In [None]:
# üöÄ PRODUCTION PROMOTION - NEW WORKFLOW (CONFIG-DRIVEN)
import mlflow
from mlflow.tracking import MlflowClient
import time
import yaml
import sys
import traceback
import requests
from typing import Optional, Dict, Tuple
from datetime import datetime
from pyspark.sql import SparkSession

print("=" * 80)
print("üöÄ PRODUCTION PROMOTION (NEW WORKFLOW)")
print("=" * 80)

# ‚úÖ LOAD PIPELINE CONFIGURATION

print("\nüìã Step 1: Loading configuration from pipeline_config.yml...")

try:
    with open("pipeline_config.yml", "r") as f:
        pipeline_cfg = yaml.safe_load(f)
    
    print(f"‚úÖ Configuration loaded successfully!")
    
except FileNotFoundError:
    print("‚ùå ERROR: pipeline_config.yml not found!")
    print("üí° Please ensure pipeline_config.yml is in the notebook directory")
    sys.exit(1)
except Exception as e:
    print(f"‚ùå ERROR loading configuration: {e}")
    traceback.print_exc()
    sys.exit(1)
 
# ‚úÖ CONFIGURATION CLASS

class Config:
    """Configuration manager - reads from pipeline_config.yml"""
    
    def __init__(self):
        # Model configuration
        MODEL_TYPE = pipeline_cfg["model"]["type"]
        UC_CATALOG = pipeline_cfg["model"]["catalog"]
        UC_SCHEMA = pipeline_cfg["model"]["schema"]
        BASE_NAME = pipeline_cfg["model"]["base_name"]
        
        self.MODEL_NAME = f"{UC_CATALOG}.{UC_SCHEMA}.{BASE_NAME}_{MODEL_TYPE}"
        self.MODEL_TYPE = MODEL_TYPE
        
        # Aliases
        self.STAGING_ALIAS = pipeline_cfg["aliases"]["staging"]
        self.PRODUCTION_ALIAS = pipeline_cfg["aliases"]["production"]
        self.BEST_ALIAS = pipeline_cfg["aliases"]["best"]
        
        # Metrics configuration - support both classification and regression
        if MODEL_TYPE in ["random_forest", "logistic_regression", "xgboost_classifier"]:
            metrics_cfg = pipeline_cfg["metrics"]["classification"]
        else:
            metrics_cfg = pipeline_cfg["metrics"]["regression"]
        
        self.PRIMARY_METRIC = metrics_cfg["primary_metric"]
        self.DIRECTION = metrics_cfg["direction"]
        self.TRACKED_METRICS = metrics_cfg["tracked_metrics"]
        
        # UAT configuration
        self.UAT_ENABLED = pipeline_cfg["uat"]["enabled"]
        
        if MODEL_TYPE in ["random_forest", "logistic_regression", "xgboost_classifier"]:
            self.UAT_THRESHOLDS = pipeline_cfg["uat"]["classification_thresholds"]
        else:
            self.UAT_THRESHOLDS = pipeline_cfg["uat"]["regression_thresholds"]
        
        # Tables
        self.UAT_RESULTS_TABLE = pipeline_cfg["tables"]["uat_results"]
        self.EVALUATION_LOG_TABLE = pipeline_cfg["tables"]["evaluation_log"]
        
        # Slack notifications
        self.SLACK_ENABLED = pipeline_cfg["notifications"]["enabled"]
        self.SLACK_WEBHOOK_URL = self._get_slack_webhook()
        
        # Duplicate detection tolerance
        self.TOLERANCE = 1e-6
        
        print(f"\nüìä Configuration Summary:")
        print(f"   Model Type: {self.MODEL_TYPE.upper()}")
        print(f"   Model Name: {self.MODEL_NAME}")
        print(f"   Staging Alias: @{self.STAGING_ALIAS}")
        print(f"   Production Alias: @{self.PRODUCTION_ALIAS}")
        print(f"   Primary Metric: {self.PRIMARY_METRIC} ({self.DIRECTION})")
        print(f"   UAT Validation: {'ENABLED' if self.UAT_ENABLED else 'DISABLED'}")
        print(f"   Slack Notifications: {'ENABLED' if self.SLACK_WEBHOOK_URL else 'DISABLED'}")
    
    def _get_slack_webhook(self) -> Optional[str]:
        """Safely retrieve Slack webhook URL from Databricks secrets"""
        if not self.SLACK_ENABLED:
            return None
        
        try:
            scopes = ["shared-scope", "dev-scope", "prod-scope", "ml-scope"]
            for scope in scopes:
                try:
                    webhook = dbutils.secrets.get(scope, "SLACK_WEBHOOK_URL")
                    if webhook and webhook.strip():
                        print(f"   ‚úÖ Slack webhook found in scope '{scope}'")
                        return webhook
                except Exception:
                    continue
            
            print("   ‚ÑπÔ∏è  No Slack webhook found in secrets")
            return None
            
        except Exception as e:
            print(f"   ‚ö†Ô∏è  Could not access secrets: {e}")
            return None

# Initialize config
config = Config()

print("=" * 80)

# üì¢ SLACK NOTIFICATION HELPER

class SlackNotifier:
    """Slack notification handler"""
    
    def __init__(self, webhook_url: Optional[str]):
        self.webhook_url = webhook_url
        self.enabled = webhook_url is not None and webhook_url.strip() != ""
        
    def send(self, message: str, level: str = "info", extra_fields: Optional[Dict] = None) -> bool:
        """Send Slack notification"""
        if not self.enabled:
            print(f"üì¢ [SLACK DISABLED] {message}")
            return False
        
        emoji_map = {
            "info": "‚ÑπÔ∏è",
            "success": "‚úÖ",
            "warning": "‚ö†Ô∏è",
            "error": "‚ùå",
            "rocket": "üöÄ",
            "trophy": "üèÜ"
        }
        
        formatted_message = f"{emoji_map.get(level, '‚ÑπÔ∏è')} *{message}*"
        
        if extra_fields:
            formatted_message += "\n"
            for key, value in extra_fields.items():
                formatted_message += f"\n‚Ä¢ *{key}:* {value}"
        
        payload = {
            "text": formatted_message,
            "username": "ML Pipeline Bot",
            "icon_emoji": ":rocket:"
        }
        
        try:
            response = requests.post(
                self.webhook_url,
                json=payload,
                timeout=5
            )
            
            if response.status_code == 200:
                print(f"üì¢ Slack notification sent successfully")
                return True
            else:
                print(f"‚ö†Ô∏è  Slack error: {response.status_code}")
                return False
                
        except Exception as e:
            print(f"‚ùå Slack notification failed: {e}")
            return False
    
    def send_promotion_success(self, model_name: str, version: int, metrics: Dict) -> bool:
        """Send success notification for production promotion"""
        extra = {
            "Model": model_name,
            "Version": f"v{version}",
            "Status": "LIVE in Production üéâ",
            "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }
        
        # Add key metrics
        for metric_name, metric_value in metrics.items():
            if metric_value is not None:
                if isinstance(metric_value, float):
                    extra[metric_name] = f"{metric_value:.4f}"
                else:
                    extra[metric_name] = str(metric_value)
        
        return self.send(
            "Production Deployment Successful",
            level="rocket",
            extra_fields=extra
        )
    
    def send_promotion_blocked(self, model_name: str, reason: str) -> bool:
        """Send notification when promotion is blocked"""
        extra = {
            "Model": model_name,
            "Reason": reason,
            "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }
        
        return self.send(
            "Production Promotion Blocked",
            level="warning",
            extra_fields=extra
        )
    
    def send_error(self, error_message: str, details: Optional[str] = None) -> bool:
        """Send error notification"""
        extra = {
            "Error": error_message,
            "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }
        
        if details:
            extra["Details"] = details
        
        return self.send(
            "Production Promotion Failed",
            level="error",
            extra_fields=extra
        )

# Initialize Slack notifier
slack = SlackNotifier(config.SLACK_WEBHOOK_URL)
 
# ‚úÖ INITIALIZE MLFLOW & SPARK

print("\nüîß Step 2: Initializing MLflow and Spark...")

try:
    spark = SparkSession.builder.appName("ProductionPromotion").getOrCreate()
    mlflow.set_tracking_uri("databricks")
    mlflow.set_registry_uri("databricks-uc")
    client = MlflowClient()
    
    print("‚úÖ MLflow and Spark initialized successfully")

except Exception as e:
    print(f"‚ùå Failed to initialize: {e}")
    slack.send_error("Initialization failed", str(e))
    sys.exit(1)

# Send startup notification
slack.send(
    "Production Promotion Pipeline Started",
    level="info",
    extra_fields={
        "Model": config.MODEL_NAME,
        "Model Type": config.MODEL_TYPE.upper()
    }
)
 
## üîßHELPER FUNCTIONS

def wait_until_ready(version: int, timeout: int = 300) -> bool:
    """Wait for model version to become READY"""
    print(f"\n‚è≥ Waiting for model v{version} to become READY...")
    
    start = time.time()
    while time.time() - start < timeout:
        try:
            mv = client.get_model_version(config.MODEL_NAME, version)
            status = mv.status
            
            if status == "READY":
                print(f"   ‚úÖ Model v{version} is READY")
                return True
            elif status == "FAILED_REGISTRATION":
                print(f"   ‚ùå Model v{version} registration FAILED")
                return False
            
            print(f"   ‚è≥ Status: {status} (waiting...)")
            time.sleep(5)
            
        except Exception as e:
            print(f"   ‚ö†Ô∏è  Error checking status: {e}")
            time.sleep(5)
    
    print(f"   ‚è∞ Timeout: Model v{version} not ready after {timeout}s")
    return False


def get_metric_from_run(run_id: str) -> Optional[float]:
    """Get primary metric value from MLflow run"""
    try:
        run = client.get_run(run_id)
        metric_value = run.data.metrics.get(config.PRIMARY_METRIC)
        return metric_value
    except Exception as e:
        print(f"   ‚ö†Ô∏è  Could not get metric from run {run_id}: {e}")
        return None
 
# üìã STEP 1: GET STAGING MODEL
 
def get_staging_model() -> Optional[Dict]:
    """Get current staging model version"""
    print(f"\n{'='*70}")
    print("üìã STEP 1: Finding Staging Model")
    print(f"{'='*70}")
    
    try:
        print(f"üîç Looking for: {config.MODEL_NAME}@{config.STAGING_ALIAS}")
        
        # Get model version by alias
        staging_mv = client.get_model_version_by_alias(
            config.MODEL_NAME,
            config.STAGING_ALIAS
        )
        
        version = int(staging_mv.version)
        run_id = staging_mv.run_id
        status = staging_mv.status
        
        print(f"‚úÖ Staging model found:")
        print(f"   Version: v{version}")
        print(f"   Run ID: {run_id}")
        print(f"   Status: {status}")
        
        # Get metric from run
        metric_value = get_metric_from_run(run_id)
        
        if metric_value is not None:
            print(f"   {config.PRIMARY_METRIC}: {metric_value:.4f}")
        
        return {
            'version': version,
            'run_id': run_id,
            'status': status,
            'metric': metric_value
        }
        
    except Exception as e:
        print(f"‚ùå No staging model found: {e}")
        print(f"üí° Please run uat_staging.py first to promote a model to @{config.STAGING_ALIAS}")
        return None
 
# üìã STEP 2: CHECK UAT STATUS

def check_uat_status(staging_version: int) -> Tuple[bool, Optional[Dict]]:
    """Check if staging model passed UAT validation"""
    print(f"\n{'='*70}")
    print("üìã STEP 2: Checking UAT Status")
    print(f"{'='*70}")
    
    if not config.UAT_ENABLED:
        print("‚ÑπÔ∏è  UAT validation disabled in config")
        return True, None
    
    try:
        print(f"üîç Reading UAT results from: {config.UAT_RESULTS_TABLE}")
        
        # Read UAT results table
        uat_df = spark.table(config.UAT_RESULTS_TABLE).toPandas()
        
        if uat_df.empty:
            print(f"‚ö†Ô∏è  No UAT results found in table")
            print(f"üí° Please run uat_inference.py first")
            return False, None
        
        print(f"üìä Found {len(uat_df)} UAT result(s) in total")
        
        # Handle both string and int version formats
        version_str = str(staging_version)
        version_int = int(staging_version)
        
        # Filter for this version
        version_results = uat_df[
            (uat_df['model_version'] == version_str) | 
            (uat_df['model_version'] == version_int)
        ]
        
        if version_results.empty:
            print(f"‚ö†Ô∏è  No UAT results found for version v{staging_version}")
            print(f"\nüìã Available UAT results:")
            print(uat_df[['timestamp', 'model_version', 'uat_status']].to_string(index=False))
            print(f"\nüí° Run uat_inference.py for this version first")
            return False, None
        
        # Get latest result for this version
        latest_result = version_results.sort_values('timestamp', ascending=False).iloc[0]
        uat_status = latest_result['uat_status']
        
        print(f"\nüìä UAT Results for v{staging_version}:")
        print(f"   Timestamp: {latest_result['timestamp']}")
        print(f"   UAT Status: {uat_status}")
        print(f"   Model Type: {latest_result.get('model_type', config.MODEL_TYPE)}")
        
        # Extract metrics based on model type
        metrics = {}
        
        if config.MODEL_TYPE in ["random_forest", "logistic_regression", "xgboost_classifier"]:
            # Classification metrics
            metrics = {
                'accuracy': float(latest_result.get('accuracy', 0)),
                'precision': float(latest_result.get('precision', 0)),
                'recall': float(latest_result.get('recall', 0)),
                'f1': float(latest_result.get('f1', 0)),
                'roc_auc': float(latest_result.get('roc_auc', 0)) if latest_result.get('roc_auc') else None
            }
            
            print(f"\n   Classification Metrics:")
            print(f"     ‚Ä¢ Accuracy:  {metrics['accuracy']:.4f}")
            print(f"     ‚Ä¢ Precision: {metrics['precision']:.4f}")
            print(f"     ‚Ä¢ Recall:    {metrics['recall']:.4f}")
            print(f"     ‚Ä¢ F1 Score:  {metrics['f1']:.4f}")
            if metrics['roc_auc']:
                print(f"     ‚Ä¢ ROC-AUC:   {metrics['roc_auc']:.4f}")
        else:
            # Regression metrics
            metrics = {
                'mae': float(latest_result.get('mae', 0)),
                'rmse': float(latest_result.get('rmse', 0)),
                'r2': float(latest_result.get('r2', 0)),
                'mape': float(latest_result.get('mape', 0))
            }
            
            print(f"\n   Regression Metrics:")
            print(f"     ‚Ä¢ MAE:  {metrics['mae']:.4f}")
            print(f"     ‚Ä¢ RMSE: {metrics['rmse']:.4f}")
            print(f"     ‚Ä¢ R¬≤:   {metrics['r2']:.4f}")
            print(f"     ‚Ä¢ MAPE: {metrics['mape']:.2f}%")
        
        if uat_status == "PASSED":
            print(f"\n‚úÖ Model v{staging_version} PASSED UAT validation")
            return True, metrics
        else:
            print(f"\n‚ùå Model v{staging_version} FAILED UAT validation")
            
            # Show failed checks if available
            if 'failed_checks_json' in latest_result and latest_result['failed_checks_json']:
                import json
                failed_checks = json.loads(latest_result['failed_checks_json'])
                print(f"\n   Failed checks ({len(failed_checks)}):")
                for check in failed_checks:
                    print(f"     ‚Ä¢ {check}")
            
            return False, metrics
        
    except Exception as e:
        print(f"‚ùå Failed to check UAT status: {e}")
        traceback.print_exc()
        return False, None

# üìã STEP 3: GET CURRENT PRODUCTION MODEL (OPTIONAL)

def get_current_production_model() -> Optional[Dict]:
    """Get current production model (if exists)"""
    print(f"\n{'='*70}")
    print("üìã STEP 3: Checking Current Production Model")
    print(f"{'='*70}")
    
    try:
        prod_mv = client.get_model_version_by_alias(
            config.MODEL_NAME,
            config.PRODUCTION_ALIAS
        )
        
        version = int(prod_mv.version)
        run_id = prod_mv.run_id
        metric_value = get_metric_from_run(run_id)
        
        print(f"‚ÑπÔ∏è  Current production model:")
        print(f"   Version: v{version}")
        print(f"   Run ID: {run_id}")
        if metric_value is not None:
            print(f"   {config.PRIMARY_METRIC}: {metric_value:.4f}")
        
        return {
            'version': version,
            'run_id': run_id,
            'metric': metric_value
        }
        
    except Exception:
        print("‚ÑπÔ∏è  No production model exists yet (first deployment)")
        return None
 
# üìã STEP 4: COMPARE STAGING VS PRODUCTION (OPTIONAL)
 

def should_promote(staging: Dict, production: Optional[Dict]) -> Tuple[bool, str]:
    """Determine if staging should replace production"""
    print(f"\n{'='*70}")
    print("üìã STEP 4: Performance Comparison")
    print(f"{'='*70}")
    
    if production is None:
        print("‚úÖ First production deployment - proceeding")
        return True, "First production deployment"
    
    staging_metric = staging.get('metric')
    prod_metric = production.get('metric')
    
    if staging_metric is None or prod_metric is None:
        print("‚ö†Ô∏è  Cannot compare metrics - proceeding anyway")
        return True, "Metrics unavailable for comparison"
    
    print(f"\nüìä Metric Comparison ({config.PRIMARY_METRIC}):")
    print(f"   Staging:    {staging_metric:.4f}")
    print(f"   Production: {prod_metric:.4f}")
    
    # Check if metrics are essentially equal
    if abs(staging_metric - prod_metric) <= config.TOLERANCE:
        print(f"\n‚ö†Ô∏è  Metrics are equal (within tolerance {config.TOLERANCE})")
        return False, "No improvement - metrics equal"
    
    # Compare based on direction
    if config.DIRECTION == "maximize":
        if staging_metric > prod_metric:
            improvement = ((staging_metric - prod_metric) / prod_metric) * 100
            print(f"\n‚úÖ Staging is better (+{improvement:.2f}% improvement)")
            return True, f"Performance improved by {improvement:.2f}%"
        else:
            decline = ((prod_metric - staging_metric) / prod_metric) * 100
            print(f"\n‚ùå Staging is worse (-{decline:.2f}% decline)")
            return False, f"Performance declined by {decline:.2f}%"
    else:  # minimize
        if staging_metric < prod_metric:
            improvement = ((prod_metric - staging_metric) / prod_metric) * 100
            print(f"\n‚úÖ Staging is better (-{improvement:.2f}% improvement)")
            return True, f"Performance improved by {improvement:.2f}%"
        else:
            decline = ((staging_metric - prod_metric) / prod_metric) * 100
            print(f"\n‚ùå Staging is worse (+{decline:.2f}% decline)")
            return False, f"Performance declined by {decline:.2f}%"
 
# üìã STEP 5: PROMOTE TO PRODUCTION
 
def promote_to_production(staging: Dict, uat_metrics: Optional[Dict]) -> bool:
    """Promote staging model to production"""
    print(f"\n{'='*70}")
    print("üìã STEP 5: Promoting to Production")
    print(f"{'='*70}")
    
    version = staging['version']
    
    # Wait for model to be ready
    print(f"\n‚è≥ Ensuring model v{version} is READY...")
    if not wait_until_ready(version):
        print(f"‚ùå Model v{version} is not ready for promotion")
        slack.send_promotion_blocked(
            config.MODEL_NAME,
            f"Model v{version} is not in READY state"
        )
        return False
    
    try:
        print(f"\nüöÄ Setting @{config.PRODUCTION_ALIAS} alias to v{version}...")
        
        client.set_registered_model_alias(
            name=config.MODEL_NAME,
            alias=config.PRODUCTION_ALIAS,
            version=version
        )
        
        print(f"\n{'='*70}")
        print("‚úÖ‚úÖ PRODUCTION PROMOTION SUCCESSFUL ‚úÖ‚úÖ")
        print(f"{'='*70}")
        print(f"\nüéâ Model Deployed to Production!")
        print(f"   Model: {config.MODEL_NAME}")
        print(f"   Model Type: {config.MODEL_TYPE.upper()}")
        print(f"   Version: v{version}")
        print(f"   Promoted: @{config.STAGING_ALIAS} ‚Üí @{config.PRODUCTION_ALIAS}")
        print(f"   Run ID: {staging['run_id']}")
        
        if staging.get('metric'):
            print(f"   {config.PRIMARY_METRIC}: {staging['metric']:.4f}")
        
        if uat_metrics:
            print(f"\nüìä UAT Performance Metrics:")
            for metric_name, metric_value in uat_metrics.items():
                if metric_value is not None:
                    if isinstance(metric_value, float):
                        print(f"   {metric_name}: {metric_value:.4f}")
                    else:
                        print(f"   {metric_name}: {metric_value}")
        
        print(f"{'='*70}")
        
        # Send success notification
        slack.send_promotion_success(
            config.MODEL_NAME,
            version,
            uat_metrics or {}
        )
        
        return True
        
    except Exception as e:
        print(f"\n‚ùå Failed to promote model: {e}")
        traceback.print_exc()
        
        slack.send_error(
            "Production promotion failed",
            str(e)
        )
        
        return False
 
# üé¨ MAIN EXECUTION
 
def main():
    """Main production promotion pipeline"""
    try:
        print("\n" + "="*80)
        print("üé¨ STARTING PRODUCTION PROMOTION PIPELINE")
        print("="*80 + "\n")
        
        # Step 1: Get staging model
        staging = get_staging_model()
        if not staging:
            error_msg = f"No staging model found - please run uat_staging.py first"
            print(f"\n‚ùå {error_msg}")
            slack.send_promotion_blocked(config.MODEL_NAME, error_msg)
            sys.exit(1)
        
        # Step 2: Check UAT status
        uat_passed, uat_metrics = check_uat_status(staging['version'])
        
        if not uat_passed:
            warning_msg = (
                f"UAT validation not passed for v{staging['version']}"
            )
            print(f"\n‚ö†Ô∏è  {warning_msg}")
            print(f"üí° Model cannot be promoted to production")
            print(f"üí° Please ensure UAT validation passes before promotion")
            
            slack.send_promotion_blocked(config.MODEL_NAME, warning_msg)
            
            print("\nüõë Stopping execution - UAT validation required")
            sys.exit(1)
        
        print(f"\n‚úÖ UAT validation passed - proceeding with promotion")
        
        # Step 3: Get current production model (optional comparison)
        production = get_current_production_model()
        
        # Step 4: Compare performance (optional)
        should_proceed, reason = should_promote(staging, production)
        
        if not should_proceed:
            print(f"\n‚ö†Ô∏è  Promotion skipped: {reason}")
            print(f"üí° Staging model does not improve upon current production")
            
            slack.send_promotion_blocked(config.MODEL_NAME, reason)
            
            # Save task values
            try:
                dbutils.jobs.taskValues.set(key="promotion_status", value="SKIPPED")
                dbutils.jobs.taskValues.set(key="reason", value=reason)
            except:
                pass
            
            sys.exit(0)
        
        # Step 5: Promote to production
        success = promote_to_production(staging, uat_metrics)
        
        if success:
            print(f"\n‚ú® Production promotion completed successfully!")
            print(f"\nüìå Next Steps:")
            print(f"   1. Monitor model performance in production")
            print(f"   2. Set up model serving endpoint (if needed)")
            print(f"   3. Update API/application to use new version")
            
            # Save task values for workflow
            try:
                dbutils.jobs.taskValues.set(key="production_version", value=staging['version'])
                dbutils.jobs.taskValues.set(key="model_type", value=config.MODEL_TYPE)
                dbutils.jobs.taskValues.set(key="promotion_status", value="SUCCESS")
                print("\n‚úÖ Task values saved for workflow")
            except:
                print("\n‚ÑπÔ∏è  Not running in workflow - skipping task values")
            
            sys.exit(0)
        else:
            print(f"\n‚ùå Production promotion failed")
            sys.exit(1)
        
    except Exception as e:
        print("\n" + "="*80)
        print("‚ùå PRODUCTION PROMOTION FAILED")
        print("="*80)
        print(f"Error: {str(e)}")
        print("="*80 + "\n")
        
        slack.send_error(
            "Production promotion pipeline failed",
            str(e)
        )
        
        traceback.print_exc()
        sys.exit(1)
 
# ‚úÖ EXECUTE

if __name__ == "__main__":
    main()