In [None]:
# üß™ UAT MODEL INFERENCE - NEW WORKFLOW (CONFIG-DRIVEN)

import mlflow
from mlflow.tracking import MlflowClient
import pandas as pd
import numpy as np
import math
from sklearn.metrics import (
    accuracy_score, 
    precision_score, 
    recall_score, 
    f1_score,
    roc_auc_score,
    confusion_matrix
)
from pyspark.sql import SparkSession
from pyspark.ml.linalg import VectorUDT, Vectors
from datetime import datetime
import warnings
import sys
import traceback
import yaml
import json
import requests
from typing import Dict, Optional, Tuple

warnings.filterwarnings("ignore")

print("=" * 80)
print("üß™ UAT MODEL INFERENCE (NEW WORKFLOW)")
print("=" * 80)

# -----------------------------------------------------------------------------------
# LOAD CONFIG
# -----------------------------------------------------------------------------------

print("\nüìã Step 1: Loading configuration from pipeline_config.yml...")

try:
    import os

    config_path = "/Workspace/Repos/vipultak7171@gmail.com/ml-credit-risk/dev_env/pipeline_config.yml"

    if not os.path.exists(config_path):
        raise FileNotFoundError(f"Config not found at: {config_path}")

    with open(config_path, "r") as f:
        pipeline_cfg = yaml.safe_load(f)

    print(f"‚úÖ Configuration loaded successfully from ‚Üí {config_path}")
    
except FileNotFoundError:
    print("‚ùå ERROR: pipeline_config.yml not found!")
    sys.exit(1)
except Exception as e:
    print(f"‚ùå ERROR loading configuration: {e}")
    traceback.print_exc()
    sys.exit(1)


# -----------------------------------------------------------------------------------
# CONFIG CLASS
# -----------------------------------------------------------------------------------

class Config:
    def __init__(self):
        MODEL_TYPE = pipeline_cfg["model"]["type"]
        UC_CATALOG = pipeline_cfg["model"]["catalog"]
        UC_SCHEMA = pipeline_cfg["model"]["schema"]
        BASE_NAME = pipeline_cfg["model"]["base_name"]
        
        self.MODEL_NAME = f"{UC_CATALOG}.{UC_SCHEMA}.{BASE_NAME}_{MODEL_TYPE}"
        self.MODEL_TYPE = MODEL_TYPE
        
        self.STAGING_ALIAS = pipeline_cfg["aliases"]["staging"]
        self.PRODUCTION_ALIAS = pipeline_cfg["aliases"]["production"]
        
        self.UAT_INPUT_TABLE = pipeline_cfg["data"]["preprocessed_table"]
        self.LABEL_COL = "label"
        
        self.PRIMARY_METRIC = pipeline_cfg["metrics"]["classification"]["primary_metric"]
        self.DIRECTION = pipeline_cfg["metrics"]["classification"]["direction"]
        self.TRACKED_METRICS = pipeline_cfg["metrics"]["classification"]["tracked_metrics"]
        self.UAT_THRESHOLDS = pipeline_cfg["uat"]["classification_thresholds"]
        
        self.UAT_RESULTS_TABLE = pipeline_cfg["tables"]["uat_results"]
        
        self.SLACK_ENABLED = pipeline_cfg["notifications"]["enabled"]
        self.SLACK_WEBHOOK_URL = None

        print(f"\nüìä Configuration Summary:")
        print(f"   Model: {self.MODEL_NAME}")
        print(f"   Alias: @{self.STAGING_ALIAS}")
        print(f"   UAT Input: {self.UAT_INPUT_TABLE}")
        print(f"   Primary Metric: {self.PRIMARY_METRIC}")

config = Config()

print("=" * 80)


# -----------------------------------------------------------------------------------
# SLACK NOTIFICATION (FROM V1) ‚Äî ADDED
# -----------------------------------------------------------------------------------

def get_slack_webhook():
    for scope in ["shared-scope", "dev-scope"]:
        try:
            webhook = dbutils.secrets.get(scope, "SLACK_WEBHOOK_URL")
            if webhook.strip():
                print(f"‚úì Slack webhook found from scope: {scope}")
                return webhook
        except Exception:
            pass
    print("‚ö† Slack webhook not found. Notifications disabled.")
    return None

SLACK_WEBHOOK_URL = get_slack_webhook()

def send_slack_notification(message, level="info"):
    if not SLACK_WEBHOOK_URL:
        print(f"üì¢ Slack Disabled ‚Äî {message}")
        return

    emoji = {"info":"‚ÑπÔ∏è","success":"‚úÖ","warning":"‚ö†Ô∏è","error":"‚ùå"}.get(level, "‚ÑπÔ∏è")
    payload = {"text": f"{emoji} {message}"}

    try:
        r = requests.post(SLACK_WEBHOOK_URL, json=payload, timeout=5)
        print("üì® Slack Notification Sent" if r.status_code == 200 else f"‚ö† Slack Error: {r.status_code}")
    except Exception as e:
        print(f"‚ö† Slack send failed: {e}")


# -----------------------------------------------------------------------------------
# SPARK + MLFLOW INIT
# -----------------------------------------------------------------------------------

print("\nüîß Step 2: Initializing MLflow and Spark...")

try:
    spark = SparkSession.builder.appName("UAT_Inference").getOrCreate()
    mlflow.set_tracking_uri("databricks")
    mlflow.set_registry_uri("databricks-uc")
    client = MlflowClient()
    
    print("‚úÖ MLflow and Spark initialized successfully")
    send_slack_notification("üöÄ UAT Pipeline Started", "info")

except Exception as e:
    send_slack_notification(f"‚ùå MLflow/Spark Init Failed: {e}", "error")
    sys.exit(1)


# -----------------------------------------------------------------------------------
# HELPER FUNCTIONS
# -----------------------------------------------------------------------------------

def vector_to_array(v):
    return v.toArray() if hasattr(v, 'toArray') else np.array(v)


# -----------------------------------------------------------------------------------
# STEP 1: LOAD MODEL
# -----------------------------------------------------------------------------------

def load_staging_model():
    print("\nüìç Loading model from Unity Catalog...")
    
    model_version = client.get_model_version_by_alias(config.MODEL_NAME, config.STAGING_ALIAS)
    model_uri = f"models:/{config.MODEL_NAME}@{config.STAGING_ALIAS}"

    print(f"üî• Loaded Version: {model_version.version}")    
    model = mlflow.pyfunc.load_model(model_uri)

    send_slack_notification(f"üì¶ Model Loaded: {config.MODEL_NAME} v{model_version.version}", "info")

    return model, int(model_version.version), model_version.run_id


# -----------------------------------------------------------------------------------
# STEP 2: LOAD UAT DATA
# -----------------------------------------------------------------------------------

def load_uat_data():
    print("\nüìç Loading UAT Dataset...")

    df_spark = spark.read.format("delta").table(config.UAT_INPUT_TABLE)
    df = df_spark.toPandas()

    X = np.array([vector_to_array(r) for r in df["features"]])
    y_true = df[config.LABEL_COL].values
    
    return df, X, y_true


# -----------------------------------------------------------------------------------
# STEP 3: RUN INFERENCE
# -----------------------------------------------------------------------------------

def run_inference(model, X):
    y_pred = model.predict(X)

    try:
        y_pred_proba = model._model_impl.predict_proba(X)[:, 1]
    except:
        y_pred_proba = y_pred.astype(float)

    return y_pred, y_pred_proba


# -----------------------------------------------------------------------------------
# STEP 4: CALCULATE METRICS
# -----------------------------------------------------------------------------------

def calculate_metrics(y_true, y_pred, y_pred_proba):
    metrics = {
        "accuracy": accuracy_score(y_true, y_pred),
        "precision": precision_score(y_true, y_pred, zero_division=0),
        "recall": recall_score(y_true, y_pred, zero_division=0),
        "f1": f1_score(y_true, y_pred, zero_division=0)
    }

    try:
        metrics["roc_auc"] = roc_auc_score(y_true, y_pred_proba)
    except:
        metrics["roc_auc"] = None

    metrics["confusion_matrix"] = confusion_matrix(y_true, y_pred).tolist()

    return metrics


# -----------------------------------------------------------------------------------
# STEP 5: VALIDATE UAT
# -----------------------------------------------------------------------------------

def validate_uat(metrics, version):
    failed = []

    for key, min_val in config.UAT_THRESHOLDS.items():
        if key.startswith("min_"):
            metric_name = key.replace("min_", "")
            if metrics.get(metric_name, 0) < min_val:
                failed.append(key)

    status = "PASSED" if len(failed) == 0 else "FAILED"

    if status == "PASSED":
        send_slack_notification(f"üéâ Model PASSED UAT ‚Äî v{version}", "success")
    else:
        send_slack_notification(f"‚ùå Model FAILED UAT ‚Äî v{version}", "error")

    return status, failed


# -----------------------------------------------------------------------------------
# STEP 6: STORE RESULTS
# -----------------------------------------------------------------------------------

def log_results(version, run_id, metrics, status, failed_checks):

    print("\nüìç Logging UAT Results into Delta Table...")

    from delta.tables import DeltaTable

    result = {
        "timestamp": datetime.now(),
        "model_name": config.MODEL_NAME,
        "model_type": config.MODEL_TYPE,
        "model_version": str(version),
        "run_id": run_id,
        "uat_status": status,
        "accuracy": float(metrics.get("accuracy", 0)),
        "precision": float(metrics.get("precision", 0)),
        "recall": float(metrics.get("recall", 0)),
        "f1": float(metrics.get("f1", 0)),
        "roc_auc": float(metrics.get("roc_auc", 0)) if metrics.get("roc_auc") else None,
        "confusion_matrix_json": json.dumps(metrics.get("confusion_matrix", [])),
        "failed_checks_json": json.dumps(failed_checks) if failed_checks else None
    }

    df = spark.createDataFrame(pd.DataFrame([result]))

    try:
        DeltaTable.forName(spark, config.UAT_RESULTS_TABLE)
        df.write.option("mergeSchema", "true").mode("append").saveAsTable(config.UAT_RESULTS_TABLE)
    except:
        df.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable(config.UAT_RESULTS_TABLE)

    print("üìå Results logged.")


# -----------------------------------------------------------------------------------
# MAIN EXECUTION
# -----------------------------------------------------------------------------------

def main():
    try:
        model, version, run_id = load_staging_model()
        df, X, y_true = load_uat_data()
        y_pred, y_pred_proba = run_inference(model, X)
        metrics = calculate_metrics(y_true, y_pred, y_pred_proba)
        status, failed_checks = validate_uat(metrics, version)
        log_results(version, run_id, metrics, status, failed_checks)

        print("\nüéØ Final UAT Status:", status)

    except Exception as e:
        send_slack_notification(f"üî• UAT Pipeline Failed ‚Äî {e}", "error")
        traceback.print_exc()
        sys.exit(1)


if __name__ == "__main__":
    main()


ModuleNotFoundError: No module named 'pyspark'