In [None]:
# üéØ MODEL EVALUATION SCRIPT 

import mlflow
from mlflow.tracking import MlflowClient
import pandas as pd
import json
import yaml
import sys
from datetime import datetime
from pyspark.sql import SparkSession
from typing import Dict, List

print("=" * 80)
print("üéØ MODEL EVALUATION PIPELINE ‚Äî LOG ONLY (NO REGISTRATION)")
print("=" * 80)

# -------------------------
# 1Ô∏è‚É£ Load Config
# -------------------------

print("\nüìã Loading configuration from pipeline_config.yml...")

try:
    with open("pipeline_config.yml", "r") as f:
        pipeline_cfg = yaml.safe_load(f)
    print("‚úÖ Configuration Loaded\n")

except Exception as e:
    print(f"‚ùå Error loading config: {e}")
    sys.exit(1)

class Config:
    def __init__(self, cfg):
        self.MODEL_TYPE = cfg["model"]["type"]
        UC_CATALOG = cfg["model"]["catalog"]
        UC_SCHEMA = cfg["model"]["schema"]
        BASE_NAME = cfg["model"]["base_name"]

        self.MODEL_NAME = f"{UC_CATALOG}.{UC_SCHEMA}.{BASE_NAME}_{self.MODEL_TYPE}"

        self.EXPERIMENT_NAME = cfg["experiment"]["name"]
        self.ARTIFACT_PATH = cfg["experiment"]["artifact_path"]

        metrics_cfg = cfg["metrics"]["classification"]
        self.PRIMARY_METRIC = metrics_cfg["primary_metric"]
        self.TRACKED_METRICS = metrics_cfg["tracked_metrics"]
        self.DIRECTION = metrics_cfg["direction"]
        self.THRESHOLD_METRICS = metrics_cfg["threshold_metrics"]

        self.EVALUATION_LOG_TABLE = cfg["tables"]["evaluation_log"]
        self.RECENT_N = cfg["comparison"]["recent_n"]

        print(f"\nüìå Evaluation Config Summary:")
        print(f"   Model: {self.MODEL_NAME}")
        print(f"   Primary Metric: {self.PRIMARY_METRIC} ({self.DIRECTION})")
        print(f"   Logging Table: {self.EVALUATION_LOG_TABLE}")

config = Config(pipeline_cfg)
print("=" * 80)

# -------------------------
# 2Ô∏è‚É£ Initialize MLflow & Spark
# -------------------------

try:
    spark = SparkSession.builder.appName("ModelEvaluationOnly").getOrCreate()
    mlflow.set_tracking_uri("databricks")
    client = MlflowClient()

    experiment = mlflow.get_experiment_by_name(config.EXPERIMENT_NAME)
    if experiment is None:
        raise Exception(f"Experiment not found: {config.EXPERIMENT_NAME}")

    print("\nüî• MLflow + Spark loaded successfully\n")

except Exception as e:
    print(f"‚ùå MLflow Init Failed: {e}")
    sys.exit(1)

# -------------------------
# 3Ô∏è‚É£ Fetch Runs
# -------------------------

def get_recent_runs():
    print("\nüìç Fetching Experiment Runs...")

    order = (
        f"metrics.{config.PRIMARY_METRIC} DESC"
        if config.DIRECTION == "maximize"
        else f"metrics.{config.PRIMARY_METRIC} ASC"
    )

    runs = client.search_runs(
        [experiment.experiment_id],
        order_by=[order],
        max_results=config.RECENT_N
    )

    if not runs:
        print("‚ö† No model runs found.")
        return []

    run_list = []
    for run in runs:
        metrics = {m: run.data.metrics.get(m) for m in config.TRACKED_METRICS}

        run_list.append({
            "run_id": run.info.run_id,
            "run_name": run.info.run_name,
            "primary_metric": run.data.metrics.get(config.PRIMARY_METRIC),
            "all_metrics": metrics,
            "params": run.data.params,
            "model_uri": f"runs:/{run.info.run_id}/{config.ARTIFACT_PATH}",
            "timestamp": datetime.fromtimestamp(run.info.start_time / 1000),
        })

    print(f"üìå {len(run_list)} runs fetched and ready for logging.")
    return run_list

# -------------------------
# 4Ô∏è‚É£ Log Evaluation Results
# -------------------------

def log_results(run_list):
    print("\nüìù Logging evaluation results to Delta table...")

    records = []
    for run in run_list:
        records.append({
            "timestamp": datetime.now(),
            "run_id": run["run_id"],
            "run_name": run["run_name"],
            "model_name": config.MODEL_NAME,
            "primary_metric": config.PRIMARY_METRIC,
            "primary_metric_value": run["primary_metric"],
            "all_metrics_json": json.dumps(run["all_metrics"]),
            "params_json": json.dumps(run["params"]),
            "model_uri": run["model_uri"],
        })

    df = pd.DataFrame(records)
    spark.createDataFrame(df).write.format("delta").mode("append").saveAsTable(config.EVALUATION_LOG_TABLE)
    print("‚úÖ Evaluation logged successfully.")

# -------------------------
# 5Ô∏è‚É£ Display Summary
# -------------------------

def show_summary(run_list):
    print("\nüìä TOP MODEL RESULTS:\n")
    for rank, run in enumerate(run_list[:10], 1):
        print(f"{rank}. {run['run_name']} ‚Üí {config.PRIMARY_METRIC}: {run['primary_metric']:.4f}")

# -------------------------
# üöÄ MAIN EXECUTION
# -------------------------

def main():
    run_list = get_recent_runs()
    if not run_list:
        print("‚ùå No runs found. Exiting.")
        return

    log_results(run_list)
    show_summary(run_list)

    print("\nüéâ Evaluation Completed ‚Äî No Models Registered (As Expected)")
    print("=" * 80)

if __name__ == "__main__":
    main()
