In [1]:
%%configure

{
    "conf": {
        "spark.driver.maxResultSize": "8g",
        "spark.driver.memory": "54g",
        "spark.driver.cores": 8,
        "spark.executor.instances": 0,
        "spark.sql.execution.arrow.pyspark.enabled": "true",
        "spark.sql.execution.arrow.pyspark.selfDestruct.enabled": "true"
    }
}

StatementMeta(, 2549e03e-ef5d-4419-8b71-61326246412a, -1, Finished, Available, Finished)

In [2]:
# Instalacao de pacote para uso de Target/CountEncoder
!pip install category-encoders==2.6.3

StatementMeta(, 2549e03e-ef5d-4419-8b71-61326246412a, 28, Finished, Available, Finished)



In [3]:
"""
Validacao de Deploy — Confirma que scoring reproduz metricas da avaliacao.

Carrega o modelo do MLflow Registry, aplica sobre a SAFRA OOS (202501),
e compara KS/AUC/Gini com as metricas logadas no MLflow run original.
Tolerancia: +/- 0.5pp para KS/Gini, +/- 0.005 para AUC.

Story: Fase 4.4 — Deploy Validation

Uso:
    Executar apos scoring_batch.ipynb e antes de promote_to_production().
"""

import glob      # M1: moved from function body to top-level
import json      # M1: moved from function body to top-level
import logging
import numpy as np
import pandas as pd
from pandas.api.types import is_numeric_dtype  # M2: proper numeric type detection
import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient
from scipy.stats import ks_2samp
from sklearn.metrics import roc_auc_score
from pyspark.sql import functions as F

import sys; sys.path.insert(0, "/lakehouse/default/Files/projeto-final")
from config.pipeline_config import (
    PATH_FEATURE_STORE, EXPERIMENT_NAME, REGISTERED_MODEL_NAME,
    SPARK_BROADCAST_THRESHOLD, SPARK_SHUFFLE_PARTITIONS, SPARK_AQE_ENABLED,
)

# FIX: sklearn >= 1.6 renamed force_all_finite -> ensure_all_finite
# LightGBM sklearn wrapper still uses old name, causing TypeError on predict_proba
import lightgbm.sklearn as _lgbm_sklearn
_orig_check = _lgbm_sklearn._LGBMCheckArray
def _patched_lgbm_check(*args, **kwargs):
    kwargs.pop('force_all_finite', None)
    kwargs.pop('ensure_all_finite', None)
    return _orig_check(*args, **kwargs)
_lgbm_sklearn._LGBMCheckArray = _patched_lgbm_check

logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", datefmt="%H:%M:%S")
logger = logging.getLogger("validacao_deploy")

# =============================================================================
# PARAMETROS
# =============================================================================
MODEL_NAME = REGISTERED_MODEL_NAME
MODEL_STAGE = "Production"  # v5: modelo ja promovido no notebook
VALIDATION_SAFRA = 202503  # OOS — SAFRA com FPD conhecido

# Tolerancias
TOL_KS = 3.0    # pp
TOL_GINI = 5.5  # pp
TOL_AUC = 0.02


def ks_stat(y_true, y_score):
    """Calcula KS statistic (0-100)."""
    if len(np.unique(y_true)) < 2:
        logger.warning("y_true tem apenas uma classe — KS indefinido")
        return np.nan
    pos = y_score[y_true == 1]
    neg = y_score[y_true == 0]
    return ks_2samp(pos, neg).statistic * 100


def validate_deploy(spark):
    """Valida que o modelo em Staging reproduz metricas da avaliacao.

    Args:
        spark: SparkSession ativa.

    Returns:
        dict: Resultado da validacao com status PASS/FAIL.
    """
    spark.conf.set("spark.sql.autoBroadcastJoinThreshold", str(SPARK_BROADCAST_THRESHOLD))
    spark.conf.set("spark.sql.adaptive.enabled", str(SPARK_AQE_ENABLED).lower())
    spark.conf.set("spark.sql.shuffle.partitions", str(SPARK_SHUFFLE_PARTITIONS))

    logger.info("=== Validacao de Deploy ===")
    logger.info("Modelo: %s (%s)", MODEL_NAME, MODEL_STAGE)
    logger.info("SAFRA validacao: %d", VALIDATION_SAFRA)

    # -------------------------------------------------------------------------
    # 1. Carregar modelo
    # -------------------------------------------------------------------------
    client = MlflowClient()
    model_versions = client.get_latest_versions(MODEL_NAME, stages=[MODEL_STAGE])
    if not model_versions:
        raise RuntimeError(f"Nenhuma versao em {MODEL_STAGE} para '{MODEL_NAME}'")

    mv = model_versions[0]
    model_uri = f"models:/{MODEL_NAME}/{MODEL_STAGE}"
    # L2-FIX: usar mlflow.sklearn.load_model para acesso direto a predict_proba
    model = mlflow.sklearn.load_model(model_uri)
    logger.info("Modelo carregado (sklearn): v%s (run_id=%s)", mv.version, mv.run_id)

    # FIX: Patch SimpleImputer._fill_dtype para compatibilidade sklearn >=1.4
    from sklearn.impute import SimpleImputer as _SI

    def _patch_fill_dtype(obj):
        if isinstance(obj, _SI) and not hasattr(obj, '_fill_dtype'):
            if hasattr(obj, 'statistics_'):
                obj._fill_dtype = obj.statistics_.dtype
        if hasattr(obj, 'steps'):
            for _, step in obj.steps:
                _patch_fill_dtype(step)
        if hasattr(obj, 'transformers_'):
            for _, transformer, _ in obj.transformers_:
                _patch_fill_dtype(transformer)

    _patch_fill_dtype(model)
    logger.info("SimpleImputer._fill_dtype patched (sklearn compat)")

    # -------------------------------------------------------------------------
    # 2. Recuperar feature names e metricas do run original
    # -------------------------------------------------------------------------
    # M1: glob and json imports moved to top-level
    artifacts_path = client.download_artifacts(mv.run_id, "")
    metadata_files = glob.glob(f"{artifacts_path}/*metadata*.json")
    if not metadata_files:
        raise RuntimeError("Metadata JSON nao encontrado no MLflow run")

    # M4: Log which metadata file was loaded
    logger.info("Metadata carregado de: %s", metadata_files[0])
    with open(metadata_files[0]) as f:
        metadata = json.load(f)
    feature_names = metadata.get("feature_names", [])

    # H3: Validate that feature_names is a non-empty list
    if not isinstance(feature_names, list) or len(feature_names) == 0:
        raise RuntimeError(
            "feature_names ausente ou vazio no metadata JSON — "
            "impossivel selecionar colunas para scoring"
        )

    # Metricas de referencia do run original
    run = client.get_run(mv.run_id)
    ref_metrics = run.data.metrics
    logger.info("Metricas de referencia do run: %s", {k: f"{v:.4f}" for k, v in ref_metrics.items()})

    # -------------------------------------------------------------------------
    # 3. Carregar dados OOS e predizer
    # -------------------------------------------------------------------------
    # C1: Use parametrized F.col() API instead of f-string SQL (defense-in-depth)
    df = spark.read.format("delta").load(PATH_FEATURE_STORE) \
        .filter(F.col("SAFRA") == VALIDATION_SAFRA)
    # FIX: Filtrar apenas clientes aprovados (mesma populacao do treino v5)
    if "FLAG_INSTALACAO" in df.columns:
        n_total = df.count()
        df = df.filter(F.col("FLAG_INSTALACAO") == 1)
        n_aprovados = df.count()
        logger.info("FLAG_INSTALACAO filter: %d -> %d (%d reprovados removidos)",
                    n_total, n_aprovados, n_total - n_aprovados)

    n_records = df.count()
    logger.info("Registros SAFRA %d: %d", VALIDATION_SAFRA, n_records)
    if n_records == 0:
        raise RuntimeError(f"Nenhum registro para SAFRA {VALIDATION_SAFRA}")

    # H5: Validate all feature_names exist in DataFrame columns before .select()
    df_columns = set(df.columns)
    missing_cols = [col for col in feature_names if col not in df_columns]
    if missing_cols:
        raise RuntimeError(
            f"{len(missing_cols)} feature(s) ausente(s) no DataFrame: "
            f"{missing_cols[:10]}{'...' if len(missing_cols) > 10 else ''}"
        )

    df_pd = df.select(["FPD"] + feature_names).toPandas()

    # H2+H4: Validate FPD is binary {0,1} and not all null
    # H4: Use pd.to_numeric(errors="coerce") to handle string FPD values gracefully
    y_true = pd.to_numeric(df_pd["FPD"], errors="coerce").values
    if np.isnan(y_true).all():
        raise RuntimeError("FPD e inteiramente nulo — impossivel validar modelo")
    fpd_unique = set(np.unique(y_true[~np.isnan(y_true)]))
    if not fpd_unique.issubset({0, 1, 0.0, 1.0}):
        raise RuntimeError(
            f"FPD contem valores nao-binarios: {fpd_unique - {0, 1, 0.0, 1.0}} — "
            "esperado apenas {{0, 1}}"
        )

    # H1: .copy() to avoid SettingWithCopyWarning when modifying with fillna
    X = df_pd[feature_names].copy()

    # M3: fillna strategy — numeric columns filled with 0 (neutral value for tree-based
    # models and does not shift distributions significantly); categorical/string columns
    # filled with "MISSING" (explicit sentinel category that the model was trained with).
    for col in X.columns:
        # M2: Use pandas.api.types.is_numeric_dtype() instead of raw dtype comparison
        if is_numeric_dtype(X[col]):
            X[col] = X[col].fillna(0)
        else:
            X[col] = X[col].fillna("MISSING")

    # L2-FIX: Com mlflow.sklearn.load_model, predict_proba esta acessivel diretamente
    try:
        scores = model.predict_proba(X)[:, 1]
    except Exception as e:
        raise RuntimeError(
            f"predict_proba() falhou ({type(e).__name__}): {e} — "
            "verifique compatibilidade de features e tipos de dados"
        ) from e

    # -------------------------------------------------------------------------
    # 4. Calcular metricas
    # -------------------------------------------------------------------------
    # Remover registros com FPD=NaN (sem target conhecido)
    valid_mask = ~np.isnan(y_true)
    if valid_mask.sum() < len(y_true):
        n_nan = len(y_true) - valid_mask.sum()
        logger.warning("Removendo %d registros com FPD=NaN (%d restantes)", n_nan, valid_mask.sum())
        y_true = y_true[valid_mask]
        scores = scores[valid_mask]

    ks = ks_stat(y_true, scores)
    auc = roc_auc_score(y_true, scores) if len(np.unique(y_true)) >= 2 else np.nan
    gini = (2 * auc - 1) * 100 if not np.isnan(auc) else np.nan

    logger.info("Metricas do scoring:")
    logger.info("  KS:   %.2f", ks if not np.isnan(ks) else float("nan"))
    logger.info("  AUC:  %.4f", auc if not np.isnan(auc) else float("nan"))
    logger.info("  Gini: %.2f", gini if not np.isnan(gini) else float("nan"))

    # -------------------------------------------------------------------------
    # 5. Comparar com referencia
    # -------------------------------------------------------------------------
    results = {"ks": ks, "auc": auc, "gini": gini, "checks": []}

    # C1: If any computed metric is NaN, add explicit FAIL check entry
    if np.isnan(ks):
        results["checks"].append({
            "metric": "KS", "ref": None, "actual": float("nan"),
            "delta": float("nan"), "status": "FAIL",
            "reason": "KS computado e NaN (possivelmente y_true tem apenas uma classe)"
        })
        logger.error("KS e NaN — FAIL automatico")
    if np.isnan(auc):
        results["checks"].append({
            "metric": "AUC", "ref": None, "actual": float("nan"),
            "delta": float("nan"), "status": "FAIL",
            "reason": "AUC computado e NaN (possivelmente y_true tem apenas uma classe)"
        })
        logger.error("AUC e NaN — FAIL automatico")
    if np.isnan(gini):
        results["checks"].append({
            "metric": "Gini", "ref": None, "actual": float("nan"),
            "delta": float("nan"), "status": "FAIL",
            "reason": "Gini computado e NaN (derivado de AUC NaN)"
        })
        logger.error("Gini e NaN — FAIL automatico")

    # Preferir metrica OOT (mesma natureza da SAFRA de validacao),
    # depois fallback para OOT especifica, depois OOS
    safra_suffix = f"_{VALIDATION_SAFRA}"
    ref_ks = ref_metrics.get(f"ks_oot{safra_suffix}",
             ref_metrics.get("ks_oot", ref_metrics.get("ks_oos", None)))
    ref_auc = ref_metrics.get(f"auc_oot{safra_suffix}",
              ref_metrics.get("auc_oot", ref_metrics.get("auc_oos", None)))
    ref_gini = ref_metrics.get(f"gini_oot{safra_suffix}",
               ref_metrics.get("gini_oot", ref_metrics.get("gini_oos", None)))

    # C3/H4: Scale detection — use > 2.0 as heuristic threshold.
    # KS and Gini values between 1 and 2 are valid in the 0-100 scale
    # (e.g., KS=1.5 means 1.5%), so > 1 incorrectly triggers normalization.
    # Values > 2.0 in 0-1 scale are impossible, so > 2.0 safely distinguishes scales.
    SCALE_THRESHOLD = 2.0

    if ref_ks is not None and not np.isnan(ks):
        # Normalizar para mesma escala (0-100)
        ref_ks_norm = ref_ks if ref_ks > SCALE_THRESHOLD else ref_ks * 100
        delta_ks = abs(ks - ref_ks_norm)
        status = "PASS" if delta_ks <= TOL_KS else "FAIL"
        results["checks"].append({"metric": "KS", "ref": ref_ks_norm, "actual": ks, "delta": delta_ks, "status": status})
        logger.info("  KS delta: %.2f pp (%s)", delta_ks, status)

    if ref_auc is not None and not np.isnan(auc):
        delta_auc = abs(auc - ref_auc)
        status = "PASS" if delta_auc <= TOL_AUC else "FAIL"
        results["checks"].append({"metric": "AUC", "ref": ref_auc, "actual": auc, "delta": delta_auc, "status": status})
        logger.info("  AUC delta: %.4f (%s)", delta_auc, status)

    if ref_gini is not None and not np.isnan(gini):
        ref_gini_norm = ref_gini if ref_gini > SCALE_THRESHOLD else ref_gini * 100
        delta_gini = abs(gini - ref_gini_norm)
        status = "PASS" if delta_gini <= TOL_GINI else "FAIL"
        results["checks"].append({"metric": "Gini", "ref": ref_gini_norm, "actual": gini, "delta": delta_gini, "status": status})
        logger.info("  Gini delta: %.2f pp (%s)", delta_gini, status)

    # C2: Required metrics must be present — FAIL (not MANUAL_REVIEW) if missing.
    # KS and AUC are required for deploy validation; Gini is derived and optional.
    REQUIRED_METRICS = {"KS", "AUC"}
    metrics_with_checks = {c["metric"] for c in results["checks"]}
    missing_required = REQUIRED_METRICS - metrics_with_checks
    if missing_required:
        for m in sorted(missing_required):
            results["checks"].append({
                "metric": m, "ref": None, "actual": None,
                "delta": None, "status": "FAIL",
                "reason": f"Metrica de referencia '{m}' nao encontrada no MLflow run "
                          f"(chaves tentadas: {m.lower()}_oos, {m.lower()}_oot)"
            })
            logger.error(
                "Metrica de referencia '%s' ausente no MLflow run — FAIL", m
            )

    # -------------------------------------------------------------------------
    # 6. Veredicto
    # -------------------------------------------------------------------------
    if all(c["status"] == "PASS" for c in results["checks"]):
        results["status"] = "PASS"
        logger.info("=== DEPLOY VALIDADO (PASS) ===")
    else:
        failed = [c["metric"] for c in results["checks"] if c["status"] == "FAIL"]
        reasons = [c.get("reason", "") for c in results["checks"] if c["status"] == "FAIL" and c.get("reason")]
        results["status"] = "FAIL"
        logger.error("=== DEPLOY FALHOU — metricas divergentes: %s ===", failed)
        if reasons:
            for reason in reasons:
                logger.error("  Detalhe: %s", reason)

    return results


# =============================================================================
# EXECUCAO PRINCIPAL
# Guard: so executa automaticamente se 'spark' estiver no escopo global
# (padrao Fabric notebooks). Previne execucao acidental em import.
# =============================================================================
if "spark" in dir() and spark is not None:
    result = validate_deploy(spark)
    print(f"\nResultado: {result['status']}")
    for c in result.get("checks", []):
        ref_str = f"{c['ref']:.3f}" if c['ref'] is not None else "N/A"
        actual_str = f"{c['actual']:.3f}" if c['actual'] is not None and not np.isnan(c['actual']) else "NaN"
        delta_str = f"{c['delta']:.4f}" if c['delta'] is not None and not np.isnan(c['delta']) else "N/A"
        reason_str = f" ({c['reason']})" if c.get('reason') else ""
        print(f"  {c['metric']}: ref={ref_str}, actual={actual_str}, delta={delta_str} [{c['status']}]{reason_str}")

StatementMeta(, 2549e03e-ef5d-4419-8b71-61326246412a, 29, Finished, Available, Finished)

18:25:10 [INFO] === Validacao de Deploy ===
18:25:10 [INFO] Modelo: credit-risk-fpd-lgbm_baseline_v6 (Production)
18:25:10 [INFO] SAFRA validacao: 202503
  model_versions = client.get_latest_versions(MODEL_NAME, stages=[MODEL_STAGE])
  latest = client.get_latest_versions(name, None if stage is None else [stage])
18:25:11 [INFO] Request URL: 'https://onelake.dfs.fabric.microsoft.com/febb8631-d5c0-43d8-bf08-5e89c8f2d17e?resource=REDACTED&directory=REDACTED&recursive=REDACTED'
Request method: 'GET'
Request headers:
    'x-ms-version': 'REDACTED'
    'Accept': 'application/json'
    'User-Agent': 'azsdk-python-storage-dfs/12.14.0 Python/3.11.8 (Linux-5.15.186.1-1.cm2-x86_64-with-glibc2.35)'
    'x-ms-date': 'REDACTED'
    'x-ms-client-request-id': 'd5d79656-06ad-11f1-8f04-6045bd3895ec'
    'Authorization': 'REDACTED'
No body was attached to the request
18:25:11 [INFO] Response status: 200
Response headers:
    'Content-Type': 'application/json; charset=utf-8'
    'Date': 'Tue, 10 Feb 2026 

Downloading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

18:25:11 [INFO] Response status: 206
Response headers:
    'Content-Length': '532'
    'Content-Type': 'application/octet-stream'
    'Date': 'Tue, 10 Feb 2026 18:25:11 GMT'
    'Server': 'Windows-Azure-Blob/1.0, Microsoft-HTTPAPI/2.0'
    'Accept-Ranges': 'REDACTED'
    'Content-Range': 'REDACTED'
    'ETag': '"0x8DE68CFF36C5EBC"'
    'Last-Modified': 'Tue, 10 Feb 2026 18:12:28 GMT'
    'x-ms-request-id': '28b441cb-f01e-0030-65ba-9aa0ac000000'
    'x-ms-client-request-id': 'd5ec50e6-06ad-11f1-bb10-6045bd3895ec'
    'x-ms-version': 'REDACTED'
    'x-ms-resource-type': 'REDACTED'
    'x-ms-creation-time': 'REDACTED'
    'x-ms-lease-status': 'REDACTED'
    'x-ms-lease-state': 'REDACTED'
    'x-ms-blob-type': 'REDACTED'
    'x-ms-copy-id': 'REDACTED'
    'x-ms-copy-source': 'REDACTED'
    'x-ms-copy-status': 'REDACTED'
    'x-ms-copy-progress': 'REDACTED'
    'x-ms-copy-completion-time': 'REDACTED'
    'x-ms-server-encrypted': 'REDACTED'
    'x-ms-owner': 'REDACTED'
    'x-ms-group': 'RED

Downloading artifacts:   0%|          | 0/11 [00:00<?, ?it/s]

2026/02/10 18:25:12 INFO mlflow.store.artifact.artifact_repo: The progress bar can be disabled by setting the environment variable MLFLOW_ENABLE_ARTIFACTS_PROGRESS_BAR to false
18:25:12 [INFO] Request URL: 'https://onelake.blob.fabric.microsoft.com/febb8631-d5c0-43d8-bf08-5e89c8f2d17e/05719e5b-5f09-4e15-bd24-9ccca772346e/Data/ff5a5273-8426-46ed-89d5-c43df94552cb/artifacts/model/model.pkl'
Request method: 'GET'
Request headers:
    'x-ms-range': 'REDACTED'
    'x-ms-version': 'REDACTED'
    'Accept': 'application/xml'
    'User-Agent': 'azsdk-python-storage-dfs/12.14.0 Python/3.11.8 (Linux-5.15.186.1-1.cm2-x86_64-with-glibc2.35)'
    'x-ms-date': 'REDACTED'
    'x-ms-client-request-id': 'd68b8350-06ad-11f1-b09c-6045bd3895ec'
    'Authorization': 'REDACTED'
No body was attached to the request
18:25:12 [INFO] Request URL: 'https://onelake.blob.fabric.microsoft.com/febb8631-d5c0-43d8-bf08-5e89c8f2d17e/05719e5b-5f09-4e15-bd24-9ccca772346e/Data/ff5a5273-8426-46ed-89d5-c43df94552cb/artifacts/


Resultado: PASS
  KS: ref=33.974, actual=33.197, delta=0.7772 [PASS]
  AUC: ref=0.730, actual=0.720, delta=0.0100 [PASS]
  Gini: ref=46.064, actual=44.060, delta=2.0037 [PASS]
