In [1]:
import numpy as np
import pandas as pd
import mlflow
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder, RobustScaler
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split

# Establishing baseline experiment

In [None]:
# --- 1. VISUALIZATION & PREPROCESSING HELPERS ---
from sklearn.model_selection import train_test_split, cross_val_score
#from sklearn.tree import DecisionTreeClassifier

def get_preprocessor(X):
    """
    Creates a ColumnTransformer preprocessor based on column types in X.
    """
    # Identify categorical and numerical features

    numerical_features = ['tenure', 'MonthlyCharges', 'TotalCharges']
    categorical_features = [
            'gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines',
            'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
            'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract',
            'PaperlessBilling', 'PaymentMethod', 'SeniorCitizen'
        ]

    X.drop(columns=['customerID'], inplace=True)

    from sklearn.impute import SimpleImputer

    X['TotalCharges'] = pd.to_numeric(X['TotalCharges'], errors='coerce')

    imputer = SimpleImputer(strategy='median')
    X[numerical_features] = imputer.fit_transform(X[numerical_features])


    #categorical_features = X.select_dtypes(include=['object', 'category']).columns.tolist()
    #numerical_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()

    # Create transformers
    numeric_transformer = Pipeline(steps=[
        ('scaler', StandardScaler())
    ])

    categorical_transformer = Pipeline(steps=[
        ('onehot', OneHotEncoder(handle_unknown='ignore'))
    ])

    # Create the column transformer
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numerical_features),
            ('cat', categorical_transformer, categorical_features)
        ],
        remainder='drop' # drop everything
    )
    return preprocessor

def plot_confusion_matrix(y_true, y_pred, title, save_path):
    """
    Generates and saves a confusion matrix plot.
    """
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=['No Churn', 'Churn'], 
                yticklabels=['No Churn', 'Churn'])
    plt.title(title)
    plt.ylabel('Actual')
    plt.xlabel('Predicted')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved visualization to {save_path}")

def plot_drift_distribution(df_original, df_drifted, feature_name, save_path):
    """
    Generates and saves a KDE plot comparing a feature's distribution
    before and after drift.
    """
    plt.figure(figsize=(10, 6))
    sns.kdeplot(df_original[feature_name], label='Original Test Data', color='blue', shade=True)
    sns.kdeplot(df_drifted[feature_name], label='Drifted Test Data', color='red', shade=True)
    plt.title(f'Distribution Shift for Feature: {feature_name}')
    plt.legend()
    plt.savefig(save_path)
    plt.close()
    print(f"Saved visualization to {save_path}")

# --- 2. BASELINE TRAINING FUNCTION ---

def run_baseline_training(preprocessed_data_path):
    """
    Trains the initial baseline model.
    - Encodes y to 0/1.
    - Trains and evaluates the model on the test set.
    - Logs model, metrics, visualizations, and data artifacts to MLflow.
    """
    print("--- Running Part 1: Baseline Training ---")

    mlflow.set_tracking_uri("http://localhost:5000")
    mlflow.set_experiment("telco-baseline")
    
    data = pd.read_csv(preprocessed_data_path)
    X = data.drop('Churn', axis=1)
    
    # --- Clean Data Prep ---
    # Encode y to numeric (0/1) from the start.
    # This is cleaner and avoids all downstream errors.
    le = LabelEncoder()
    y = le.fit_transform(data['Churn'])
    print(f"Target 'Churn' encoded. Positive class ('{le.classes_[1]}') is 1.")

    # Get the preprocessor
    preprocessor = get_preprocessor(X)
    
    # Split into train and test
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

    # Define the model
    rf = RandomForestClassifier(random_state=42, n_estimators=100, max_depth=10)
    # Subbing this with LR for testing

    #from sklearn.linear_model import LogisticRegression
    # lr = LogisticRegression(penalty='elasticnet', solver='saga', l1_ratio=0.5, random_state=42)

    # Create the full pipeline
    model_pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('classifier', rf)
    ])

    # Start MLflow run
    with mlflow.start_run(run_name="Baseline-Model-Setup") as run:
        print(f"MLflow Run ID: {run.info.run_id}")
        
        # Train the model
        model_pipeline.fit(X_train, y_train)

        from mlflow.models.signature import infer_signature
        signature = infer_signature(X_train, y_train)

        # Log parameters
        mlflow.log_params(rf.get_params())

        # Make predictions
        y_pred = model_pipeline.predict(X_test)
        y_pred_proba = model_pipeline.predict_proba(X_test)[:, 1]

        #print("--Building analytic dataset for DDLA--")

        #error_mask = (y_pred != y_test)
        #correct_mask = (y_pred == y_test)

        #X_test_analytic = X_test.copy()

        #X_test_analytic['ddla_target'] = 0
        #X_test_analytic.loc[error_mask, 'ddla_target'] = 1

        #analytic_path = "analytic_dataset.csv"
        #X_test_analytic.to_csv(analytic_path, index=False)
        #mlflow.log_artifact(analytic_path)

        #if os.path.exists(analytic_path):
        #    os.remove(analytic_path)
        
        # Log metrics
        accuracy = accuracy_score(y_test, y_pred)
        roc_auc = roc_auc_score(y_test, y_pred_proba)
        
        print(f"Baseline Model Accuracy: {accuracy:.4f}")
        print(f"Baseline Model ROC AUC: {roc_auc:.4f}")
        mlflow.log_metric("baseline_accuracy", accuracy)
        mlflow.log_metric("baseline_roc_auc", roc_auc)

        # Log the model pipeline
        #mlflow.sklearn.log_model(model_pipeline, "model_pipeline")
        mlflow.sklearn.log_model(
            model_pipeline, 
            'model_pipeline',
            signature=signature, 
            registered_model_name='telco-baseline'
        )

        # --- Save and log visualizations ---
        cm_path = "confusion_matrix_baseline.png"
        plot_confusion_matrix(y_test, y_pred, "Baseline Model Confusion Matrix", cm_path)
        mlflow.log_artifact(cm_path)

        # --- Save and log the data artifacts (with y as 0/1) ---
        X_train.to_csv("X_train.csv", index=False)
        pd.Series(y_train, name="Churn").to_csv("y_train.csv", index=False, header=True)
        X_test.to_csv("X_test.csv", index=False)
        pd.Series(y_test, name="Churn").to_csv("y_test.csv", index=False, header=True)
        
        mlflow.log_artifact("X_train.csv")
        mlflow.log_artifact("y_train.csv")
        mlflow.log_artifact("X_test.csv")
        mlflow.log_artifact("y_test.csv")
        
        print("Logged model pipeline, parameters, metrics, visualizations, and data artifacts.")
        
    # Clean up local files
    for f in ["X_train.csv", "y_train.csv", "X_test.csv", "y_test.csv", cm_path]:
        if os.path.exists(f):
            os.remove(f)
            
    return run.info.run_id

# --- 3. DRIFT SIMULATION FUNCTIONS ---
import numpy as np
import pandas as pd

def simulate_drift(df, target_col="Churn", drift_type="covariate", drift_fraction=0.3, intensity=0.3,
    thresholds=None, random_state=None):

    np.random.seed(random_state)
    df_drifted = df.copy()

    # Select subset to drift
    drift_indices = np.random.choice(
        df_drifted.index,
        size=int(len(df_drifted) * drift_fraction),
        replace=False
    )

    # Set default thresholds if not provided
    if thresholds is None:
        thresholds = {
            "tenure": 12,
            "MonthlyCharges": df_drifted["MonthlyCharges"].median(),
            "TotalCharges": df_drifted["TotalCharges"].median(),
            "SeniorCitizen": 0.5,
        }

    # === Covariate Drift ===
    if drift_type == "covariate":
        # Demographic drift
        if "tenure" in df_drifted.columns:
            mask = df_drifted.index.isin(drift_indices)
            df_drifted.loc[mask, "tenure"] *= np.random.uniform(0.5, 1 - intensity)

        if "SeniorCitizen" in df_drifted.columns:
            mask = df_drifted.index.isin(drift_indices)
            df_drifted.loc[mask, "SeniorCitizen"] = np.random.choice(
                [0, 1],
                size=mask.sum(),
                p=[1 - thresholds["SeniorCitizen"], thresholds["SeniorCitizen"]]
            )

        # Plan drift (pricing + contracts)
        if "Contract" in df_drifted.columns:
            mask = df_drifted.index.isin(drift_indices)
            df_drifted.loc[mask, "Contract"] = np.random.choice(
                ["Month-to-month", "One year", "Two year"],
                size=mask.sum(),
                p=[0.7, 0.2, 0.1]
            )

        if "MonthlyCharges" in df_drifted.columns:
            mask = df_drifted.index.isin(drift_indices)
            df_drifted.loc[mask, "MonthlyCharges"] *= np.random.uniform(0.8, 1 - intensity / 2)
            if "TotalCharges" in df_drifted.columns:
                df_drifted.loc[mask, "TotalCharges"] = (
                    df_drifted.loc[mask, "MonthlyCharges"] * df_drifted.loc[mask, "tenure"]
                )

        # Payment behavior drift
        if "PaymentMethod" in df_drifted.columns:
            mask = df_drifted.index.isin(drift_indices)
            df_drifted.loc[mask, "PaymentMethod"] = np.random.choice(
                [
                    "Electronic check",
                    "Mailed check",
                    "Bank transfer (automatic)",
                    "Credit card (automatic)",
                ],
                size=mask.sum(),
                p=[0.6, 0.1, 0.15, 0.15],
            )

        if "PaperlessBilling" in df_drifted.columns:
            df_drifted.loc[drift_indices, "PaperlessBilling"] = "Yes"

    # === Concept Drift ===
    elif drift_type == "concept":
        flip_mask = np.zeros(len(df_drifted), dtype=bool)

        # Economic drift ‚Äî churn increases for high cost and long tenure
        if "MonthlyCharges" in df_drifted.columns and "tenure" in df_drifted.columns:
            flip_mask |= (
                (df_drifted["MonthlyCharges"] > thresholds["MonthlyCharges"]) &
                (df_drifted["tenure"] > thresholds["tenure"])
            )

        # Service quality drift ‚Äî churn increases for Fiber + no support
        if {"InternetService", "TechSupport"}.issubset(df_drifted.columns):
            flip_mask |= (
                (df_drifted["InternetService"] == "Fiber optic") &
                (df_drifted["TechSupport"] == "No")
            )

        # Retention drift ‚Äî churn decreases for long tenure
        if "tenure" in df_drifted.columns:
            flip_mask |= (df_drifted["tenure"] > 24) & (df_drifted[target_col] == "Yes")

        # Apply drift only to fraction of data
        indices_to_flip = df_drifted[flip_mask].sample(
            frac=intensity, random_state=random_state
        ).index if flip_mask.sum() > 0 else []

        if df_drifted[target_col].dtype == "object":
            df_drifted.loc[indices_to_flip, target_col] = df_drifted.loc[
                indices_to_flip, target_col
            ].apply(lambda x: "No" if x == "Yes" else "Yes")
        else:
            df_drifted.loc[indices_to_flip, target_col] = 1 - df_drifted.loc[
                indices_to_flip, target_col
            ]

    else:
        raise ValueError("drift_type must be either 'covariate' or 'concept'")

    return df_drifted


# --- 4. DRIFT EVALUATION FUNCTION ---

def run_drift_evaluation(setup_run_id, drift_type):
    """
    Simulates a drift scenario and evaluates the baseline model's performance on it.
    - Loads the baseline model and test data from the 'setup_run_id'.
    - Applies the specified 'drift_type' to the test data.
    - Logs the model's new, drifted performance and comparison visualizations.
    """
    print(f"\n--- Running Part 1: Evaluation for: {drift_type} drift ---")
    
    client = mlflow.tracking.MlflowClient()
    local_download_path = "."
    cleanup_files = []

    try:
        # --- 1. SETUP: Load Baseline Assets ---
        print(f"Loading assets from setup run: {setup_run_id}")
        
        model_pipeline_uri = f"runs:/{setup_run_id}/model_pipeline"
        baseline_model = mlflow.sklearn.load_model(model_pipeline_uri)
        
        # Download artifacts to the current directory
        for f in ["X_train.csv", "X_test.csv", "y_test.csv"]:
            client.download_artifacts(setup_run_id, f, local_download_path)
            cleanup_files.append(f)
        
        # Load the downloaded files
        X_train_original = pd.read_csv("X_train.csv") # For viz
        X_test_original = pd.read_csv("X_test.csv")
        y_test_original = pd.read_csv("y_test.csv").squeeze() # Squeeze to make it a Series
        
        # Reconstruct the original test set for drift simulation (all numeric)
        test_data_original = X_test_original.copy()
        test_data_original['Churn'] = y_test_original
        
    except Exception as e:
        print(f"Error loading artifacts: {e}")
        return
    
    # --- 2. SIMULATE DRIFT ---
    
    if drift_type == 'covariate':
        drifted_test_data = simulate_drift(test_data_original, drift_type='covariate', drift_fraction=0.4, intensity=0.5, random_state=42)
    elif drift_type == 'concept':
        drifted_test_data = simulate_drift(test_data_original, drift_type='concept', drift_fraction=0.4, intensity=0.5, thresholds= {"tenure": 12, "MonthlyCharges": 75}, 
                                           random_state=42)
    else:
        pass
        
    X_test_drifted = drifted_test_data.drop('Churn', axis=1)
    y_test_drifted = drifted_test_data['Churn'] # This is the new "ground truth"

    # --- 3. MONITOR: Evaluate Baseline Model on New Data ---
    
    with mlflow.start_run(run_name=f"Sim-Evaluate-{drift_type}-drift", nested=True) as run:
        print(f"Logging simulation run: {run.info.run_id}")
        mlflow.log_param("drift_type", drift_type)
        mlflow.log_param("parent_setup_run_id", setup_run_id)
        
        # Get "ground truth" performance of the old model on the new data
        y_pred_drifted = baseline_model.predict(X_test_drifted)
        y_pred_proba_drifted = baseline_model.predict_proba(X_test_drifted)[:, 1]
        
        drifted_accuracy = accuracy_score(y_test_drifted, y_pred_drifted)
        drifted_roc_auc = roc_auc_score(y_test_drifted, y_pred_proba_drifted)
    
        print(f"Baseline model accuracy on {drift_type} data: {drifted_accuracy:.4f}")
        mlflow.log_metric("drifted_accuracy", drifted_accuracy)
        mlflow.log_metric("drifted_roc_auc", drifted_roc_auc)

        # --- 4. LOG VISUALIZATIONS ---
        
        # Log new confusion matrix
        cm_drifted_path = f"confusion_matrix_{drift_type}.png"
        plot_confusion_matrix(y_test_drifted, y_pred_drifted, 
                              f"Model Performance on {drift_type} Drift", cm_drifted_path)
        mlflow.log_artifact(cm_drifted_path)
        cleanup_files.append(cm_drifted_path)
        
        # Log distribution shift plots for key features
        if drift_type in ['covariate', 'combined']:
            for feature in ['tenure', 'MonthlyCharges']:
                dist_path = f"drift_distribution_{feature}_{drift_type}.png"
                plot_drift_distribution(X_test_original, X_test_drifted, feature, dist_path)
                mlflow.log_artifact(dist_path)
                cleanup_files.append(dist_path)

    # --- 5. CLEANUP ---
#    finally:
#        for f in cleanup_files:
#            if os.path.exists(f):
#                os.remove(f)

In [None]:
file_path = 'C:/Users/ldmag/Documents/GitHub/Code-Assignments-Projects/Projects/MLOps Drift Detection and Pipeline Optimization/data/Telco-Churn.csv'
run_id = run_baseline_training(file_path)

run_drift_evaluation(run_id, 'covariate')
run_drift_evaluation(run_id, 'concept')
#run_drift_evaluation(run_id, 'combined')

--- Running Part 1: Baseline Training ---
Target 'Churn' encoded. Positive class ('Yes') is 1.
MLflow Run ID: da7ff7eedbc9485fa1f7a25cbaf06021




Baseline Model Accuracy: 0.7963
Baseline Model ROC AUC: 0.8369


Registered model 'telco-baseline' already exists. Creating a new version of this model...
2025/11/09 21:46:58 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: telco-baseline, version 30
Created version '30' of model 'telco-baseline'.


Saved visualization to confusion_matrix_baseline.png
Logged model pipeline, parameters, metrics, visualizations, and data artifacts.
üèÉ View run Baseline-Model-Setup at: http://localhost:5000/#/experiments/1/runs/da7ff7eedbc9485fa1f7a25cbaf06021
üß™ View experiment at: http://localhost:5000/#/experiments/1

--- Running Part 1: Evaluation for: covariate drift ---
Loading assets from setup run: da7ff7eedbc9485fa1f7a25cbaf06021


Downloading artifacts:   0%|          | 0/1 [04:08<?, ?it/s]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5/5 [00:01<00:00,  3.48it/s]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:01<00:00,  1.44s/it]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  2.70it/s]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  2.71it/s]


Logging simulation run: 0cebdcce8fca479b8db390b748d8a699
Baseline model accuracy on covariate data: 0.7786
Saved visualization to confusion_matrix_covariate.png
Saved visualization to drift_distribution_tenure_covariate.png



`shade` is now deprecated in favor of `fill`; setting `fill=True`.
This will become an error in seaborn v0.14.0; please update your code.

  sns.kdeplot(df_original[feature_name], label='Original Test Data', color='blue', shade=True)

`shade` is now deprecated in favor of `fill`; setting `fill=True`.
This will become an error in seaborn v0.14.0; please update your code.

  sns.kdeplot(df_drifted[feature_name], label='Drifted Test Data', color='red', shade=True)

`shade` is now deprecated in favor of `fill`; setting `fill=True`.
This will become an error in seaborn v0.14.0; please update your code.

  sns.kdeplot(df_original[feature_name], label='Original Test Data', color='blue', shade=True)

`shade` is now deprecated in favor of `fill`; setting `fill=True`.
This will become an error in seaborn v0.14.0; please update your code.

  sns.kdeplot(df_drifted[feature_name], label='Drifted Test Data', color='red', shade=True)


Saved visualization to drift_distribution_MonthlyCharges_covariate.png
üèÉ View run Sim-Evaluate-covariate-drift at: http://localhost:5000/#/experiments/1/runs/0cebdcce8fca479b8db390b748d8a699
üß™ View experiment at: http://localhost:5000/#/experiments/1

--- Running Part 1: Evaluation for: concept drift ---
Loading assets from setup run: da7ff7eedbc9485fa1f7a25cbaf06021


Downloading artifacts:   0%|          | 0/1 [04:07<?, ?it/s]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5/5 [00:01<00:00,  3.52it/s]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:01<00:00,  1.44s/it]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  2.72it/s]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  2.67it/s]


Logging simulation run: 2c564d5c16ac4139b75b1ccf59226c63
Baseline model accuracy on concept data: 0.6870
Saved visualization to confusion_matrix_concept.png
üèÉ View run Sim-Evaluate-concept-drift at: http://localhost:5000/#/experiments/1/runs/2c564d5c16ac4139b75b1ccf59226c63
üß™ View experiment at: http://localhost:5000/#/experiments/1


# Monitoring and retraining for drift

In [3]:
# helpers for this approach

def calculate_psi(expected, actual, bins=10):
    """
    Calculate Population Stability Index (PSI) between two distributions.
    
    Parameters:
    -----------
    expected : array-like
        Reference distribution (e.g., training data)
    actual : array-like
        Current distribution (e.g., test data)
    bins : int
        Number of bins for discretization
        
    Returns:
    --------
    float : PSI value
        - PSI < 0.1: No significant drift
        - 0.1 <= PSI < 0.25: Moderate drift
        - PSI >= 0.25: Significant drift
    """
    def psi_calc(e_perc, a_perc):
        if a_perc == 0:
            a_perc = 0.0001
        if e_perc == 0:
            e_perc = 0.0001
        return (e_perc - a_perc) * np.log(e_perc / a_perc)
    
    # Create breakpoints
    breakpoints = np.linspace(
        min(expected.min(), actual.min()),
        max(expected.max(), actual.max()),
        bins + 1
    )
    
    # Calculate percentages in each bin
    expected_percents = np.histogram(expected, breakpoints)[0] / len(expected)
    actual_percents = np.histogram(actual, breakpoints)[0] / len(actual)
    
    # Calculate PSI
    psi = sum([psi_calc(e, a) for e, a in zip(expected_percents, actual_percents)])
    
    return psi

def calculate_all_psi_scores(X_train, X_test):
    """
    Calculate PSI for all features (numerical and categorical).
    
    Returns:
    --------
    dict : Feature name -> PSI score mapping
    """
    psi_scores = {}
    
    # Numerical features
    numerical_features = ['tenure', 'MonthlyCharges', 'TotalCharges']
    for feature in numerical_features:
        if feature in X_train.columns:
            psi = calculate_psi(X_train[feature].values, X_test[feature].values)
            psi_scores[feature] = psi
    
    # Categorical features
    categorical_features = [
        'gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines',
        'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
        'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract',
        'PaperlessBilling', 'PaymentMethod', 'SeniorCitizen'
    ]
    
    for feature in categorical_features:
        if feature in X_train.columns:
            psi = calculate_psi_categorical(X_train[feature], X_test[feature])
            psi_scores[feature] = psi
    
    return psi_scores

def adversarial_validation_score(X_train, X_test, preprocessor=None, n_estimators=100, max_depth=5, random_state=42):
    """
    Train an adversarial classifier to distinguish train from test data.
    Returns the AUC score as a drift metric.
    
    Parameters:
    -----------
    X_train : pd.DataFrame
        Training data features
    X_test : pd.DataFrame
        Test data features
    n_estimators : int
        Number of trees in random forest
    max_depth : int
        Maximum depth of trees
    random_state : int
        Random seed
        
    Returns:
    --------
    float : AUC score
        - AUC ‚âà 0.5: No drift (can't distinguish train from test)
        - AUC ‚Üí 1.0: Significant drift (easy to distinguish)
    """
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import roc_auc_score

    if preprocessor is None:
        preprocessor = get_preprocessor(X_train)

    X_train_cleaned = preprocessor.fit_transform(X_train)
    X_test_cleaned = preprocessor.fit_transform(X_test)

    if hasattr(X_train_cleaned, 'toarray'):
        X_train_cleaned = X_train_cleaned.toarray()
        X_test_cleaned = X_test_cleaned.toarray()
    
    # Combine data
    X_combined = np.vstack([X_train_cleaned, X_test_cleaned])
    
    # Create labels: 0 = train, 1 = test
    y_domain = np.concatenate([
        np.zeros(len(X_train_cleaned)),
        np.ones(len(X_test_cleaned))
    ])
    
    # Train adversarial classifier
    adv_classifier = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        random_state=random_state,
        n_jobs=-1
    )
    adv_classifier.fit(X_combined, y_domain)
    
    # Get predictions
    y_pred_proba = adv_classifier.predict_proba(X_combined)[:, 1]
    
    # Calculate AUC
    auc_score = roc_auc_score(y_domain, y_pred_proba)

    # feature name handling
    feature_names = get_feature_names(preprocessor, X_train)
    feature_imp = dict(zip(feature_names, adv_classifier.feature_importances_))
    feature_imp = dict(sorted(feature_imp.items(), key=lambda x: x[1], reverse=True))
    
    return auc_score, adv_classifier, feature_imp

def get_feature_names(preprocessor, X):
    feature_names = []
    
    for name, transformer, columns in preprocessor.transformers_:
        if name == 'num':
            # Numerical features keep their names
            feature_names.extend(columns)
        elif name == 'cat':
            # Categorical features get one-hot encoded names
            if hasattr(transformer.named_steps['onehot'], 'get_feature_names_out'):
                cat_features = transformer.named_steps['onehot'].get_feature_names_out(columns)
                feature_names.extend(cat_features)
            else:
                # Fallback for older sklearn versions
                feature_names.extend([f"{col}_{val}" for col in columns 
                                     for val in transformer.named_steps['onehot'].categories_])
    
    return feature_names

def calculate_psi_categorical(expected, actual):
    """
    Calculate PSI for categorical features.
    
    Parameters:
    -----------
    expected : pd.Series or np.array
        Reference categorical distribution (training)
    actual : pd.Series or np.array
        Current categorical distribution (test)
        
    Returns:
    --------
    float : PSI value
    """
    # Get unique categories from both
    all_categories = sorted(set(expected) | set(actual))
    
    # Calculate proportions
    expected_counts = pd.Series(expected).value_counts(normalize=True)
    actual_counts = pd.Series(actual).value_counts(normalize=True)
    
    psi = 0.0
    epsilon = 0.0001  # To avoid log(0)
    
    for category in all_categories:
        e_perc = expected_counts.get(category, 0) + epsilon
        a_perc = actual_counts.get(category, 0) + epsilon
        
        psi += (e_perc - a_perc) * np.log(e_perc / a_perc)
    
    return psi

def estimate_density_ratio_adversarial(X_train, X_test, preprocessor=None, n_estimators=100, max_depth=3):
    """
    Estimate density ratios using adversarial validation approach.
    More stable than direct density estimation methods.
    
    Parameters:
    -----------
    X_train : pd.DataFrame
        Training data features
    X_test : pd.DataFrame
        Test data features
        
    Returns:
    --------
    np.array : Density ratios for training samples
    """
    from sklearn.ensemble import GradientBoostingClassifier
    
    # Combine data
    #X_combined = pd.concat([X_train, X_test], axis=0)
    if preprocessor is None:
        preprocessor = get_preprocessor(X_train)

    X_train_transformed = preprocessor.fit_transform(X_train)
    X_test_transformed = preprocessor.fit_transform(X_test)

    if hasattr(X_train_transformed, 'toarray'):
        X_train_transformed = X_train_transformed.toarray()
    if hasattr(X_test_transformed, 'toarray'):
        X_test_transformed = X_test_transformed.toarray()

    X_combined = np.vstack([X_train_transformed, X_test_transformed])

    y_domain = np.concatenate([
        np.zeros(len(X_train_transformed)),
        np.ones(len(X_test_transformed))
    ])
    
    # Train discriminator
    discriminator = GradientBoostingClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        learning_rate=0.1,
        random_state=42
    )
    discriminator.fit(X_combined, y_domain)
    
    # Get probabilities for training data
    test_proba = discriminator.predict_proba(X_train_transformed)[:, 1]
    
    # Calculate density ratio: r(x) = P(test|x) / P(train|x)
    epsilon = 1e-6  # for numerical stability
    density_ratios = (test_proba + epsilon) / (1 - test_proba + epsilon)
    
    # Clip extreme values to reduce variance
    density_ratios = np.clip(density_ratios, 0.3, 3.0)
    
    return density_ratios

def plot_adversarial_auc_over_batches(auc_scores, drift_type, save_path):
    """
    Plot adversarial validation AUC scores over batches/time.
    """
    plt.figure(figsize=(12, 6))
    
    batches = np.arange(len(auc_scores))
    plt.plot(batches, auc_scores, marker='o', linewidth=2, markersize=8, 
             label=f'{drift_type.capitalize()} Drift AUC', color='red')
    
    # Add reference line at 0.5 (no drift)
    plt.axhline(y=0.5, color='green', linestyle='--', linewidth=2, 
                label='No Drift (AUC=0.5)')
    
    # Add threshold lines
    plt.axhline(y=0.7, color='orange', linestyle=':', linewidth=1.5, 
                label='Moderate Drift Threshold (0.7)')
    plt.axhline(y=0.8, color='darkred', linestyle=':', linewidth=1.5, 
                label='Severe Drift Threshold (0.8)')
    
    plt.xlabel('Batch Index', fontsize=12)
    plt.ylabel('Adversarial AUC Score', fontsize=12)
    plt.title(f'Adversarial Validation Drift Detection: {drift_type.capitalize()} Drift', 
              fontsize=14, fontweight='bold')
    plt.legend(loc='best', fontsize=10)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig(save_path, dpi=300)
    plt.close()
    print(f"Saved visualization to {save_path}")


def plot_psi_scores(psi_scores_dict, drift_type, save_path):
    """
    Plot PSI scores for multiple features.
    """
    features = list(psi_scores_dict.keys())
    psi_values = list(psi_scores_dict.values())
    
    plt.figure(figsize=(12, 6))
    colors = ['green' if psi < 0.1 else 'orange' if psi < 0.25 else 'red' 
              for psi in psi_values]
    
    bars = plt.bar(features, psi_values, color=colors, alpha=0.7, edgecolor='black')
    
    # Add reference lines
    plt.axhline(y=0.1, color='orange', linestyle='--', linewidth=1.5, 
                label='Moderate Drift (0.1)')
    plt.axhline(y=0.25, color='red', linestyle='--', linewidth=1.5, 
                label='Severe Drift (0.25)')
    
    plt.xlabel('Features', fontsize=12)
    plt.ylabel('PSI Score', fontsize=12)
    plt.title(f'Population Stability Index by Feature: {drift_type.capitalize()} Drift', 
              fontsize=14, fontweight='bold')
    plt.xticks(rotation=45, ha='right')
    plt.legend(loc='best', fontsize=10)
    plt.grid(True, alpha=0.3, axis='y')
    plt.tight_layout()
    plt.savefig(save_path, dpi=300)
    plt.close()
    print(f"Saved visualization to {save_path}")


def plot_model_performance_comparison(baseline_metrics, adapted_metrics, drift_type, save_path):
    """
    Compare baseline vs adapted model performance.
    """
    metrics = ['Accuracy', 'ROC-AUC']
    baseline_vals = [baseline_metrics['accuracy'], baseline_metrics['roc_auc']]
    adapted_vals = [adapted_metrics['accuracy'], adapted_metrics['roc_auc']]
    
    x = np.arange(len(metrics))
    width = 0.35
    
    fig, ax = plt.subplots(figsize=(10, 6))
    bars1 = ax.bar(x - width/2, baseline_vals, width, label='Baseline Model', 
                   color='steelblue', alpha=0.8)
    bars2 = ax.bar(x + width/2, adapted_vals, width, label='Adapted Model (Reweighted)', 
                   color='forestgreen', alpha=0.8)
    
    ax.set_xlabel('Metrics', fontsize=12)
    ax.set_ylabel('Score', fontsize=12)
    ax.set_title(f'Model Performance Comparison: {drift_type.capitalize()} Drift', 
                 fontsize=14, fontweight='bold')
    ax.set_xticks(x)
    ax.set_xticklabels(metrics)
    ax.legend(loc='best', fontsize=10)
    ax.grid(True, alpha=0.3, axis='y')
    
    # Add value labels on bars
    for bars in [bars1, bars2]:
        for bar in bars:
            height = bar.get_height()
            ax.text(bar.get_x() + bar.get_width()/2., height,
                   f'{height:.3f}', ha='center', va='bottom', fontsize=9)
    
    plt.tight_layout()
    plt.savefig(save_path, dpi=300)
    plt.close()
    print(f"Saved visualization to {save_path}")

def run_adversarial_drift_experiment(setup_run_id, drift_type='covariate', 
                                      drift_threshold=0.7, use_reweighting=True, drift_intensity=0.5):
    """
    Complete experimental pipeline for ensemble drift detection with adversarial validation.
    
    Parameters:
    -----------
    setup_run_id : str
        MLflow run ID from baseline training
    drift_type : str
        Type of drift to simulate ('covariate' or 'concept')
    drift_threshold : float
        Adversarial AUC threshold to trigger reweighting (default: 0.7)
    use_reweighting : bool
        Whether to apply density ratio reweighting when drift detected
    drift_intensity: float
        Sets intensity of the drift when calling the simulate_drift function (default: 0.5)
        
    Returns:
    --------
    dict : Experiment results including metrics and drift scores
    """
    print(f"\n{'='*80}")
    print(f"ADVERSARIAL DRIFT DETECTION EXPERIMENT: {drift_type.upper()} DRIFT")
    print(f"{'='*80}\n")
    
    # Setup MLflow
    mlflow.set_tracking_uri("http://localhost:5000")
    mlflow.set_experiment("telco-adversarial-drift-detection")
    
    client = mlflow.tracking.MlflowClient()
    local_download_path = "."
    cleanup_files = []

    #model_pipeline_uri = f"runs:/{setup_run_id}/model_pipeline"
    #baseline_model = mlflow.sklearn.load_model(model_pipeline_uri)
    
    try:
        print("STEP 1: Loading baseline model and data...")
        
        model_pipeline_uri = f"runs:/{setup_run_id}/model_pipeline"
        baseline_model = mlflow.sklearn.load_model(model_pipeline_uri)

        preprocessor = baseline_model.named_steps['preprocessor']
        
        for f in ["X_train.csv", "X_test.csv", "y_train.csv", "y_test.csv"]:
            client.download_artifacts(setup_run_id, f, local_download_path)
            cleanup_files.append(f)
        
        X_train_original = pd.read_csv("X_train.csv")
        y_train_original = pd.read_csv("y_train.csv").squeeze()
        X_test_original = pd.read_csv("X_test.csv")
        y_test_original = pd.read_csv("y_test.csv").squeeze()
        
        test_data_original = X_test_original.copy()
        test_data_original['Churn'] = y_test_original
        
        print(f"Loaded {len(X_train_original)} training samples")
        print(f"Loaded {len(X_test_original)} test samples\n")
        
        print(f"STEP 2: Simulating {drift_type} drift...")
        
        if drift_type == 'covariate':
            drifted_test_data = simulate_drift(
                test_data_original, 
                drift_type='covariate',
                drift_fraction=0.4, 
                intensity=drift_intensity, 
                random_state=42
            )
        elif drift_type == 'concept':
            drifted_test_data = simulate_drift(
                test_data_original, 
                drift_type='concept',
                drift_fraction=0.4, 
                intensity=drift_intensity, 
                thresholds={"tenure": 12, "MonthlyCharges": 75},
                random_state=42
            )
        else:
            raise ValueError("drift_type must be 'covariate' or 'concept'")
        
        X_test_drifted = drifted_test_data.drop('Churn', axis=1)
        y_test_drifted = drifted_test_data['Churn']
        
        print(f"Applied {drift_type} drift to test data\n")
        
        print("STEP 3: Running adversarial validation...")
        
        # Get preprocessor from baseline model
        preprocessor = baseline_model.named_steps['preprocessor']

        print("Running validation on features")

        adv_auc, adv_classifier, feature_importances = adversarial_validation_score(
            X_train_original,
            X_test_drifted,
            preprocessor=preprocessor
        )

        #print("Drifted feature importances:")
        #for i, (feature, importance) in enumerate(list(feature_importances.items())[:5]):
        #    print(f"  {i+1}. {feature}: {importance:.4f}")
        
        # Transform data for adversarial validation (use numerical features only)
        #numerical_features = ['tenure', 'MonthlyCharges', 'TotalCharges']
        #X_train_num = X_train_original[numerical_features]
        #X_test_drifted_num = X_test_drifted[numerical_features]
        
        # Calculate adversarial AUC
        #adv_auc, adv_classifier = adversarial_validation_score(
        #    X_train_num, 
        #    X_test_drifted_num,
        #    preprocessor=preprocessor
        #)
        
        print(f"Adversarial AUC Score: {adv_auc:.4f}")
        
        if adv_auc < 0.6:
            drift_severity = "MINIMAL"
        elif adv_auc < 0.7:
            drift_severity = "MODERATE"
        elif adv_auc < 0.8:
            drift_severity = "SIGNIFICANT"
        else:
            drift_severity = "SEVERE"
        
        print(f"  ‚Üí Drift Severity: {drift_severity}\n")

        print("STEP 4: Calculating PSI scores per feature...")
        
        psi_scores = calculate_all_psi_scores(X_train_original, X_test_drifted)
        #for feature in numerical_features:
        #    psi = calculate_psi(
        #        X_train_original[feature].values,
        #        X_test_drifted[feature].values
        #    )
        #    psi_scores[feature] = psi
        #    print(f"  {feature}: PSI = {psi:.4f}")
        
        avg_psi = np.mean(list(psi_scores.values()))
        print(f"Average PSI: {avg_psi:.4f}\n")

        print("STEP 5: Evaluating baseline model on drifted data...")
        
        y_pred_baseline = baseline_model.predict(X_test_drifted)
        y_pred_proba_baseline = baseline_model.predict_proba(X_test_drifted)[:, 1]
        
        baseline_metrics = {
            'accuracy': accuracy_score(y_test_drifted, y_pred_baseline),
            'roc_auc': roc_auc_score(y_test_drifted, y_pred_proba_baseline)
        }
        
        print(f"‚úì Baseline Accuracy: {baseline_metrics['accuracy']:.4f}")
        print(f"‚úì Baseline ROC-AUC: {baseline_metrics['roc_auc']:.4f}\n")

        adapted_metrics = None
        density_ratios = None
        
        if use_reweighting and adv_auc >= drift_threshold:
            print(f"STEP 6: Drift detected (AUC={adv_auc:.4f} >= {drift_threshold})!")
            print("         Applying density ratio reweighting...\n")
            
            # Estimate density ratios
            density_ratios = estimate_density_ratio_adversarial(
                X_train_original, 
                X_test_drifted,
                preprocessor=preprocessor
            )
            
            print(f"‚úì Density ratios computed")
            print(f"  ‚Üí Mean weight: {density_ratios.mean():.3f}")
            print(f"  ‚Üí Std weight: {density_ratios.std():.3f}")
            print(f"  ‚Üí Min/Max weight: {density_ratios.min():.3f} / {density_ratios.max():.3f}\n")
            
            # Retrain with importance weighting
            #from sklearn.linear_model import LogisticRegression
            from sklearn.base import clone

            base_classifier = baseline_model.named_steps['classifier']
            adapted_classifier = clone(base_classifier)
            
            X_train_transformed = preprocessor.transform(X_train_original)

            adapted_classifier.fit(X_train_transformed, y_train_original, sample_weight=density_ratios)
            #adapted_model = LogisticRegression(
            #    penalty='elasticnet',
            #    solver='saga',
            #    l1_ratio=0.5,
            #    random_state=42,
            #    max_iter=1000
            #)
            
            # Create new pipeline with adapted model
            adapted_pipeline = Pipeline(steps=[
                ('preprocessor', preprocessor),
                ('classifier', adapted_classifier)
            ])
            
            # Fit with sample weights
            # Note: We need to pass weights through the pipeline
            #X_train_transformed = preprocessor.transform(X_train_original)
            #adapted_model.fit(X_train_transformed, y_test_original[:len(X_train_original)], 
            #                 sample_weight=density_ratios)
            
            # Evaluate adapted model
            y_pred_adapted = adapted_pipeline.predict(X_test_drifted)
            y_pred_proba_adapted = adapted_pipeline.predict_proba(X_test_drifted)[:, 1]
            
            adapted_metrics = {
                'accuracy': accuracy_score(y_test_drifted, y_pred_adapted),
                'roc_auc': roc_auc_score(y_test_drifted, y_pred_proba_adapted)
            }
            
            print(f"‚úì Adapted Model Accuracy: {adapted_metrics['accuracy']:.4f}")
            print(f"‚úì Adapted Model ROC-AUC: {adapted_metrics['roc_auc']:.4f}")
            
            improvement_acc = adapted_metrics['accuracy'] - baseline_metrics['accuracy']
            improvement_auc = adapted_metrics['roc_auc'] - baseline_metrics['roc_auc']
            
            print(f"\n{'‚îÄ'*60}")
            print(f"IMPROVEMENT: Accuracy = {improvement_acc:+.4f}, ROC-AUC = {improvement_auc:+.4f}")
            print(f"{'‚îÄ'*60}\n")
        
        else:
            print(f"STEP 6: No significant drift detected (AUC={adv_auc:.4f} < {drift_threshold})")
            print("         Skipping reweighting.\n")

        print("STEP 7: Logging results to MLflow...")
        
        with mlflow.start_run(run_name=f"Adversarial-Drift-{drift_type}") as run:
            # Log parameters
            mlflow.log_param("drift_type", drift_type)
            mlflow.log_param("drift_threshold", drift_threshold)
            mlflow.log_param("use_reweighting", use_reweighting)
            mlflow.log_param("parent_setup_run_id", setup_run_id)
            
            # Log drift metrics
            mlflow.log_metric("adversarial_auc", adv_auc)
            mlflow.log_metric("avg_psi", avg_psi)
            mlflow.log_metric("drift_severity_code", 
                             {"MINIMAL": 0, "MODERATE": 1, "SIGNIFICANT": 2, "SEVERE": 3}[drift_severity])
            
            for feature, psi in psi_scores.items():
                mlflow.log_metric(f"psi_{feature}", psi)
            
            # Log baseline metrics
            mlflow.log_metric("baseline_accuracy", baseline_metrics['accuracy'])
            mlflow.log_metric("baseline_roc_auc", baseline_metrics['roc_auc'])
            
            # Log adapted metrics if available
            if adapted_metrics:
                mlflow.log_metric("adapted_accuracy", adapted_metrics['accuracy'])
                mlflow.log_metric("adapted_roc_auc", adapted_metrics['roc_auc'])
                mlflow.log_metric("accuracy_improvement", 
                                 adapted_metrics['accuracy'] - baseline_metrics['accuracy'])
                mlflow.log_metric("roc_auc_improvement", 
                                 adapted_metrics['roc_auc'] - baseline_metrics['roc_auc'])
            
            # Create and log visualizations
            print("  Creating visualizations...")
            
            # 1. Adversarial AUC plot (single point for now, but structured for batches)
            auc_path = f"adversarial_auc_{drift_type}.png"
            plot_adversarial_auc_over_batches([adv_auc], drift_type, auc_path)
            mlflow.log_artifact(auc_path)
            cleanup_files.append(auc_path)
            
            # 2. PSI scores plot
            psi_path = f"psi_scores_{drift_type}.png"
            plot_psi_scores(psi_scores, drift_type, psi_path)
            mlflow.log_artifact(psi_path)
            cleanup_files.append(psi_path)
            
            # 3. Performance comparison (if reweighting was applied)
            if adapted_metrics:
                perf_path = f"performance_comparison_{drift_type}.png"
                plot_model_performance_comparison(baseline_metrics, adapted_metrics, 
                                                 drift_type, perf_path)
                mlflow.log_artifact(perf_path)
                cleanup_files.append(perf_path)
            
            # 4. Confusion matrices
            cm_baseline_path = f"cm_baseline_{drift_type}.png"
            plot_confusion_matrix(y_test_drifted, y_pred_baseline,
                                f"Baseline Model: {drift_type.capitalize()} Drift",
                                cm_baseline_path)
            mlflow.log_artifact(cm_baseline_path)
            cleanup_files.append(cm_baseline_path)
            
            if adapted_metrics:
                cm_adapted_path = f"cm_adapted_{drift_type}.png"
                plot_confusion_matrix(y_test_drifted, y_pred_adapted,
                                    f"Adapted Model: {drift_type.capitalize()} Drift",
                                    cm_adapted_path)
                mlflow.log_artifact(cm_adapted_path)
                cleanup_files.append(cm_adapted_path)
            
            # 5. Distribution shift plots (reuse from your notebook)
            if drift_type in ['covariate']:
                for feature in ['tenure', 'MonthlyCharges']:
                    dist_path = f"drift_distribution_{feature}_{drift_type}.png"
                    plot_drift_distribution(X_test_original, X_test_drifted, 
                                          feature, dist_path)
                    mlflow.log_artifact(dist_path)
                    cleanup_files.append(dist_path)
            
            print(f"Logged {len(cleanup_files)} artifacts to MLflow")
            print(f"MLflow Run ID: {run.info.run_id}\n")
        
        print(f"{'='*80}")
        print("EXPERIMENT COMPLETED SUCCESSFULLY!")
        print(f"{'='*80}\n")
        
        # Return results
        return {
            'adversarial_auc': adv_auc,
            'drift_severity': drift_severity,
            'psi_scores': psi_scores,
            'avg_psi': avg_psi,
            'baseline_metrics': baseline_metrics,
            'adapted_metrics': adapted_metrics,
            'density_ratios': density_ratios
        }
    
    finally:
        # Cleanup local files
        for f in cleanup_files:
            if os.path.exists(f):
                os.remove(f)

In [22]:
#file_path = 'C:/Users/ldmag/Documents/GitHub/Code-Assignments-Projects/Projects/MLOps Drift Detection and Pipeline Optimization/data/Telco-Churn.csv'
#run_id = run_baseline_training(file_path)

results = run_adversarial_drift_experiment(setup_run_id=run_id, drift_type='covariate', drift_threshold=0.7, use_reweighting=True, drift_intensity=0.9)


ADVERSARIAL DRIFT DETECTION EXPERIMENT: COVARIATE DRIFT

STEP 1: Loading baseline model and data...


Downloading artifacts:   0%|          | 0/1 [04:07<?, ?it/s]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5/5 [00:01<00:00,  3.13it/s]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:01<00:00,  1.56s/it]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  2.38it/s]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:01<00:00,  1.92s/it]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  2.56it/s]


Loaded 5634 training samples
Loaded 1409 test samples

STEP 2: Simulating covariate drift...
Applied covariate drift to test data

STEP 3: Running adversarial validation...
Running validation on features
Adversarial AUC Score: 0.9224
  ‚Üí Drift Severity: SEVERE

STEP 4: Calculating PSI scores per feature...
Average PSI: 0.0476

STEP 5: Evaluating baseline model on drifted data...
‚úì Baseline Accuracy: 0.7757
‚úì Baseline ROC-AUC: 0.7902

STEP 6: Drift detected (AUC=0.9224 >= 0.7)!
         Applying density ratio reweighting...

‚úì Density ratios computed
  ‚Üí Mean weight: 0.316
  ‚Üí Std weight: 0.093
  ‚Üí Min/Max weight: 0.300 / 2.785

‚úì Adapted Model Accuracy: 0.7743
‚úì Adapted Model ROC-AUC: 0.8021

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
IMPROVEMENT: Accuracy = -0.0014, ROC-AUC = +0.0119
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ


`shade` is now deprecated in favor of `fill`; setting `fill=True`.
This will become an error in seaborn v0.14.0; please update your code.

  sns.kdeplot(df_original[feature_name], label='Original Test Data', color='blue', shade=True)

`shade` is now deprecated in favor of `fill`; setting `fill=True`.
This will become an error in seaborn v0.14.0; please update your code.

  sns.kdeplot(df_drifted[feature_name], label='Drifted Test Data', color='red', shade=True)

`shade` is now deprecated in favor of `fill`; setting `fill=True`.
This will become an error in seaborn v0.14.0; please update your code.

  sns.kdeplot(df_original[feature_name], label='Original Test Data', color='blue', shade=True)


Saved visualization to drift_distribution_tenure_covariate.png
Saved visualization to drift_distribution_MonthlyCharges_covariate.png
Logged 11 artifacts to MLflow
MLflow Run ID: 212683cd5a3848f18348a86315010d9c

üèÉ View run Adversarial-Drift-covariate at: http://localhost:5000/#/experiments/3/runs/212683cd5a3848f18348a86315010d9c
üß™ View experiment at: http://localhost:5000/#/experiments/3
EXPERIMENT COMPLETED SUCCESSFULLY!




`shade` is now deprecated in favor of `fill`; setting `fill=True`.
This will become an error in seaborn v0.14.0; please update your code.

  sns.kdeplot(df_drifted[feature_name], label='Drifted Test Data', color='red', shade=True)


In [23]:
results = run_adversarial_drift_experiment(setup_run_id=run_id, drift_type='covariate', drift_threshold=0.7, use_reweighting=True, drift_intensity=0.2)


ADVERSARIAL DRIFT DETECTION EXPERIMENT: COVARIATE DRIFT

STEP 1: Loading baseline model and data...


Downloading artifacts:   0%|          | 0/1 [04:06<?, ?it/s]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5/5 [00:01<00:00,  3.46it/s]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:01<00:00,  1.52s/it]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  2.19it/s]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:01<00:00,  1.53s/it]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  2.80it/s]


Loaded 5634 training samples
Loaded 1409 test samples

STEP 2: Simulating covariate drift...
Applied covariate drift to test data

STEP 3: Running adversarial validation...
Running validation on features
Adversarial AUC Score: 0.8408
  ‚Üí Drift Severity: SEVERE

STEP 4: Calculating PSI scores per feature...
Average PSI: 0.0283

STEP 5: Evaluating baseline model on drifted data...
‚úì Baseline Accuracy: 0.7864
‚úì Baseline ROC-AUC: 0.7986

STEP 6: Drift detected (AUC=0.8408 >= 0.7)!
         Applying density ratio reweighting...

‚úì Density ratios computed
  ‚Üí Mean weight: 0.316
  ‚Üí Std weight: 0.087
  ‚Üí Min/Max weight: 0.300 / 2.530

‚úì Adapted Model Accuracy: 0.7878
‚úì Adapted Model ROC-AUC: 0.8142

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
IMPROVEMENT: Accuracy = +0.0014, ROC-AUC = +0.0156
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ


`shade` is now deprecated in favor of `fill`; setting `fill=True`.
This will become an error in seaborn v0.14.0; please update your code.

  sns.kdeplot(df_original[feature_name], label='Original Test Data', color='blue', shade=True)

`shade` is now deprecated in favor of `fill`; setting `fill=True`.
This will become an error in seaborn v0.14.0; please update your code.

  sns.kdeplot(df_drifted[feature_name], label='Drifted Test Data', color='red', shade=True)

`shade` is now deprecated in favor of `fill`; setting `fill=True`.
This will become an error in seaborn v0.14.0; please update your code.

  sns.kdeplot(df_original[feature_name], label='Original Test Data', color='blue', shade=True)

`shade` is now deprecated in favor of `fill`; setting `fill=True`.
This will become an error in seaborn v0.14.0; please update your code.

  sns.kdeplot(df_drifted[feature_name], label='Drifted Test Data', color='red', shade=True)


Saved visualization to drift_distribution_MonthlyCharges_covariate.png
Logged 11 artifacts to MLflow
MLflow Run ID: 4adf16604e674ce99247cd7a86298b67

üèÉ View run Adversarial-Drift-covariate at: http://localhost:5000/#/experiments/3/runs/4adf16604e674ce99247cd7a86298b67
üß™ View experiment at: http://localhost:5000/#/experiments/3
EXPERIMENT COMPLETED SUCCESSFULLY!



In [24]:
results = run_adversarial_drift_experiment(setup_run_id=run_id, drift_type='covariate', drift_threshold=0.7, use_reweighting=True, drift_intensity=0.5)


ADVERSARIAL DRIFT DETECTION EXPERIMENT: COVARIATE DRIFT

STEP 1: Loading baseline model and data...


Downloading artifacts:   0%|          | 0/1 [04:06<?, ?it/s]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5/5 [00:01<00:00,  3.62it/s]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:01<00:00,  1.44s/it]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  2.56it/s]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:01<00:00,  1.43s/it]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  2.77it/s]


Loaded 5634 training samples
Loaded 1409 test samples

STEP 2: Simulating covariate drift...
Applied covariate drift to test data

STEP 3: Running adversarial validation...
Running validation on features
Adversarial AUC Score: 0.8733
  ‚Üí Drift Severity: SEVERE

STEP 4: Calculating PSI scores per feature...
Average PSI: 0.0355

STEP 5: Evaluating baseline model on drifted data...
‚úì Baseline Accuracy: 0.7842
‚úì Baseline ROC-AUC: 0.7947

STEP 6: Drift detected (AUC=0.8733 >= 0.7)!
         Applying density ratio reweighting...

‚úì Density ratios computed
  ‚Üí Mean weight: 0.316
  ‚Üí Std weight: 0.077
  ‚Üí Min/Max weight: 0.300 / 1.714

‚úì Adapted Model Accuracy: 0.7793
‚úì Adapted Model ROC-AUC: 0.8155

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
IMPROVEMENT: Accuracy = -0.0050, ROC-AUC = +0.0208
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ


`shade` is now deprecated in favor of `fill`; setting `fill=True`.
This will become an error in seaborn v0.14.0; please update your code.

  sns.kdeplot(df_original[feature_name], label='Original Test Data', color='blue', shade=True)

`shade` is now deprecated in favor of `fill`; setting `fill=True`.
This will become an error in seaborn v0.14.0; please update your code.

  sns.kdeplot(df_drifted[feature_name], label='Drifted Test Data', color='red', shade=True)

`shade` is now deprecated in favor of `fill`; setting `fill=True`.
This will become an error in seaborn v0.14.0; please update your code.

  sns.kdeplot(df_original[feature_name], label='Original Test Data', color='blue', shade=True)

`shade` is now deprecated in favor of `fill`; setting `fill=True`.
This will become an error in seaborn v0.14.0; please update your code.

  sns.kdeplot(df_drifted[feature_name], label='Drifted Test Data', color='red', shade=True)


Saved visualization to drift_distribution_MonthlyCharges_covariate.png
Logged 11 artifacts to MLflow
MLflow Run ID: cb45078bcb064cefa9e142543c49f20b

üèÉ View run Adversarial-Drift-covariate at: http://localhost:5000/#/experiments/3/runs/cb45078bcb064cefa9e142543c49f20b
üß™ View experiment at: http://localhost:5000/#/experiments/3
EXPERIMENT COMPLETED SUCCESSFULLY!



## Comparing across intensities

In [25]:
def run_multi_intensity_drift_experiment(setup_run_id, drift_type='covariate'):
    """
    Run drift detection and adaptation experiments across multiple drift intensities.
    
    This creates a comprehensive evaluation showing:
    1. How drift severity affects detection (adversarial AUC, PSI)
    2. How baseline model degrades with increasing drift
    3. Whether adaptation effectiveness scales with drift intensity
    4. The bias-variance tradeoff of importance weighting
    
    Parameters:
    -----------
    setup_run_id : str
        MLflow run ID from baseline training
    drift_type : str
        'covariate' or 'concept'
        
    Returns:
    --------
    pd.DataFrame : Results across all intensities
    """
    print(f"\n{'='*80}")
    print(f"MULTI-INTENSITY DRIFT EXPERIMENT: {drift_type.upper()} DRIFT")
    print(f"{'='*80}\n")
    
    # Define intensity levels to test
    intensities = [0.0, 0.2, 0.3, 0.5, 0.7, 0.9]
    drift_fractions = [0.0, 0.2, 0.3, 0.4, 0.5, 0.6]
    
    # Results storage
    results = []
    
    # Setup MLflow
    mlflow.set_tracking_uri("http://localhost:5000")
    mlflow.set_experiment("telco-multi-intensity-drift")
    
    client = mlflow.tracking.MlflowClient()
    local_download_path = "."
    
    # Load baseline assets once
    print("Loading baseline model and data...")
    model_pipeline_uri = f"runs:/{setup_run_id}/model_pipeline"
    baseline_model = mlflow.sklearn.load_model(model_pipeline_uri)
    preprocessor = baseline_model.named_steps['preprocessor']
    
    for f in ["X_train.csv", "y_train.csv", "X_test.csv", "y_test.csv"]:
        client.download_artifacts(setup_run_id, f, local_download_path)
    
    X_train_original = pd.read_csv("X_train.csv")
    y_train_original = pd.read_csv("y_train.csv").squeeze()
    X_test_original = pd.read_csv("X_test.csv")
    y_test_original = pd.read_csv("y_test.csv").squeeze()
    
    test_data_original = X_test_original.copy()
    test_data_original['Churn'] = y_test_original
    
    print(f"‚úì Loaded {len(X_train_original)} training samples")
    print(f"‚úì Loaded {len(X_test_original)} test samples\n")
    
    # Test each intensity level
    for intensity, drift_fraction in zip(intensities, drift_fractions):
        print(f"\n{'‚îÄ'*80}")
        print(f"Testing Intensity: {intensity:.1f}, Drift Fraction: {drift_fraction:.1f}")
        print(f"{'‚îÄ'*80}\n")
        
        # Simulate drift
        if intensity == 0.0:
            # No drift scenario
            X_test_drifted = X_test_original.copy()
            y_test_drifted = y_test_original.copy()
        else:
            if drift_type == 'covariate':
                drifted_test_data = simulate_drift(
                    test_data_original,
                    drift_type='covariate',
                    drift_fraction=drift_fraction,
                    intensity=intensity,
                    random_state=42
                )
            else:  # concept drift
                drifted_test_data = simulate_drift(
                    test_data_original,
                    drift_type='concept',
                    drift_fraction=drift_fraction,
                    intensity=intensity,
                    thresholds={"tenure": 12, "MonthlyCharges": 75},
                    random_state=42
                )
            
            X_test_drifted = drifted_test_data.drop('Churn', axis=1)
            y_test_drifted = drifted_test_data['Churn']
        
        # Adversarial validation for drift detection
        adv_auc, adv_classifier, feature_importances = adversarial_validation_score(
            X_train_original,
            X_test_drifted,
            preprocessor=preprocessor
        )
        
        # Calculate PSI
        numerical_features = ['tenure', 'MonthlyCharges', 'TotalCharges']
        psi_scores = {}
        for feature in numerical_features:
            psi = calculate_psi(
                X_train_original[feature].values,
                X_test_drifted[feature].values
            )
            psi_scores[feature] = psi
        avg_psi = np.mean(list(psi_scores.values()))
        
        # Evaluate baseline model
        y_pred_baseline = baseline_model.predict(X_test_drifted)
        y_pred_proba_baseline = baseline_model.predict_proba(X_test_drifted)[:, 1]
        
        baseline_acc = accuracy_score(y_test_drifted, y_pred_baseline)
        baseline_auc = roc_auc_score(y_test_drifted, y_pred_proba_baseline)
        
        # Adaptation (only if drift detected)
        adapted_acc = None
        adapted_auc = None
        ess_ratio = None
        weight_cv = None
        
        if adv_auc >= 0.6:  # Threshold for attempting adaptation
            try:
                # Estimate density ratios
                density_ratios = estimate_density_ratio_adversarial(
                    X_train_original,
                    X_test_drifted,
                    preprocessor=preprocessor
                )
                
                # Calculate weight diagnostics
                mean_w = density_ratios.mean()
                std_w = density_ratios.std()
                weight_cv = std_w / mean_w
                ess = (density_ratios.sum() ** 2) / (density_ratios ** 2).sum()
                ess_ratio = ess / len(density_ratios)
                
                # Retrain with weights
                from sklearn.base import clone
                base_classifier = baseline_model.named_steps['classifier']
                adapted_classifier = clone(base_classifier)
                
                X_train_transformed = preprocessor.transform(X_train_original)
                adapted_classifier.fit(
                    X_train_transformed,
                    y_train_original,
                    sample_weight=density_ratios
                )
                
                adapted_pipeline = Pipeline(steps=[
                    ('preprocessor', preprocessor),
                    ('classifier', adapted_classifier)
                ])
                
                # Evaluate adapted model
                y_pred_adapted = adapted_pipeline.predict(X_test_drifted)
                y_pred_proba_adapted = adapted_pipeline.predict_proba(X_test_drifted)[:, 1]
                
                adapted_acc = accuracy_score(y_test_drifted, y_pred_adapted)
                adapted_auc = roc_auc_score(y_test_drifted, y_pred_proba_adapted)
                
                print(f"‚úì Adaptation completed (ESS ratio: {ess_ratio:.2%}, CV: {weight_cv:.2f})")
                
            except Exception as e:
                print(f"‚ö† Adaptation failed: {str(e)}")
        
        # Store results
        result = {
            'intensity': intensity,
            'drift_fraction': drift_fraction,
            'adversarial_auc': adv_auc,
            'avg_psi': avg_psi,
            'baseline_accuracy': baseline_acc,
            'baseline_roc_auc': baseline_auc,
            'adapted_accuracy': adapted_acc,
            'adapted_roc_auc': adapted_auc,
            'accuracy_improvement': (adapted_acc - baseline_acc) if adapted_acc else 0,
            'roc_auc_improvement': (adapted_auc - baseline_auc) if adapted_auc else 0,
            'ess_ratio': ess_ratio,
            'weight_cv': weight_cv
        }
        results.append(result)
        
        # Print summary
        print(f"\nResults for intensity {intensity:.1f}:")
        print(f"  Adversarial AUC: {adv_auc:.4f}")
        print(f"  Average PSI: {avg_psi:.4f}")
        print(f"  Baseline ROC-AUC: {baseline_auc:.4f}")
        if adapted_auc:
            print(f"  Adapted ROC-AUC: {adapted_auc:.4f}")
            print(f"  Improvement: {adapted_auc - baseline_auc:+.4f}")
    
    # Convert to DataFrame
    results_df = pd.DataFrame(results)
    
    # Log comprehensive results to MLflow
    with mlflow.start_run(run_name=f"Multi-Intensity-{drift_type}"):
        mlflow.log_param("drift_type", drift_type)
        mlflow.log_param("num_intensities", len(intensities))
        
        # Log summary table
        results_df.to_csv("multi_intensity_results.csv", index=False)
        mlflow.log_artifact("multi_intensity_results.csv")
        
        # Create comprehensive visualizations
        create_multi_intensity_visualizations(results_df, drift_type)
    
    # Cleanup
    for f in ["X_train.csv", "y_train.csv", "X_test.csv", "y_test.csv", 
              "multi_intensity_results.csv"]:
        if os.path.exists(f):
            os.remove(f)
    
    print(f"\n{'='*80}")
    print("MULTI-INTENSITY EXPERIMENT COMPLETED!")
    print(f"{'='*80}\n")
    
    return results_df


def create_multi_intensity_visualizations(results_df, drift_type):
    """
    Create comprehensive visualizations for multi-intensity experiment.
    """
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    # Plot 1: Detection Metrics vs Intensity
    ax1 = axes[0, 0]
    ax1.plot(results_df['intensity'], results_df['adversarial_auc'], 
             marker='o', linewidth=2, markersize=8, label='Adversarial AUC', color='red')
    ax1.plot(results_df['intensity'], results_df['avg_psi'], 
             marker='s', linewidth=2, markersize=8, label='Average PSI', color='orange')
    ax1.axhline(y=0.7, color='gray', linestyle='--', alpha=0.5, label='Detection Threshold')
    ax1.set_xlabel('Drift Intensity', fontsize=12)
    ax1.set_ylabel('Detection Metric Value', fontsize=12)
    ax1.set_title('Drift Detection Metrics vs Intensity', fontsize=14, fontweight='bold')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Plot 2: Model Performance Degradation
    ax2 = axes[0, 1]
    ax2.plot(results_df['intensity'], results_df['baseline_roc_auc'], 
             marker='o', linewidth=2, markersize=8, label='Baseline Model', color='steelblue')
    ax2.plot(results_df['intensity'], results_df['adapted_roc_auc'], 
             marker='^', linewidth=2, markersize=8, label='Adapted Model', color='forestgreen')
    ax2.set_xlabel('Drift Intensity', fontsize=12)
    ax2.set_ylabel('ROC-AUC Score', fontsize=12)
    ax2.set_title('Model Performance vs Drift Intensity', fontsize=14, fontweight='bold')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    # Plot 3: Improvement from Adaptation
    ax3 = axes[1, 0]
    colors = ['green' if x > 0 else 'red' for x in results_df['roc_auc_improvement']]
    ax3.bar(results_df['intensity'], results_df['roc_auc_improvement'], 
            color=colors, alpha=0.7, edgecolor='black')
    ax3.axhline(y=0, color='black', linestyle='-', linewidth=1)
    ax3.set_xlabel('Drift Intensity', fontsize=12)
    ax3.set_ylabel('ROC-AUC Improvement', fontsize=12)
    ax3.set_title('Adaptation Benefit vs Drift Intensity', fontsize=14, fontweight='bold')
    ax3.grid(True, alpha=0.3, axis='y')
    
    # Plot 4: Weight Quality Metrics
    ax4 = axes[1, 1]
    ax4_twin = ax4.twinx()
    
    # Filter out None values
    valid_data = results_df[results_df['ess_ratio'].notna()]
    
    ax4.plot(valid_data['intensity'], valid_data['ess_ratio'], 
             marker='o', linewidth=2, markersize=8, label='ESS Ratio', color='purple')
    ax4_twin.plot(valid_data['intensity'], valid_data['weight_cv'], 
                  marker='s', linewidth=2, markersize=8, label='Weight CV', color='darkorange')
    
    ax4.set_xlabel('Drift Intensity', fontsize=12)
    ax4.set_ylabel('Effective Sample Size Ratio', fontsize=12, color='purple')
    ax4_twin.set_ylabel('Coefficient of Variation', fontsize=12, color='darkorange')
    ax4.set_title('Weight Quality vs Drift Intensity', fontsize=14, fontweight='bold')
    ax4.tick_params(axis='y', labelcolor='purple')
    ax4_twin.tick_params(axis='y', labelcolor='darkorange')
    
    # Combine legends
    lines1, labels1 = ax4.get_legend_handles_labels()
    lines2, labels2 = ax4_twin.get_legend_handles_labels()
    ax4.legend(lines1 + lines2, labels1 + labels2, loc='upper left')
    ax4.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(f"multi_intensity_analysis_{drift_type}.png", dpi=300)
    mlflow.log_artifact(f"multi_intensity_analysis_{drift_type}.png")
    plt.close()
    
    print(f"‚úì Saved multi-intensity visualization")
    
    # Cleanup
    if os.path.exists(f"multi_intensity_analysis_{drift_type}.png"):
        os.remove(f"multi_intensity_analysis_{drift_type}.png")

In [26]:
results_df = run_multi_intensity_drift_experiment(
    setup_run_id=run_id,
    drift_type='covariate'
)

2025/11/09 22:15:42 INFO mlflow.tracking.fluent: Experiment with name 'telco-multi-intensity-drift' does not exist. Creating a new experiment.



MULTI-INTENSITY DRIFT EXPERIMENT: COVARIATE DRIFT

Loading baseline model and data...


Downloading artifacts:   0%|          | 0/1 [04:06<?, ?it/s]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5/5 [00:01<00:00,  3.65it/s]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:01<00:00,  1.50s/it]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:01<00:00,  1.36s/it]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  2.63it/s]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  2.73it/s]


‚úì Loaded 5634 training samples
‚úì Loaded 1409 test samples


‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
Testing Intensity: 0.0, Drift Fraction: 0.0
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

‚úì Adaptation completed (ESS ratio: 99.22%, CV: 0.09)

Results for intensity 0.0:
  Adversarial AUC: 0.7382
  Average PSI: 0.0106
  Baseline ROC-AUC: 0.8303
  Adapted ROC-AUC: 0.8376
  Improvement: +0.0072

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

In [6]:
def estimate_doubly_robust_weights(X_train, y_train, X_test, preprocessor=None,
                                    n_estimators=100, max_depth=3, cv_folds=5):
    """
    Estimate doubly robust importance weights for covariate shift adaptation.
    
    Combines:
    1. Density ratio estimation (via adversarial validation)
    2. Outcome model estimation (via cross-validation)
    3. Bias correction term
    
    This provides robustness to misspecification in either the density ratio
    or outcome model.
    
    Parameters:
    -----------
    X_train : pd.DataFrame
        Training features
    y_train : pd.Series or np.array
        Training labels
    X_test : pd.DataFrame
        Test features (drifted)
    preprocessor : ColumnTransformer
        Feature preprocessor
    n_estimators : int
        Trees for gradient boosting discriminator
    max_depth : int
        Max depth for discriminator
    cv_folds : int
        Cross-validation folds for outcome model
        
    Returns:
    --------
    np.array : Doubly robust importance weights
    dict : Diagnostic information
    """
    from sklearn.ensemble import GradientBoostingClassifier
    from sklearn.model_selection import cross_val_predict
    from sklearn.base import clone
    
    print("  ‚Üí Estimating doubly robust importance weights...")
    
    # =========================================================================
    # STEP 1: Preprocess data
    # =========================================================================
    if 'customerID' in X_train.columns:
        X_train = X_train.drop(columns=['customerID'])
    if 'customerID' in X_test.columns:
        X_test = X_test.drop(columns=['customerID'])
    
    if preprocessor is None:
        preprocessor = get_preprocessor(X_train)
    
    X_train_transformed = preprocessor.fit_transform(X_train)
    X_test_transformed = preprocessor.transform(X_test)
    
    # Convert sparse to dense
    if hasattr(X_train_transformed, 'toarray'):
        X_train_transformed = X_train_transformed.toarray()
    if hasattr(X_test_transformed, 'toarray'):
        X_test_transformed = X_test_transformed.toarray()
    
    # =========================================================================
    # STEP 2: Estimate density ratios (propensity model)
    # =========================================================================
    print("  ‚Üí Step 1/3: Estimating density ratios...")
    
    # Combine data for adversarial validation
    X_combined = np.vstack([X_train_transformed, X_test_transformed])
    y_domain = np.concatenate([
        np.zeros(len(X_train_transformed)),
        np.ones(len(X_test_transformed))
    ])
    
    # Train discriminator
    discriminator = GradientBoostingClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        learning_rate=0.1,
        random_state=42,
        subsample=0.8
    )
    discriminator.fit(X_combined, y_domain)
    
    # Estimate density ratios for training data
    test_proba = discriminator.predict_proba(X_train_transformed)[:, 1]
    epsilon = 1e-6
    density_ratios = (test_proba + epsilon) / (1 - test_proba + epsilon)
    density_ratios = np.clip(density_ratios, 0.1, 10)
    
    print(f"     ‚úì Density ratios: mean={density_ratios.mean():.3f}, "
          f"std={density_ratios.std():.3f}")
    
    # =========================================================================
    # STEP 3: Estimate outcome model via cross-validation
    # =========================================================================
    print("  ‚Üí Step 2/3: Estimating outcome model (CV)...")
    
    # Use same classifier type as baseline for outcome model
    from sklearn.ensemble import RandomForestClassifier
    outcome_model = RandomForestClassifier(random_state=42, n_estimators=100, max_depth=10)
    
    # Get out-of-fold predictions (unbiased estimates)
    y_pred_cv = cross_val_predict(
        outcome_model,
        X_train_transformed,
        y_train,
        cv=cv_folds,
        method='predict_proba',
        n_jobs=-1
    )[:, 1]  # Probability of positive class
    
    print(f"     ‚úì CV predictions: mean={y_pred_cv.mean():.3f}, "
          f"std={y_pred_cv.std():.3f}")
    
    # =========================================================================
    # STEP 4: Compute doubly robust weights
    # =========================================================================
    print("  ‚Üí Step 3/3: Computing doubly robust correction...")
    
    # Convert labels to array
    y_train_array = np.array(y_train)
    
    # Compute residuals (prediction errors)
    residuals = y_train_array - y_pred_cv
    
    # Doubly robust weight formula:
    # w_DR(x) = w(x) * [1 + (y - ≈∑(x))]
    # where w(x) is density ratio, ≈∑(x) is outcome model prediction
    
    dr_weights = density_ratios * (1 + residuals)
    
    # Normalize weights to sum to n (standard practice)
    dr_weights = dr_weights / dr_weights.sum() * len(dr_weights)
    
    # Clip extreme values for stability
    dr_weights = np.clip(dr_weights, 0.05, 15)
    
    print(f"     ‚úì DR weights: mean={dr_weights.mean():.3f}, "
          f"std={dr_weights.std():.3f}")
    
    # =========================================================================
    # STEP 5: Calculate diagnostics
    # =========================================================================
    
    # Effective sample size
    ess = (dr_weights.sum() ** 2) / (dr_weights ** 2).sum()
    ess_ratio = ess / len(dr_weights)
    
    # Weight statistics
    weight_cv = dr_weights.std() / dr_weights.mean()
    
    # Prediction quality (outcome model)
    from sklearn.metrics import roc_auc_score
    outcome_auc = roc_auc_score(y_train_array, y_pred_cv)
    
    diagnostics = {
        'density_ratios_mean': density_ratios.mean(),
        'density_ratios_std': density_ratios.std(),
        'dr_weights_mean': dr_weights.mean(),
        'dr_weights_std': dr_weights.std(),
        'ess': ess,
        'ess_ratio': ess_ratio,
        'weight_cv': weight_cv,
        'outcome_model_auc': outcome_auc,
        'avg_residual': np.abs(residuals).mean()
    }
    
    print(f"\n Diagnostics:")
    print(f"     ‚Ä¢ ESS Ratio: {ess_ratio:.2%}")
    print(f"     ‚Ä¢ Weight CV: {weight_cv:.3f}")
    print(f"     ‚Ä¢ Outcome Model AUC: {outcome_auc:.4f}")
    print(f"     ‚Ä¢ Avg Absolute Residual: {diagnostics['avg_residual']:.4f}")
    
    return dr_weights, diagnostics


def run_doubly_robust_experiment(setup_run_id, drift_type='covariate',
                                   drift_threshold=0.7, compare_methods=True):
    """
    Run experiment comparing standard importance weighting vs doubly robust.
    
    Parameters:
    -----------
    setup_run_id : str
        MLflow run ID from baseline training
    drift_type : str
        'covariate' or 'concept'
    drift_threshold : float
        Adversarial AUC threshold for triggering adaptation
    compare_methods : bool
        If True, compares standard IW vs DR-IW
        
    Returns:
    --------
    dict : Results comparing both methods
    """
    print(f"\n{'='*80}")
    print(f"DOUBLY ROBUST IMPORTANCE WEIGHTING EXPERIMENT: {drift_type.upper()} DRIFT")
    print(f"{'='*80}\n")
    
    # Setup MLflow
    mlflow.set_tracking_uri("http://localhost:5000")
    mlflow.set_experiment("telco-doubly-robust-drift")
    
    client = mlflow.tracking.MlflowClient()
    local_download_path = "."
    cleanup_files = []
    
    try:
        # =====================================================================
        # STEP 1: Load Baseline Assets
        # =====================================================================
        print("STEP 1: Loading baseline model and data...")
        
        model_pipeline_uri = f"runs:/{setup_run_id}/model_pipeline"
        baseline_model = mlflow.sklearn.load_model(model_pipeline_uri)
        preprocessor = baseline_model.named_steps['preprocessor']
        
        for f in ["X_train.csv", "y_train.csv", "X_test.csv", "y_test.csv"]:
            client.download_artifacts(setup_run_id, f, local_download_path)
            cleanup_files.append(f)
        
        X_train_original = pd.read_csv("X_train.csv")
        y_train_original = pd.read_csv("y_train.csv").squeeze()
        X_test_original = pd.read_csv("X_test.csv")
        y_test_original = pd.read_csv("y_test.csv").squeeze()
        
        if 'customerID' in X_train_original.columns:
            X_train_original = X_train_original.drop(columns=['customerID'])
        if 'customerID' in X_test_original.columns:
            X_test_original = X_test_original.drop(columns=['customerID'])
        
        test_data_original = X_test_original.copy()
        test_data_original['Churn'] = y_test_original
        
        print(f"‚úì Loaded {len(X_train_original)} training samples")
        print(f"‚úì Loaded {len(X_test_original)} test samples\n")
        
        # =====================================================================
        # STEP 2: Simulate Drift
        # =====================================================================
        print(f"STEP 2: Simulating {drift_type} drift...")
        
        if drift_type == 'covariate':
            drifted_test_data = simulate_drift(
                test_data_original,
                drift_type='covariate',
                drift_fraction=0.4,
                intensity=0.5,
                random_state=42
            )
        else:
            drifted_test_data = simulate_drift(
                test_data_original,
                drift_type='concept',
                drift_fraction=0.4,
                intensity=0.5,
                thresholds={"tenure": 12, "MonthlyCharges": 75},
                random_state=42
            )
        
        X_test_drifted = drifted_test_data.drop('Churn', axis=1)
        y_test_drifted = drifted_test_data['Churn']
        
        print(f"‚úì Applied {drift_type} drift to test data\n")
        
        # =====================================================================
        # STEP 3: Drift Detection
        # =====================================================================
        print("STEP 3: Running drift detection...")
        
        adv_auc, adv_classifier, feature_importances = adversarial_validation_score(
            X_train_original,
            X_test_drifted,
            preprocessor=preprocessor
        )
        
        print(f"‚úì Adversarial AUC: {adv_auc:.4f}\n")
        
        # =====================================================================
        # STEP 4: Baseline Evaluation
        # =====================================================================
        print("STEP 4: Evaluating baseline model...")
        
        y_pred_baseline = baseline_model.predict(X_test_drifted)
        y_pred_proba_baseline = baseline_model.predict_proba(X_test_drifted)[:, 1]
        
        baseline_metrics = {
            'accuracy': accuracy_score(y_test_drifted, y_pred_baseline),
            'roc_auc': roc_auc_score(y_test_drifted, y_pred_proba_baseline)
        }
        
        print(f"‚úì Baseline ROC-AUC: {baseline_metrics['roc_auc']:.4f}\n")
        
        # =====================================================================
        # STEP 5: Adaptation with Different Methods
        # =====================================================================
        
        results = {
            'baseline': baseline_metrics,
            'standard_iw': None,
            'doubly_robust': None
        }
        
        if adv_auc >= drift_threshold:
            print(f"STEP 5: Drift detected (AUC={adv_auc:.4f})!")
            print("         Testing adaptation methods...\n")
            
            # -----------------------------------------------------------------
            # Method 1: Standard Importance Weighting
            # -----------------------------------------------------------------
            if compare_methods:
                print("  [Method 1/2] Standard Importance Weighting...")
                
                density_ratios_std = estimate_density_ratio_adversarial(
                    X_train_original,
                    X_test_drifted,
                    preprocessor=preprocessor
                )
                
                # Train model with standard IW
                from sklearn.base import clone
                classifier_std = clone(baseline_model.named_steps['classifier'])
                X_train_transformed = preprocessor.transform(X_train_original)
                
                classifier_std.fit(
                    X_train_transformed,
                    y_train_original,
                    sample_weight=density_ratios_std
                )
                
                pipeline_std = Pipeline(steps=[
                    ('preprocessor', preprocessor),
                    ('classifier', classifier_std)
                ])
                
                # Evaluate
                y_pred_std = pipeline_std.predict(X_test_drifted)
                y_pred_proba_std = pipeline_std.predict_proba(X_test_drifted)[:, 1]
                
                std_iw_metrics = {
                    'accuracy': accuracy_score(y_test_drifted, y_pred_std),
                    'roc_auc': roc_auc_score(y_test_drifted, y_pred_proba_std),
                    'improvement_acc': accuracy_score(y_test_drifted, y_pred_std) - baseline_metrics['accuracy'],
                    'improvement_auc': roc_auc_score(y_test_drifted, y_pred_proba_std) - baseline_metrics['roc_auc']
                }
                
                results['standard_iw'] = std_iw_metrics
                
                print(f"     ‚úì Standard IW ROC-AUC: {std_iw_metrics['roc_auc']:.4f}")
                print(f"       Improvement: {std_iw_metrics['improvement_auc']:+.4f}\n")
            
            # -----------------------------------------------------------------
            # Method 2: Doubly Robust Importance Weighting
            # -----------------------------------------------------------------
            print("  [Method 2/2] Doubly Robust Importance Weighting...")
            
            dr_weights, dr_diagnostics = estimate_doubly_robust_weights(
                X_train_original,
                y_train_original,
                X_test_drifted,
                preprocessor=preprocessor
            )
            
            # Train model with DR weights
            classifier_dr = clone(baseline_model.named_steps['classifier'])
            
            classifier_dr.fit(
                X_train_transformed,
                y_train_original,
                sample_weight=dr_weights
            )
            
            pipeline_dr = Pipeline(steps=[
                ('preprocessor', preprocessor),
                ('classifier', classifier_dr)
            ])
            
            # Evaluate
            y_pred_dr = pipeline_dr.predict(X_test_drifted)
            y_pred_proba_dr = pipeline_dr.predict_proba(X_test_drifted)[:, 1]
            
            dr_metrics = {
                'accuracy': accuracy_score(y_test_drifted, y_pred_dr),
                'roc_auc': roc_auc_score(y_test_drifted, y_pred_proba_dr),
                'improvement_acc': accuracy_score(y_test_drifted, y_pred_dr) - baseline_metrics['accuracy'],
                'improvement_auc': roc_auc_score(y_test_drifted, y_pred_proba_dr) - baseline_metrics['roc_auc'],
                **dr_diagnostics
            }
            
            results['doubly_robust'] = dr_metrics
            
            print(f"\n     ‚úì Doubly Robust ROC-AUC: {dr_metrics['roc_auc']:.4f}")
            print(f"       Improvement: {dr_metrics['improvement_auc']:+.4f}\n")
            
            # -----------------------------------------------------------------
            # Comparison Summary
            # -----------------------------------------------------------------
            if compare_methods and results['standard_iw']:
                print(f"\n{'‚îÄ'*60}")
                print("COMPARISON SUMMARY")
                print(f"{'‚îÄ'*60}")
                print(f"Baseline ROC-AUC:          {baseline_metrics['roc_auc']:.4f}")
                print(f"Standard IW ROC-AUC:       {std_iw_metrics['roc_auc']:.4f} "
                      f"({std_iw_metrics['improvement_auc']:+.4f})")
                print(f"Doubly Robust ROC-AUC:     {dr_metrics['roc_auc']:.4f} "
                      f"({dr_metrics['improvement_auc']:+.4f})")
                
                if dr_metrics['roc_auc'] > std_iw_metrics['roc_auc']:
                    advantage = dr_metrics['roc_auc'] - std_iw_metrics['roc_auc']
                    print(f"\n‚úì Doubly Robust WINS by {advantage:+.4f} AUC points!")
                elif std_iw_metrics['roc_auc'] > dr_metrics['roc_auc']:
                    advantage = std_iw_metrics['roc_auc'] - dr_metrics['roc_auc']
                    print(f"\n‚úì Standard IW WINS by {advantage:+.4f} AUC points!")
                else:
                    print(f"\n‚âà Methods perform equally")
                print(f"{'‚îÄ'*60}\n")
        
        else:
            print(f"STEP 5: No drift detected (AUC={adv_auc:.4f} < {drift_threshold})\n")
        
        # =====================================================================
        # STEP 6: Log Results to MLflow
        # =====================================================================
        print("STEP 6: Logging results to MLflow...")
        
        with mlflow.start_run(run_name=f"DoublyRobust-{drift_type}") as run:
            # Log parameters
            mlflow.log_param("drift_type", drift_type)
            mlflow.log_param("drift_threshold", drift_threshold)
            mlflow.log_param("compare_methods", compare_methods)
            
            # Log drift metrics
            mlflow.log_metric("adversarial_auc", adv_auc)
            
            # Log baseline
            mlflow.log_metric("baseline_roc_auc", baseline_metrics['roc_auc'])
            mlflow.log_metric("baseline_accuracy", baseline_metrics['accuracy'])
            
            # Log standard IW if available
            if results['standard_iw']:
                mlflow.log_metric("std_iw_roc_auc", results['standard_iw']['roc_auc'])
                mlflow.log_metric("std_iw_improvement", results['standard_iw']['improvement_auc'])
            
            # Log doubly robust
            if results['doubly_robust']:
                mlflow.log_metric("dr_roc_auc", results['doubly_robust']['roc_auc'])
                mlflow.log_metric("dr_improvement", results['doubly_robust']['improvement_auc'])
                mlflow.log_metric("dr_ess_ratio", results['doubly_robust']['ess_ratio'])
                mlflow.log_metric("dr_weight_cv", results['doubly_robust']['weight_cv'])
                mlflow.log_metric("outcome_model_auc", results['doubly_robust']['outcome_model_auc'])
            
            # Create comparison visualization
            if compare_methods and results['standard_iw'] and results['doubly_robust']:
                create_method_comparison_plot(results, drift_type)
            
            print(f"‚úì MLflow Run ID: {run.info.run_id}\n")
        
        print(f"{'='*80}")
        print("DOUBLY ROBUST EXPERIMENT COMPLETED!")
        print(f"{'='*80}\n")
        
        return results
    
    finally:
        # Cleanup
        for f in cleanup_files:
            if os.path.exists(f):
                os.remove(f)


def create_method_comparison_plot(results, drift_type):
    """
    Create visualization comparing Standard IW vs Doubly Robust.
    """
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    
    # Plot 1: ROC-AUC Comparison
    ax1 = axes[0]
    methods = ['Baseline', 'Standard IW', 'Doubly Robust']
    auc_scores = [
        results['baseline']['roc_auc'],
        results['standard_iw']['roc_auc'],
        results['doubly_robust']['roc_auc']
    ]
    colors = ['steelblue', 'orange', 'forestgreen']
    
    bars = ax1.bar(methods, auc_scores, color=colors, alpha=0.8, edgecolor='black')
    ax1.set_ylabel('ROC-AUC Score', fontsize=12)
    ax1.set_title(f'Performance Comparison: {drift_type.capitalize()} Drift', 
                  fontsize=14, fontweight='bold')
    ax1.grid(True, alpha=0.3, axis='y')
    
    # Add value labels on bars
    for bar in bars:
        height = bar.get_height()
        ax1.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.4f}', ha='center', va='bottom', fontsize=10)
    
    # Plot 2: Improvement Comparison
    ax2 = axes[1]
    improvements = [
        0,  # Baseline
        results['standard_iw']['improvement_auc'],
        results['doubly_robust']['improvement_auc']
    ]
    colors = ['gray' if x == 0 else 'green' if x > 0 else 'red' for x in improvements]
    
    bars = ax2.bar(methods, improvements, color=colors, alpha=0.8, edgecolor='black')
    ax2.axhline(y=0, color='black', linestyle='-', linewidth=1)
    ax2.set_ylabel('ROC-AUC Improvement', fontsize=12)
    ax2.set_title('Improvement Over Baseline', fontsize=14, fontweight='bold')
    ax2.grid(True, alpha=0.3, axis='y')
    
    # Add value labels
    for bar in bars:
        height = bar.get_height()
        if height != 0:
            ax2.text(bar.get_x() + bar.get_width()/2., height,
                    f'{height:+.4f}', ha='center', 
                    va='bottom' if height > 0 else 'top', fontsize=10)
    
    plt.tight_layout()
    plt.savefig(f"method_comparison_{drift_type}.png", dpi=300)
    mlflow.log_artifact(f"method_comparison_{drift_type}.png")
    plt.close()
    
    if os.path.exists(f"method_comparison_{drift_type}.png"):
        os.remove(f"method_comparison_{drift_type}.png")


In [7]:
file_path = 'C:/Users/ldmag/Documents/GitHub/Code-Assignments-Projects/Projects/MLOps Drift Detection and Pipeline Optimization/data/Telco-Churn.csv'
run_id = run_baseline_training(file_path)

# Run doubly robust experiment
results = run_doubly_robust_experiment(
    setup_run_id=run_id,
    drift_type='covariate',
    drift_threshold=0.7,
    compare_methods=True  # Compare with standard IW
)

--- Running Part 1: Baseline Training ---
Target 'Churn' encoded. Positive class ('Yes') is 1.
MLflow Run ID: ea43fd86247b46cfa281c0ec738fb22f




Baseline Model Accuracy: 0.7963
Baseline Model ROC AUC: 0.8369


Registered model 'telco-baseline' already exists. Creating a new version of this model...
2025/11/10 19:27:49 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: telco-baseline, version 32
Created version '32' of model 'telco-baseline'.


Saved visualization to confusion_matrix_baseline.png
Logged model pipeline, parameters, metrics, visualizations, and data artifacts.
üèÉ View run Baseline-Model-Setup at: http://localhost:5000/#/experiments/1/runs/ea43fd86247b46cfa281c0ec738fb22f
üß™ View experiment at: http://localhost:5000/#/experiments/1

DOUBLY ROBUST IMPORTANCE WEIGHTING EXPERIMENT: COVARIATE DRIFT

STEP 1: Loading baseline model and data...


Downloading artifacts:   0%|          | 0/1 [04:06<?, ?it/s]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5/5 [00:01<00:00,  3.41it/s]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:01<00:00,  1.67s/it]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:01<00:00,  1.34s/it]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  2.74it/s]
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  2.94it/s]


‚úì Loaded 5634 training samples
‚úì Loaded 1409 test samples

STEP 2: Simulating covariate drift...
‚úì Applied covariate drift to test data

STEP 3: Running drift detection...
‚úì Adversarial AUC: 0.8733

STEP 4: Evaluating baseline model...
‚úì Baseline ROC-AUC: 0.7947

STEP 5: Drift detected (AUC=0.8733)!
         Testing adaptation methods...

  [Method 1/2] Standard Importance Weighting...
     ‚úì Standard IW ROC-AUC: 0.8155
       Improvement: +0.0208

  [Method 2/2] Doubly Robust Importance Weighting...
  ‚Üí Estimating doubly robust importance weights...
  ‚Üí Step 1/3: Estimating density ratios...
     ‚úì Density ratios: mean=0.177, std=0.095
  ‚Üí Step 2/3: Estimating outcome model (CV)...
     ‚úì CV predictions: mean=0.269, std=0.241
  ‚Üí Step 3/3: Computing doubly robust correction...
     ‚úì DR weights: mean=0.997, std=0.674

 Diagnostics:
     ‚Ä¢ ESS Ratio: 68.67%
     ‚Ä¢ Weight CV: 0.675
     ‚Ä¢ Outcome Model AUC: 0.8421
     ‚Ä¢ Avg Absolute Residual: 0.2745

 