### 1. Library import

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from sklearn.metrics import roc_auc_score, brier_score_loss
from sklearn.calibration import CalibratedClassifierCV, calibration_curve
from scipy.stats import entropy

### 1. Input Validation & Schema Checks

In [None]:
EXPECTED_COLUMNS = ['GarmentType', 'Material', 'Weight', 'Size', 'Collection',
                    'SupplierScore', 'IncidentCount', 'OnTimeRate', 'SeasonalityScore']

In [None]:
def validate_input_schema(df, expected=EXPECTED_COLUMNS):
    new_cols = set(df.columns) - set(expected)
    missing_cols = set(expected) - set(df.columns)

    if new_cols:
        print(f"[WARNING] Unexpected columns found: {new_cols}")
    if missing_cols:
        print(f"[ERROR] Missing required columns: {missing_cols}")
    if not new_cols and not missing_cols:
        print("[OK] Input schema validated successfully.")

In [None]:
def validate_input_quality(X):
    if X.isnull().any().any():
        print("[ERROR] Missing values found in input features.")
    if len(X) < 10:
        print("[WARNING] Very few records (<10) in batch. Metrics may be unstable.")

### 2. Batch Inference & Performance Evaluation

In [None]:
def run_batch_inference(model, X, y_true=None):
    y_pred = model.predict_proba(X)[:, 1]
    if y_true is not None:
        auc = roc_auc_score(y_true, y_pred)
        print(f"[INFO] AUC: {auc:.4f}")
    return y_pred

In [None]:
def monitor_performance_metrics(model, X, y_true, threshold_auc=0.7):
    y_pred = run_batch_inference(model, X, y_true)
    auc = roc_auc_score(y_true, y_pred)
    brier = brier_score_loss(y_true, y_pred)
    print(f"[INFO] Brier Score: {brier:.4f}")
    if auc < threshold_auc:
        print("[WARNING] AUC below threshold. Consider recalibration or retraining.")
    else:
        print("[OK] Model performance is within acceptable range.")
    return auc, brier, y_pred

### 3. Drift Detection

In [None]:
def detect_distribution_drift(reference_df, new_df, columns, threshold=0.1):
    print("[DRIFT CHECK] Starting drift analysis...")
    for col in columns:
        ref_dist = reference_df[col].value_counts(normalize=True).reindex(new_df[col].unique(), fill_value=0.001)
        new_dist = new_df[col].value_counts(normalize=True).reindex(ref_dist.index, fill_value=0.001)
        kl_div = entropy(ref_dist, new_dist)
        if kl_div > threshold:
            print(f"[WARNING] Drift detected in '{col}' (KL divergence: {kl_div:.4f})")
        else:
            print(f"[OK] '{col}' distribution stable (KL divergence: {kl_div:.4f})")

### 4. Calibration Monitoring

In [None]:
def recalibrate_model(model, X_val, y_val):
    calibrator = CalibratedClassifierCV(base_estimator=model, cv='prefit', method='sigmoid')
    calibrator.fit(X_val, y_val)
    print("[INFO] Model recalibrated using validation data.")
    return calibrator

In [None]:
def plot_probability_distribution(y_true, y_pred):
    plt.figure(figsize=(10, 5))
    sns.histplot(y_pred, bins=25, kde=True, label="Predicted Probabilities")
    sns.histplot(y_true, bins=2, color='orange', label="True Labels", discrete=True)
    plt.title("Probability Distribution")
    plt.legend()
    plt.show()

In [None]:
def plot_calibration_curve(y_true, y_prob):
    prob_true, prob_pred = calibration_curve(y_true, y_prob, n_bins=10)
    plt.figure(figsize=(6, 6))
    plt.plot(prob_pred, prob_true, marker='o', label='Calibration Curve')
    plt.plot([0, 1], [0, 1], linestyle='--', label='Perfect Calibration')
    plt.title('Calibration Curve')
    plt.xlabel('Mean Predicted Probability')
    plt.ylabel('Fraction of Positives')
    plt.legend()
    plt.grid()
    plt.show()

### 5. Logging, Versioning & Retraining Trigger

In [None]:
def log_model_metadata(model_name, version, auc, brier, timestamp=None):
    timestamp = timestamp or datetime.now().isoformat()
    print(f"[MODEL LOG] Name: {model_name} | Version: {version} | AUC: {auc:.4f} | Brier: {brier:.4f} | Time: {timestamp}")

In [None]:
def save_monitoring_log(output_dict, filepath="monitoring_log.csv"):
    df = pd.DataFrame([output_dict])
    if os.path.exists(filepath):
        df.to_csv(filepath, mode='a', header=False, index=False)
    else:
        df.to_csv(filepath, index=False)

In [None]:

def trigger_retraining_if_needed(auc, threshold=0.7):
    if auc < threshold:
        print("[ALERT] Retraining required! AUC dropped below threshold.")
        # Optional: trigger pipeline or workflow
        # subprocess.call(['python', 'retrain_model.py'])

### 6. Full Maintenance Pipeline

In [None]:
def full_monitoring_pipeline(model, new_data_df, y_true=None, reference_df=None, model_name="PackagingModel", version="1.0"):
    print(f"\n[MONITORING PIPELINE] - Run Date: {datetime.today().strftime('%Y-%m-%d')}")

    validate_input_schema(new_data_df)
    validate_input_quality(new_data_df[EXPECTED_COLUMNS])

    if reference_df is not None:
        detect_distribution_drift(reference_df, new_data_df, columns=EXPECTED_COLUMNS)

    if y_true is not None:
        auc, brier, y_pred = monitor_performance_metrics(model, new_data_df[EXPECTED_COLUMNS], y_true)
        plot_probability_distribution(y_true, y_pred)
        plot_calibration_curve(y_true, y_pred)
        log_model_metadata(model_name, version, auc, brier)
        save_monitoring_log({
            "model": model_name,
            "version": version,
            "date": datetime.now().isoformat(),
            "AUC": auc,
            "BrierScore": brier
        })
        trigger_retraining_if_needed(auc)
    else:
        print("[NOTE] No ground truth provided. Skipping performance metrics.")

### 7. Pipeline activation

In [None]:
# Example usage:
# full_monitoring_pipeline(model, new_data_df, y_true=actual_labels, reference_df=train_data_df)