In [1]:
# %%
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import (
    brier_score_loss,
    log_loss
)

# Helper for ECE/MCE
def calibration_errors(y_true, y_prob, n_bins=10):
    """
    Returns Expected Calibration Error and Maximum Calibration Error
    """
    bins = np.linspace(0, 1, n_bins+1)
    binids = np.digitize(y_prob, bins) - 1

    ece = 0.0
    mce = 0.0
    
    for i in range(n_bins):
        mask = (binids == i)
        if not np.any(mask):
            continue
        
        avg_confidence = y_prob[mask].mean()
        avg_accuracy   = y_true[mask].mean()
        gap = abs(avg_confidence - avg_accuracy)
        
        ece += (mask.sum() / len(y_true)) * gap
        mce = max(mce, gap)
    
    return ece, mce

# %%
# Load engineered features
df = pd.read_csv("data/features_account_level.csv")

y = df["is_mule"]
feature_cols = [c for c in df.columns if c not in ["is_mule", "account_id"]]
X = df[feature_cols]

# Stratified split
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

# %%
#############################
# 1) Logistic Regression
#############################
lr_model = Pipeline([
    ("scaler", StandardScaler()),
    ("logreg", LogisticRegression(
        class_weight="balanced", 
        max_iter=1000,
        n_jobs=-1
    ))
])

lr_model.fit(X_train, y_train)
y_prob_lr = lr_model.predict_proba(X_test)[:, 1]

#############################
# 2) Random Forest + Calibration
#############################
rf = RandomForestClassifier(
    n_estimators=300,
    min_samples_split=4,
    min_samples_leaf=2,
    class_weight="balanced",
    random_state=42,
    n_jobs=-1
)

rf_cal = CalibratedClassifierCV(estimator=rf, method="isotonic", cv=5)
rf_cal.fit(X_train, y_train)
y_prob_rf = rf_cal.predict_proba(X_test)[:, 1]

#############################
# 3) XGBoost + Calibration
#############################
pos = y_train.sum()
neg = len(y_train) - pos
scale_pos_weight = neg / pos

xgb = XGBClassifier(
    n_estimators=400,
    max_depth=4,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    objective="binary:logistic",
    eval_metric="logloss",
    scale_pos_weight=scale_pos_weight,
    random_state=42,
    n_jobs=-1
)

xgb_cal = CalibratedClassifierCV(estimator=xgb, method="isotonic", cv=5)
xgb_cal.fit(X_train, y_train)
y_prob_xgb = xgb_cal.predict_proba(X_test)[:, 1]

# %%
#############################
# Compute Calibration Metrics
#############################

records = []

for name, y_prob in [
    ("LogReg", y_prob_lr),
    ("RF_cal", y_prob_rf),
    ("XGB_cal", y_prob_xgb)
]:
    ece, mce = calibration_errors(y_test.values, y_prob)
    brier = brier_score_loss(y_test, y_prob)
    ll    = log_loss(y_test, y_prob)
    
    records.append({
        "model": name,
        "ECE": ece,
        "MCE": mce,
        "Brier": brier,
        "LogLoss": ll
    })

calib_df = pd.DataFrame(records)
calib_df

Unnamed: 0,model,ECE,MCE,Brier,LogLoss
0,LogReg,0.256729,0.640753,0.107368,0.368029
1,RF_cal,0.005975,0.635438,0.020068,0.130126
2,XGB_cal,0.005776,0.411367,0.019722,0.131674
