# ML Model Comparison: LightGBM vs CatBoost

Compare LightGBM and CatBoost on credit risk prediction using the same data and comparable hyperparameters.

In [None]:
import sys
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import lightgbm as lgb
from catboost import CatBoostClassifier
from sklearn.metrics import roc_auc_score, log_loss, brier_score_loss
from sklearn.model_selection import train_test_split
from IPython.core.display import HTML

PROJ_ROOT = Path.cwd().parent
if str(PROJ_ROOT) not in sys.path:
    sys.path.append(str(PROJ_ROOT))

from credit_risk_xai.config import FEATURE_CACHE_PATH, ACTIVE_FEATURES
from credit_risk_xai.features.engineer import prepare_modeling_data
from credit_risk_xai.modeling import compute_ece

## 1. Load and Prepare Data

In [None]:
df = pd.read_parquet(FEATURE_CACHE_PATH)
df = df[
    (df["ser_aktiv"] == 1) & 
    (df["sme_category"].isin(["Small", "Medium"])) & 
    (df["knc_kncfall"] == 1) &
    (df["bransch_borsbransch_konv"] != "40.0")
]

X, y = prepare_modeling_data(df)
print(f"Data: {len(X):,} samples, {X.shape[1]} features")
print(f"Default rate: {y.mean()*100:.2f}%")
print(f"Class imbalance: {(y==0).sum()/(y==1).sum():.0f}:1")

In [None]:
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Identify categorical features for CatBoost
cat_features = [col for col in X.columns if X[col].dtype.name == 'category']
cat_indices = [X.columns.get_loc(col) for col in cat_features]

print(f"Train: {len(X_train):,}, Val: {len(X_val):,}")
print(f"Categorical features: {cat_features}")

## 2. Train Models

Using comparable settings between LightGBM and CatBoost.

In [None]:
# LightGBM with tuned hyperparameters
lgbm_params = {
    "objective": "binary",
    "n_estimators": 10_000,
    "metric": "logloss",
    "random_state": 42,
    "n_jobs": -1,
    "verbosity": -1,
    "learning_rate": 0.0567,
    "num_leaves": 214,
    "max_depth": 6,
    "min_child_samples": 97,
    "min_child_weight": 0.308,
    "reg_alpha": 4.764,
    "reg_lambda": 9.83e-05,
    "min_split_gain": 0.846,
    "subsample": 0.826,
    "subsample_freq": 3,
    "colsample_bytree": 0.505,
}

lgbm_model = lgb.LGBMClassifier(**lgbm_params)
lgbm_model.fit(
    X_train, y_train,
    eval_set=[(X_val, y_val)],
    eval_metric='logloss',
    callbacks=[
        lgb.early_stopping(stopping_rounds=50, verbose=False),
        lgb.log_evaluation(period=50)
    ]
)

print(f"LightGBM best iteration: {lgbm_model.best_iteration_}")

In [None]:
# CatBoost with default settings
catboost_params = {
    "iterations": 10_000,
    "random_seed": 42,
    "verbose": 50,
    "early_stopping_rounds": 50,
    "use_best_model": True,
}

catboost_model = CatBoostClassifier(**catboost_params)
catboost_model.fit(
    X_train, y_train,
    eval_set=(X_val, y_val),
    cat_features=cat_indices,
)

print(f"CatBoost best iteration: {catboost_model.best_iteration_}")

## 3. Evaluate Models

In [None]:
# Get predictions
lgbm_proba = lgbm_model.predict_proba(X_val)[:, 1]
catboost_proba = catboost_model.predict_proba(X_val)[:, 1]

# Compute metrics
results = {
    'Model': ['LightGBM', 'CatBoost', 'Δ (LGBM - CB)'],
    'AUC': [
        roc_auc_score(y_val, lgbm_proba),
        roc_auc_score(y_val, catboost_proba),
        0
    ],
    'Log Loss': [
        log_loss(y_val, lgbm_proba),
        log_loss(y_val, catboost_proba),
        0
    ],
    'Brier Score': [
        brier_score_loss(y_val, lgbm_proba),
        brier_score_loss(y_val, catboost_proba),
        0
    ],
    'ECE': [
        compute_ece(y_val, lgbm_proba),
        compute_ece(y_val, catboost_proba),
        0
    ],
}

# Compute deltas
for col in ['AUC', 'Log Loss', 'Brier Score', 'ECE']:
    results[col][2] = results[col][0] - results[col][1]

results_df = pd.DataFrame(results)
HTML(results_df.to_html(index=False))

In [None]:
from sklearn.calibration import calibration_curve

fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Calibration curves
ax = axes[0]
for name, proba, color in [('LightGBM', lgbm_proba, 'steelblue'), ('CatBoost', catboost_proba, 'darkorange')]:
    fraction_pos, mean_pred = calibration_curve(y_val, proba, n_bins=50, strategy='quantile')
    ece = compute_ece(y_val, proba)
    ax.plot(mean_pred, fraction_pos, 's-', label=f'{name} (ECE={ece:.4f})', color=color, linewidth=2)

ax.plot([0, 1], [0, 1], 'k--', alpha=0.5, label='Perfect calibration')
ax.set_xlabel('Mean predicted probability')
ax.set_ylabel('Fraction of positives')
ax.set_title('Calibration Curves')
ax.legend()
ax.grid(alpha=0.3)

# Prediction distribution
ax = axes[1]
ax.hist(lgbm_proba, bins=50, alpha=0.5, label='LightGBM', color='steelblue', density=True)
ax.hist(catboost_proba, bins=50, alpha=0.5, label='CatBoost', color='darkorange', density=True)
ax.set_xlabel('Predicted probability')
ax.set_ylabel('Density')
ax.set_title('Prediction Distributions')
ax.legend()
ax.grid(alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Compare feature importances
lgbm_importance = pd.DataFrame({
    'feature': X.columns,
    'lgbm': lgbm_model.feature_importances_
})

catboost_importance = pd.DataFrame({
    'feature': X.columns,
    'catboost': catboost_model.feature_importances_
})

importance_comparison = lgbm_importance.merge(catboost_importance, on='feature')
importance_comparison['lgbm_rank'] = importance_comparison['lgbm'].rank(ascending=False)
importance_comparison['catboost_rank'] = importance_comparison['catboost'].rank(ascending=False)
importance_comparison['rank_diff'] = importance_comparison['lgbm_rank'] - importance_comparison['catboost_rank']

HTML(importance_comparison.to_html(index=False))