# Hyperparameter Tuning with Optuna

This notebook performs hyperparameter optimization for the LightGBM model using Optuna.
We optimize all relevant hyperparameters to find the best configuration for credit risk prediction.

In [11]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import lightgbm as lgb
import optuna
from optuna.integration import LightGBMPruningCallback
from sklearn.metrics import (
    roc_auc_score,
    average_precision_score,
    log_loss,
    brier_score_loss,
)
from sklearn.model_selection import StratifiedKFold
import matplotlib.pyplot as plt

from credit_risk_xai.config import FEATURE_CACHE_PATH
from credit_risk_xai.features.engineer import prepare_modeling_data
from credit_risk_xai.modeling.utils import split_train_validation

## Load and Prepare Data

In [12]:
# Load and filter data (same filters as in 05a notebook)
df = pd.read_parquet(FEATURE_CACHE_PATH)
df = df[
    (df["ser_aktiv"] == 1) & 
    (df["sme_category"].isin(["Small", "Medium"])) & 
    (df["knc_kncfall"] == 1) &
    (df["bransch_borsbransch_konv"] != "40.0")
]

X, y = prepare_modeling_data(df)
print(f"Dataset shape: {X.shape}")
print(f"Target distribution:\n{y.value_counts(normalize=True)}")

Dataset shape: (304149, 29)
Target distribution:
target_next_year
0    0.982397
1    0.017603
Name: proportion, dtype: Float64


In [13]:
# Split into train and validation (same split as training)
X_train, X_val, y_train, y_val = split_train_validation(
    X, y, test_size=0.2, random_state=42
)

print(f"Training set: {X_train.shape[0]} samples")
print(f"Validation set: {X_val.shape[0]} samples")
print(f"Training positive rate: {y_train.mean():.4f}")
print(f"Validation positive rate: {y_val.mean():.4f}")

Training set: 243319 samples
Validation set: 60830 samples
Training positive rate: 0.0176
Validation positive rate: 0.0176


## Define Optuna Objective Function

We optimize all relevant LightGBM hyperparameters:
- **Tree structure**: `num_leaves`, `max_depth`, `min_child_samples`, `min_child_weight`
- **Regularization**: `reg_alpha` (L1), `reg_lambda` (L2), `min_split_gain`
- **Sampling**: `subsample` (bagging), `colsample_bytree` (feature fraction), `subsample_freq`
- **Learning**: `learning_rate`, `n_estimators` (with early stopping)

In [25]:
def objective(trial: optuna.Trial) -> float:
    """
    Optuna objective function for LightGBM hyperparameter optimization.
    Uses cross-validation for robust evaluation. Minimizes log loss.
    """
    # Hyperparameter search space
    params = {
        "objective": "binary",
        "metric": "binary_logloss",
        "verbosity": -1,
        "random_state": 42,
        "n_jobs": -1,
        
        # Learning rate and boosting rounds
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
        "n_estimators": 10_000,  # Use early stopping
        
        # Tree structure
        "num_leaves": trial.suggest_int("num_leaves", 8, 256),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        "min_child_weight": trial.suggest_float("min_child_weight", 1e-3, 10.0, log=True),
        
        # Regularization
        "reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 10.0, log=True),
        "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 10.0, log=True),
        "min_split_gain": trial.suggest_float("min_split_gain", 0.0, 1.0),
        
        # Sampling
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "subsample_freq": trial.suggest_int("subsample_freq", 0, 10),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
        
        # Class imbalance handling
        "is_unbalance": False,
    }
    
    
    # Cross-validation with stratified folds
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    cv_scores = []
    
    for fold, (train_idx, val_idx) in enumerate(cv.split(X_train, y_train)):
        X_fold_train = X_train.iloc[train_idx]
        y_fold_train = y_train.iloc[train_idx]
        X_fold_val = X_train.iloc[val_idx]
        y_fold_val = y_train.iloc[val_idx]
        
        model = lgb.LGBMClassifier(**params)
        
        # Pruning callback using log loss (aligned with study direction=minimize)
        pruning_callback = LightGBMPruningCallback(trial, "binary_logloss")
        
        model.fit(
            X_fold_train,
            y_fold_train,
            eval_set=[(X_fold_val, y_fold_val)],
            callbacks=[
                lgb.early_stopping(stopping_rounds=50, verbose=False),
                pruning_callback,
            ],
        )
        
        # Evaluate on fold validation set
        y_pred_proba = model.predict_proba(X_fold_val)[:, 1]
        fold_score = log_loss(y_fold_val, y_pred_proba)
        cv_scores.append(fold_score)
    
    return np.mean(cv_scores)

## Run Hyperparameter Optimization

In [26]:
# Create Optuna study
study = optuna.create_study(
    direction="minimize",  # Minimize log loss
    study_name="lightgbm_credit_risk",
    sampler=optuna.samplers.TPESampler(seed=42),
    pruner=optuna.pruners.MedianPruner(n_startup_trials=10, n_warmup_steps=20),
)

# Run optimization
study.optimize(
    objective,
    n_trials=100,  # Adjust based on available compute time
    show_progress_bar=True,
    n_jobs=1,  # Sequential trials (parallelism handled within each trial)
)

[I 2025-12-01 08:40:10,935] A new study created in memory with name: lightgbm_credit_risk
Best trial: 0. Best value: 0.0654981:   1%|          | 1/100 [00:28<46:52, 28.41s/it]

[I 2025-12-01 08:40:39,344] Trial 0 finished with value: 0.0654981454829895 and parameters: {'learning_rate': 0.03574712922600244, 'num_leaves': 244, 'max_depth': 10, 'min_child_samples': 62, 'min_child_weight': 0.004207988669606638, 'reg_alpha': 2.5348407664333426e-07, 'reg_lambda': 3.3323645788192616e-08, 'min_split_gain': 0.8661761457749352, 'subsample': 0.8005575058716043, 'subsample_freq': 7, 'colsample_bytree': 0.5102922471479012}. Best is trial 0 with value: 0.0654981454829895.


Best trial: 0. Best value: 0.0654981:   2%|▏         | 2/100 [00:31<21:48, 13.35s/it]

[I 2025-12-01 08:40:42,154] Trial 1 finished with value: 0.06952755029379128 and parameters: {'learning_rate': 0.2708160864249968, 'num_leaves': 215, 'max_depth': 5, 'min_child_samples': 22, 'min_child_weight': 0.00541524411940254, 'reg_alpha': 5.472429642032198e-06, 'reg_lambda': 0.00052821153945323, 'min_split_gain': 0.43194501864211576, 'subsample': 0.645614570099021, 'subsample_freq': 6, 'colsample_bytree': 0.569746930326021}. Best is trial 0 with value: 0.0654981454829895.


Best trial: 2. Best value: 0.0645705:   3%|▎         | 3/100 [00:52<27:12, 16.83s/it]

[I 2025-12-01 08:41:03,131] Trial 2 finished with value: 0.06457048936389305 and parameters: {'learning_rate': 0.027010527749605478, 'num_leaves': 99, 'max_depth': 7, 'min_child_samples': 80, 'min_child_weight': 0.006290644294586149, 'reg_alpha': 0.00042472707398058225, 'reg_lambda': 0.0021465011216654484, 'min_split_gain': 0.046450412719997725, 'subsample': 0.8037724259507192, 'subsample_freq': 1, 'colsample_bytree': 0.5325257964926398}. Best is trial 2 with value: 0.06457048936389305.


Best trial: 2. Best value: 0.0645705:   4%|▍         | 4/100 [01:03<23:26, 14.66s/it]

[I 2025-12-01 08:41:14,449] Trial 3 finished with value: 0.08224760438090972 and parameters: {'learning_rate': 0.2521267904777921, 'num_leaves': 248, 'max_depth': 11, 'min_child_samples': 34, 'min_child_weight': 0.002458603276328005, 'reg_alpha': 0.014391207615728067, 'reg_lambda': 9.148975058772307e-05, 'min_split_gain': 0.12203823484477883, 'subsample': 0.7475884550556351, 'subsample_freq': 0, 'colsample_bytree': 0.954660201039391}. Best is trial 2 with value: 0.06457048936389305.


Best trial: 2. Best value: 0.0645705:   5%|▌         | 5/100 [01:22<25:29, 16.10s/it]

[I 2025-12-01 08:41:33,114] Trial 4 finished with value: 0.06477274013767753 and parameters: {'learning_rate': 0.024112898115291985, 'num_leaves': 172, 'max_depth': 6, 'min_child_samples': 54, 'min_child_weight': 0.1537592023548176, 'reg_alpha': 4.609885087947832e-07, 'reg_lambda': 5.324289357128436, 'min_split_gain': 0.7751328233611146, 'subsample': 0.9697494707820946, 'subsample_freq': 9, 'colsample_bytree': 0.7989499894055425}. Best is trial 2 with value: 0.06457048936389305.


Best trial: 2. Best value: 0.0645705:   6%|▌         | 6/100 [01:24<17:58, 11.47s/it]

[I 2025-12-01 08:41:35,597] Trial 5 finished with value: 0.06808200333570644 and parameters: {'learning_rate': 0.22999586428143728, 'num_leaves': 30, 'max_depth': 4, 'min_child_samples': 9, 'min_child_weight': 0.02001342062287998, 'reg_alpha': 3.148441347423712e-05, 'reg_lambda': 2.7678419414850017e-06, 'min_split_gain': 0.8287375091519293, 'subsample': 0.6783766633467947, 'subsample_freq': 3, 'colsample_bytree': 0.7713480415791243}. Best is trial 2 with value: 0.06457048936389305.


Best trial: 2. Best value: 0.0645705:   7%|▋         | 7/100 [01:41<20:22, 13.15s/it]

[I 2025-12-01 08:41:52,188] Trial 6 finished with value: 0.06512427447342065 and parameters: {'learning_rate': 0.016149614799999188, 'num_leaves': 207, 'max_depth': 3, 'min_child_samples': 99, 'min_child_weight': 1.2273800987852967, 'reg_alpha': 6.143857495033091e-07, 'reg_lambda': 1.1212412169964432e-08, 'min_split_gain': 0.8154614284548342, 'subsample': 0.8534286719238086, 'subsample_freq': 8, 'colsample_bytree': 0.8856351733429728}. Best is trial 2 with value: 0.06457048936389305.


Best trial: 2. Best value: 0.0645705:   8%|▊         | 8/100 [02:02<24:07, 15.73s/it]

[I 2025-12-01 08:42:13,462] Trial 7 finished with value: 0.06498433356968869 and parameters: {'learning_rate': 0.012863908101989912, 'num_leaves': 97, 'max_depth': 4, 'min_child_samples': 87, 'min_child_weight': 0.3113095956122124, 'reg_alpha': 9.507847858536042e-06, 'reg_lambda': 3.732717755563729e-08, 'min_split_gain': 0.3109823217156622, 'subsample': 0.6625916610133735, 'subsample_freq': 8, 'colsample_bytree': 0.8187787356776066}. Best is trial 2 with value: 0.06457048936389305.


Best trial: 2. Best value: 0.0645705:   9%|▉         | 9/100 [02:06<18:14, 12.03s/it]

[I 2025-12-01 08:42:17,336] Trial 8 finished with value: 0.06598111376217289 and parameters: {'learning_rate': 0.20441878352493792, 'num_leaves': 125, 'max_depth': 4, 'min_child_samples': 73, 'min_child_weight': 1.1044350847124695, 'reg_alpha': 0.0011258869313624912, 'reg_lambda': 0.08683696167603723, 'min_split_gain': 0.49379559636439074, 'subsample': 0.7613664146909971, 'subsample_freq': 4, 'colsample_bytree': 0.5127095633720475}. Best is trial 2 with value: 0.06457048936389305.


Best trial: 9. Best value: 0.0643781:  10%|█         | 10/100 [02:48<31:45, 21.17s/it]

[I 2025-12-01 08:42:58,992] Trial 9 finished with value: 0.06437810578120436 and parameters: {'learning_rate': 0.01443340240633889, 'num_leaves': 15, 'max_depth': 9, 'min_child_samples': 35, 'min_child_weight': 0.1082138291061399, 'reg_alpha': 1.4726456718740824, 'reg_lambda': 1.7523871598466864e-06, 'min_split_gain': 0.41038292303562973, 'subsample': 0.8777755692715243, 'subsample_freq': 2, 'colsample_bytree': 0.5384899549143964}. Best is trial 9 with value: 0.06437810578120436.


Best trial: 9. Best value: 0.0643781:  11%|█         | 11/100 [02:54<24:29, 16.52s/it]

[I 2025-12-01 08:43:04,947] Trial 10 finished with value: 0.06523439408089429 and parameters: {'learning_rate': 0.10806495824733199, 'num_leaves': 9, 'max_depth': 9, 'min_child_samples': 38, 'min_child_weight': 6.208276873294917, 'reg_alpha': 1.475649304728376, 'reg_lambda': 2.0957649527062313e-06, 'min_split_gain': 0.6379717552110609, 'subsample': 0.5484843898741398, 'subsample_freq': 2, 'colsample_bytree': 0.6403804771198521}. Best is trial 9 with value: 0.06437810578120436.


Best trial: 9. Best value: 0.0643781:  12%|█▏        | 12/100 [03:06<22:19, 15.22s/it]

[I 2025-12-01 08:43:17,203] Trial 11 finished with value: 0.06465239197441772 and parameters: {'learning_rate': 0.05262281255053803, 'num_leaves': 67, 'max_depth': 8, 'min_child_samples': 74, 'min_child_weight': 0.0314128357270742, 'reg_alpha': 8.311476367194397, 'reg_lambda': 0.0021142323970375567, 'min_split_gain': 0.006526520049464649, 'subsample': 0.9014455515734231, 'subsample_freq': 0, 'colsample_bytree': 0.6608835888001324}. Best is trial 9 with value: 0.06437810578120436.


Best trial: 9. Best value: 0.0643781:  13%|█▎        | 13/100 [03:06<15:34, 10.74s/it]

[I 2025-12-01 08:43:17,641] Trial 12 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  14%|█▍        | 14/100 [03:07<10:58,  7.65s/it]

[I 2025-12-01 08:43:18,162] Trial 13 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  15%|█▌        | 15/100 [03:20<13:23,  9.46s/it]

[I 2025-12-01 08:43:31,792] Trial 14 finished with value: 0.06505285562995594 and parameters: {'learning_rate': 0.03906124460339477, 'num_leaves': 71, 'max_depth': 8, 'min_child_samples': 60, 'min_child_weight': 0.0011065136021090993, 'reg_alpha': 0.0011119316820459558, 'reg_lambda': 1.6189116655480906e-05, 'min_split_gain': 0.6318201239960418, 'subsample': 0.8124624230002184, 'subsample_freq': 5, 'colsample_bytree': 0.6964466627628921}. Best is trial 9 with value: 0.06437810578120436.


Best trial: 9. Best value: 0.0643781:  16%|█▌        | 16/100 [03:32<14:05, 10.06s/it]

[I 2025-12-01 08:43:43,263] Trial 15 finished with value: 0.06613843102376403 and parameters: {'learning_rate': 0.08801819602632835, 'num_leaves': 167, 'max_depth': 9, 'min_child_samples': 78, 'min_child_weight': 0.07539864683189276, 'reg_alpha': 1.2747154038311253e-08, 'reg_lambda': 0.21766039991025712, 'min_split_gain': 0.013179214380923565, 'subsample': 0.9065559864045057, 'subsample_freq': 1, 'colsample_bytree': 0.5870784569531349}. Best is trial 9 with value: 0.06437810578120436.


Best trial: 9. Best value: 0.0643781:  17%|█▋        | 17/100 [03:32<09:56,  7.19s/it]

[I 2025-12-01 08:43:43,776] Trial 16 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  18%|█▊        | 18/100 [03:33<07:04,  5.18s/it]

[I 2025-12-01 08:43:44,269] Trial 17 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  19%|█▉        | 19/100 [03:34<05:13,  3.87s/it]

[I 2025-12-01 08:43:45,108] Trial 18 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  20%|██        | 20/100 [03:34<03:56,  2.96s/it]

[I 2025-12-01 08:43:45,915] Trial 19 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  21%|██        | 21/100 [03:47<07:30,  5.70s/it]

[I 2025-12-01 08:43:58,019] Trial 20 finished with value: 0.06473613667527041 and parameters: {'learning_rate': 0.049014400339869126, 'num_leaves': 27, 'max_depth': 6, 'min_child_samples': 66, 'min_child_weight': 0.22382977000298435, 'reg_alpha': 0.00045135953361543263, 'reg_lambda': 1.938418664214207e-05, 'min_split_gain': 0.37775911476591023, 'subsample': 0.7950223784771869, 'subsample_freq': 4, 'colsample_bytree': 0.5091560990452557}. Best is trial 9 with value: 0.06437810578120436.


Best trial: 9. Best value: 0.0643781:  22%|██▏       | 22/100 [03:57<09:25,  7.25s/it]

[I 2025-12-01 08:44:08,871] Trial 21 finished with value: 0.06501376076081883 and parameters: {'learning_rate': 0.07482227022847905, 'num_leaves': 62, 'max_depth': 8, 'min_child_samples': 83, 'min_child_weight': 0.02881328776398132, 'reg_alpha': 6.574194346314073, 'reg_lambda': 0.001359746046772486, 'min_split_gain': 0.011026506906002438, 'subsample': 0.9298511164779876, 'subsample_freq': 0, 'colsample_bytree': 0.6522273130007921}. Best is trial 9 with value: 0.06437810578120436.


Best trial: 9. Best value: 0.0643781:  23%|██▎       | 23/100 [03:59<07:14,  5.64s/it]

[I 2025-12-01 08:44:10,780] Trial 22 pruned. Trial was pruned at iteration 133.


Best trial: 9. Best value: 0.0643781:  24%|██▍       | 24/100 [04:00<05:15,  4.16s/it]

[I 2025-12-01 08:44:11,468] Trial 23 pruned. Trial was pruned at iteration 129.


Best trial: 9. Best value: 0.0643781:  25%|██▌       | 25/100 [04:16<09:37,  7.70s/it]

[I 2025-12-01 08:44:27,429] Trial 24 finished with value: 0.0646321707541869 and parameters: {'learning_rate': 0.04808168512897296, 'num_leaves': 45, 'max_depth': 7, 'min_child_samples': 77, 'min_child_weight': 0.11379410566589125, 'reg_alpha': 2.438254986307382, 'reg_lambda': 0.00013649562061832058, 'min_split_gain': 0.07392131180409385, 'subsample': 0.9005784996140654, 'subsample_freq': 3, 'colsample_bytree': 0.6095043058550895}. Best is trial 9 with value: 0.06437810578120436.


Best trial: 9. Best value: 0.0643781:  26%|██▌       | 26/100 [04:16<06:48,  5.51s/it]

[I 2025-12-01 08:44:27,846] Trial 25 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  27%|██▋       | 27/100 [04:17<04:49,  3.96s/it]

[I 2025-12-01 08:44:28,180] Trial 26 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  28%|██▊       | 28/100 [04:17<03:30,  2.93s/it]

[I 2025-12-01 08:44:28,704] Trial 27 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  29%|██▉       | 29/100 [04:18<02:32,  2.15s/it]

[I 2025-12-01 08:44:29,031] Trial 28 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  30%|███       | 30/100 [04:18<01:53,  1.63s/it]

[I 2025-12-01 08:44:29,446] Trial 29 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  31%|███       | 31/100 [04:20<02:00,  1.75s/it]

[I 2025-12-01 08:44:31,464] Trial 30 pruned. Trial was pruned at iteration 77.


Best trial: 9. Best value: 0.0643781:  32%|███▏      | 32/100 [04:36<06:55,  6.11s/it]

[I 2025-12-01 08:44:47,758] Trial 31 finished with value: 0.06478117122263463 and parameters: {'learning_rate': 0.04468106676763554, 'num_leaves': 85, 'max_depth': 8, 'min_child_samples': 76, 'min_child_weight': 0.03702826912151809, 'reg_alpha': 6.5857833925072296, 'reg_lambda': 0.0004121766042748579, 'min_split_gain': 0.05175347285981846, 'subsample': 0.9071657516456156, 'subsample_freq': 0, 'colsample_bytree': 0.678459764825505}. Best is trial 9 with value: 0.06437810578120436.


Best trial: 9. Best value: 0.0643781:  33%|███▎      | 33/100 [04:38<05:25,  4.86s/it]

[I 2025-12-01 08:44:49,708] Trial 32 pruned. Trial was pruned at iteration 95.


Best trial: 9. Best value: 0.0643781:  34%|███▍      | 34/100 [04:39<03:54,  3.55s/it]

[I 2025-12-01 08:44:50,189] Trial 33 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  35%|███▌      | 35/100 [04:40<03:01,  2.79s/it]

[I 2025-12-01 08:44:51,211] Trial 34 pruned. Trial was pruned at iteration 137.


Best trial: 9. Best value: 0.0643781:  36%|███▌      | 36/100 [04:40<02:17,  2.15s/it]

[I 2025-12-01 08:44:51,851] Trial 35 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  37%|███▋      | 37/100 [04:43<02:25,  2.32s/it]

[I 2025-12-01 08:44:54,559] Trial 36 pruned. Trial was pruned at iteration 123.


Best trial: 9. Best value: 0.0643781:  38%|███▊      | 38/100 [04:44<01:55,  1.87s/it]

[I 2025-12-01 08:44:55,382] Trial 37 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  39%|███▉      | 39/100 [04:45<01:33,  1.53s/it]

[I 2025-12-01 08:44:56,139] Trial 38 pruned. Trial was pruned at iteration 57.


Best trial: 9. Best value: 0.0643781:  40%|████      | 40/100 [04:54<03:51,  3.86s/it]

[I 2025-12-01 08:45:05,414] Trial 39 finished with value: 0.06483382492784782 and parameters: {'learning_rate': 0.06755657172306859, 'num_leaves': 238, 'max_depth': 5, 'min_child_samples': 74, 'min_child_weight': 0.05246554944323991, 'reg_alpha': 8.492469797939589e-07, 'reg_lambda': 0.00020000537293217037, 'min_split_gain': 0.11495103686786867, 'subsample': 0.9193542080601775, 'subsample_freq': 4, 'colsample_bytree': 0.5285320251016934}. Best is trial 9 with value: 0.06437810578120436.


Best trial: 9. Best value: 0.0643781:  41%|████      | 41/100 [04:55<02:50,  2.89s/it]

[I 2025-12-01 08:45:06,041] Trial 40 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  42%|████▏     | 42/100 [04:55<02:02,  2.12s/it]

[I 2025-12-01 08:45:06,361] Trial 41 pruned. Trial was pruned at iteration 21.


Best trial: 9. Best value: 0.0643781:  43%|████▎     | 43/100 [04:55<01:30,  1.59s/it]

[I 2025-12-01 08:45:06,718] Trial 42 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  44%|████▍     | 44/100 [04:56<01:08,  1.23s/it]

[I 2025-12-01 08:45:07,108] Trial 43 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  45%|████▌     | 45/100 [04:56<00:53,  1.02it/s]

[I 2025-12-01 08:45:07,511] Trial 44 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  46%|████▌     | 46/100 [04:57<00:50,  1.07it/s]

[I 2025-12-01 08:45:08,344] Trial 45 pruned. Trial was pruned at iteration 120.


Best trial: 9. Best value: 0.0643781:  47%|████▋     | 47/100 [04:57<00:40,  1.32it/s]

[I 2025-12-01 08:45:08,687] Trial 46 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  48%|████▊     | 48/100 [04:57<00:31,  1.66it/s]

[I 2025-12-01 08:45:08,922] Trial 47 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  49%|████▉     | 49/100 [04:58<00:32,  1.55it/s]

[I 2025-12-01 08:45:09,662] Trial 48 pruned. Trial was pruned at iteration 89.


Best trial: 9. Best value: 0.0643781:  50%|█████     | 50/100 [04:59<00:26,  1.86it/s]

[I 2025-12-01 08:45:09,956] Trial 49 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  51%|█████     | 51/100 [04:59<00:23,  2.08it/s]

[I 2025-12-01 08:45:10,303] Trial 50 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  52%|█████▏    | 52/100 [04:59<00:22,  2.12it/s]

[I 2025-12-01 08:45:10,753] Trial 51 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  53%|█████▎    | 53/100 [05:00<00:24,  1.91it/s]

[I 2025-12-01 08:45:11,394] Trial 52 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  54%|█████▍    | 54/100 [05:00<00:22,  2.07it/s]

[I 2025-12-01 08:45:11,784] Trial 53 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  55%|█████▌    | 55/100 [05:01<00:23,  1.94it/s]

[I 2025-12-01 08:45:12,372] Trial 54 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  56%|█████▌    | 56/100 [05:01<00:20,  2.12it/s]

[I 2025-12-01 08:45:12,741] Trial 55 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  57%|█████▋    | 57/100 [05:02<00:20,  2.07it/s]

[I 2025-12-01 08:45:13,252] Trial 56 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  58%|█████▊    | 58/100 [05:02<00:18,  2.21it/s]

[I 2025-12-01 08:45:13,633] Trial 57 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  59%|█████▉    | 59/100 [05:03<00:18,  2.21it/s]

[I 2025-12-01 08:45:14,086] Trial 58 pruned. Trial was pruned at iteration 32.


Best trial: 9. Best value: 0.0643781:  60%|██████    | 60/100 [05:03<00:18,  2.19it/s]

[I 2025-12-01 08:45:14,556] Trial 59 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  61%|██████    | 61/100 [05:05<00:31,  1.22it/s]

[I 2025-12-01 08:45:16,220] Trial 60 pruned. Trial was pruned at iteration 91.


Best trial: 9. Best value: 0.0643781:  62%|██████▏   | 62/100 [05:05<00:26,  1.44it/s]

[I 2025-12-01 08:45:16,625] Trial 61 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  63%|██████▎   | 63/100 [05:07<00:42,  1.15s/it]

[I 2025-12-01 08:45:18,820] Trial 62 pruned. Trial was pruned at iteration 137.


Best trial: 9. Best value: 0.0643781:  64%|██████▍   | 64/100 [05:08<00:34,  1.05it/s]

[I 2025-12-01 08:45:19,323] Trial 63 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  65%|██████▌   | 65/100 [05:08<00:27,  1.25it/s]

[I 2025-12-01 08:45:19,757] Trial 64 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  66%|██████▌   | 66/100 [05:10<00:37,  1.09s/it]

[I 2025-12-01 08:45:21,536] Trial 65 pruned. Trial was pruned at iteration 76.


Best trial: 9. Best value: 0.0643781:  67%|██████▋   | 67/100 [05:10<00:28,  1.14it/s]

[I 2025-12-01 08:45:21,914] Trial 66 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  68%|██████▊   | 68/100 [05:12<00:37,  1.18s/it]

[I 2025-12-01 08:45:23,813] Trial 67 pruned. Trial was pruned at iteration 90.


Best trial: 9. Best value: 0.0643781:  69%|██████▉   | 69/100 [05:13<00:28,  1.08it/s]

[I 2025-12-01 08:45:24,139] Trial 68 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  70%|███████   | 70/100 [05:13<00:23,  1.26it/s]

[I 2025-12-01 08:45:24,615] Trial 69 pruned. Trial was pruned at iteration 20.


Best trial: 9. Best value: 0.0643781:  71%|███████   | 71/100 [05:15<00:27,  1.04it/s]

[I 2025-12-01 08:45:25,984] Trial 70 pruned. Trial was pruned at iteration 96.


Best trial: 9. Best value: 0.0643781:  72%|███████▏  | 72/100 [05:16<00:28,  1.03s/it]

[I 2025-12-01 08:45:27,170] Trial 71 pruned. Trial was pruned at iteration 123.


Best trial: 9. Best value: 0.0643781:  73%|███████▎  | 73/100 [05:17<00:26,  1.02it/s]

[I 2025-12-01 08:45:28,031] Trial 72 pruned. Trial was pruned at iteration 123.


Best trial: 9. Best value: 0.0643781:  74%|███████▍  | 74/100 [05:18<00:26,  1.00s/it]

[I 2025-12-01 08:45:29,081] Trial 73 pruned. Trial was pruned at iteration 123.


Best trial: 9. Best value: 0.0643781:  75%|███████▌  | 75/100 [05:34<02:19,  5.56s/it]

[I 2025-12-01 08:45:45,291] Trial 74 finished with value: 0.06447126939447739 and parameters: {'learning_rate': 0.05573111574663507, 'num_leaves': 235, 'max_depth': 6, 'min_child_samples': 80, 'min_child_weight': 0.10696497549201958, 'reg_alpha': 5.500455215925543, 'reg_lambda': 0.0003254792362017694, 'min_split_gain': 0.03364475913300199, 'subsample': 0.8779303161375109, 'subsample_freq': 4, 'colsample_bytree': 0.5079777475673698}. Best is trial 9 with value: 0.06437810578120436.


Best trial: 9. Best value: 0.0643781:  76%|███████▌  | 76/100 [05:48<03:11,  8.00s/it]

[I 2025-12-01 08:45:58,960] Trial 75 finished with value: 0.06446706418381043 and parameters: {'learning_rate': 0.054976726739633296, 'num_leaves': 200, 'max_depth': 6, 'min_child_samples': 80, 'min_child_weight': 0.10231157181367961, 'reg_alpha': 6.7803582486948155, 'reg_lambda': 0.00038932956541993705, 'min_split_gain': 0.8718792628031496, 'subsample': 0.8786718760221435, 'subsample_freq': 3, 'colsample_bytree': 0.509151504273782}. Best is trial 9 with value: 0.06437810578120436.


Best trial: 76. Best value: 0.0643569:  77%|███████▋  | 77/100 [06:04<03:59, 10.41s/it]

[I 2025-12-01 08:46:15,002] Trial 76 finished with value: 0.06435685167659946 and parameters: {'learning_rate': 0.056692846707476825, 'num_leaves': 214, 'max_depth': 6, 'min_child_samples': 97, 'min_child_weight': 0.30784186040552297, 'reg_alpha': 4.763643552050694, 'reg_lambda': 9.834121930599604e-05, 'min_split_gain': 0.8460048017532359, 'subsample': 0.8260476911307014, 'subsample_freq': 3, 'colsample_bytree': 0.5051959552498168}. Best is trial 76 with value: 0.06435685167659946.


Best trial: 76. Best value: 0.0643569:  78%|███████▊  | 78/100 [06:07<03:00,  8.20s/it]

[I 2025-12-01 08:46:18,060] Trial 77 pruned. Trial was pruned at iteration 269.


Best trial: 76. Best value: 0.0643569:  79%|███████▉  | 79/100 [06:16<03:02,  8.70s/it]

[I 2025-12-01 08:46:27,905] Trial 78 finished with value: 0.0649135486261934 and parameters: {'learning_rate': 0.07905303645861181, 'num_leaves': 242, 'max_depth': 6, 'min_child_samples': 81, 'min_child_weight': 0.1883710101904615, 'reg_alpha': 1.9738971026090413, 'reg_lambda': 0.0010504071482077563, 'min_split_gain': 0.9223532293506662, 'subsample': 0.8018144048643758, 'subsample_freq': 3, 'colsample_bytree': 0.5105359104910981}. Best is trial 76 with value: 0.06435685167659946.


Best trial: 76. Best value: 0.0643569:  80%|████████  | 80/100 [06:17<02:04,  6.22s/it]

[I 2025-12-01 08:46:28,334] Trial 79 pruned. Trial was pruned at iteration 20.


Best trial: 76. Best value: 0.0643569:  81%|████████  | 81/100 [06:31<02:41,  8.51s/it]

[I 2025-12-01 08:46:42,185] Trial 80 finished with value: 0.06453435432415566 and parameters: {'learning_rate': 0.05647021057387591, 'num_leaves': 201, 'max_depth': 6, 'min_child_samples': 92, 'min_child_weight': 0.11372952362917932, 'reg_alpha': 5.614396162067026, 'reg_lambda': 9.869689112262341e-05, 'min_split_gain': 0.9137249202430281, 'subsample': 0.8746554997351402, 'subsample_freq': 2, 'colsample_bytree': 0.5357330818407356}. Best is trial 76 with value: 0.06435685167659946.


Best trial: 76. Best value: 0.0643569:  82%|████████▏ | 82/100 [06:34<02:03,  6.88s/it]

[I 2025-12-01 08:46:45,259] Trial 81 pruned. Trial was pruned at iteration 267.


Best trial: 76. Best value: 0.0643569:  83%|████████▎ | 83/100 [06:34<01:24,  4.97s/it]

[I 2025-12-01 08:46:45,793] Trial 82 pruned. Trial was pruned at iteration 20.


Best trial: 76. Best value: 0.0643569:  84%|████████▍ | 84/100 [06:49<02:07,  7.97s/it]

[I 2025-12-01 08:47:00,745] Trial 83 finished with value: 0.0644354938087951 and parameters: {'learning_rate': 0.06357229803225518, 'num_leaves': 202, 'max_depth': 7, 'min_child_samples': 97, 'min_child_weight': 0.11795486689212152, 'reg_alpha': 4.791690696672405, 'reg_lambda': 0.00015882726967554004, 'min_split_gain': 0.852960163238523, 'subsample': 0.8341511558473634, 'subsample_freq': 2, 'colsample_bytree': 0.5320584104855287}. Best is trial 76 with value: 0.06435685167659946.


Best trial: 76. Best value: 0.0643569:  85%|████████▌ | 85/100 [06:52<01:37,  6.47s/it]

[I 2025-12-01 08:47:03,733] Trial 84 pruned. Trial was pruned at iteration 216.


Best trial: 76. Best value: 0.0643569:  86%|████████▌ | 86/100 [06:54<01:08,  4.91s/it]

[I 2025-12-01 08:47:05,011] Trial 85 pruned. Trial was pruned at iteration 95.


Best trial: 76. Best value: 0.0643569:  87%|████████▋ | 87/100 [06:55<00:51,  3.94s/it]

[I 2025-12-01 08:47:06,667] Trial 86 pruned. Trial was pruned at iteration 103.


Best trial: 76. Best value: 0.0643569:  88%|████████▊ | 88/100 [07:06<01:11,  5.96s/it]

[I 2025-12-01 08:47:17,349] Trial 87 finished with value: 0.0645772119645282 and parameters: {'learning_rate': 0.07520651714710837, 'num_leaves': 185, 'max_depth': 6, 'min_child_samples': 100, 'min_child_weight': 0.0951616088845103, 'reg_alpha': 5.225926592041438, 'reg_lambda': 0.0012035292899894425, 'min_split_gain': 0.9930332779188475, 'subsample': 0.8095207672047176, 'subsample_freq': 3, 'colsample_bytree': 0.5266511004917029}. Best is trial 76 with value: 0.06435685167659946.


Best trial: 76. Best value: 0.0643569:  89%|████████▉ | 89/100 [07:08<00:52,  4.79s/it]

[I 2025-12-01 08:47:19,402] Trial 88 pruned. Trial was pruned at iteration 267.


Best trial: 76. Best value: 0.0643569:  90%|█████████ | 90/100 [07:21<01:13,  7.34s/it]

[I 2025-12-01 08:47:32,692] Trial 89 finished with value: 0.06509586089411533 and parameters: {'learning_rate': 0.0791929047190248, 'num_leaves': 168, 'max_depth': 12, 'min_child_samples': 95, 'min_child_weight': 0.3465329529849674, 'reg_alpha': 4.911613917384222, 'reg_lambda': 0.005411452690011253, 'min_split_gain': 0.935294474556492, 'subsample': 0.8099417837613144, 'subsample_freq': 3, 'colsample_bytree': 0.570821107036454}. Best is trial 76 with value: 0.06435685167659946.


Best trial: 76. Best value: 0.0643569:  91%|█████████ | 91/100 [07:22<00:49,  5.49s/it]

[I 2025-12-01 08:47:33,874] Trial 90 pruned. Trial was pruned at iteration 93.


Best trial: 76. Best value: 0.0643569:  92%|█████████▏| 92/100 [07:40<01:13,  9.17s/it]

[I 2025-12-01 08:47:51,614] Trial 91 finished with value: 0.06446845826867749 and parameters: {'learning_rate': 0.05562331355076857, 'num_leaves': 204, 'max_depth': 6, 'min_child_samples': 88, 'min_child_weight': 0.09100299811713242, 'reg_alpha': 5.721240923053518, 'reg_lambda': 0.0023539978049270157, 'min_split_gain': 0.9862579559127019, 'subsample': 0.8842384480744665, 'subsample_freq': 3, 'colsample_bytree': 0.521709530053008}. Best is trial 76 with value: 0.06435685167659946.


Best trial: 76. Best value: 0.0643569:  93%|█████████▎| 93/100 [07:43<00:50,  7.16s/it]

[I 2025-12-01 08:47:54,084] Trial 92 pruned. Trial was pruned at iteration 200.


Best trial: 76. Best value: 0.0643569:  94%|█████████▍| 94/100 [07:43<00:30,  5.13s/it]

[I 2025-12-01 08:47:54,496] Trial 93 pruned. Trial was pruned at iteration 20.


Best trial: 76. Best value: 0.0643569:  95%|█████████▌| 95/100 [07:45<00:20,  4.16s/it]

[I 2025-12-01 08:47:56,386] Trial 94 pruned. Trial was pruned at iteration 200.


Best trial: 76. Best value: 0.0643569:  96%|█████████▌| 96/100 [07:46<00:12,  3.23s/it]

[I 2025-12-01 08:47:57,436] Trial 95 pruned. Trial was pruned at iteration 110.


Best trial: 76. Best value: 0.0643569:  97%|█████████▋| 97/100 [07:48<00:08,  2.97s/it]

[I 2025-12-01 08:47:59,791] Trial 96 pruned. Trial was pruned at iteration 127.


Best trial: 76. Best value: 0.0643569:  98%|█████████▊| 98/100 [07:49<00:04,  2.37s/it]

[I 2025-12-01 08:48:00,779] Trial 97 pruned. Trial was pruned at iteration 20.


Best trial: 76. Best value: 0.0643569:  99%|█████████▉| 99/100 [07:50<00:01,  1.82s/it]

[I 2025-12-01 08:48:01,308] Trial 98 pruned. Trial was pruned at iteration 20.


Best trial: 76. Best value: 0.0643569: 100%|██████████| 100/100 [07:51<00:00,  4.71s/it]

[I 2025-12-01 08:48:01,960] Trial 99 pruned. Trial was pruned at iteration 20.





In [27]:
# Display best results
print("Best trial:")
print(f"  Value (Log Loss): {study.best_trial.value:.4f}")
print("\nBest hyperparameters:")
for key, value in study.best_trial.params.items():
    print(f"  {key}: {value}")

# Quick AUC check with best params
best_params_check = {
    "objective": "binary",
    "metric": "binary_logloss",
    "verbosity": -1,
    "random_state": 42,
    "n_jobs": -1,
    "n_estimators": 10_000,
    **study.best_trial.params,
}

model_check = lgb.LGBMClassifier(**best_params_check)
model_check.fit(
    X_train,
    y_train,
    eval_set=[(X_val, y_val)],
    callbacks=[lgb.early_stopping(stopping_rounds=50, verbose=False)],
)

y_pred_check = model_check.predict_proba(X_val)[:, 1]
print(f"\nValidation metrics with best params:")
print(f"  ROC-AUC: {roc_auc_score(y_val, y_pred_check):.4f}")
print(f"  Log Loss: {log_loss(y_val, y_pred_check):.4f}")
print(f"  Best iteration: {model_check.best_iteration_}")

Best trial:
  Value (Log Loss): 0.0644

Best hyperparameters:
  learning_rate: 0.056692846707476825
  num_leaves: 214
  max_depth: 6
  min_child_samples: 97
  min_child_weight: 0.30784186040552297
  reg_alpha: 4.763643552050694
  reg_lambda: 9.834121930599604e-05
  min_split_gain: 0.8460048017532359
  subsample: 0.8260476911307014
  subsample_freq: 3
  colsample_bytree: 0.5051959552498168

Validation metrics with best params:
  ROC-AUC: 0.8990
  Log Loss: 0.0655
  Best iteration: 142


## Visualize Optimization Results

In [28]:
# Optimization history
import plotly
fig = optuna.visualization.plot_optimization_history(study)
fig.show()

ImportError: Tried to import 'plotly' but failed. Please make sure that the package is installed correctly to use this feature. Actual error: No module named 'plotly'.

In [None]:
# Parameter importances
fig = optuna.visualization.plot_param_importances(study)
fig.show()

In [None]:
# Parallel coordinate plot
fig = optuna.visualization.plot_parallel_coordinate(study)
fig.show()

In [None]:
# Slice plot for key parameters
fig = optuna.visualization.plot_slice(
    study, 
    params=["learning_rate", "num_leaves", "max_depth", "reg_alpha", "reg_lambda"]
)
fig.show()

## Train Final Model with Best Parameters

In [None]:
# Construct best parameters dict
best_params = {
    "objective": "binary",
    "metric": "binary_logloss",
    "verbosity": -1,
    "random_state": 42,
    "n_jobs": -1,
    "n_estimators": 10_000,
    **study.best_trial.params,
}

print("Best parameters for final model:")
for k, v in best_params.items():
    print(f"  {k}: {v}")

In [None]:
# Train final model on full training set
final_model = lgb.LGBMClassifier(**best_params)

final_model.fit(
    X_train,
    y_train,
    eval_set=[(X_train, y_train), (X_val, y_val)],
    callbacks=[
        lgb.early_stopping(stopping_rounds=50, verbose=False),
        lgb.log_evaluation(period=100),
    ],
)

print(f"\nBest iteration: {final_model.best_iteration_}")

In [None]:
# Evaluate on held-out validation set
y_pred_proba = final_model.predict_proba(X_val)[:, 1]

metrics = {
    "ROC-AUC": roc_auc_score(y_val, y_pred_proba),
    "PR-AUC": average_precision_score(y_val, y_pred_proba),
    "Log Loss": log_loss(y_val, y_pred_proba),
    "Brier Score": brier_score_loss(y_val, y_pred_proba),
}

print("\nFinal Model Validation Metrics:")
for metric, value in metrics.items():
    print(f"  {metric}: {value:.4f}")

## Export Best Parameters

In [None]:
# Save best parameters for use in other notebooks
import json
from credit_risk_xai.config import PROJ_ROOT

output_path = PROJ_ROOT / "models" / "best_lgbm_params.json"
output_path.parent.mkdir(parents=True, exist_ok=True)

# Convert to serializable format
params_to_save = {k: v for k, v in best_params.items() if k not in ["verbosity", "n_jobs"]}

with open(output_path, "w") as f:
    json.dump(params_to_save, f, indent=2)

print(f"Best parameters saved to: {output_path}")

In [None]:
# Print parameters in a format ready to copy into code
print("\n# Copy these parameters to use in run_lightgbm_training():")
print("params = {")
for k, v in study.best_trial.params.items():
    if isinstance(v, str):
        print(f'    "{k}": "{v}",')
    elif isinstance(v, bool):
        print(f'    "{k}": {v},')
    elif isinstance(v, float):
        print(f'    "{k}": {v:.6g},')
    else:
        print(f'    "{k}": {v},')
print("}")

## Compare with Default Parameters

In [None]:
# Train model with default parameters for comparison
default_params = {
    "objective": "binary",
    "n_estimators": 10_000,
    "learning_rate": 0.05,
    "num_leaves": 31,
    "subsample": 0.8,
    "colsample_bytree": 0.8,
    "random_state": 42,
    "n_jobs": -1,
    "verbosity": -1,
    "is_unbalance": False,
    "metric": "binary_logloss",
}

default_model = lgb.LGBMClassifier(**default_params)
default_model.fit(
    X_train,
    y_train,
    eval_set=[(X_val, y_val)],
    callbacks=[
        lgb.early_stopping(stopping_rounds=50, verbose=False),
    ],
)

y_pred_default = default_model.predict_proba(X_val)[:, 1]

default_metrics = {
    "ROC-AUC": roc_auc_score(y_val, y_pred_default),
    "PR-AUC": average_precision_score(y_val, y_pred_default),
    "Log Loss": log_loss(y_val, y_pred_default),
    "Brier Score": brier_score_loss(y_val, y_pred_default),
}

In [None]:
# Comparison table
comparison = pd.DataFrame({
    "Default": default_metrics,
    "Optimized": metrics,
}).T

comparison["Δ ROC-AUC"] = comparison["ROC-AUC"] - comparison.loc["Default", "ROC-AUC"]
comparison["Δ Log Loss"] = comparison["Log Loss"] - comparison.loc["Default", "Log Loss"]

print("\nComparison: Default vs Optimized Parameters")
print("="*60)
print(comparison.round(4).to_string())

## Summary

The Optuna optimization searched over:
- **Learning rate**: 0.01 - 0.3 (log scale)
- **Tree structure**: num_leaves (8-256), max_depth (3-12), min_child_samples (5-100)
- **Regularization**: L1 (reg_alpha), L2 (reg_lambda), min_split_gain
- **Sampling**: subsample (0.5-1.0), colsample_bytree (0.5-1.0)
- **Class imbalance**: is_unbalance or scale_pos_weight

The best parameters have been saved to `models/best_lgbm_params.json` for use in other notebooks.