In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from pathlib import Path
import warnings

In [12]:
warnings.simplefilter(action='ignore', category=FutureWarning)

In [13]:
# Importing libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score

In [14]:
%run features_testing.ipynb

In [15]:
# Split the data into explanatory and target variables
X = train.drop("TARGET", axis=1).values
y = train["TARGET"].values
X_test = test.values

In [16]:
#from sklearn.preprocessing import PowerTransformer
#
#pt = PowerTransformer(method='yeo-johnson')
#X_pt = pt.fit_transform(X)
#X_test_pt = pt.transform(X_test)

In [17]:
# Standardization
sc = StandardScaler()
sc.fit(X)
X_std = sc.transform(X)
X_test_std = sc.transform(X_test)

In [18]:
# Split the original data into the training data and the validation data
X_train, X_valid, y_train, y_valid = train_test_split(X_std, y, test_size=0.3, stratify=y, random_state=0)

In [19]:
lgb_params = {
    "objective": "binary",
    "metric": "auc",
    "boosting_type": "gbdt",
    "learning_rate": 0.03,

    "num_leaves": 16,          
    "max_depth": 4,              
    "min_child_samples": 40,
    "min_child_weight": 1e-3,

    "subsample": 0.8,
    "subsample_freq": 1,
    "colsample_bytree": 0.7,

    "reg_alpha": 0.4,    
    "reg_lambda": 0.6,

    "n_estimators": 2000,
    "early_stopping_rounds": 200,
    "random_state": 42,
    "verbose": -1
}

In [20]:
# this will become important when the data grow
import optuna
import lightgbm as lgb

def objective(trial):

    params = {
        "objective": "binary",
        "metric": "auc",
        "boosting_type": "gbdt",

        "num_leaves": trial.suggest_int("num_leaves", 16, 256),
        "max_depth": trial.suggest_int("max_depth", -1, 12),
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.2, log=True),

        "feature_fraction": trial.suggest_float("feature_fraction", 0.6, 1.0),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.6, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 10),

        "min_child_samples": trial.suggest_int("min_child_samples", 10, 200),
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),

        "n_estimators": trial.suggest_int("n_estimators", 200, 3000),
        "verbose": -1
    }

    model = lgb.LGBMClassifier(**params)

    model.fit(
        X_train,
        y_train,
        eval_set=[(X_valid, y_valid)],
        callbacks=[
            lgb.early_stopping(stopping_rounds=100),
            lgb.log_evaluation(period=0)
        ]
    )

    preds = model.predict_proba(X_valid)[:, 1]
    auc = roc_auc_score(y_valid, preds)

    return auc



study = optuna.create_study(
    direction="maximize",
    study_name="lgbm_optimization"
)

study.optimize(objective, n_trials=50)

print("\nBest AUC:", study.best_value)
print("Best hyperparameters:\n", study.best_params)


  from .autonotebook import tqdm as notebook_tqdm
[I 2025-11-25 01:39:11,205] A new study created in memory with name: lgbm_optimization


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[413]	valid_0's auc: 0.736492


[I 2025-11-25 01:39:28,072] Trial 0 finished with value: 0.7364922901207483 and parameters: {'num_leaves': 238, 'max_depth': 0, 'learning_rate': 0.0022026225789126717, 'feature_fraction': 0.7574867242714163, 'bagging_fraction': 0.7633346838980356, 'bagging_freq': 2, 'min_child_samples': 54, 'lambda_l1': 9.314393674822856e-08, 'lambda_l2': 2.7190394968328172e-08, 'n_estimators': 413}. Best is trial 0 with value: 0.7364922901207483.


Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:39:32,899] Trial 1 finished with value: 0.7450032461762222 and parameters: {'num_leaves': 79, 'max_depth': 7, 'learning_rate': 0.01927977029974666, 'feature_fraction': 0.6338454780170465, 'bagging_fraction': 0.6088939163390285, 'bagging_freq': 4, 'min_child_samples': 91, 'lambda_l1': 0.0003919059768423908, 'lambda_l2': 2.1664408558999384e-06, 'n_estimators': 381}. Best is trial 1 with value: 0.7450032461762222.


Did not meet early stopping. Best iteration is:
[381]	valid_0's auc: 0.745003
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[683]	valid_0's auc: 0.742541


[I 2025-11-25 01:39:57,198] Trial 2 finished with value: 0.7425411636549437 and parameters: {'num_leaves': 178, 'max_depth': 0, 'learning_rate': 0.0068072980075061015, 'feature_fraction': 0.8024324759594217, 'bagging_fraction': 0.6290721105776597, 'bagging_freq': 8, 'min_child_samples': 148, 'lambda_l1': 0.0005643016731020475, 'lambda_l2': 3.23566155452872e-05, 'n_estimators': 1591}. Best is trial 1 with value: 0.7450032461762222.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[2185]	valid_0's auc: 0.744724


[I 2025-11-25 01:40:12,200] Trial 3 finished with value: 0.7447240632716501 and parameters: {'num_leaves': 252, 'max_depth': 5, 'learning_rate': 0.002893489646080138, 'feature_fraction': 0.7723090916658713, 'bagging_fraction': 0.7416177705639683, 'bagging_freq': 7, 'min_child_samples': 158, 'lambda_l1': 0.44171806309779654, 'lambda_l2': 1.6574317711985213e-07, 'n_estimators': 2185}. Best is trial 1 with value: 0.7450032461762222.


Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:40:14,885] Trial 4 finished with value: 0.7458637983229818 and parameters: {'num_leaves': 194, 'max_depth': 5, 'learning_rate': 0.06595498431940933, 'feature_fraction': 0.634129276904048, 'bagging_fraction': 0.6613819314639441, 'bagging_freq': 4, 'min_child_samples': 144, 'lambda_l1': 0.2346772693785257, 'lambda_l2': 0.13549104307844156, 'n_estimators': 793}. Best is trial 4 with value: 0.7458637983229818.


Early stopping, best iteration is:
[326]	valid_0's auc: 0.745864
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[780]	valid_0's auc: 0.747795


[I 2025-11-25 01:40:21,653] Trial 5 finished with value: 0.7477948453408648 and parameters: {'num_leaves': 24, 'max_depth': -1, 'learning_rate': 0.016815022427841293, 'feature_fraction': 0.9286011938642567, 'bagging_fraction': 0.7414020303838763, 'bagging_freq': 4, 'min_child_samples': 68, 'lambda_l1': 1.9404250040959405, 'lambda_l2': 0.0018170031688758647, 'n_estimators': 945}. Best is trial 5 with value: 0.7477948453408648.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[376]	valid_0's auc: 0.739041


[I 2025-11-25 01:40:27,812] Trial 6 finished with value: 0.73904053973216 and parameters: {'num_leaves': 66, 'max_depth': 0, 'learning_rate': 0.006550598270112647, 'feature_fraction': 0.6929893623182717, 'bagging_fraction': 0.9498288231888623, 'bagging_freq': 6, 'min_child_samples': 92, 'lambda_l1': 1.5798530295120148, 'lambda_l2': 2.3825230458696613, 'n_estimators': 376}. Best is trial 5 with value: 0.7477948453408648.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[2078]	valid_0's auc: 0.744935


[I 2025-11-25 01:40:50,020] Trial 7 finished with value: 0.7449353801740035 and parameters: {'num_leaves': 40, 'max_depth': 8, 'learning_rate': 0.0023172216313901467, 'feature_fraction': 0.8136625542969602, 'bagging_fraction': 0.7001089483716304, 'bagging_freq': 4, 'min_child_samples': 173, 'lambda_l1': 9.074421674912926e-07, 'lambda_l2': 1.0974935609033548e-05, 'n_estimators': 2078}. Best is trial 5 with value: 0.7477948453408648.


Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:40:53,162] Trial 8 finished with value: 0.7425340169077022 and parameters: {'num_leaves': 79, 'max_depth': -1, 'learning_rate': 0.08981309788476642, 'feature_fraction': 0.8785647481712429, 'bagging_fraction': 0.9916556052285209, 'bagging_freq': 7, 'min_child_samples': 26, 'lambda_l1': 0.0003273149937851225, 'lambda_l2': 4.8471298021199525, 'n_estimators': 748}. Best is trial 5 with value: 0.7477948453408648.


Early stopping, best iteration is:
[92]	valid_0's auc: 0.742534
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[2871]	valid_0's auc: 0.743531


[I 2025-11-25 01:42:18,493] Trial 9 finished with value: 0.7435308527734821 and parameters: {'num_leaves': 168, 'max_depth': 0, 'learning_rate': 0.0018319126170573163, 'feature_fraction': 0.8204787753664164, 'bagging_fraction': 0.7485103921777201, 'bagging_freq': 1, 'min_child_samples': 185, 'lambda_l1': 7.321461665307442e-05, 'lambda_l2': 0.17396656834620358, 'n_estimators': 2883}. Best is trial 5 with value: 0.7477948453408648.


Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:42:25,485] Trial 10 finished with value: 0.7419007405184657 and parameters: {'num_leaves': 124, 'max_depth': 11, 'learning_rate': 0.029314134194362393, 'feature_fraction': 0.9348842029733666, 'bagging_fraction': 0.8749533876835338, 'bagging_freq': 10, 'min_child_samples': 55, 'lambda_l1': 0.033032064447148526, 'lambda_l2': 0.0008370915677162866, 'n_estimators': 1293}. Best is trial 5 with value: 0.7477948453408648.


Early stopping, best iteration is:
[184]	valid_0's auc: 0.741901
Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:42:26,708] Trial 11 finished with value: 0.7466367300203588 and parameters: {'num_leaves': 200, 'max_depth': 4, 'learning_rate': 0.17808380583812738, 'feature_fraction': 0.9810438460751256, 'bagging_fraction': 0.8297804956275368, 'bagging_freq': 4, 'min_child_samples': 124, 'lambda_l1': 8.324197093640466, 'lambda_l2': 0.005766872952064445, 'n_estimators': 1055}. Best is trial 5 with value: 0.7477948453408648.


Early stopping, best iteration is:
[112]	valid_0's auc: 0.746637
Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:42:28,483] Trial 12 finished with value: 0.7487711962485042 and parameters: {'num_leaves': 140, 'max_depth': 3, 'learning_rate': 0.1259671614041704, 'feature_fraction': 0.9938684922111991, 'bagging_fraction': 0.8361031789382992, 'bagging_freq': 3, 'min_child_samples': 113, 'lambda_l1': 4.721064816423877, 'lambda_l2': 0.0016724856068595002, 'n_estimators': 1118}. Best is trial 12 with value: 0.7487711962485042.


Early stopping, best iteration is:
[295]	valid_0's auc: 0.748771
Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:42:32,978] Trial 13 finished with value: 0.7493300677960161 and parameters: {'num_leaves': 127, 'max_depth': 3, 'learning_rate': 0.043389608037253447, 'feature_fraction': 0.9515165198313991, 'bagging_fraction': 0.84473812145531, 'bagging_freq': 2, 'min_child_samples': 62, 'lambda_l1': 0.012270243725199761, 'lambda_l2': 0.0008086739529999648, 'n_estimators': 1489}. Best is trial 13 with value: 0.7493300677960161.


Early stopping, best iteration is:
[908]	valid_0's auc: 0.74933
Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:42:37,152] Trial 14 finished with value: 0.74931993770325 and parameters: {'num_leaves': 126, 'max_depth': 3, 'learning_rate': 0.04820752098004546, 'feature_fraction': 0.979570081443408, 'bagging_fraction': 0.8871639736245507, 'bagging_freq': 2, 'min_child_samples': 15, 'lambda_l1': 0.016426458283575873, 'lambda_l2': 0.00010776877226595583, 'n_estimators': 1775}. Best is trial 13 with value: 0.7493300677960161.


Early stopping, best iteration is:
[800]	valid_0's auc: 0.74932
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1831]	valid_0's auc: 0.749033


[I 2025-11-25 01:42:43,614] Trial 15 finished with value: 0.7490326732017258 and parameters: {'num_leaves': 113, 'max_depth': 2, 'learning_rate': 0.043809120432981664, 'feature_fraction': 0.9063947400915662, 'bagging_fraction': 0.8918965111551096, 'bagging_freq': 1, 'min_child_samples': 12, 'lambda_l1': 0.006090324034558029, 'lambda_l2': 7.240515169981834e-05, 'n_estimators': 2102}. Best is trial 13 with value: 0.7493300677960161.


Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:42:50,072] Trial 16 finished with value: 0.748980704753057 and parameters: {'num_leaves': 151, 'max_depth': 2, 'learning_rate': 0.042138800665017295, 'feature_fraction': 0.874361507451198, 'bagging_fraction': 0.9145839293900287, 'bagging_freq': 2, 'min_child_samples': 33, 'lambda_l1': 1.5379824314892766e-05, 'lambda_l2': 1.5192922650920505e-06, 'n_estimators': 1727}. Best is trial 13 with value: 0.7493300677960161.


Did not meet early stopping. Best iteration is:
[1680]	valid_0's auc: 0.748981
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1485]	valid_0's auc: 0.746561


[I 2025-11-25 01:43:10,871] Trial 17 finished with value: 0.7465610480615432 and parameters: {'num_leaves': 109, 'max_depth': 7, 'learning_rate': 0.007110226264493366, 'feature_fraction': 0.9605252007857523, 'bagging_fraction': 0.8120799796774799, 'bagging_freq': 2, 'min_child_samples': 69, 'lambda_l1': 0.010152539831488616, 'lambda_l2': 0.03140378509634037, 'n_estimators': 2585}. Best is trial 13 with value: 0.7493300677960161.


Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:43:14,410] Trial 18 finished with value: 0.7428696381707617 and parameters: {'num_leaves': 103, 'max_depth': 10, 'learning_rate': 0.05890993509035041, 'feature_fraction': 0.8603804843412043, 'bagging_fraction': 0.8656692401135277, 'bagging_freq': 1, 'min_child_samples': 37, 'lambda_l1': 0.033607984220462024, 'lambda_l2': 0.0001545391428404014, 'n_estimators': 1520}. Best is trial 13 with value: 0.7493300677960161.


Early stopping, best iteration is:
[99]	valid_0's auc: 0.74287
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1481]	valid_0's auc: 0.748713


[I 2025-11-25 01:43:21,188] Trial 19 finished with value: 0.7487127758042912 and parameters: {'num_leaves': 218, 'max_depth': 3, 'learning_rate': 0.025555419183197002, 'feature_fraction': 0.956981974444725, 'bagging_fraction': 0.9291764573298673, 'bagging_freq': 3, 'min_child_samples': 13, 'lambda_l1': 0.0026732465419935206, 'lambda_l2': 4.808026322931121e-06, 'n_estimators': 1875}. Best is trial 13 with value: 0.7493300677960161.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[2379]	valid_0's auc: 0.716731


[I 2025-11-25 01:43:28,930] Trial 20 finished with value: 0.7167313577557968 and parameters: {'num_leaves': 157, 'max_depth': 2, 'learning_rate': 0.001021967218000572, 'feature_fraction': 0.9984721882970703, 'bagging_fraction': 0.962631419134815, 'bagging_freq': 5, 'min_child_samples': 82, 'lambda_l1': 2.562134435007091e-05, 'lambda_l2': 3.7379980735042294e-07, 'n_estimators': 2379}. Best is trial 13 with value: 0.7493300677960161.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1704]	valid_0's auc: 0.748807


[I 2025-11-25 01:43:34,806] Trial 21 finished with value: 0.7488065264157111 and parameters: {'num_leaves': 120, 'max_depth': 2, 'learning_rate': 0.04357552037970935, 'feature_fraction': 0.9177511193882806, 'bagging_fraction': 0.8787524640652083, 'bagging_freq': 1, 'min_child_samples': 11, 'lambda_l1': 0.004891124341269195, 'lambda_l2': 8.587309616443709e-05, 'n_estimators': 1914}. Best is trial 13 with value: 0.7493300677960161.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[1429]	valid_0's auc: 0.748223


[I 2025-11-25 01:43:42,649] Trial 22 finished with value: 0.7482226489155565 and parameters: {'num_leaves': 93, 'max_depth': 4, 'learning_rate': 0.011358013770769584, 'feature_fraction': 0.9020286337815847, 'bagging_fraction': 0.8990447430238113, 'bagging_freq': 2, 'min_child_samples': 45, 'lambda_l1': 0.06102888818449037, 'lambda_l2': 0.00022757817916255175, 'n_estimators': 1432}. Best is trial 13 with value: 0.7493300677960161.


Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:43:45,814] Trial 23 finished with value: 0.7435004292901392 and parameters: {'num_leaves': 135, 'max_depth': 1, 'learning_rate': 0.09636813939518193, 'feature_fraction': 0.8436592959238607, 'bagging_fraction': 0.7827009526534341, 'bagging_freq': 3, 'min_child_samples': 22, 'lambda_l1': 0.002177227976913047, 'lambda_l2': 4.282511786994453e-05, 'n_estimators': 2208}. Best is trial 13 with value: 0.7493300677960161.


Early stopping, best iteration is:
[1028]	valid_0's auc: 0.7435
Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:43:50,373] Trial 24 finished with value: 0.7493641259546003 and parameters: {'num_leaves': 57, 'max_depth': 4, 'learning_rate': 0.03277842323048595, 'feature_fraction': 0.9569717768433371, 'bagging_fraction': 0.8506767379185446, 'bagging_freq': 1, 'min_child_samples': 48, 'lambda_l1': 0.17636584028657937, 'lambda_l2': 0.025244809309038312, 'n_estimators': 2486}. Best is trial 24 with value: 0.7493641259546003.


Early stopping, best iteration is:
[677]	valid_0's auc: 0.749364
Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:43:55,838] Trial 25 finished with value: 0.7468062545430249 and parameters: {'num_leaves': 54, 'max_depth': 6, 'learning_rate': 0.02802574979425312, 'feature_fraction': 0.9569469811594381, 'bagging_fraction': 0.8506204374964985, 'bagging_freq': 2, 'min_child_samples': 68, 'lambda_l1': 0.33472593852890514, 'lambda_l2': 0.03292450866729531, 'n_estimators': 2970}. Best is trial 24 with value: 0.7493641259546003.


Early stopping, best iteration is:
[464]	valid_0's auc: 0.746806
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[2279]	valid_0's auc: 0.749092


[I 2025-11-25 01:44:07,737] Trial 26 finished with value: 0.7490924014138403 and parameters: {'num_leaves': 38, 'max_depth': 4, 'learning_rate': 0.012196227519051403, 'feature_fraction': 0.952574603050908, 'bagging_fraction': 0.8016996568082096, 'bagging_freq': 3, 'min_child_samples': 49, 'lambda_l1': 0.14586512717947225, 'lambda_l2': 0.011647035023484988, 'n_estimators': 2631}. Best is trial 24 with value: 0.7493641259546003.


Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:44:10,482] Trial 27 finished with value: 0.7478644584392639 and parameters: {'num_leaves': 19, 'max_depth': 6, 'learning_rate': 0.06261403658439246, 'feature_fraction': 0.7165411679890249, 'bagging_fraction': 0.8374916359351381, 'bagging_freq': 5, 'min_child_samples': 37, 'lambda_l1': 0.986142172388348, 'lambda_l2': 0.324211535776139, 'n_estimators': 2471}. Best is trial 24 with value: 0.7493641259546003.


Early stopping, best iteration is:
[371]	valid_0's auc: 0.747864
Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:44:12,316] Trial 28 finished with value: 0.7384020373797868 and parameters: {'num_leaves': 88, 'max_depth': 9, 'learning_rate': 0.17625906316535592, 'feature_fraction': 0.8981567780076831, 'bagging_fraction': 0.9245258903619051, 'bagging_freq': 1, 'min_child_samples': 78, 'lambda_l1': 0.0231221185379126, 'lambda_l2': 0.8952757307709621, 'n_estimators': 1800}. Best is trial 24 with value: 0.7493641259546003.


Early stopping, best iteration is:
[27]	valid_0's auc: 0.738402
Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:44:13,972] Trial 29 finished with value: 0.7473350321034031 and parameters: {'num_leaves': 134, 'max_depth': 3, 'learning_rate': 0.1019348744158506, 'feature_fraction': 0.9818922385363813, 'bagging_fraction': 0.7788302219746206, 'bagging_freq': 2, 'min_child_samples': 58, 'lambda_l1': 6.784898059886176e-07, 'lambda_l2': 0.0007374788413606581, 'n_estimators': 1415}. Best is trial 24 with value: 0.7493641259546003.


Early stopping, best iteration is:
[279]	valid_0's auc: 0.747335
Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:44:17,132] Trial 30 finished with value: 0.7476918279678324 and parameters: {'num_leaves': 223, 'max_depth': 5, 'learning_rate': 0.03455096975462144, 'feature_fraction': 0.766687591202518, 'bagging_fraction': 0.7013867893641085, 'bagging_freq': 10, 'min_child_samples': 103, 'lambda_l1': 0.05256043438003856, 'lambda_l2': 2.2824601103903205e-08, 'n_estimators': 1180}. Best is trial 24 with value: 0.7493641259546003.


Early stopping, best iteration is:
[410]	valid_0's auc: 0.747692
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[2127]	valid_0's auc: 0.748786


[I 2025-11-25 01:44:28,805] Trial 31 finished with value: 0.7487858371188361 and parameters: {'num_leaves': 44, 'max_depth': 4, 'learning_rate': 0.00965988182158067, 'feature_fraction': 0.9504625880359644, 'bagging_fraction': 0.7991943775444823, 'bagging_freq': 3, 'min_child_samples': 43, 'lambda_l1': 0.15319476347258082, 'lambda_l2': 0.008292902299721907, 'n_estimators': 2783}. Best is trial 24 with value: 0.7493641259546003.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1110]	valid_0's auc: 0.74816


[I 2025-11-25 01:44:35,835] Trial 32 finished with value: 0.7481599322494493 and parameters: {'num_leaves': 62, 'max_depth': 4, 'learning_rate': 0.019517078267781762, 'feature_fraction': 0.9316644907502492, 'bagging_fraction': 0.8103761430634416, 'bagging_freq': 2, 'min_child_samples': 51, 'lambda_l1': 0.0011560449301821695, 'lambda_l2': 0.01648676945374737, 'n_estimators': 2640}. Best is trial 24 with value: 0.7493641259546003.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[2359]	valid_0's auc: 0.73989


[I 2025-11-25 01:44:43,198] Trial 33 finished with value: 0.739889664235189 and parameters: {'num_leaves': 32, 'max_depth': 1, 'learning_rate': 0.01382318092697137, 'feature_fraction': 0.9769654411126706, 'bagging_fraction': 0.85967959196105, 'bagging_freq': 3, 'min_child_samples': 61, 'lambda_l1': 0.14951086106314931, 'lambda_l2': 0.004625081141518512, 'n_estimators': 2359}. Best is trial 24 with value: 0.7493641259546003.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[462]	valid_0's auc: 0.745721


[I 2025-11-25 01:44:50,457] Trial 34 finished with value: 0.7457208378358098 and parameters: {'num_leaves': 74, 'max_depth': 7, 'learning_rate': 0.022247206039548762, 'feature_fraction': 0.9437606417799129, 'bagging_fraction': 0.7799360788093087, 'bagging_freq': 1, 'min_child_samples': 26, 'lambda_l1': 0.009806519466116864, 'lambda_l2': 0.0640956264447563, 'n_estimators': 2656}. Best is trial 24 with value: 0.7493641259546003.


Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:44:53,872] Trial 35 finished with value: 0.7482091472336698 and parameters: {'num_leaves': 51, 'max_depth': 3, 'learning_rate': 0.0505566704994927, 'feature_fraction': 0.9690073216342452, 'bagging_fraction': 0.8259810876462601, 'bagging_freq': 3, 'min_child_samples': 87, 'lambda_l1': 0.5392557667457328, 'lambda_l2': 0.0005127366749406588, 'n_estimators': 1652}. Best is trial 24 with value: 0.7493641259546003.


Early stopping, best iteration is:
[660]	valid_0's auc: 0.748209
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[2253]	valid_0's auc: 0.74711


[I 2025-11-25 01:45:12,119] Trial 36 finished with value: 0.7471100551566437 and parameters: {'num_leaves': 98, 'max_depth': 5, 'learning_rate': 0.004777039456929772, 'feature_fraction': 0.8823430129750676, 'bagging_fraction': 0.8977862517752162, 'bagging_freq': 2, 'min_child_samples': 47, 'lambda_l1': 1.4862235064883385e-08, 'lambda_l2': 0.0021449096917335445, 'n_estimators': 2253}. Best is trial 24 with value: 0.7493641259546003.


Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:45:13,974] Trial 37 finished with value: 0.7267205359764141 and parameters: {'num_leaves': 33, 'max_depth': 1, 'learning_rate': 0.01463292479176534, 'feature_fraction': 0.8508695481525097, 'bagging_fraction': 0.8519775243788696, 'bagging_freq': 5, 'min_child_samples': 100, 'lambda_l1': 0.11236572273867719, 'lambda_l2': 1.8450184373277247e-05, 'n_estimators': 602}. Best is trial 24 with value: 0.7493641259546003.


Did not meet early stopping. Best iteration is:
[602]	valid_0's auc: 0.726721
Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:45:18,468] Trial 38 finished with value: 0.7459074501851538 and parameters: {'num_leaves': 181, 'max_depth': 6, 'learning_rate': 0.034357461445899885, 'feature_fraction': 0.6091327593184231, 'bagging_fraction': 0.7197611977220365, 'bagging_freq': 9, 'min_child_samples': 75, 'lambda_l1': 0.0007707247283374971, 'lambda_l2': 0.011284640802421116, 'n_estimators': 1975}. Best is trial 24 with value: 0.7493641259546003.


Early stopping, best iteration is:
[336]	valid_0's auc: 0.745907
Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:45:21,216] Trial 39 finished with value: 0.7474686236595954 and parameters: {'num_leaves': 16, 'max_depth': 4, 'learning_rate': 0.07788998041925643, 'feature_fraction': 0.9180550303772709, 'bagging_fraction': 0.7611951986953762, 'bagging_freq': 6, 'min_child_samples': 20, 'lambda_l1': 3.687827690848575, 'lambda_l2': 0.5374738178536352, 'n_estimators': 2778}. Best is trial 24 with value: 0.7493641259546003.


Early stopping, best iteration is:
[382]	valid_0's auc: 0.747469
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[2508]	valid_0's auc: 0.747332


[I 2025-11-25 01:45:38,245] Trial 40 finished with value: 0.7473321866865242 and parameters: {'num_leaves': 65, 'max_depth': 5, 'learning_rate': 0.0048314262292841826, 'feature_fraction': 0.7397899439348093, 'bagging_fraction': 0.9443557084297107, 'bagging_freq': 1, 'min_child_samples': 32, 'lambda_l1': 0.01509565322355215, 'lambda_l2': 0.09789005748588163, 'n_estimators': 2509}. Best is trial 24 with value: 0.7493641259546003.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[2089]	valid_0's auc: 0.748931


[I 2025-11-25 01:45:44,701] Trial 41 finished with value: 0.7489307234985817 and parameters: {'num_leaves': 112, 'max_depth': 2, 'learning_rate': 0.03748640354473076, 'feature_fraction': 0.9091589999885559, 'bagging_fraction': 0.9056251310224116, 'bagging_freq': 1, 'min_child_samples': 11, 'lambda_l1': 0.004179032640144249, 'lambda_l2': 0.00021964535434422885, 'n_estimators': 2096}. Best is trial 24 with value: 0.7493641259546003.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1949]	valid_0's auc: 0.749144


[I 2025-11-25 01:45:52,997] Trial 42 finished with value: 0.7491443698625091 and parameters: {'num_leaves': 150, 'max_depth': 3, 'learning_rate': 0.02049390069572915, 'feature_fraction': 0.9342990924060958, 'bagging_fraction': 0.8876728334939769, 'bagging_freq': 1, 'min_child_samples': 17, 'lambda_l1': 0.00014155650173610856, 'lambda_l2': 5.075656694673525e-05, 'n_estimators': 2293}. Best is trial 24 with value: 0.7493641259546003.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[2078]	valid_0's auc: 0.749059


[I 2025-11-25 01:46:03,134] Trial 43 finished with value: 0.7490585527024589 and parameters: {'num_leaves': 165, 'max_depth': 3, 'learning_rate': 0.020517618845168856, 'feature_fraction': 0.9991109986675908, 'bagging_fraction': 0.8804273883466922, 'bagging_freq': 2, 'min_child_samples': 25, 'lambda_l1': 0.00014112202424702524, 'lambda_l2': 0.0025238919834331065, 'n_estimators': 2346}. Best is trial 24 with value: 0.7493641259546003.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[2272]	valid_0's auc: 0.748683


[I 2025-11-25 01:46:16,162] Trial 44 finished with value: 0.7486828350712087 and parameters: {'num_leaves': 150, 'max_depth': 4, 'learning_rate': 0.011992946411674185, 'feature_fraction': 0.9386186603145337, 'bagging_fraction': 0.9840957981339732, 'bagging_freq': 4, 'min_child_samples': 131, 'lambda_l1': 2.9262118919777514e-06, 'lambda_l2': 4.130448413266834e-06, 'n_estimators': 2741}. Best is trial 24 with value: 0.7493641259546003.


Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:46:17,535] Trial 45 finished with value: 0.735850850399066 and parameters: {'num_leaves': 189, 'max_depth': 3, 'learning_rate': 0.01769932378780898, 'feature_fraction': 0.9738453882115179, 'bagging_fraction': 0.7984159991128977, 'bagging_freq': 2, 'min_child_samples': 58, 'lambda_l1': 0.0001898764580932504, 'lambda_l2': 1.582158420816704e-05, 'n_estimators': 255}. Best is trial 24 with value: 0.7493641259546003.


Did not meet early stopping. Best iteration is:
[255]	valid_0's auc: 0.735851
Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:46:23,712] Trial 46 finished with value: 0.7353049032765154 and parameters: {'num_leaves': 253, 'max_depth': 1, 'learning_rate': 0.008041261335894556, 'feature_fraction': 0.9307646148428685, 'bagging_fraction': 0.631900468628867, 'bagging_freq': 3, 'min_child_samples': 40, 'lambda_l1': 0.8287766647367913, 'lambda_l2': 0.0008862585821533026, 'n_estimators': 2014}. Best is trial 24 with value: 0.7493641259546003.


Did not meet early stopping. Best iteration is:
[2007]	valid_0's auc: 0.735305
Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:46:32,126] Trial 47 finished with value: 0.744161947846727 and parameters: {'num_leaves': 127, 'max_depth': 12, 'learning_rate': 0.025931741698638225, 'feature_fraction': 0.6639191111821765, 'bagging_fraction': 0.820587760757351, 'bagging_freq': 1, 'min_child_samples': 20, 'lambda_l1': 0.00046607538422255166, 'lambda_l2': 8.676186970194065e-07, 'n_estimators': 1675}. Best is trial 24 with value: 0.7493641259546003.


Early stopping, best iteration is:
[244]	valid_0's auc: 0.744162
Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:46:34,387] Trial 48 finished with value: 0.7463979551005476 and parameters: {'num_leaves': 82, 'max_depth': 5, 'learning_rate': 0.07500022822432262, 'feature_fraction': 0.8000669775833891, 'bagging_fraction': 0.8402379037558965, 'bagging_freq': 7, 'min_child_samples': 31, 'lambda_l1': 6.398845010008367e-05, 'lambda_l2': 9.788326602571199e-08, 'n_estimators': 1265}. Best is trial 24 with value: 0.7493641259546003.


Early stopping, best iteration is:
[195]	valid_0's auc: 0.746398
Training until validation scores don't improve for 100 rounds


[I 2025-11-25 01:46:42,844] Trial 49 finished with value: 0.7423499996602869 and parameters: {'num_leaves': 143, 'max_depth': 0, 'learning_rate': 0.0313592964327413, 'feature_fraction': 0.8857724801907787, 'bagging_fraction': 0.8739765942957983, 'bagging_freq': 4, 'min_child_samples': 49, 'lambda_l1': 0.0015927009556688623, 'lambda_l2': 8.841955516123053e-05, 'n_estimators': 2923}. Best is trial 24 with value: 0.7493641259546003.


Early stopping, best iteration is:
[199]	valid_0's auc: 0.74235

Best AUC: 0.7493641259546003
Best hyperparameters:
 {'num_leaves': 57, 'max_depth': 4, 'learning_rate': 0.03277842323048595, 'feature_fraction': 0.9569717768433371, 'bagging_fraction': 0.8506767379185446, 'bagging_freq': 1, 'min_child_samples': 48, 'lambda_l1': 0.17636584028657937, 'lambda_l2': 0.025244809309038312, 'n_estimators': 2486}


In [None]:
# lightGBM cause seem like this is the best for the old model
import lightgbm as lgb
# optuna para {'num_leaves': 57, 'max_depth': 4, 'learning_rate': 0.03277842323048595, 'feature_fraction': 0.9569717768433371, 'bagging_fraction': 0.8506767379185446, 'bagging_freq': 1, 'min_child_samples': 48, 'lambda_l1': 0.17636584028657937, 'lambda_l2': 0.025244809309038312, 'n_estimators': 2486}

lgbm = lgb.LGBMClassifier()
lgbm.fit(X_train, y_train, eval_set= [(X_valid, y_valid)])

lgbm_train_pred = lgbm.predict_proba(X_train)[:, 1]
lgbm_valid_pred = lgbm.predict_proba(X_valid)[:, 1]

print(f"Train Score: {roc_auc_score(y_train, lgbm_train_pred)}")
print(f"Valid Score: {roc_auc_score(y_valid, lgbm_valid_pred)}")

Train Score: 0.8122957326179272
Valid Score: 0.7411931027094857




### under this is the output part


In [22]:
# Make predictions for the test data
# Change model name if needed
pred = lgbm.predict_proba(X_test_std)[:, 1]



In [23]:
# Put the prediction into the format of submission
sample_sub['TARGET'] = pred
sample_sub

Unnamed: 0,SK_ID_CURR,TARGET
0,171202,0.028224
1,171203,0.110668
2,171204,0.121044
3,171205,0.099954
4,171206,0.195951
...,...,...
61495,232697,0.270269
61496,232698,0.057006
61497,232699,0.046150
61498,232700,0.083779


In [24]:
# Create the "output" directory if it doesn't exist
output_dir = Path.cwd() / "output"
os.makedirs(output_dir, exist_ok=True)

# Specify the new output file path
output_file = output_dir / "submission.csv"

# Save the CSV file to the "output" directory
sample_sub.to_csv(output_file, index=False)