In [1]:
import pandas as pd
import numpy as np
import optuna
from lightgbm import LGBMRegressor
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt

In [2]:
train = pd.read_csv(r"C:\Users\YILMAZ\Desktop\train_fe.csv")
test = pd.read_csv(r"C:\Users\YILMAZ\Desktop\test_fe.csv")

target = "price_z"
X_train = train.drop(columns=[target])
y_train = train[target]

In [3]:
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.3, log=True),
        'num_leaves': trial.suggest_int('num_leaves', 20, 150),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 10.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 10.0),
        'random_state': 42,
        'n_jobs': -1
    }

    model = LGBMRegressor(**params)
    cv = KFold(n_splits=5, shuffle=True, random_state=42)
    rmse = -cross_val_score(model, X_train, y_train, scoring='neg_root_mean_squared_error', cv=cv).mean()
    return rmse

In [4]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50, timeout=900)

[I 2025-05-21 21:27:23,216] A new study created in memory with name: no-name-d26fe1b9-958f-4ca9-9107-78489665fd5a


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022060 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3366
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 784183.422197
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022433 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3363
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 785279.908805
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.024442 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3364
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [In

[I 2025-05-21 21:27:36,638] Trial 0 finished with value: 94205.197529458 and parameters: {'n_estimators': 216, 'max_depth': 11, 'learning_rate': 0.12762356244624928, 'num_leaves': 44, 'min_child_samples': 78, 'subsample': 0.7339339189404508, 'colsample_bytree': 0.8580187706686745, 'reg_alpha': 7.016924267691681, 'reg_lambda': 7.446007108106598}. Best is trial 0 with value: 94205.197529458.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.032499 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3366
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 784183.422197
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023296 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3363
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 785279.908805
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.026455 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3364
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [In

[I 2025-05-21 21:29:00,386] Trial 1 finished with value: 94251.54154349663 and parameters: {'n_estimators': 895, 'max_depth': 7, 'learning_rate': 0.005669930382079966, 'num_leaves': 96, 'min_child_samples': 45, 'subsample': 0.6486528923097437, 'colsample_bytree': 0.8213326713780422, 'reg_alpha': 1.1965129303529665, 'reg_lambda': 6.701494064867212}. Best is trial 0 with value: 94205.197529458.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012883 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3366
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 784183.422197
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.027221 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3363
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 785279.908805
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.025361 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3364
[LightGBM] [Info] Number of data points in t

[I 2025-05-21 21:29:43,052] Trial 2 finished with value: 97886.06878545726 and parameters: {'n_estimators': 711, 'max_depth': 4, 'learning_rate': 0.005118293178176607, 'num_leaves': 93, 'min_child_samples': 13, 'subsample': 0.6906202254572965, 'colsample_bytree': 0.8895986396626805, 'reg_alpha': 7.610065978397626, 'reg_lambda': 2.6886893867053665}. Best is trial 0 with value: 94205.197529458.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.027283 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3366
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 784183.422197
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023502 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3363
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 785279.908805
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.024487 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3364
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [In

[I 2025-05-21 21:30:59,657] Trial 3 finished with value: 150993.077588351 and parameters: {'n_estimators': 715, 'max_depth': 10, 'learning_rate': 0.0019263938199856438, 'num_leaves': 129, 'min_child_samples': 82, 'subsample': 0.9192997486350867, 'colsample_bytree': 0.8038264191048234, 'reg_alpha': 7.798923717403284, 'reg_lambda': 8.78655734745278}. Best is trial 0 with value: 94205.197529458.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.025736 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3366
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 784183.422197
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023049 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3363
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 785279.908805
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.021885 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3364
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [In

[I 2025-05-21 21:31:31,422] Trial 4 finished with value: 94320.79281478716 and parameters: {'n_estimators': 853, 'max_depth': 4, 'learning_rate': 0.018635445418757553, 'num_leaves': 45, 'min_child_samples': 53, 'subsample': 0.7581647643939697, 'colsample_bytree': 0.5961491881997947, 'reg_alpha': 3.359429190724077, 'reg_lambda': 5.022561023463538}. Best is trial 0 with value: 94205.197529458.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023846 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3366
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 784183.422197
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.025670 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3363
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 785279.908805
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022514 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3364
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [In

[I 2025-05-21 21:32:50,036] Trial 5 finished with value: 93352.88658386005 and parameters: {'n_estimators': 967, 'max_depth': 11, 'learning_rate': 0.011610775274909365, 'num_leaves': 120, 'min_child_samples': 51, 'subsample': 0.9203178743271226, 'colsample_bytree': 0.6891595666364607, 'reg_alpha': 5.892746039084277, 'reg_lambda': 6.113568412640022}. Best is trial 5 with value: 93352.88658386005.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.025155 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3366
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 784183.422197
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022918 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3363
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 785279.908805
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.027250 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3364
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [In

[I 2025-05-21 21:33:05,567] Trial 6 finished with value: 95370.1681738162 and parameters: {'n_estimators': 405, 'max_depth': 5, 'learning_rate': 0.23671546252261624, 'num_leaves': 25, 'min_child_samples': 77, 'subsample': 0.8693827151355996, 'colsample_bytree': 0.6602177299846226, 'reg_alpha': 6.245940677996648, 'reg_lambda': 8.485479811859866}. Best is trial 5 with value: 93352.88658386005.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023656 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3366
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 784183.422197
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022822 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3363
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 785279.908805
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.024150 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3364
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [In

[I 2025-05-21 21:33:24,272] Trial 7 finished with value: 299466.15953724895 and parameters: {'n_estimators': 211, 'max_depth': 11, 'learning_rate': 0.0019351210194307755, 'num_leaves': 66, 'min_child_samples': 69, 'subsample': 0.552810360038358, 'colsample_bytree': 0.9088927594022602, 'reg_alpha': 9.497432782926836, 'reg_lambda': 8.99795519978554}. Best is trial 5 with value: 93352.88658386005.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022874 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3366
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 784183.422197
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023498 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3363
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 785279.908805
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.021660 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3364
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [In

[I 2025-05-21 21:33:43,347] Trial 8 finished with value: 234681.4691972187 and parameters: {'n_estimators': 254, 'max_depth': 12, 'learning_rate': 0.0031571385222636875, 'num_leaves': 57, 'min_child_samples': 39, 'subsample': 0.5496395320175216, 'colsample_bytree': 0.6350520776982482, 'reg_alpha': 2.36778252750081, 'reg_lambda': 3.3950739889933947}. Best is trial 5 with value: 93352.88658386005.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.020048 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3366
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 784183.422197
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.021134 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3363
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 785279.908805
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007552 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3364
[LightGBM] [Info] Number of data points in t

[I 2025-05-21 21:34:15,111] Trial 9 finished with value: 96306.18766350248 and parameters: {'n_estimators': 788, 'max_depth': 5, 'learning_rate': 0.2437336488455838, 'num_leaves': 81, 'min_child_samples': 60, 'subsample': 0.5426485307152031, 'colsample_bytree': 0.5562418330007106, 'reg_alpha': 9.98258829708535, 'reg_lambda': 8.90073315102932}. Best is trial 5 with value: 93352.88658386005.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023600 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3366
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 784183.422197
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023302 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3363
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 785279.908805
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023748 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3364
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [In

[I 2025-05-21 21:35:05,613] Trial 10 finished with value: 93607.07810097105 and parameters: {'n_estimators': 590, 'max_depth': 9, 'learning_rate': 0.02846202966735235, 'num_leaves': 149, 'min_child_samples': 21, 'subsample': 0.9901186715640484, 'colsample_bytree': 0.7028377525304326, 'reg_alpha': 3.9749511497133345, 'reg_lambda': 0.43702467047337734}. Best is trial 5 with value: 93352.88658386005.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023638 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3366
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 784183.422197
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.021246 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3363
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 785279.908805
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.025052 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3364
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [In

[I 2025-05-21 21:35:51,019] Trial 11 finished with value: 93789.89947954225 and parameters: {'n_estimators': 518, 'max_depth': 9, 'learning_rate': 0.03017384865751673, 'num_leaves': 150, 'min_child_samples': 19, 'subsample': 0.9915704992936621, 'colsample_bytree': 0.7194368165765554, 'reg_alpha': 4.352898912791896, 'reg_lambda': 0.0083664643313896}. Best is trial 5 with value: 93352.88658386005.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022591 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3366
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 784183.422197
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022952 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3363
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 785279.908805
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022651 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3364
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [In

[I 2025-05-21 21:36:55,343] Trial 12 finished with value: 93935.78657089625 and parameters: {'n_estimators': 955, 'max_depth': 8, 'learning_rate': 0.04111013553449341, 'num_leaves': 123, 'min_child_samples': 30, 'subsample': 0.9974083337378798, 'colsample_bytree': 0.7145275311515701, 'reg_alpha': 4.9617136154107495, 'reg_lambda': 0.6236483695081829}. Best is trial 5 with value: 93352.88658386005.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023073 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3366
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 784183.422197
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023086 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3363
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 785279.908805
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023157 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3364
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [In

[I 2025-05-21 21:37:51,400] Trial 13 finished with value: 94471.41858279235 and parameters: {'n_estimators': 577, 'max_depth': 7, 'learning_rate': 0.010197644989066543, 'num_leaves': 142, 'min_child_samples': 98, 'subsample': 0.850401566624756, 'colsample_bytree': 0.9919253956309229, 'reg_alpha': 0.12337929828608551, 'reg_lambda': 5.489490134634739}. Best is trial 5 with value: 93352.88658386005.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007803 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3366
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 784183.422197
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008254 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3363
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 785279.908805
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.020185 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [In

[I 2025-05-21 21:38:56,289] Trial 14 finished with value: 94330.51484915911 and parameters: {'n_estimators': 993, 'max_depth': 9, 'learning_rate': 0.06824521259305277, 'num_leaves': 116, 'min_child_samples': 5, 'subsample': 0.9194284745492438, 'colsample_bytree': 0.5062285599776729, 'reg_alpha': 5.684172946176784, 'reg_lambda': 3.3848665828527933}. Best is trial 5 with value: 93352.88658386005.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022628 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3366
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 784183.422197
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023409 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3363
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 785279.908805
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022670 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3364
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [In

[I 2025-05-21 21:39:46,696] Trial 15 finished with value: 93956.61316891911 and parameters: {'n_estimators': 584, 'max_depth': 12, 'learning_rate': 0.011705436139395, 'num_leaves': 108, 'min_child_samples': 31, 'subsample': 0.8248293807247787, 'colsample_bytree': 0.762493899962537, 'reg_alpha': 3.3776562293164014, 'reg_lambda': 1.5433882211730394}. Best is trial 5 with value: 93352.88658386005.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022820 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3366
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 784183.422197
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023928 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3363
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 785279.908805
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.021907 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3364
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [In

[I 2025-05-21 21:40:24,068] Trial 16 finished with value: 310745.3115288614 and parameters: {'n_estimators': 385, 'max_depth': 10, 'learning_rate': 0.0010360535989072095, 'num_leaves': 135, 'min_child_samples': 24, 'subsample': 0.9395507261194118, 'colsample_bytree': 0.6742022847593492, 'reg_alpha': 3.435073016856766, 'reg_lambda': 6.2263780879761885}. Best is trial 5 with value: 93352.88658386005.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022934 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3366
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 784183.422197
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022360 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3363
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 785279.908805
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022464 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3364
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [In

[I 2025-05-21 21:41:10,151] Trial 17 finished with value: 94107.85071147415 and parameters: {'n_estimators': 681, 'max_depth': 8, 'learning_rate': 0.057774937205620956, 'num_leaves': 149, 'min_child_samples': 42, 'subsample': 0.7966717660278891, 'colsample_bytree': 0.7576600560687006, 'reg_alpha': 8.565382738841306, 'reg_lambda': 4.475849614406517}. Best is trial 5 with value: 93352.88658386005.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.021870 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3366
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 784183.422197
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022569 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3363
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 785279.908805
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022274 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3364
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [In

[I 2025-05-21 21:41:22,023] Trial 18 finished with value: 110872.87330506762 and parameters: {'n_estimators': 116, 'max_depth': 10, 'learning_rate': 0.022667461440981477, 'num_leaves': 122, 'min_child_samples': 61, 'subsample': 0.9494695546103851, 'colsample_bytree': 0.5935386391379371, 'reg_alpha': 6.33985207677242, 'reg_lambda': 1.9638484704485841}. Best is trial 5 with value: 93352.88658386005.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022680 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3366
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 784183.422197
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.019760 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3363
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 785279.908805
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.021792 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3364
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [In

[I 2025-05-21 21:41:58,731] Trial 19 finished with value: 95789.24488001899 and parameters: {'n_estimators': 426, 'max_depth': 9, 'learning_rate': 0.009757094214536565, 'num_leaves': 103, 'min_child_samples': 5, 'subsample': 0.90996177895638, 'colsample_bytree': 0.6836638098846465, 'reg_alpha': 4.480928608771438, 'reg_lambda': 4.217079204589635}. Best is trial 5 with value: 93352.88658386005.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022530 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3366
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 784183.422197
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.019373 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3363
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 785279.908805
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023535 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3364
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [In

[I 2025-05-21 21:42:33,576] Trial 20 finished with value: 94964.60586080166 and parameters: {'n_estimators': 809, 'max_depth': 6, 'learning_rate': 0.12127481163045466, 'num_leaves': 78, 'min_child_samples': 100, 'subsample': 0.8752743518051377, 'colsample_bytree': 0.6133647418009582, 'reg_alpha': 2.2701551316863204, 'reg_lambda': 7.474663822421812}. Best is trial 5 with value: 93352.88658386005.


In [5]:
print("\n Best hyperparameters found by Optuna:")
print(study.best_params)

best_model = LGBMRegressor(**study.best_params, random_state=42, n_jobs=-1)
best_model.fit(X_train, y_train)


 Best hyperparameters found by Optuna:
{'n_estimators': 967, 'max_depth': 11, 'learning_rate': 0.011610775274909365, 'num_leaves': 120, 'min_child_samples': 51, 'subsample': 0.9203178743271226, 'colsample_bytree': 0.6891595666364607, 'reg_alpha': 5.892746039084277, 'reg_lambda': 6.113568412640022}
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.033005 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3368
[LightGBM] [Info] Number of data points in the train set: 156454, number of used features: 31
[LightGBM] [Info] Start training from score 784808.026204


In [7]:
cv = KFold(n_splits=5, shuffle=True, random_state=42)
cv_rmse = -cross_val_score(best_model, X_train, y_train, scoring='neg_root_mean_squared_error', cv=cv)
cv_mae = -cross_val_score(best_model, X_train, y_train, scoring='neg_mean_absolute_error', cv=cv)
cv_r2 = cross_val_score(best_model, X_train, y_train, scoring='r2', cv=cv)

print("\n Tuned LightGBM 5-Fold Cross-Validation Results:")
print(f"🔹 RMSE: {cv_rmse.mean():,.2f} ± {cv_rmse.std():,.2f}")
print(f"🔹 MAE: {cv_mae.mean():,.2f} ± {cv_mae.std():,.2f}")
print(f"🔹 R²: {cv_r2.mean():.4f} ± {cv_r2.std():.4f}")

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.021804 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3366
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 784183.422197
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022107 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3363
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [Info] Start training from score 785279.908805
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023063 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3364
[LightGBM] [Info] Number of data points in the train set: 125163, number of used features: 31
[LightGBM] [In