In [1]:
import pandas as pd
import optuna

import sklearn.linear_model as sklm
import sklearn.model_selection as skms
import sklearn.metrics as metrics
import sklearn.pipeline as skpl
import sklearn.preprocessing as skpp
import sklearn.compose as skcmp
import category_encoders as ce

import lightgbm as lgb

from preprocessing import *
from ucimlrepo import fetch_ucirepo

import gc

In [2]:
train_df = pd.read_csv("data/train.csv", index_col="id")
test_df = pd.read_csv("data/test.csv", index_col="id")
orig_df = fetch_ucirepo(id=848)['data']['original']

CONT_FEATS = [
    "cap-diameter",
    "stem-height",
    "stem-width"
]
CAT_FEATS = [c for c in train_df.columns if c not in CONT_FEATS]
RESPONSE_COL = "class"

train_df = convert_cols(train_df, CONT_FEATS, CAT_FEATS)
test_df = convert_cols(test_df, CONT_FEATS, CAT_FEATS)
orig_df = convert_cols(orig_df, CONT_FEATS, CAT_FEATS)

train_df = null_all_non_original_categories(train_df, orig_df, CAT_FEATS)
test_df = null_all_non_original_categories(test_df, orig_df, CAT_FEATS)



In [3]:
for df in [train_df, orig_df]:
    df["class"] = df["class"].cat.rename_categories({"e":0, "p":1})

In [6]:
x_train, x_val, y_train, y_val = skms.train_test_split(train_df.drop(columns=[RESPONSE_COL]), train_df[RESPONSE_COL], stratify = train_df[RESPONSE_COL])


def objective(trial):
    model = lgb.LGBMClassifier(
        boosting_type="dart",
        n_jobs=-1,

        num_leaves=trial.suggest_int("num_leaves", 15, 45),
        max_depth=trial.suggest_int("max_depth", -1, 50),
        learning_rate=trial.suggest_float("learning_rate", 1e-3, 1e-1),
        n_estimators=trial.suggest_int("n_estimators", 1, 500),
        colsample_bytree=trial.suggest_float("colsample_bytree", 0.05, 1),
        reg_lambda=trial.suggest_int("reg_lambda", 0, 20),
        reg_alpha=trial.suggest_int("reg_alpha", 0, 20),
    )

    model.fit(x_train, y_train, eval_metric="mcc")

    preds = model.predict(x_val)

    return metrics.matthews_corrcoef(y_val, preds)


In [7]:
# optuna.delete_study(study_name="dart_tuning", storage="sqlite:///optuna.sqlite3")

study = optuna.create_study(
    direction="maximize",
    storage="sqlite:///optuna.sqlite3",
    study_name="dart_tuning",
    load_if_exists=True
)
study.optimize(objective, n_trials=50)
gc.collect()

[I 2024-08-08 14:37:14,504] Using an existing study with name 'dart_tuning' instead of creating a new one.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.066093 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 14:38:50,343] Trial 5 finished with value: 0.9754634649449754 and parameters: {'num_leaves': 39, 'max_depth': 25, 'learning_rate': 0.05163482386743505, 'n_estimators': 209, 'colsample_bytree': 0.8930219666873452, 'reg_lambda': 10, 'reg_alpha': 14}. Best is trial 5 with value: 0.9754634649449754.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.067631 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 14:39:56,093] Trial 6 finished with value: 0.8770478526595541 and parameters: {'num_leaves': 15, 'max_depth': 36, 'learning_rate': 0.017944482876098282, 'n_estimators': 204, 'colsample_bytree': 0.3417536931845077, 'reg_lambda': 14, 'reg_alpha': 7}. Best is trial 5 with value: 0.9754634649449754.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.058186 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 14:44:37,688] Trial 7 finished with value: 0.9754814295962444 and parameters: {'num_leaves': 20, 'max_depth': 13, 'learning_rate': 0.04589367702208301, 'n_estimators': 472, 'colsample_bytree': 0.724880335325746, 'reg_lambda': 7, 'reg_alpha': 20}. Best is trial 7 with value: 0.9754814295962444.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.066573 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 14:46:08,067] Trial 8 finished with value: 0.9454646708113577 and parameters: {'num_leaves': 23, 'max_depth': 50, 'learning_rate': 0.0297725838867489, 'n_estimators': 230, 'colsample_bytree': 0.9850569477712566, 'reg_lambda': 10, 'reg_alpha': 0}. Best is trial 7 with value: 0.9754814295962444.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.324128 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 14:51:08,993] Trial 9 finished with value: 0.9713371503936649 and parameters: {'num_leaves': 21, 'max_depth': 44, 'learning_rate': 0.03680685986015522, 'n_estimators': 421, 'colsample_bytree': 0.7390817528559167, 'reg_lambda': 17, 'reg_alpha': 9}. Best is trial 7 with value: 0.9754814295962444.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.080284 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 14:54:12,249] Trial 10 finished with value: 0.9796151506941281 and parameters: {'num_leaves': 38, 'max_depth': 50, 'learning_rate': 0.06130631787309337, 'n_estimators': 300, 'colsample_bytree': 0.9144580053827588, 'reg_lambda': 16, 'reg_alpha': 1}. Best is trial 10 with value: 0.9796151506941281.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.056733 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 14:57:07,572] Trial 11 finished with value: 0.6543374261452406 and parameters: {'num_leaves': 33, 'max_depth': 32, 'learning_rate': 0.0541205864423562, 'n_estimators': 491, 'colsample_bytree': 0.06684026213180642, 'reg_lambda': 20, 'reg_alpha': 2}. Best is trial 10 with value: 0.9796151506941281.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.053164 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 14:58:28,560] Trial 12 finished with value: 0.9246543251687251 and parameters: {'num_leaves': 20, 'max_depth': 9, 'learning_rate': 0.0699413613493366, 'n_estimators': 237, 'colsample_bytree': 0.16260984151369418, 'reg_lambda': 8, 'reg_alpha': 12}. Best is trial 10 with value: 0.9796151506941281.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.062308 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 15:04:00,783] Trial 13 finished with value: 0.9810742034523615 and parameters: {'num_leaves': 43, 'max_depth': 8, 'learning_rate': 0.06553735925142665, 'n_estimators': 446, 'colsample_bytree': 0.8321627127625991, 'reg_lambda': 4, 'reg_alpha': 10}. Best is trial 13 with value: 0.9810742034523615.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.062875 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 15:07:14,553] Trial 14 finished with value: 0.9639754441968934 and parameters: {'num_leaves': 21, 'max_depth': 15, 'learning_rate': 0.033125600839828545, 'n_estimators': 355, 'colsample_bytree': 0.7118566769223883, 'reg_lambda': 0, 'reg_alpha': 10}. Best is trial 13 with value: 0.9810742034523615.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.070170 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 15:07:33,916] Trial 15 finished with value: 0.9775545811197386 and parameters: {'num_leaves': 44, 'max_depth': -1, 'learning_rate': 0.09927547752346737, 'n_estimators': 76, 'colsample_bytree': 0.4650568916476167, 'reg_lambda': 2, 'reg_alpha': 17}. Best is trial 13 with value: 0.9810742034523615.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.068236 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 15:11:22,918] Trial 16 finished with value: 0.9817058968394945 and parameters: {'num_leaves': 45, 'max_depth': 0, 'learning_rate': 0.07361339804970247, 'n_estimators': 344, 'colsample_bytree': 0.8373868685358983, 'reg_lambda': 5, 'reg_alpha': 6}. Best is trial 16 with value: 0.9817058968394945.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.252305 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 15:16:02,427] Trial 17 finished with value: 0.9821855161877332 and parameters: {'num_leaves': 45, 'max_depth': -1, 'learning_rate': 0.08129726913499627, 'n_estimators': 379, 'colsample_bytree': 0.6136041408046933, 'reg_lambda': 4, 'reg_alpha': 5}. Best is trial 17 with value: 0.9821855161877332.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.067343 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 15:19:50,235] Trial 18 finished with value: 0.9806744481382205 and parameters: {'num_leaves': 32, 'max_depth': -1, 'learning_rate': 0.08566382196254124, 'n_estimators': 349, 'colsample_bytree': 0.5548004341117614, 'reg_lambda': 5, 'reg_alpha': 5}. Best is trial 17 with value: 0.9821855161877332.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.078885 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 15:22:18,438] Trial 19 finished with value: 0.9403021423383244 and parameters: {'num_leaves': 39, 'max_depth': 4, 'learning_rate': 0.07774765974912792, 'n_estimators': 373, 'colsample_bytree': 0.5628291188528354, 'reg_lambda': 4, 'reg_alpha': 5}. Best is trial 17 with value: 0.9821855161877332.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.071256 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 15:22:57,515] Trial 20 finished with value: 0.9786555240674493 and parameters: {'num_leaves': 45, 'max_depth': 21, 'learning_rate': 0.09581574029768312, 'n_estimators': 115, 'colsample_bytree': 0.6405869495925147, 'reg_lambda': 1, 'reg_alpha': 4}. Best is trial 17 with value: 0.9821855161877332.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.067229 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 15:25:52,817] Trial 21 finished with value: 0.9806008313667253 and parameters: {'num_leaves': 36, 'max_depth': 20, 'learning_rate': 0.082141199221582, 'n_estimators': 303, 'colsample_bytree': 0.40725620840232823, 'reg_lambda': 7, 'reg_alpha': 7}. Best is trial 17 with value: 0.9821855161877332.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.065556 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 15:28:37,747] Trial 22 finished with value: 0.9464448523952584 and parameters: {'num_leaves': 28, 'max_depth': 4, 'learning_rate': 0.08899896756626091, 'n_estimators': 404, 'colsample_bytree': 0.3092252498144175, 'reg_lambda': 13, 'reg_alpha': 3}. Best is trial 17 with value: 0.9821855161877332.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.069476 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 15:29:04,596] Trial 23 finished with value: 0.8107729207557599 and parameters: {'num_leaves': 42, 'max_depth': 3, 'learning_rate': 0.07398920775242102, 'n_estimators': 148, 'colsample_bytree': 0.8126451118254574, 'reg_lambda': 3, 'reg_alpha': 7}. Best is trial 17 with value: 0.9821855161877332.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.065800 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 15:31:35,006] Trial 24 finished with value: 0.9768730167178747 and parameters: {'num_leaves': 28, 'max_depth': 13, 'learning_rate': 0.05985907656064933, 'n_estimators': 285, 'colsample_bytree': 0.6486719244034339, 'reg_lambda': 5, 'reg_alpha': 13}. Best is trial 17 with value: 0.9821855161877332.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.072187 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 15:35:25,044] Trial 25 finished with value: 0.9817190208460181 and parameters: {'num_leaves': 35, 'max_depth': 30, 'learning_rate': 0.08977388575093496, 'n_estimators': 336, 'colsample_bytree': 0.9839458835952926, 'reg_lambda': 8, 'reg_alpha': 6}. Best is trial 17 with value: 0.9821855161877332.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.074925 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 15:39:23,853] Trial 26 finished with value: 0.9820033606065605 and parameters: {'num_leaves': 41, 'max_depth': 27, 'learning_rate': 0.08851687251713937, 'n_estimators': 335, 'colsample_bytree': 0.9843028042905162, 'reg_lambda': 7, 'reg_alpha': 6}. Best is trial 17 with value: 0.9821855161877332.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.077074 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 15:44:13,832] Trial 27 finished with value: 0.9822904169520692 and parameters: {'num_leaves': 35, 'max_depth': 31, 'learning_rate': 0.09140367196108835, 'n_estimators': 392, 'colsample_bytree': 0.9968773034086398, 'reg_lambda': 8, 'reg_alpha': 9}. Best is trial 27 with value: 0.9822904169520692.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.071517 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 15:49:30,805] Trial 28 finished with value: 0.983139447515153 and parameters: {'num_leaves': 41, 'max_depth': 38, 'learning_rate': 0.09916410190820278, 'n_estimators': 408, 'colsample_bytree': 0.9978610959534197, 'reg_lambda': 12, 'reg_alpha': 9}. Best is trial 28 with value: 0.983139447515153.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.076589 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 15:54:57,761] Trial 29 finished with value: 0.9831967766274115 and parameters: {'num_leaves': 41, 'max_depth': 38, 'learning_rate': 0.09995702439200448, 'n_estimators': 413, 'colsample_bytree': 0.906889542836681, 'reg_lambda': 12, 'reg_alpha': 9}. Best is trial 29 with value: 0.9831967766274115.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.069719 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 16:00:44,568] Trial 30 finished with value: 0.9831812421139969 and parameters: {'num_leaves': 36, 'max_depth': 38, 'learning_rate': 0.0997638536404341, 'n_estimators': 441, 'colsample_bytree': 0.8886836017230948, 'reg_lambda': 13, 'reg_alpha': 9}. Best is trial 29 with value: 0.9831967766274115.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.067740 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 16:05:47,839] Trial 31 finished with value: 0.9444601468697996 and parameters: {'num_leaves': 40, 'max_depth': 38, 'learning_rate': 0.005107393619034006, 'n_estimators': 445, 'colsample_bytree': 0.8962487524061308, 'reg_lambda': 12, 'reg_alpha': 12}. Best is trial 29 with value: 0.9831967766274115.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.076685 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 16:13:01,187] Trial 32 finished with value: 0.9834509029861053 and parameters: {'num_leaves': 37, 'max_depth': 43, 'learning_rate': 0.09915861490219136, 'n_estimators': 497, 'colsample_bytree': 0.7772016275449934, 'reg_lambda': 15, 'reg_alpha': 15}. Best is trial 32 with value: 0.9834509029861053.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.067025 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 16:18:57,517] Trial 33 finished with value: 0.98323296502723 and parameters: {'num_leaves': 37, 'max_depth': 44, 'learning_rate': 0.09943627257334023, 'n_estimators': 454, 'colsample_bytree': 0.7844117185334076, 'reg_lambda': 16, 'reg_alpha': 15}. Best is trial 32 with value: 0.9834509029861053.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.066372 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 16:25:22,162] Trial 34 finished with value: 0.9827469665594281 and parameters: {'num_leaves': 31, 'max_depth': 44, 'learning_rate': 0.09286865363779692, 'n_estimators': 495, 'colsample_bytree': 0.7856046917816106, 'reg_lambda': 17, 'reg_alpha': 15}. Best is trial 32 with value: 0.9834509029861053.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.082566 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 16:31:33,870] Trial 35 finished with value: 0.9826616429513683 and parameters: {'num_leaves': 38, 'max_depth': 44, 'learning_rate': 0.08159998905741965, 'n_estimators': 470, 'colsample_bytree': 0.6876442474688477, 'reg_lambda': 15, 'reg_alpha': 16}. Best is trial 32 with value: 0.9834509029861053.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.077216 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 16:37:10,241] Trial 36 finished with value: 0.9830830552832658 and parameters: {'num_leaves': 36, 'max_depth': 41, 'learning_rate': 0.09978362134255381, 'n_estimators': 443, 'colsample_bytree': 0.8969412436459783, 'reg_lambda': 19, 'reg_alpha': 19}. Best is trial 32 with value: 0.9834509029861053.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.078030 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 16:42:38,521] Trial 37 finished with value: 0.9828248885093918 and parameters: {'num_leaves': 37, 'max_depth': 35, 'learning_rate': 0.09431821256976969, 'n_estimators': 433, 'colsample_bytree': 0.7739717725249988, 'reg_lambda': 14, 'reg_alpha': 14}. Best is trial 32 with value: 0.9834509029861053.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.073148 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 16:48:47,227] Trial 38 finished with value: 0.9829486861916246 and parameters: {'num_leaves': 34, 'max_depth': 41, 'learning_rate': 0.09506023156675573, 'n_estimators': 472, 'colsample_bytree': 0.8768589220735435, 'reg_lambda': 11, 'reg_alpha': 18}. Best is trial 32 with value: 0.9834509029861053.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.078391 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 16:48:51,449] Trial 39 finished with value: 0.9243702062176659 and parameters: {'num_leaves': 30, 'max_depth': 47, 'learning_rate': 0.08508958696023163, 'n_estimators': 13, 'colsample_bytree': 0.9343563299709413, 'reg_lambda': 14, 'reg_alpha': 11}. Best is trial 32 with value: 0.9834509029861053.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.079465 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 16:54:43,648] Trial 40 finished with value: 0.9798144803322592 and parameters: {'num_leaves': 39, 'max_depth': 36, 'learning_rate': 0.04205487544419201, 'n_estimators': 461, 'colsample_bytree': 0.7568435258165399, 'reg_lambda': 18, 'reg_alpha': 14}. Best is trial 32 with value: 0.9834509029861053.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.076466 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 16:56:04,564] Trial 41 finished with value: 0.9795297573402163 and parameters: {'num_leaves': 33, 'max_depth': 47, 'learning_rate': 0.09984609658038712, 'n_estimators': 199, 'colsample_bytree': 0.8546447521965546, 'reg_lambda': 16, 'reg_alpha': 16}. Best is trial 32 with value: 0.9834509029861053.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.069250 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 16:59:29,518] Trial 42 finished with value: 0.9423306840493085 and parameters: {'num_leaves': 16, 'max_depth': 41, 'learning_rate': 0.02523782264194894, 'n_estimators': 427, 'colsample_bytree': 0.9322342010926034, 'reg_lambda': 10, 'reg_alpha': 8}. Best is trial 32 with value: 0.9834509029861053.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.062496 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 17:05:29,030] Trial 43 finished with value: 0.9817902500201433 and parameters: {'num_leaves': 29, 'max_depth': 47, 'learning_rate': 0.07852815554619427, 'n_estimators': 492, 'colsample_bytree': 0.735410716231764, 'reg_lambda': 14, 'reg_alpha': 20}. Best is trial 32 with value: 0.9834509029861053.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.071172 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 17:12:02,174] Trial 44 finished with value: 0.981303963058073 and parameters: {'num_leaves': 38, 'max_depth': 39, 'learning_rate': 0.05043471363349209, 'n_estimators': 499, 'colsample_bytree': 0.6772120729365863, 'reg_lambda': 16, 'reg_alpha': 12}. Best is trial 32 with value: 0.9834509029861053.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.066007 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 17:16:13,337] Trial 45 finished with value: 0.9795532423963217 and parameters: {'num_leaves': 25, 'max_depth': 34, 'learning_rate': 0.06894269020361117, 'n_estimators': 412, 'colsample_bytree': 0.8089910825752761, 'reg_lambda': 11, 'reg_alpha': 11}. Best is trial 32 with value: 0.9834509029861053.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.076675 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 17:21:11,726] Trial 46 finished with value: 0.9831008139472147 and parameters: {'num_leaves': 41, 'max_depth': 38, 'learning_rate': 0.09618682723187721, 'n_estimators': 409, 'colsample_bytree': 0.9404192601608118, 'reg_lambda': 12, 'reg_alpha': 9}. Best is trial 32 with value: 0.9834509029861053.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.068580 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 17:27:28,848] Trial 47 finished with value: 0.9831837625427172 and parameters: {'num_leaves': 42, 'max_depth': 50, 'learning_rate': 0.0865746233825821, 'n_estimators': 467, 'colsample_bytree': 0.8766847180013405, 'reg_lambda': 13, 'reg_alpha': 8}. Best is trial 32 with value: 0.9834509029861053.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.075549 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 17:33:51,889] Trial 48 finished with value: 0.9833132365587254 and parameters: {'num_leaves': 43, 'max_depth': 50, 'learning_rate': 0.08783948406243992, 'n_estimators': 468, 'colsample_bytree': 0.8525315351652176, 'reg_lambda': 15, 'reg_alpha': 8}. Best is trial 32 with value: 0.9834509029861053.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.073609 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 17:39:57,787] Trial 49 finished with value: 0.9832846107114538 and parameters: {'num_leaves': 42, 'max_depth': 50, 'learning_rate': 0.08747251485982972, 'n_estimators': 460, 'colsample_bytree': 0.8287392350642944, 'reg_lambda': 15, 'reg_alpha': 8}. Best is trial 32 with value: 0.9834509029861053.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.071527 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 17:46:34,871] Trial 50 finished with value: 0.983488929774915 and parameters: {'num_leaves': 43, 'max_depth': 50, 'learning_rate': 0.09213233096596282, 'n_estimators': 474, 'colsample_bytree': 0.8039493032525874, 'reg_lambda': 17, 'reg_alpha': 11}. Best is trial 50 with value: 0.983488929774915.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.073001 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 17:53:08,192] Trial 51 finished with value: 0.9833751809514968 and parameters: {'num_leaves': 43, 'max_depth': 49, 'learning_rate': 0.09157082375179112, 'n_estimators': 477, 'colsample_bytree': 0.7279252703107777, 'reg_lambda': 18, 'reg_alpha': 14}. Best is trial 50 with value: 0.983488929774915.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.077988 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 17:59:41,364] Trial 52 finished with value: 0.9828295487390025 and parameters: {'num_leaves': 43, 'max_depth': 50, 'learning_rate': 0.07681294405202405, 'n_estimators': 478, 'colsample_bytree': 0.7140804801027253, 'reg_lambda': 20, 'reg_alpha': 13}. Best is trial 50 with value: 0.983488929774915.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.076960 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 18:03:56,572] Trial 53 finished with value: 0.9810241220490888 and parameters: {'num_leaves': 43, 'max_depth': 47, 'learning_rate': 0.06422674141877682, 'n_estimators': 371, 'colsample_bytree': 0.5806730374571512, 'reg_lambda': 18, 'reg_alpha': 13}. Best is trial 50 with value: 0.983488929774915.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.072177 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-08 18:10:33,468] Trial 54 finished with value: 0.9831456792835306 and parameters: {'num_leaves': 44, 'max_depth': 48, 'learning_rate': 0.08439389512528117, 'n_estimators': 479, 'colsample_bytree': 0.47844417766558034, 'reg_lambda': 18, 'reg_alpha': 11}. Best is trial 50 with value: 0.983488929774915.


3599

In [8]:
best_model = lgb.LGBMClassifier(
    boosting_type="dart",
    n_jobs=-1,

    **study.best_params
)
best_model.fit(train_df.drop(columns=["class"]), train_df["class"])
preds = best_model.predict(test_df)
out_df = pd.DataFrame({"class":preds}, index=test_df.index)

[LightGBM] [Info] Number of positive: 1705396, number of negative: 1411549
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.098972 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 898
[LightGBM] [Info] Number of data points in the train set: 3116945, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


In [9]:
out_df["class"] = out_df["class"].replace({0:"e", 1:"p"})
out_df.to_csv("tuned_dart_100_v1.csv")