In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join("../helpers"))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import optuna
import functools

import sklearn.linear_model as sklm
import sklearn.model_selection as skms
import sklearn.metrics as metrics
import sklearn.pipeline as skpl
import sklearn.preprocessing as skpp
import sklearn.compose as skcmp
import sklearn.impute as skim
import category_encoders as ce

import lightgbm as lgb
import xgboost as xgb

from preprocessing import *
from ucimlrepo import fetch_ucirepo

import gc

### Load and process data

In [2]:
train_df = pd.read_csv("../data/train.csv", index_col="id")
test_df = pd.read_csv("../data/test.csv", index_col="id")
orig_df = pd.read_csv("../data/orig.csv", index_col="id")

CONT_FEATS = [
    "cap-diameter",
    "stem-height",
    "stem-width"
]
CAT_FEATS = [c for c in train_df.columns if c not in CONT_FEATS]
RESPONSE_COL = "class"

In [3]:
fix_categories(orig_df, train_df, test_df, CAT_FEATS, nan_as_cat=True)
fix_that_one_mushroom_in_test(test_df)

pipeline = skpl.make_pipeline(
    skcmp.ColumnTransformer([
        (
            "float", 
            skpl.make_pipeline
            (
                skim.SimpleImputer(add_indicator=True, strategy="median"),
                skpp.StandardScaler().set_output(transform="pandas") # This is also scaling the indicator column...unsure if it matters
            ),
            CONT_FEATS
        ),
        (
            "cat", 
            skpp.OneHotEncoder(
                sparse_output=False,
                dtype=np.int8,
                handle_unknown='infrequent_if_exist'),
            [c for c in CAT_FEATS if c != RESPONSE_COL]
        ),
    ], remainder="passthrough").set_output(transform="pandas"),
    )



### Helper methods for creating the models to be fed into optuna

In [4]:
def make_xgb(**kwargs):
    return xgb.XGBClassifier(
        enable_categorical=True,
        random_state=0,
        n_jobs=-1,
        **kwargs
    )

def optuna_xgb(trial):
    return make_xgb(
        n_estimators=trial.suggest_int("n_estimators", 1, 1e4),
        eta=trial.suggest_float("eta", 0.01, 1),
        gamma=trial.suggest_float("gamma", 0, 20),
        max_depth=trial.suggest_int("max_depth", 3, 50),
        max_leaves=trial.suggest_int("max_leaves", 0, 1e4),
        colsample_bytree=trial.suggest_float("colsample_bytree", 0.05, 1),
        colsample_bylevel=trial.suggest_float("colsample_bylevel", 0.05, 1),
        colsample_bynode=trial.suggest_float("colsample_bynode", 0.05, 1),
        reg_lambda=trial.suggest_int("reg_lambda", 0, 20),
        reg_alpha=trial.suggest_int("reg_alpha", 0, 20),
        grow_policy=trial.suggest_categorical(
            "grow_policy", ["depthwise", "lossguide"]
        ),
        min_child_weight=trial.suggest_float("min_child_weight", 0, 1e3),
        subsample=trial.suggest_float("subsample", 0.01, 1),
        max_delta_step=trial.suggest_float("max_delta_step", 0, 1e2)
    )

def make_lgbm(**kwargs):
    return lgb.LGBMClassifier(
        boosting_type="gbdt",
        n_jobs=-1,
        **kwargs
    )

def optuna_lgbm(trial):
    return make_lgbm(
        num_leaves=trial.suggest_int("num_leaves", 15, 45),
        max_depth=trial.suggest_int("max_depth", -1, 50),
        learning_rate=trial.suggest_float("learning_rate", 1e-3, 1e-1),
        n_estimators=trial.suggest_int("n_estimators", 1, 1e4),
        colsample_bytree=trial.suggest_float("colsample_bytree", 0.05, 1),
        reg_lambda=trial.suggest_int("reg_lambda", 0, 20),
        reg_alpha=trial.suggest_int("reg_alpha", 0, 20),
    )

def make_dart(**kwargs):
    return lgb.LGBMClassifier(
        boosting_type="dart",
        n_jobs=-1,
        **kwargs
    )

def optuna_dart(trial):
    return make_dart(
        num_leaves=trial.suggest_int("num_leaves", 15, 45),
        max_depth=trial.suggest_int("max_depth", -1, 50),
        learning_rate=trial.suggest_float("learning_rate", 1e-3, 1e-1),
        n_estimators=trial.suggest_int("n_estimators", 1, 500),
        colsample_bytree=trial.suggest_float("colsample_bytree", 0.05, 1),
        reg_lambda=trial.suggest_int("reg_lambda", 0, 20),
        reg_alpha=trial.suggest_int("reg_alpha", 0, 20),
    )

In [5]:
def objective_xgb(trial, tx, ty, vx, vy):
    model = optuna_xgb(trial)
    model.fit(tx, ty)
    preds = model.predict(vx)

    return metrics.matthews_corrcoef(vy, preds)

def objective_lgbm(trial, tx, ty, vx, vy):
    model = optuna_lgbm(trial)
    model.fit(tx, ty, eval_metric="mcc", eval_set=(vx, vy), callbacks=[lgb.early_stopping(stopping_rounds=5)])
    preds = model.predict(vx)
    return metrics.matthews_corrcoef(vy, preds)

def objective_dart(trial, tx, ty, vx, vy):
    model = optuna_dart(trial)
    model.fit(tx, ty, eval_metric="mcc")
    preds = model.predict(vx)
    return metrics.matthews_corrcoef(vy, preds)

In [6]:
def refit_save_predictions(model, train_x, train_y, test_x, path, file_prefix):
    root_dir = os.path.abspath(path)
    train_file = os.path.join(root_dir, f"{file_prefix}_train.csv")
    test_file = os.path.join(root_dir, f"{file_prefix}_test.csv")

    model.fit(train_x, train_y)

    train_preds = model.predict(train_x)
    train_pred_proba = model.predict_proba(train_x)

    test_preds = model.predict(test_x)
    test_pred_proba = model.predict_proba(test_x)

    out_df = pd.DataFrame({RESPONSE_COL:train_preds}, index=train_x.index)
    for i in range(train_pred_proba.shape[1]):
        out_df[f"pp_{i}"] = train_pred_proba[:, i]
    out_df.to_csv(train_file)

    out_df = pd.DataFrame({RESPONSE_COL:test_preds}, index=test_x.index)
    for i in range(test_pred_proba.shape[1]):
        out_df[f"pp_{i}"] = test_pred_proba[:, i]
    out_df.to_csv(test_file)

### Model training

In [7]:
x_train = pipeline.fit_transform(train_df.drop(columns=[RESPONSE_COL]))
y_train = train_df[RESPONSE_COL]
y_train = y_train.replace({"e":0, "p":1})

x_test = pipeline.transform(test_df)

x_tr, x_v, y_tr, y_v = skms.train_test_split(
    x_train, 
    y_train, 
    stratify=y_train
)

#### XGB

In [8]:
# optuna.delete_study(study_name="xgb_tuning_nan_cat", storage="sqlite:///optuna.sqlite3")

study = optuna.create_study(
    direction="maximize",
    storage="sqlite:///optuna.sqlite3",
    study_name="xgb_tuning_nan_cat",
    load_if_exists=True
)

obj = functools.partial(objective_xgb, tx=x_tr, ty=y_tr, vx=x_v, vy=y_v)
study.optimize(obj, n_trials=1)
gc.collect()

[I 2024-08-16 10:34:49,718] Using an existing study with name 'xgb_tuning_nan_cat' instead of creating a new one.
[I 2024-08-16 10:43:01,141] Trial 51 finished with value: 0.9839299090349225 and parameters: {'n_estimators': 3937, 'eta': 0.05957287291848978, 'gamma': 0.8345920072739275, 'max_depth': 14, 'max_leaves': 4275, 'colsample_bytree': 0.8102869845591929, 'colsample_bylevel': 0.8965487399448485, 'colsample_bynode': 0.6181478505746691, 'reg_lambda': 12, 'reg_alpha': 1, 'grow_policy': 'depthwise', 'min_child_weight': 395.6856055888533, 'subsample': 0.8922052974031557, 'max_delta_step': 63.36878846075586}. Best is trial 50 with value: 0.9843358600724808.


2930

In [9]:
study = optuna.load_study(study_name="xgb_tuning_nan_cat", storage="sqlite:///optuna.sqlite3")
model = make_xgb(**study.best_params)
refit_save_predictions(model, x_train, y_train, x_test, "../predictions/v2", "xgb")

#### LGBM

In [10]:
# optuna.delete_study(study_name="lgbm_tuning_nan_cat", storage="sqlite:///optuna.sqlite3")

study = optuna.create_study(
    direction="maximize",
    storage="sqlite:///optuna.sqlite3",
    study_name="lgbm_tuning_nan_cat",
    load_if_exists=True
)

obj = functools.partial(objective_lgbm, tx=x_tr, ty=y_tr, vx=x_v, vy=y_v)
study.optimize(obj, n_trials=1)
gc.collect()

[I 2024-08-16 11:01:34,403] Using an existing study with name 'lgbm_tuning_nan_cat' instead of creating a new one.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.118004 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
Training until validation scores don't improve for 5 rounds
Early stopping, best iteration is:
[871]	valid_0's binary_logloss: 0.0366412


[I 2024-08-16 11:02:28,699] Trial 57 finished with value: 0.9843347982816744 and parameters: {'num_leaves': 35, 'max_depth': 46, 'learning_rate': 0.08898471672808295, 'n_estimators': 2849, 'colsample_bytree': 0.2855155587749621, 'reg_lambda': 8, 'reg_alpha': 1}. Best is trial 57 with value: 0.9843347982816744.


6794

In [11]:
study = optuna.load_study(study_name="lgbm_tuning_nan_cat", storage="sqlite:///optuna.sqlite3")
model = make_lgbm(**study.best_params)
refit_save_predictions(model, x_train, y_train, x_test, "../predictions/v2", "lgbm")

[LightGBM] [Info] Number of positive: 1705396, number of negative: 1411549
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.145431 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 3116945, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


#### Dart

In [12]:
# optuna.delete_study(study_name="dart_tuning_nan_cat", storage="sqlite:///optuna.sqlite3")

study = optuna.create_study(
    direction="maximize",
    storage="sqlite:///optuna.sqlite3",
    study_name="dart_tuning_nan_cat",
    load_if_exists=True
)

obj = functools.partial(objective_dart, tx=x_tr, ty=y_tr, vx=x_v, vy=y_v)
study.optimize(obj, n_trials=50)
gc.collect()

[I 2024-08-16 11:08:25,029] A new study created in RDB with name: dart_tuning_nan_cat


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.080582 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 11:08:49,696] Trial 0 finished with value: 0.9063121082471411 and parameters: {'num_leaves': 22, 'max_depth': 34, 'learning_rate': 0.06255115741113876, 'n_estimators': 129, 'colsample_bytree': 0.10744878781062826, 'reg_lambda': 12, 'reg_alpha': 16}. Best is trial 0 with value: 0.9063121082471411.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.087064 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 11:11:51,997] Trial 1 finished with value: 0.9557578761046263 and parameters: {'num_leaves': 45, 'max_depth': 27, 'learning_rate': 0.027250464572051627, 'n_estimators': 382, 'colsample_bytree': 0.13296948700495573, 'reg_lambda': 5, 'reg_alpha': 20}. Best is trial 1 with value: 0.9557578761046263.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.099544 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 11:12:04,527] Trial 2 finished with value: 0.9664177336383916 and parameters: {'num_leaves': 39, 'max_depth': 22, 'learning_rate': 0.07278762212597599, 'n_estimators': 69, 'colsample_bytree': 0.674145152033546, 'reg_lambda': 8, 'reg_alpha': 15}. Best is trial 2 with value: 0.9664177336383916.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.094469 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 11:13:54,324] Trial 3 finished with value: 0.9697996779396293 and parameters: {'num_leaves': 25, 'max_depth': 10, 'learning_rate': 0.06056297058489939, 'n_estimators': 324, 'colsample_bytree': 0.8438854084003525, 'reg_lambda': 5, 'reg_alpha': 12}. Best is trial 3 with value: 0.9697996779396293.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.090130 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 11:14:57,262] Trial 4 finished with value: 0.9773574579609249 and parameters: {'num_leaves': 44, 'max_depth': 47, 'learning_rate': 0.09524298390732745, 'n_estimators': 198, 'colsample_bytree': 0.8956763711705841, 'reg_lambda': 10, 'reg_alpha': 14}. Best is trial 4 with value: 0.9773574579609249.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.088485 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 11:19:30,499] Trial 5 finished with value: 0.9822310490038294 and parameters: {'num_leaves': 40, 'max_depth': 0, 'learning_rate': 0.09021182630257812, 'n_estimators': 496, 'colsample_bytree': 0.3794545472501709, 'reg_lambda': 14, 'reg_alpha': 4}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.098073 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 11:21:54,747] Trial 6 finished with value: 0.9522328550116848 and parameters: {'num_leaves': 38, 'max_depth': 44, 'learning_rate': 0.011122007171469817, 'n_estimators': 349, 'colsample_bytree': 0.3372581940046325, 'reg_lambda': 5, 'reg_alpha': 15}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.088705 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 11:22:55,168] Trial 7 finished with value: 0.9665124218983571 and parameters: {'num_leaves': 19, 'max_depth': 39, 'learning_rate': 0.08731183432937162, 'n_estimators': 242, 'colsample_bytree': 0.7904841123024959, 'reg_lambda': 5, 'reg_alpha': 19}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.093347 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 11:23:05,942] Trial 8 finished with value: 0.8899242215921729 and parameters: {'num_leaves': 34, 'max_depth': 36, 'learning_rate': 0.056775180036356364, 'n_estimators': 64, 'colsample_bytree': 0.08374643013093558, 'reg_lambda': 13, 'reg_alpha': 10}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.360027 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 11:25:46,478] Trial 9 finished with value: 0.8947540040474304 and parameters: {'num_leaves': 17, 'max_depth': 39, 'learning_rate': 0.010770634963579214, 'n_estimators': 460, 'colsample_bytree': 0.2513282055316664, 'reg_lambda': 20, 'reg_alpha': 9}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.092213 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 11:29:36,456] Trial 10 finished with value: 0.973506507580526 and parameters: {'num_leaves': 29, 'max_depth': 0, 'learning_rate': 0.03801151531714432, 'n_estimators': 492, 'colsample_bytree': 0.45867817397322336, 'reg_lambda': 17, 'reg_alpha': 3}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.103503 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 11:30:44,886] Trial 11 finished with value: 0.9791581206772775 and parameters: {'num_leaves': 45, 'max_depth': 50, 'learning_rate': 0.09813031554552043, 'n_estimators': 208, 'colsample_bytree': 0.5864923081356743, 'reg_lambda': 0, 'reg_alpha': 0}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.105991 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 11:31:21,103] Trial 12 finished with value: 0.9741696596864255 and parameters: {'num_leaves': 40, 'max_depth': 14, 'learning_rate': 0.09862183021125753, 'n_estimators': 146, 'colsample_bytree': 0.5820575052059694, 'reg_lambda': 15, 'reg_alpha': 0}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.094596 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 11:32:52,742] Trial 13 finished with value: 0.9757308480187842 and parameters: {'num_leaves': 33, 'max_depth': -1, 'learning_rate': 0.07967263692735571, 'n_estimators': 272, 'colsample_bytree': 0.4754272748358866, 'reg_lambda': 2, 'reg_alpha': 5}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.093896 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 11:36:01,862] Trial 14 finished with value: 0.9805455770218777 and parameters: {'num_leaves': 42, 'max_depth': 10, 'learning_rate': 0.08133634874956494, 'n_estimators': 419, 'colsample_bytree': 0.669996050705324, 'reg_lambda': 0, 'reg_alpha': 0}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.103301 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 11:38:59,616] Trial 15 finished with value: 0.9775910243727789 and parameters: {'num_leaves': 35, 'max_depth': 9, 'learning_rate': 0.07496459499018301, 'n_estimators': 425, 'colsample_bytree': 0.9912919161149794, 'reg_lambda': 19, 'reg_alpha': 6}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.100698 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 11:41:14,958] Trial 16 finished with value: 0.9624908581386198 and parameters: {'num_leaves': 28, 'max_depth': 6, 'learning_rate': 0.04423096913746475, 'n_estimators': 422, 'colsample_bytree': 0.34409475156250413, 'reg_lambda': 9, 'reg_alpha': 3}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.106525 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 11:45:59,577] Trial 17 finished with value: 0.9821132939016208 and parameters: {'num_leaves': 42, 'max_depth': 18, 'learning_rate': 0.08509696195838269, 'n_estimators': 497, 'colsample_bytree': 0.704050632620229, 'reg_lambda': 14, 'reg_alpha': 6}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.095284 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 11:50:20,620] Trial 18 finished with value: 0.9805649762733314 and parameters: {'num_leaves': 37, 'max_depth': 19, 'learning_rate': 0.06936689286792934, 'n_estimators': 480, 'colsample_bytree': 0.7451705982872827, 'reg_lambda': 16, 'reg_alpha': 7}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.099960 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 11:52:31,491] Trial 19 finished with value: 0.9801660028191321 and parameters: {'num_leaves': 41, 'max_depth': 28, 'learning_rate': 0.08697615748059863, 'n_estimators': 314, 'colsample_bytree': 0.39865270846339407, 'reg_lambda': 13, 'reg_alpha': 3}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.099522 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 11:52:35,508] Trial 20 finished with value: 0.336882817059092 and parameters: {'num_leaves': 32, 'max_depth': 16, 'learning_rate': 0.049746621647546924, 'n_estimators': 3, 'colsample_bytree': 0.21616443677753983, 'reg_lambda': 18, 'reg_alpha': 8}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.107954 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 11:56:55,513] Trial 21 finished with value: 0.9804559426442443 and parameters: {'num_leaves': 37, 'max_depth': 18, 'learning_rate': 0.06861559912593498, 'n_estimators': 483, 'colsample_bytree': 0.7647142403943668, 'reg_lambda': 16, 'reg_alpha': 6}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.099410 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 12:01:28,182] Trial 22 finished with value: 0.9819122228094959 and parameters: {'num_leaves': 37, 'max_depth': 23, 'learning_rate': 0.08743971470355316, 'n_estimators': 499, 'colsample_bytree': 0.6728833142982438, 'reg_lambda': 15, 'reg_alpha': 7}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.101788 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 12:04:35,416] Trial 23 finished with value: 0.9807083621166418 and parameters: {'num_leaves': 42, 'max_depth': 29, 'learning_rate': 0.08766338575724267, 'n_estimators': 382, 'colsample_bytree': 0.5501044801405625, 'reg_lambda': 11, 'reg_alpha': 11}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.090592 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 12:08:18,284] Trial 24 finished with value: 0.9814744908580192 and parameters: {'num_leaves': 36, 'max_depth': 23, 'learning_rate': 0.09073530201724263, 'n_estimators': 444, 'colsample_bytree': 0.6610875478992451, 'reg_lambda': 15, 'reg_alpha': 4}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.105996 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 12:09:39,336] Trial 25 finished with value: 0.927325071845873 and parameters: {'num_leaves': 42, 'max_depth': 4, 'learning_rate': 0.08039672831219741, 'n_estimators': 377, 'colsample_bytree': 0.4809570895254721, 'reg_lambda': 14, 'reg_alpha': 8}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.104117 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 12:13:47,232] Trial 26 finished with value: 0.9814984024874929 and parameters: {'num_leaves': 31, 'max_depth': 32, 'learning_rate': 0.09363491462766091, 'n_estimators': 499, 'colsample_bytree': 0.6254714091328996, 'reg_lambda': 12, 'reg_alpha': 2}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.114903 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 12:17:37,816] Trial 27 finished with value: 0.9810966763016155 and parameters: {'num_leaves': 39, 'max_depth': 13, 'learning_rate': 0.08019621518487612, 'n_estimators': 449, 'colsample_bytree': 0.9506430442707818, 'reg_lambda': 18, 'reg_alpha': 5}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.100286 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 12:20:47,643] Trial 28 finished with value: 0.9699302695642358 and parameters: {'num_leaves': 43, 'max_depth': 21, 'learning_rate': 0.023829818634408785, 'n_estimators': 403, 'colsample_bytree': 0.7191874088031278, 'reg_lambda': 8, 'reg_alpha': 7}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.095338 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 12:24:07,348] Trial 29 finished with value: 0.9760211270116786 and parameters: {'num_leaves': 27, 'max_depth': 25, 'learning_rate': 0.06362608049462273, 'n_estimators': 456, 'colsample_bytree': 0.41786086875316614, 'reg_lambda': 12, 'reg_alpha': 12}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.100327 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 12:25:01,039] Trial 30 finished with value: 0.8168176685722258 and parameters: {'num_leaves': 35, 'max_depth': 3, 'learning_rate': 0.08559311120798427, 'n_estimators': 334, 'colsample_bytree': 0.811560878669874, 'reg_lambda': 14, 'reg_alpha': 2}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.087565 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 12:29:22,490] Trial 31 finished with value: 0.9817931883587114 and parameters: {'num_leaves': 31, 'max_depth': 32, 'learning_rate': 0.09730859791081854, 'n_estimators': 500, 'colsample_bytree': 0.6220243417320632, 'reg_lambda': 11, 'reg_alpha': 1}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.093186 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 12:32:56,199] Trial 32 finished with value: 0.9796598915065928 and parameters: {'num_leaves': 22, 'max_depth': 32, 'learning_rate': 0.09988234606152516, 'n_estimators': 498, 'colsample_bytree': 0.5262735587657058, 'reg_lambda': 11, 'reg_alpha': 2}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.085439 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 12:37:09,724] Trial 33 finished with value: 0.9821365813589652 and parameters: {'num_leaves': 40, 'max_depth': 26, 'learning_rate': 0.09354048816793495, 'n_estimators': 460, 'colsample_bytree': 0.7186315204006831, 'reg_lambda': 14, 'reg_alpha': 5}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.095595 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 12:41:34,734] Trial 34 finished with value: 0.9811019323277846 and parameters: {'num_leaves': 39, 'max_depth': 27, 'learning_rate': 0.07577278693302794, 'n_estimators': 464, 'colsample_bytree': 0.8515164252298568, 'reg_lambda': 16, 'reg_alpha': 5}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.093493 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 12:45:02,586] Trial 35 finished with value: 0.9817587812982205 and parameters: {'num_leaves': 45, 'max_depth': 24, 'learning_rate': 0.09044451126934897, 'n_estimators': 399, 'colsample_bytree': 0.7121492232552729, 'reg_lambda': 14, 'reg_alpha': 9}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.109075 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 12:48:52,834] Trial 36 finished with value: 0.9804782504273059 and parameters: {'num_leaves': 40, 'max_depth': 19, 'learning_rate': 0.0685184043585949, 'n_estimators': 438, 'colsample_bytree': 0.8895311892674845, 'reg_lambda': 17, 'reg_alpha': 6}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.097250 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 12:52:46,943] Trial 37 finished with value: 0.9771519166831799 and parameters: {'num_leaves': 38, 'max_depth': 13, 'learning_rate': 0.08427810846662519, 'n_estimators': 464, 'colsample_bytree': 0.18032908113387822, 'reg_lambda': 13, 'reg_alpha': 4}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.109310 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 12:54:45,310] Trial 38 finished with value: 0.980458297787616 and parameters: {'num_leaves': 43, 'max_depth': 22, 'learning_rate': 0.09377620262691592, 'n_estimators': 296, 'colsample_bytree': 0.2805607710290076, 'reg_lambda': 9, 'reg_alpha': 7}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.096284 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 12:57:58,880] Trial 39 finished with value: 0.9786243813857644 and parameters: {'num_leaves': 40, 'max_depth': 41, 'learning_rate': 0.060383703346349266, 'n_estimators': 398, 'colsample_bytree': 0.6961253597046151, 'reg_lambda': 15, 'reg_alpha': 10}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.097647 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 13:01:06,939] Trial 40 finished with value: 0.980410529886635 and parameters: {'num_leaves': 44, 'max_depth': 37, 'learning_rate': 0.07469276132178536, 'n_estimators': 375, 'colsample_bytree': 0.5952644628957268, 'reg_lambda': 17, 'reg_alpha': 4}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.090120 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 13:04:41,751] Trial 41 finished with value: 0.9799429270341792 and parameters: {'num_leaves': 25, 'max_depth': 35, 'learning_rate': 0.0939844057026004, 'n_estimators': 477, 'colsample_bytree': 0.6668781091736904, 'reg_lambda': 11, 'reg_alpha': 1}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.097789 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 13:09:11,446] Trial 42 finished with value: 0.981294245895654 and parameters: {'num_leaves': 34, 'max_depth': 33, 'learning_rate': 0.09027400035602594, 'n_estimators': 499, 'colsample_bytree': 0.6304037898593672, 'reg_lambda': 13, 'reg_alpha': 19}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.093005 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 13:11:42,486] Trial 43 finished with value: 0.9793935294216739 and parameters: {'num_leaves': 30, 'max_depth': 30, 'learning_rate': 0.09523490631908899, 'n_estimators': 356, 'colsample_bytree': 0.8019974885306833, 'reg_lambda': 10, 'reg_alpha': 1}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.093970 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 13:15:26,338] Trial 44 finished with value: 0.981033666673983 and parameters: {'num_leaves': 37, 'max_depth': 30, 'learning_rate': 0.08518554124933503, 'n_estimators': 436, 'colsample_bytree': 0.5394421511018147, 'reg_lambda': 6, 'reg_alpha': 5}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.105105 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 13:19:33,448] Trial 45 finished with value: 0.9672332113334041 and parameters: {'num_leaves': 41, 'max_depth': 26, 'learning_rate': 0.017960064214155927, 'n_estimators': 473, 'colsample_bytree': 0.7410406179307849, 'reg_lambda': 14, 'reg_alpha': 8}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.104328 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 13:20:05,052] Trial 46 finished with value: 0.9435739327003017 and parameters: {'num_leaves': 15, 'max_depth': 44, 'learning_rate': 0.09988601835814673, 'n_estimators': 161, 'colsample_bytree': 0.6300332300978687, 'reg_lambda': 15, 'reg_alpha': 3}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.088927 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 13:20:23,270] Trial 47 finished with value: 0.9438396173936542 and parameters: {'num_leaves': 33, 'max_depth': 8, 'learning_rate': 0.0388004077703815, 'n_estimators': 98, 'colsample_bytree': 0.5034271561757141, 'reg_lambda': 12, 'reg_alpha': 1}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.098141 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 13:23:46,947] Trial 48 finished with value: 0.9804686524852635 and parameters: {'num_leaves': 38, 'max_depth': 16, 'learning_rate': 0.07771744613521209, 'n_estimators': 416, 'colsample_bytree': 0.42842936275090526, 'reg_lambda': 7, 'reg_alpha': 4}. Best is trial 5 with value: 0.9822310490038294.


[LightGBM] [Info] Number of positive: 1279047, number of negative: 1058661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.112118 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 2337708, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110


[I 2024-08-16 13:26:56,790] Trial 49 finished with value: 0.975078417197354 and parameters: {'num_leaves': 21, 'max_depth': 21, 'learning_rate': 0.08188529711264368, 'n_estimators': 463, 'colsample_bytree': 0.36522077245398255, 'reg_lambda': 10, 'reg_alpha': 6}. Best is trial 5 with value: 0.9822310490038294.


9848

In [14]:
study = optuna.load_study(study_name="dart_tuning_nan_cat", storage="sqlite:///optuna.sqlite3")
model = make_dart(**study.best_params)
refit_save_predictions(model, x_train, y_train, x_test, "../predictions/v2", "dart")

[LightGBM] [Info] Number of positive: 1705396, number of negative: 1411549
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.125574 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1029
[LightGBM] [Info] Number of data points in the train set: 3116945, number of used features: 135
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.547137 -> initscore=0.189110
[LightGBM] [Info] Start training from score 0.189110
