In [262]:
print('------------Training LgbmBoost------------')
%run ./Preprocessing.ipynb
import optuna

------------Training LgbmBoost------------
Preprocessed : xtrain, xtest, ytrain, ytest
(266776, 13) (114333, 13) (266776,) (114333,)


In [263]:
import lightgbm as lgb

from lightgbm import (LGBMClassifier, Dataset, cv, early_stopping)

from sklearn.metrics import roc_auc_score, confusion_matrix, accuracy_score

In [264]:
static_param = {
    'objective': 'binary',
    'metric': 'auc',
    "verbose": -1,
}

opt_grid = {
    'early_stopping': 10,
    'number_of_trials': 10,
    'shuffle': True,
    'folds' : 3, 
    'cv': KFold(n_splits=10, random_state=None, shuffle=True),
    'time_constraint': 60 * 1
}


In [265]:
def objective(trial):
        param_grid = {
            "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.05),
            "max_depth": trial.suggest_int("max_depth", 3, 6),
            'min_child_weight': trial.suggest_float("min_child_weight", 0.1, 10),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 0.9),
            "subsample": trial.suggest_float("subsample", 0.2, 0.7),
            "reg_alpha": trial.suggest_float("reg_alpha", 1, 5),
            "reg_lambda": trial.suggest_float("reg_lambda", 1, 5),
            'max_bin': trial.suggest_int('max_bin', 50, 100)
        }

        pruning_callback = optuna.integration.LightGBMPruningCallback(trial, "auc")
        early_callback = lgb.early_stopping(1)

        full_grid = {**param_grid, **static_param}

        train_set = lgb.Dataset(xtrain, label=ytrain)

        cv_results = lgb.cv(full_grid,
                            train_set,
                            folds=opt_grid['cv'],
                            nfold=opt_grid['folds'],
                            shuffle=False,
                            callbacks=[pruning_callback, early_callback]
                            )

        scores = np.array(cv_results['%s-mean' % full_grid['metric']])

        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

        return np.mean(scores)


In [255]:
study = optuna.create_study(sampler=optuna.samplers.TPESampler(), direction='maximize')
study.optimize(objective, n_trials=opt_grid['number_of_trials'], show_progress_bar=False, timeout=opt_grid['time_constraint'])
model_parameters = study.best_params

[32m[I 2022-07-31 14:16:14,812][0m A new study created in memory with name: no-name-fa7c1b16-c71a-406b-bff5-8e235c775415[0m


Training until validation scores don't improve for 1 rounds


[32m[I 2022-07-31 14:16:15,653][0m Trial 0 finished with value: 0.8309232292249811 and parameters: {'learning_rate': 0.011537205722821851, 'max_depth': 3, 'min_child_weight': 7.954099566707031, 'colsample_bytree': 0.733672365168599, 'subsample': 0.6900622034832957, 'reg_alpha': 4.036749483746876, 'reg_lambda': 3.969924909919909, 'max_bin': 83}. Best is trial 0 with value: 0.8309232292249811.[0m


Early stopping, best iteration is:
[4]	cv_agg's auc: 0.836384 + 0.00383355
Training until validation scores don't improve for 1 rounds


[32m[I 2022-07-31 14:16:16,631][0m Trial 1 finished with value: 0.8449104238813271 and parameters: {'learning_rate': 0.016150676747690502, 'max_depth': 6, 'min_child_weight': 0.20972595047337791, 'colsample_bytree': 0.8795136918477197, 'subsample': 0.5514966593830863, 'reg_alpha': 1.7634068298562733, 'reg_lambda': 1.6892169378933892, 'max_bin': 69}. Best is trial 1 with value: 0.8449104238813271.[0m


Early stopping, best iteration is:
[4]	cv_agg's auc: 0.848495 + 0.00466603
Training until validation scores don't improve for 1 rounds
Early stopping, best iteration is:
[4]	cv_agg's auc: 0.839674 + 0.00483368


[32m[I 2022-07-31 14:16:17,371][0m Trial 2 finished with value: 0.8352712504218565 and parameters: {'learning_rate': 0.030866487405698965, 'max_depth': 4, 'min_child_weight': 7.411908396581043, 'colsample_bytree': 0.7003758472001314, 'subsample': 0.3364176747222951, 'reg_alpha': 1.9502451020249287, 'reg_lambda': 3.3769413137169324, 'max_bin': 91}. Best is trial 1 with value: 0.8449104238813271.[0m


Training until validation scores don't improve for 1 rounds


[32m[I 2022-07-31 14:16:18,316][0m Trial 3 finished with value: 0.8352969555703206 and parameters: {'learning_rate': 0.03904287258134346, 'max_depth': 4, 'min_child_weight': 4.129095091119126, 'colsample_bytree': 0.5111818704000444, 'subsample': 0.2574129124375931, 'reg_alpha': 4.0164688313823, 'reg_lambda': 2.9429016314678687, 'max_bin': 62}. Best is trial 1 with value: 0.8449104238813271.[0m


Early stopping, best iteration is:
[3]	cv_agg's auc: 0.844623 + 0.00564766
Training until validation scores don't improve for 1 rounds


[32m[I 2022-07-31 14:16:19,251][0m Trial 4 finished with value: 0.8311055431732333 and parameters: {'learning_rate': 0.01190135815233842, 'max_depth': 3, 'min_child_weight': 9.799367830810862, 'colsample_bytree': 0.6944134768283282, 'subsample': 0.4517184140238902, 'reg_alpha': 1.2171154063032499, 'reg_lambda': 3.0186211988477822, 'max_bin': 74}. Best is trial 1 with value: 0.8449104238813271.[0m


Early stopping, best iteration is:
[4]	cv_agg's auc: 0.83648 + 0.00438103


[32m[I 2022-07-31 14:16:19,836][0m Trial 5 pruned. Trial was pruned at iteration 0.[0m


Training until validation scores don't improve for 1 rounds
Early stopping, best iteration is:
[3]	cv_agg's auc: 0.843605 + 0.00412567


[32m[I 2022-07-31 14:16:20,764][0m Trial 6 finished with value: 0.8353841601732223 and parameters: {'learning_rate': 0.04013532680023088, 'max_depth': 4, 'min_child_weight': 2.6480330309702325, 'colsample_bytree': 0.6735319973235452, 'subsample': 0.6464900437019108, 'reg_alpha': 3.760996949664157, 'reg_lambda': 2.224027073272176, 'max_bin': 91}. Best is trial 1 with value: 0.8449104238813271.[0m
[32m[I 2022-07-31 14:16:21,288][0m Trial 7 pruned. Trial was pruned at iteration 0.[0m
[32m[I 2022-07-31 14:16:22,080][0m Trial 8 finished with value: 0.8427624283564704 and parameters: {'learning_rate': 0.0236690238971873, 'max_depth': 6, 'min_child_weight': 0.6598697346867007, 'colsample_bytree': 0.6301268809059893, 'subsample': 0.5832819542193141, 'reg_alpha': 1.8564722267445184, 'reg_lambda': 3.881784152233115, 'max_bin': 75}. Best is trial 1 with value: 0.8449104238813271.[0m


Training until validation scores don't improve for 1 rounds
Early stopping, best iteration is:
[3]	cv_agg's auc: 0.850211 + 0.00305505


[32m[I 2022-07-31 14:16:22,603][0m Trial 9 pruned. Trial was pruned at iteration 0.[0m


In [257]:
lgbm_model = LGBMClassifier()

In [258]:
lgbm_model.set_params(**model_parameters)

LGBMClassifier(colsample_bytree=0.8449269793735279,
               learning_rate=0.019706375683285406, max_bin=54, max_depth=6,
               min_child_weight=7.524256091143642, reg_alpha=4.202447242672279,
               reg_lambda=4.617876854761777, subsample=0.2599315427861408)

In [266]:
model_lgbm=lgbm_model.fit(xtrain,ytrain)
pred_lgbm=model_lgbm.predict(xtest)
probs_lgbm = model_lgbm.predict_proba(xtest)[:, 1]

print('-----------model_lgbm, pred_lgbm, probs_lgbm loaded-----------')

-----------model_lgbm, pred_lgbm, probs_lgbm loaded-----------
