In [1]:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import sys, gc, warnings, random, math, time, datetime, os
from tqdm import tqdm_notebook
warnings.filterwarnings('ignore')

from sklearn import metrics, preprocessing
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_auc_score
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectFromModel, RFECV

import eli5
from eli5.sklearn import PermutationImportance

import xgboost as xgb
import lightgbm as lgb
from bayes_opt import BayesianOptimization


pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

Using TensorFlow backend.


In [2]:
# read df_train and df_test
df_train = pd.read_pickle("../features/df_train.pkl")
df_test = pd.read_pickle("../features/df_test.pkl")
TARGET = "hospital_death"

# read features cols and cat cols
all_features = np.load("../features/all_features.npy", allow_pickle=True).tolist()
categorical_features = np.load("../features/categorical_features.npy", allow_pickle=True).tolist()

In [3]:
def bayes_parameter_opt_lgb(X,
                            y,
                            init_round=15, # how many steps of random exploration
                            opt_round=25, # how many steps of bayes optimization
                            n_folds=5,
                            random_seed=6,
                            n_estimators=10000,
                            learning_rate=0.05,
                            output_process=False):
    # prepare data
    train_data = xgb.DMatrix(data=X,
                             label=y)

    # parameters
    def lgb_eval(num_leaves, feature_fraction, bagging_fraction, max_depth,
                 lambda_l1, lambda_l2, min_split_gain, min_child_weight):
        params = {
            'objective': 'binary:logistic',
            'num_iterations': n_estimators,
            'learning_rate': learning_rate,
            'early_stopping_round': 200,
            'metric': 'auc',
            'tree_method':'gpu_hist'
        }
        params["num_leaves"] = int(round(num_leaves))
        params['feature_fraction'] = max(min(feature_fraction, 1), 0)
        params['bagging_fraction'] = max(min(bagging_fraction, 1), 0)
        params['max_depth'] = int(round(max_depth))
        params['lambda_l1'] = max(lambda_l1, 0)
        params['lambda_l2'] = max(lambda_l2, 0)
        params['min_split_gain'] = min_split_gain
        params['min_child_weight'] = min_child_weight

        # modify here
        cv_result = xgb.cv(params,
                           train_data,
                           nfold=n_folds,
                           num_boost_round = 5000,
                           early_stopping_rounds = 200,
                           seed=random_seed,
                           stratified=True,
                           verbose_eval=200,
                           metrics=['auc'])
        
        return max(cv_result['test-auc-mean'])

    # range
    lgbBO = BayesianOptimization(lgb_eval, {
        'num_leaves': (32, 128),
        'feature_fraction': (0.1, 0.9),
        'bagging_fraction': (0.1, 0.9),
        'max_depth': (6, 10),
        'lambda_l1': (1, 5),
        'lambda_l2': (1, 5),
        'min_split_gain': (0.001, 0.1),
        'min_child_weight': (8, 64)
    },
                                 random_state=42)
    # optimize
    lgbBO.maximize(init_points=init_round, n_iter=opt_round)

    # output optimization process
    #     if output_process==True: lgbBO.points_to_csv("bayes_opt_result.csv")

    # return best parameters
    return lgbBO.max

In [4]:
X = df_train[all_features]
y = df_train[TARGET]

opt_params = bayes_parameter_opt_lgb(X,
                                     y,
                                     init_round=10, # how many steps of random exploration
                                     opt_round=30, # how many steps of bayes optimization
                                     n_folds=5,
                                     random_seed=42,
                                     n_estimators=5000,
                                     learning_rate=0.01)

|   iter    |  target   | baggin... | featur... | lambda_l1 | lambda_l2 | max_depth | min_ch... | min_sp... | num_le... |
-------------------------------------------------------------------------------------------------------------------------
[0]	train-auc:0.866577+0.00191764	test-auc:0.855491+0.00112836
[200]	train-auc:0.909405+0.000774684	test-auc:0.885226+0.00229203
[400]	train-auc:0.927916+0.000450204	test-auc:0.893858+0.00196582
[600]	train-auc:0.940208+0.000350584	test-auc:0.898939+0.00224328
[800]	train-auc:0.947663+0.000400631	test-auc:0.901138+0.00196124
[1000]	train-auc:0.953117+0.000398757	test-auc:0.902211+0.00187289
[1200]	train-auc:0.957695+0.000275195	test-auc:0.902849+0.00173209
[1400]	train-auc:0.961673+0.000327045	test-auc:0.903289+0.00167954
[1600]	train-auc:0.965286+0.000476612	test-auc:0.903573+0.00166421
[1800]	train-auc:0.968564+0.000517767	test-auc:0.903809+0.00162088
[2000]	train-auc:0.971513+0.000447107	test-auc:0.904006+0.0016261
[2200]	train-auc:0.974311+0.

[0]	train-auc:0.858664+0.00173286	test-auc:0.85188+0.00139235
[200]	train-auc:0.894987+0.000383349	test-auc:0.882243+0.00248489
[400]	train-auc:0.908173+0.00055565	test-auc:0.891182+0.00178747
[600]	train-auc:0.917501+0.000411436	test-auc:0.896982+0.00178955
[800]	train-auc:0.923143+0.000387873	test-auc:0.899661+0.00171157
[1000]	train-auc:0.927362+0.000405335	test-auc:0.901123+0.00166939
[1200]	train-auc:0.930928+0.000357604	test-auc:0.902208+0.00162389
[1400]	train-auc:0.934181+0.000352655	test-auc:0.903016+0.00165773
[1600]	train-auc:0.936993+0.000311952	test-auc:0.90357+0.00164786
[1800]	train-auc:0.939493+0.000333758	test-auc:0.904008+0.00157497
[2000]	train-auc:0.94197+0.000380051	test-auc:0.904374+0.00152389
[2200]	train-auc:0.944264+0.000398228	test-auc:0.904539+0.00152854
[2400]	train-auc:0.94631+0.000441588	test-auc:0.904665+0.00153012
[2600]	train-auc:0.948298+0.000516625	test-auc:0.904804+0.00151683
[2800]	train-auc:0.950254+0.000588433	test-auc:0.904893+0.00154355
[3000]	t

[800]	train-auc:0.932762+0.000457573	test-auc:0.900955+0.00196293
[1000]	train-auc:0.937574+0.000394441	test-auc:0.902369+0.001887
[1200]	train-auc:0.94188+0.000463157	test-auc:0.903298+0.00182095
[1400]	train-auc:0.945502+0.00046094	test-auc:0.90391+0.0017713
[1600]	train-auc:0.948878+0.000537331	test-auc:0.90432+0.00177521
[1800]	train-auc:0.951919+0.000605956	test-auc:0.904555+0.00178201
[2000]	train-auc:0.954619+0.000665421	test-auc:0.904732+0.00174913
[2200]	train-auc:0.957203+0.000727862	test-auc:0.904873+0.00175444
[2400]	train-auc:0.95964+0.000832694	test-auc:0.904874+0.0017116
| [0m 16      [0m | [0m 0.9049  [0m | [0m 0.2981  [0m | [0m 0.2995  [0m | [0m 2.99    [0m | [0m 1.899   [0m | [0m 8.34    [0m | [0m 63.6    [0m | [0m 0.05839 [0m | [0m 127.8   [0m |
[0]	train-auc:0.871989+0.00155409	test-auc:0.858871+0.00138601
[200]	train-auc:0.905247+0.00054783	test-auc:0.885694+0.00199735
[400]	train-auc:0.917108+0.000395105	test-auc:0.893118+0.00178793
[600]	train

| [95m 23      [0m | [95m 0.9051  [0m | [95m 0.8485  [0m | [95m 0.7255  [0m | [95m 2.464   [0m | [95m 1.488   [0m | [95m 9.589   [0m | [95m 63.78   [0m | [95m 0.04592 [0m | [95m 127.3   [0m |
[0]	train-auc:0.871792+0.00154431	test-auc:0.858796+0.00149168
[200]	train-auc:0.905172+0.000546482	test-auc:0.885694+0.00203006
[400]	train-auc:0.916956+0.000387206	test-auc:0.893115+0.00181876
[600]	train-auc:0.926508+0.000349659	test-auc:0.898507+0.0019555
[800]	train-auc:0.932692+0.000482201	test-auc:0.90094+0.00178094
[1000]	train-auc:0.937556+0.000441557	test-auc:0.902347+0.00171581
[1200]	train-auc:0.941789+0.000435055	test-auc:0.903299+0.00165261
[1400]	train-auc:0.945452+0.000496597	test-auc:0.903924+0.00167753
[1600]	train-auc:0.948752+0.00051985	test-auc:0.9043+0.00165174
[1800]	train-auc:0.951738+0.000601388	test-auc:0.904537+0.00158555
[2000]	train-auc:0.954424+0.000643544	test-auc:0.904686+0.00154715
[2200]	train-auc:0.956939+0.000731412	test-auc:0.904785+0.001508

[1000]	train-auc:0.941327+0.000469596	test-auc:0.902912+0.00185892
[1200]	train-auc:0.945813+0.000441325	test-auc:0.903767+0.00178967
[1400]	train-auc:0.94976+0.000533042	test-auc:0.904335+0.00176781
[1600]	train-auc:0.953287+0.000617783	test-auc:0.904668+0.0017848
[1800]	train-auc:0.956319+0.000618057	test-auc:0.904845+0.00173507
[2000]	train-auc:0.959216+0.000703869	test-auc:0.904954+0.00173012
[2200]	train-auc:0.961864+0.000759505	test-auc:0.905023+0.00167728
| [0m 31      [0m | [0m 0.905   [0m | [0m 0.7085  [0m | [0m 0.3494  [0m | [0m 1.183   [0m | [0m 1.975   [0m | [0m 9.421   [0m | [0m 63.94   [0m | [0m 0.088   [0m | [0m 127.3   [0m |
[0]	train-auc:0.871792+0.00154431	test-auc:0.858796+0.00149168
[200]	train-auc:0.905191+0.000572229	test-auc:0.885694+0.00207957
[400]	train-auc:0.916998+0.000416917	test-auc:0.893193+0.00183408
[600]	train-auc:0.926497+0.000359282	test-auc:0.898572+0.00197301
[800]	train-auc:0.932651+0.000441651	test-auc:0.901056+0.00190594
[100

KeyboardInterrupt: 

In [None]:
# |  27       |  0.9052   |  0.7126   |  0.146    |  4.098    |  2.762    |  8.576    |  63.95    |  0.02228  |  127.0    |
opt_params