In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import os
from tqdm import tqdm
import xgboost as xgb
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
import warnings

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 100)

In [2]:
train_label = pd.read_hdf('C:/Users/f3107/Desktop/hy_data/train_204.h5')
test_label = pd.read_hdf('C:/Users/f3107/Desktop/hy_data/test_204.h5')

In [3]:
#LabelEncoder
type_map = dict(zip(train_label['type'].unique(), np.arange(3)))
type_map_rev = {v:k for k,v in type_map.items()}
train_label['type'] = train_label['type'].map(type_map)

In [4]:
features = [x for x in train_label.columns if x not in ['ship','x','y','v','d','datetime','type','t','d_d','d_t',
                                                        'd_x','v_x','d_y','v_y','hour','date','diff_time']]
target = 'type'

In [5]:
def LGB_bayesian(    
    eta,
    max_depth,
    n_estimators
    ):
    
    # LightGBM expects next three parameters need to be integer. So we make them integer
    n_estimators = int(n_estimators)
    max_depth = int(max_depth)

    assert type(n_estimators) == int
    assert type(max_depth) == int

    params = {
        
        'eta':eta,
        'booster': 'gbtree',
        'objective': 'multi:softmax',
        'num_class': 3,
        'max_depth': max_depth,
        'early_stopping_rounds': 100,
        'n_estimators':n_estimators

    }    

    fold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    X = train_label[features].copy()
    y = train_label[target]
    score = []

    for index, (train_idx, val_idx) in enumerate(fold.split(X, y)):

        X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
        X_val, y_val = X.iloc[val_idx], y.iloc[val_idx]

        xgb_train = xgb.DMatrix(X_train, y_train)
        xgb_eval = xgb.DMatrix(X_val, y_val)

        model = xgb.train(params, xgb_train, evals=[(xgb_train, 'train'), (xgb_eval, 'val')], verbose_eval=False)

        val_pred = model.predict(xgb.DMatrix(X_val))

        val_y = y.iloc[val_idx]
        f1 = metrics.f1_score(val_y, val_pred, average='macro')

        score.append(f1)
    
    return sum(score)/len(score)

In [6]:
bounds_LGB = {
    
    'eta': (0.3, 0.45),#[default = 0.3]
    'max_depth':(40,100), #[default=6]
    'n_estimators': (1,2000)
    
}

In [7]:
from bayes_opt import BayesianOptimization
LGB_BO = BayesianOptimization(LGB_bayesian, bounds_LGB, random_state=6)  

In [8]:
LGB_BO.maximize()

|   iter    |  target   |    eta    | max_depth | n_esti... |
-------------------------------------------------------------
|  1        |  0.8851   |  0.4339   |  59.92    |  1.643e+0 |
|  2        |  0.883    |  0.3063   |  46.46    |  1.191e+0 |
|  3        |  0.8813   |  0.3795   |  65.13    |  671.5    |
|  4        |  0.8829   |  0.3934   |  66.29    |  1.472e+0 |
|  5        |  0.883    |  0.3777   |  74.73    |  1.291e+0 |
|  6        |  0.8826   |  0.3385   |  99.02    |  1.999e+0 |
|  7        |  0.8845   |  0.3846   |  40.53    |  2e+03    |
|  8        |  0.8822   |  0.3879   |  40.5     |  1.998e+0 |
|  9        |  0.881    |  0.3364   |  99.91    |  2e+03    |
|  10       |  0.8803   |  0.4079   |  40.29    |  2.872    |
|  11       |  0.8836   |  0.4436   |  40.08    |  1.999e+0 |
|  12       |  0.8841   |  0.3728   |  40.16    |  1.996e+0 |
|  13       |  0.8837   |  0.3055   |  40.38    |  1.998e+0 |
|  14       |  0.887    |  0.4256   |  40.25    |  2e+03    |
|  15   

In [11]:
print(LGB_BO.maximize.__defaults__)

(5, 25, 'ucb', 2.576, 0.0)


In [9]:
LGB_BO.max['target']

0.8869846185695899

In [10]:
LGB_BO.max['params']

{'eta': 0.42563552598003573,
 'max_depth': 40.245863353591176,
 'n_estimators': 1999.5103231174214}

0.8888253675125923

{'eta': 0.38689139253372246,
 'max_depth': 69.90202031263354,
 'n_estimators': 1017.6257117512797}