In [1]:
from hyperopt import tpe, hp, fmin
from lightgbm import LGBMRegressor

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.metrics import mean_squared_error as mse

import numpy as np

Metrics: https://scikit-learn.org/stable/modules/model_evaluation.html

LGBM: https://github.com/microsoft/LightGBM/blob/master/docs/Parameters.rst

In [2]:
x, y = load_boston(return_X_y=True)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

In [3]:
ESTIMATOR = LGBMRegressor
INT_PARAMS = ['max_depth', 'n_estimators']
CV_SCORING_METRIC = 'neg_mean_squared_error'
N_SPLITS = 5
MAX_EVALS = 100

In [4]:
space = {
           'objective' : hp.choice('objective', ['regression']),
           'metric' : hp.choice('metric', ['mse']),
           'n_jobs' : hp.choice('n_jobs', [3]),
    
           'colsample_bytree' : hp.uniform('colsample_bytree', 0.2, 0.8),
           'learning_rate' : hp.uniform('learning_rate', 0.001, 0.5),
           'subsample' : hp.uniform('subsample', 0.2, 0.8),

           'max_depth' : hp.quniform('max_depth', 2, 9, 1), # 1 stands for q
           'n_estimators' : hp.quniform('n_estimators', 50, 500, 1)
        }

In [5]:
def objective_func(params):
    
    for param_name in INT_PARAMS:
        params[param_name] = int(params[param_name])
    
    estim = ESTIMATOR(**params)
    
    score = cross_val_score(estim, 
                            x_train, 
                            y_train, 
                            scoring=CV_SCORING_METRIC, 
                            cv=KFold(n_splits=N_SPLITS, 
                                     shuffle=True, 
                                     random_state=0)).mean()

    return -score # minus because of 'NEG' in neg_mean_squared_error

In [6]:
best_classifier = fmin(objective_func,
                       space,
                       algo=tpe.suggest,
                       max_evals=MAX_EVALS, 
                       rstate=np.random.RandomState(0))

100%|██████████| 100/100 [00:19<00:00,  4.71it/s, best loss: 8.466234990016847]


In [7]:
best_classifier

{'colsample_bytree': 0.6720306038517232,
 'learning_rate': 0.07864416385719122,
 'max_depth': 5.0,
 'metric': 0,
 'n_estimators': 407.0,
 'n_jobs': 0,
 'objective': 0,
 'subsample': 0.5116384130392916}

See 'hyperopt' notebook for more details