In [1]:
from hyperopt import tpe, hp, fmin, space_eval, Trials

import numpy as np

from sklearn.model_selection import train_test_split, cross_val_score, KFold, StratifiedKFold

# Datasets
from sklearn.datasets import load_boston, load_iris

# Regression Models
from sklearn.linear_model import Ridge
from lightgbm import LGBMRegressor

# Classification Models
from sklearn.svm import SVC
from lightgbm import LGBMClassifier

CV Metrics: https://scikit-learn.org/stable/modules/model_evaluation.html

HyperOpt: https://github.com/hyperopt/hyperopt/wiki/FMin

In [2]:
N_SPLITS = 5 # for cross validation
MAX_EVALS = 100 # # of hyperopt optimization rounds

In [3]:
def objective_func(params):
    for param_name in params['int_param_names']:
        params['params'][param_name] = int(params['params'][param_name])
    
    estim = params['model'](**params['params'])
    
    score = cross_val_score(estim, 
                            x_train, 
                            y_train, 
                            scoring=CV_SCORING_METRIC, 
                            cv=CV(n_splits=N_SPLITS, 
                                     shuffle=True, 
                                     random_state=0)).mean()

    return -score if IS_HIGHER_BETTER else score

In [4]:
def get_best_estimator():
    trials = Trials()
    best_estimator = fmin(objective_func,
                           space,
                           algo=tpe.suggest,
                           max_evals=MAX_EVALS, 
                           trials=trials,
                           rstate=np.random.RandomState(0))
    
    model = space_eval(space, best_estimator)['model']
    params = space_eval(space, best_estimator)['params']

    estimator = model(**params)
    
    return estimator

# Regression

In [5]:
x, y = load_boston(return_X_y=True)

In [6]:
CV_SCORING_METRIC = 'neg_mean_squared_error'
IS_HIGHER_BETTER = True # this will add '-' (minus) to the loss as we have 'NEG' in 'neg_mean_squared_error'
                        # because hyperopt tries to MINIMIZE objective function
CV = KFold

In [7]:
space = hp.choice('estimator',
        [
            {'model': LGBMRegressor,
             'params': {
                        'objective' : hp.choice('objective_LGBM', ['regression']),
                        'metric' : hp.choice('metric_LGBM', ['mse']),
                        'n_jobs' : hp.choice('n_jobs_LGBM', [3]),
                        'random_state' : hp.choice('random_state_LGBM', [0]),

                        'colsample_bytree' : hp.uniform('colsample_bytree_LGBM', 0.2, 0.8),
                        'learning_rate' : hp.uniform('learning_rate_LGBM', 0.001, 0.5),
                        'subsample' : hp.uniform('subsample_LGBM', 0.2, 0.8),

                        'max_depth' : hp.quniform('max_depth_LGBM', 2, 9, 1), # 1 stands for q
                        'n_estimators' : hp.quniform('n_estimators_LGBM', 50, 500, 1)
                      },
             'int_param_names' : ['max_depth', 'n_estimators']
            },

            {'model': Ridge,
             'params': {
                           'normalize' : hp.choice('normalize_RIDGE', [True, False]),
                           'solver' : hp.choice('solver_RIDGE', ['svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']),
                           'random_state' : hp.choice('random_state_RIDGE', [0]),

                           'alpha' : hp.loguniform('alpha_RIDGE', -5, 5)
                      },
             'int_param_names' : []
        }
        ])
# Labels should have unique names (i.e. if we put simply 'random_state' in both LGBMRegressor and Ridge -> error)

In [8]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

estimator = get_best_estimator()
estimator

100%|██████████| 100/100 [00:15<00:00,  5.59it/s, best loss: 8.392130215229862]


LGBMRegressor(boosting_type='gbdt', class_weight=None,
              colsample_bytree=0.7182480942124142, importance_type='split',
              learning_rate=0.16040147643869565, max_depth=4.0, metric='mse',
              min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
              n_estimators=358.0, n_jobs=3, num_leaves=31,
              objective='regression', random_state=0, reg_alpha=0.0,
              reg_lambda=0.0, silent=True, subsample=0.7030235795002958,
              subsample_for_bin=200000, subsample_freq=0)

# Classification

In [9]:
x, y = load_iris(return_X_y=True)

In [10]:
CV_SCORING_METRIC = 'balanced_accuracy'
IS_HIGHER_BETTER = True # this will add '-' (minus) to the loss as we have 'NEG' in 'neg_mean_squared_error'
                        # because hyperopt tries to MINIMIZE objective function
CV = StratifiedKFold

In [11]:
space = hp.choice('estimator',
        [
            {'model': LGBMClassifier,
             'params': {
                        'objective' : hp.choice('objective_LGBM', ['multiclass']),
                        'metric' : hp.choice('metric_LGBM', ['multi_logloss']),
                        'n_jobs' : hp.choice('n_jobs_LGBM', [3]),
                        'random_state' : hp.choice('random_state_LGBM', [0]),

                        'colsample_bytree' : hp.uniform('colsample_bytree_LGBM', 0.2, 0.8),
                        'learning_rate' : hp.uniform('learning_rate_LGBM', 0.001, 0.5),
                        'subsample' : hp.uniform('subsample_LGBM', 0.2, 0.8),

                        'max_depth' : hp.quniform('max_depth_LGBM', 2, 9, 1), # 1 stands for q
                        'n_estimators' : hp.quniform('n_estimators_LGBM', 50, 500, 1)
                      },
             'int_param_names' : ['max_depth', 'n_estimators']
            },

            {'model': SVC,
             'params': {
                       'C'      : hp.lognormal('C_SVC', 0,1),
                       'kernel' : hp.choice('kernel_SVC', ['rbf', 'poly', 'rbf', 'sigmoid']),
                       'degree' : hp.choice('degree_SVC', range(1,15)),
                       'gamma'  : hp.uniform('gamma_SVC', 0.001,10000)
                      },
             'int_param_names' : []
        }
        ])
# Labels should have unique names (i.e. if we put simply 'random_state' in both LGBMRegressor and Ridge -> error)

In [12]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

estimator = get_best_estimator()
estimator

100%|██████████| 100/100 [00:14<00:00,  5.03it/s, best loss: -0.9580687830687831]


LGBMClassifier(boosting_type='gbdt', class_weight=None,
               colsample_bytree=0.6362652862491109, importance_type='split',
               learning_rate=0.49849818403162566, max_depth=5.0,
               metric='multi_logloss', min_child_samples=20,
               min_child_weight=0.001, min_split_gain=0.0, n_estimators=349.0,
               n_jobs=3, num_leaves=31, objective='multiclass', random_state=0,
               reg_alpha=0.0, reg_lambda=0.0, silent=True,
               subsample=0.45608066870051306, subsample_for_bin=200000,
               subsample_freq=0)