In [25]:
from hyperopt import tpe, hp, fmin, space_eval, Trials

In [26]:
class ModelTraining:

    def __init__(self, X, y, params_space, n_trials, cv_scoring_metric, cv):

        self.X = X
        self.y = y 
        self.params_space = params_space
        self.n_trials = n_trials
        self.cv_scoring_metric = cv_scoring_metric
        self.cv = cv
        self.trials = Trials()

    def _objective(self, params):
        estimator = LGBMRegressor(**params, random_state=1)
        score = cross_val_score(estimator, self.X, self.y, 
                                scoring=self.cv_scoring_metric, cv=self.cv).mean()
        return -score
        
    def optimize(self):
        return fmin(self._objective,
                    self.params_space,
                    algo=tpe.suggest,
                    max_evals=self.n_trials,
                    trials=self.trials,
                    rstate=np.random.RandomState(1))

In [27]:
params_space = {'learning_rate' : hp.uniform('learning_rate', 0.05, 0.5),
                'max_depth' : hp.choice('max_depth', np.arange(2,7))}

In [28]:
model_training = ModelTraining(X=X_train, 
                               y=y_train, 
                               params_space=params_space,
                               n_trials=50, 
                               cv_scoring_metric='neg_mean_squared_error', 
                               cv=kfold)

In [29]:
best_params = model_training.optimize()

100%|███████████████████████████████████████████████████| 50/50 [00:08<00:00,  5.97it/s, best loss: 11.870490843729723]


In [30]:
best_params
# keep in mind, that when 'hp.choice' is used, the return of the fmin function contains 
# the index of the parameter value provided in the corresponding hp.choice range of values
# thus, 'max_depth': 3 means that "third parameter in the np.arange(2,7) was picked as optimal"
# as np.arange(2,7) -> [2,3,4,5,6], the third parameter has the value of 5 (indexing starts with 0)

{'learning_rate': 0.2556072866275003, 'max_depth': 1}

In [31]:
# You can retrieve the values of the selected parameters by using space_eval:
space_eval(params_space, best_params)

{'learning_rate': 0.2556072866275003, 'max_depth': 3}

In [32]:
best_model = LGBMRegressor(**space_eval(params_space, best_params), random_state=1)
best_model.fit(X_train, y_train)

LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
              importance_type='split', learning_rate=0.2556072866275003,
              max_depth=3, min_child_samples=20, min_child_weight=0.001,
              min_split_gain=0.0, n_estimators=100, n_jobs=-1, num_leaves=31,
              objective=None, random_state=1, reg_alpha=0.0, reg_lambda=0.0,
              silent=True, subsample=1.0, subsample_for_bin=200000,
              subsample_freq=0)

In [33]:
mse(y_train, best_model.predict(X_train))

2.0059894557008233

In [34]:
mse(y_test, best_model.predict(X_test))

11.54208304337233