In [35]:
import optuna
optuna.logging.set_verbosity('WARNING')

In [36]:
from optuna import create_study, samplers

In [37]:
class ModelTraining:

    def __init__(self, X, y, n_trials, cv_scoring_metric, cv,
                 sampler_seed=1):

        self.X = X
        self.y = y
        self.n_trials = n_trials
        self._cv_scoring_metric = cv_scoring_metric
        self._cv = cv
        self.study = None
        self._sampler_seed= sampler_seed

    def _objective(self, trial):
        params = {
            'learning_rate': trial.suggest_uniform('learning_rate', 0.05, 0.5),
            'max_depth': trial.suggest_int('max_depth', 2, 7-1),
        }
            
        model = LGBMRegressor(**params, random_state=1)
        score = cross_val_score(model, 
                                self.X, 
                                self.y, 
                                scoring=self._cv_scoring_metric,
                                cv=self._cv).mean()
        return score       

    def optimize(self):
        self.study = create_study(sampler=samplers.TPESampler(seed=self._sampler_seed),
                                  direction='maximize')
        self.study.optimize(self._objective, n_trials=self.n_trials)

In [38]:
model_training = ModelTraining(X=X_train, 
                               y=y_train, 
                               n_trials=50, 
                               cv_scoring_metric='neg_mean_squared_error', 
                               cv=kfold)

In [39]:
model_training.optimize()

In [40]:
model_training.study.best_params

{'learning_rate': 0.24545070025296128, 'max_depth': 3}

In [41]:
model_training.study.best_value

-11.623597255551626

In [42]:
best_model = LGBMRegressor(**model_training.study.best_params, random_state=1)
best_model.fit(X_train, y_train)

LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
              importance_type='split', learning_rate=0.24545070025296128,
              max_depth=3, min_child_samples=20, min_child_weight=0.001,
              min_split_gain=0.0, n_estimators=100, n_jobs=-1, num_leaves=31,
              objective=None, random_state=1, reg_alpha=0.0, reg_lambda=0.0,
              silent=True, subsample=1.0, subsample_for_bin=200000,
              subsample_freq=0)

In [43]:
mse(y_train, best_model.predict(X_train))

2.22107239312464

In [44]:
mse(y_test, best_model.predict(X_test))

11.41184036504707