In [3]:
import sklearn
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error, make_scorer
from sklearn.datasets import make_regression 

import xgboost as xgb

from hyperopt import fmin, tpe, Trials, hp, STATUS_OK

import numpy as np

# Some constants

In [4]:
MAX_EVALS = 100 
SEED = 314 
NUMBER_SAMPLES = 200
TEST_SIZE = 0.2 

# A hp grid

In [6]:
hyperopt_hp_grid = {
    'n_estimators': hp.quniform('n_estimators', 10, 1000, 1),
    'learning_rate': hp.loguniform('learning_rate', 0.001, 0.1),
    'max_depth': hp.quniform('max_depth', 3, 15, 1),
    'gamma': hp.loguniform('gamma', 0.01, 1)
    
}

# Generate some data

In [7]:
features, targets = make_regression(NUMBER_SAMPLES, random_state=SEED)

# Train/test split

In [8]:
train_features, test_features, train_targets, test_targets = train_test_split(features, targets, test_size=TEST_SIZE, random_state=SEED)

# Define the score function 

In [9]:
mse_scorer = make_scorer(mean_squared_error)

# Hyperopt methods

In [30]:
def transform_params(params):
    params['gamma'] = np.log(params['gamma'])
    params['learning_rate'] = np.log(params['learning_rate'])
    params['n_estimators'] = int(params['n_estimators'])
    params['max_depth'] = int(params['max_depth'])
    return params

In [31]:
def loss(params):
    params = transform_params(params)
    model = xgb.XGBRegressor(silent=False, **params)
    cv_loss = cross_val_score(model, train_features, train_targets, cv=5, n_jobs=4, scoring=mse_scorer)
    return {'loss': cv_loss.mean(), 'status': STATUS_OK}

In [40]:
def optimize(trials, space):
    best = fmin(loss, space, algo=tpe.suggest, max_evals=MAX_EVALS, trials=trials)
    return best

# Run 

In [36]:
trials = Trials()

In [41]:
hyperopt_opt_hp = optimize(trials, hyperopt_hp_grid)
hyperopt_opt_hp = transform_params(hyperopt_opt_hp)

In [42]:
hyperopt_opt_hp

{'gamma': 0.55199695557430306,
 'learning_rate': 0.046608179354446229,
 'max_depth': 3,
 'n_estimators': 420}