In [29]:
import pandas
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import time
from xgboost import XGBClassifier
from sklearn.model_selection import RandomizedSearchCV
from scipy import stats
from hyperopt import hp, fmin, tpe, Trials, STATUS_OK

In [30]:
dataset = pandas.read_csv("diabetes.csv")

X = dataset[dataset.columns.difference(["Outcome"])]
target = dataset["Outcome"]
X_train, X_test, Y_train, Y_test = train_test_split(X, target, test_size=0.2, random_state=0)

In [31]:
classifier = XGBClassifier()
param_distributions  = {
    'max_depth': range(1, 20),
    'reg_alpha': stats.uniform(0, 1000),
    'reg_lambda': stats.uniform(0, 1000),
    'n_estimators': range(1, 20),
}

startTime = time.time()
random_search = RandomizedSearchCV(classifier, param_distributions, n_iter=100, random_state=42)
random_search.fit(X_train, Y_train)
searchTime = time.time() - startTime
print("Best params:", random_search.best_params_)
print("Time:", searchTime, "seconds")

Best params: {'max_depth': 7, 'n_estimators': 15, 'reg_alpha': np.float64(731.9939418114051), 'reg_lambda': np.float64(598.6584841970366)}
Time: 5.909210205078125 seconds


In [32]:
space = {
    'max_depth': hp.choice('max_depth', range(1, 20)),
    'reg_alpha': hp.uniform('reg_alpha', 0, 1000),
    'reg_lambda': hp.uniform('reg_lambda', 0, 1000),
    'n_estimators': hp.choice('n_estimators', range(1, 20)),
}

def objective(parameters):
    classifier = XGBClassifier(**parameters)
    classifier.fit(X_train, Y_train)
    Y_predicted = classifier.predict(X_test)
    accuracy = accuracy_score(Y_test, Y_predicted)
    
    return {'loss': -accuracy, 'status': STATUS_OK}


startTime = time.time()

trials = Trials()
best_parameters = fmin(
    fn=objective,
    space=space,
    algo=tpe.suggest,
    max_evals=100,
    trials=trials,
)

searchTime = time.time() - startTime

print("Best params:", best_parameters)
print("Time:", searchTime, "seconds")

100%|██████████| 100/100 [00:01<00:00, 65.96trial/s, best loss: -0.6948051948051948]
Best params: {'max_depth': np.int64(8), 'n_estimators': np.int64(0), 'reg_alpha': np.float64(485.05774248022647), 'reg_lambda': np.float64(335.14578479857704)}
Time: 1.5250003337860107 seconds


Как можно заметить, разные алгоритмы подобрали разные значения гиперпараметров, при чем второй алгоритм сделал это в несколько раз быстрее.