In [None]:
from timeit import default_timer as timer
param_grid = {
    'n_estimators': [50, 100,200,300,400,500,600],
    'max_depth': [5,10,15,30,40,80, 100],
    'eta':[0.1, 0.2, 0.3, 0.4, 0.5, 0.6],
    'min_child_weight':[0,1,2,3,4,5,6,7,8,9],
    'sampling_method': ['uniform', 'gradient_based'],
    'colsample_bytree': [0.5,0.6,0.7,0.8],
    'reg_alpha': [30,35,40,45,50,55,60,65,70,75,80],
    'reg_lambda': [0.1,0.2,0.3,0.4,0.5,0.6,0.7]
    'min_samples_leaf': [3,6,10,15,20,30, 40],
    'min_samples_split': [3,6,10, 15, 20],
    'criterion': ['entropy','gini'],
    'max_features': ['auto','sqrt','log2']
    
}
params = {key: random.sample(value, 1)[0] for key, value in param_grid.items()}
#params
rand = sklearn.ensemble.RandomForestClassifier(**params,n_jobs=-1)
acc = cross_val_score(rand,X_final , y_final,scoring="accuracy", n_jobs=-1).mean()
r_best = acc
random_results = pd.DataFrame(columns = ['loss', 'params', 'iteration', 'estimators', 'time'],
                       index = list(range(1000)))
subsample_dist = list(np.linspace(0.5, 1, 100))

def random_objective(params, iteration, n_folds = 10):
    """Random search objective function. Takes in hyperparameters
       and returns a list of results to be saved."""

    start = timer()
    
    # Perform n_folds cross validation
    cv_results = cross_val_score(rand,X_final , y_final,scoring="accuracy", n_jobs=-1).mean()
    end = timer()
    best_score = np.max(cv_results)
    
    # Loss must be minimized
    loss = 1 - best_score
    
    # Boosting rounds that returned the highest cv score
    n_estimators = int(np.argmax(cv_results) + 1)
    
    # Return list of results
    return [loss, params, iteration, n_estimators, end - start]

for i in range(1000):
    
    # Randomly sample parameters for gbm
    params = {key: random.sample(value, 1)[0] for key, value in param_grid.items()}
    
    #print(params)
    params['subsample'] = random.sample(subsample_dist, 1)[0]
        
        
    results_list = random_objective(params, i)
    
    # Add results to next row in dataframe
    random_results.loc[i, :] = results_list
    
    
random_results.sort_values('loss', ascending = True, inplace = True)
random_results.reset_index(inplace = True, drop = True)
random_results.head()
random_results['params'][0]

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from xgboost import XGBClassifier
XGB = XGBClassifier(random_state=0)
parameter_grid = {
    'n_estimators': [100,200,300,400,500,600],
    'max_depth': [5,10,15,30,40,80],
    'min_samples_leaf': [3,6,10,15,20,30],
    'min_samples_split': [3,6,10, 15, 20],
    'criterion': ['gini','entropy'],
    'max_features': ['auto','sqrt','log2']
    
}

cross_validation = StratifiedKFold(n_splits=10,random_state=0,shuffle=True)

grid_search_XGB = GridSearchCV(XGB,
                           param_grid=parameter_grid,
                           cv=cross_validation,
                            n_jobs=-1,
                            verbose=0)

grid_search_XGB.fit(X_final, y_final)
print('Best score: {}'.format(grid_search_XGB.best_score_))
print('Best parameters: {}'.format(grid_search_XGB.best_params_))

In [None]:
!pip install scikit-optimize
import skopt
from skopt import gp_minimize
from skopt.space import Integer, Categorical
from skopt.utils import use_named_args
from skopt.plots import plot_convergence
space = [
Integer(100,1000,name = "n_estimators"),
Integer( 10, 80, name = "max_depth"),
Categorical(["auto","sqrt","log2"], name = "max_features"),
Categorical(["gini","entropy"], name = "criterion"),
Integer(4, 15, name = "min_samples_split"),
Integer(5, 9, name = "min_samples_leaf")
#Categorical([True,False], name = "bootstrap")
    
]
tune_rand_gp= None
print(tune_rand_gp)
@use_named_args(space)
def objective(**params):
    rand = sklearn.ensemble.RandomForestClassifier(**params,n_jobs=-1)
    return -np.mean(cross_val_score(rand, X_final , y_final, cv=5,   
                    n_jobs=-1,
                    scoring="neg_mean_absolute_error"))
tune_rand_gp = gp_minimize(objective,space,random_state=1234)

In [None]:
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
from sklearn.metrics import accuracy_score
space = {
"n_estimators": hp.choice("n_estimators",[200,600,900,1200,1500]),
"max_depth": hp.quniform("max_depth", 10, 80,5),
"max_features": hp.choice("max_features", ["auto","sqrt","log2"]),
"criterion": hp.choice("criterion", ["gini","entropy"]),
"min_samples_split":hp.choice("min_samples_split",[2, 5, 10,12,15]),
"min_samples_leaf":hp.choice("min_samples_leaf",[1, 2, 4,7,9])}
def tune_random(params):
    rand = sklearn.ensemble.RandomForestClassifier(**params,n_jobs=-1)
    acc = cross_val_score(rand,X_final , y_final,scoring="accuracy", n_jobs=-1).mean()
    return {"loss": -acc, "status": STATUS_OK}
trials = Trials()
best = fmin(fn=tune_random, space = space, algo=tpe.suggest,
            max_evals=100, trials=trials)
print("Best: {}".format(best))