In [1]:
from xgboost import XGBClassifier

In [19]:
from src.utils.feats import load_gei
from sklearn.model_selection import KFold, cross_val_score
from src.utils.feats import load_gei

from sklearn.pipeline import Pipeline

from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from skopt.plots import plot_objective, plot_histogram

import pandas as pd

In [14]:
datapath = "../data/feats/database24_gei_480x640.pkl"

dim = (64, 48)

crop_person = True

X, y = load_gei(datapath, dim=dim, crop_person=crop_person) 

In [15]:
# Kfold
n_splits = 3
cv = KFold(n_splits=n_splits, random_state=42, shuffle=True)

In [16]:
# pipeline class is used as estimator to enable
# search over different model types

pipe = Pipeline([
    ('model', XGBClassifier())
])

In [17]:
# explicit dimension classes can be specified like this

xgb_search = {
    'model': Categorical([XGBClassifier(random_state=0)]),    
    'model__n_estimators': Integer(1, 400),     
    'model__learning_rate': Real(1e-6, 1e+0, prior='log-uniform'),
}

opt = BayesSearchCV(
    pipe,
    # (parameter space, # of evaluations)    
    [(xgb_search, 16)],
    cv=cv,
    scoring='accuracy'
)

In [18]:
opt.fit(X, y)





































BayesSearchCV(cv=KFold(n_splits=3, random_state=42, shuffle=True),
              estimator=Pipeline(steps=[('model',
                                         XGBClassifier(base_score=None,
                                                       booster=None,
                                                       colsample_bylevel=None,
                                                       colsample_bynode=None,
                                                       colsample_bytree=None,
                                                       gamma=None, gpu_id=None,
                                                       importance_type='gain',
                                                       interaction_constraints=None,
                                                       learning_rate=None,
                                                       max_delta_step=None,
                                                       max_depth=None,
                                         

In [20]:
df = pd.DataFrame(opt.cv_results_['params'])
# df.rename(columns = {0:'param_model'}, inplace = True)

df_mean = pd.DataFrame(opt.cv_results_['mean_test_score'])
df_std = pd.DataFrame(opt.cv_results_['std_test_score'])
df_rank = pd.DataFrame(opt.cv_results_['rank_test_score'])

df = df.join(df_mean)
df.rename(columns = {0:'mean_test_score'}, inplace = True)


df = df.join(df_std)
df.rename(columns = {0:'std_test_score'}, inplace = True)

df = df.join(df_rank)
df.rename(columns = {0:'rank'}, inplace = True)

df.sort_values(by='mean_test_score', inplace=True, ascending=False)

df

Unnamed: 0,model,model__learning_rate,model__n_estimators,mean_test_score,std_test_score,rank
13,"XGBClassifier(base_score=0.5, booster='gbtree'...",0.148282,131,0.703839,0.035148,1
12,"XGBClassifier(base_score=0.5, booster='gbtree'...",0.114134,400,0.700183,0.035202,2
14,"XGBClassifier(base_score=0.5, booster='gbtree'...",0.242274,400,0.698355,0.037621,3
10,"XGBClassifier(base_score=0.5, booster='gbtree'...",0.080982,284,0.696527,0.035445,4
11,"XGBClassifier(base_score=0.5, booster='gbtree'...",0.049565,400,0.694698,0.036492,5
8,"XGBClassifier(base_score=0.5, booster='gbtree'...",0.059201,158,0.691042,0.042703,6
5,"XGBClassifier(base_score=0.5, booster='gbtree'...",0.737305,308,0.645338,0.04624,7
2,"XGBClassifier(base_score=0.5, booster='gbtree'...",0.005136,340,0.628885,0.048743,8
4,"XGBClassifier(base_score=0.5, booster='gbtree'...",0.902259,272,0.6234,0.063282,9
6,"XGBClassifier(base_score=0.5, booster='gbtree'...",0.013379,65,0.599634,0.055236,10
