In [1]:
from xgboost import XGBClassifier

In [2]:
from src.utils.feats import load_gei
from sklearn.model_selection import KFold, cross_val_score
from src.utils.feats import load_gei

from sklearn.pipeline import Pipeline

from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from skopt.plots import plot_objective, plot_histogram

from src.utils.results import df_results
import pandas as pd

In [3]:
datapath = "../data/feats/database24_gei_480x640.pkl"

dim = (64, 48)

crop_person = True

X, y = load_gei(datapath, dim=dim, crop_person=crop_person) 

In [4]:
# Kfold
n_splits = 3
cv = KFold(n_splits=n_splits, random_state=42, shuffle=True)

In [5]:
res = cross_val_score(XGBClassifier(
    objective='multi:softmax',
    eval_metric='merror',
    use_label_encoder=False,
    random_state=0,
    learning_rate=1e-2,
    n_estimators=1000,
    max_depth=3, 
    subsample=0.8,
    colsample_bytree=1,
    gamma=1
), X, y, cv=cv)

In [6]:
print(f"Mean acc: {res.mean():.5f} +/- {res.std():.5f}")

Mean acc: 0.69298 +/- 0.04641


In [7]:
# pipeline class is used as estimator to enable
# search over different model types

pipe = Pipeline([
    ('model', XGBClassifier())
])

In [15]:
# explicit dimension classes can be specified like this

xgb_search = {
    'model': Categorical([XGBClassifier(
        objective='multi:softmax',
        eval_metric='merror',
        use_label_encoder=False,
        random_state=0)]),    
    'model__n_estimators': Integer(600, 1200, prior='log-uniform'),     
    'model__learning_rate': Real(0.09, 0.15, prior='uniform'),
    'model__booster': Categorical(['dart']), # 'dart' presented best results. 
#     Rashmi et al. DART: Dropouts meet Multiple Additive Regression Trees 
#     18th International Conference on Artificial Intelligence and Statistics (AISTATS) 2015, San Diego.
    'model__min_child_leaf': Integer(50, 150),
    'model__max_depth': Integer(5, 12),
    'model__gamma': Real(1e-8, 1e-3, prior='uniform'),
    'model__subsample': Real(0.3, 0.8, prior='uniform'),
    'model__colsample_bytree': Real(0.3, 0.8, prior='uniform'),
}

opt = BayesSearchCV(
    pipe,
    # (parameter space, # of evaluations)    
    [(xgb_search, 256)],
    cv=cv,
    scoring='accuracy'
)

In [None]:
opt.fit(X, y)

Parameters: { min_child_leaf } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { min_child_leaf } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { min_child_leaf } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { min_child_leaf } might not be used.

  This may not be accurate due to some parameters are only used in language bin

Parameters: { min_child_leaf } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { min_child_leaf } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { min_child_leaf } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { min_child_leaf } might not be used.

  This may not be accurate due to some parameters are only used in language bin

Parameters: { min_child_leaf } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { min_child_leaf } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { min_child_leaf } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { min_child_leaf } might not be used.

  This may not be accurate due to some parameters are only used in language bin

Parameters: { min_child_leaf } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { min_child_leaf } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { min_child_leaf } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { min_child_leaf } might not be used.

  This may not be accurate due to some parameters are only used in language bin

In [20]:
df = df_results(opt)
df.to_csv('results_XGBoost.csv')
df[:20]

Unnamed: 0,model,model__booster,model__colsample_bytree,model__gamma,model__learning_rate,model__max_depth,model__min_child_leaf,model__n_estimators,model__subsample,mean_test_score,std_test_score,rank
254,"XGBClassifier(base_score=0.5, booster='dart', ...",dart,0.375165,1e-08,0.099129,5,66,695,0.327273,0.793419,0.015286,1
201,"XGBClassifier(base_score=0.5, booster='dart', ...",dart,0.412522,1e-08,0.129058,5,50,1200,0.3,0.79159,0.012965,2
54,"XGBClassifier(base_score=0.5, booster='dart', ...",dart,0.359399,1e-08,0.136385,5,143,726,0.3,0.787934,0.024197,3
226,"XGBClassifier(base_score=0.5, booster='dart', ...",dart,0.3,0.000189377,0.097761,5,123,1088,0.315144,0.787934,0.028665,3
88,"XGBClassifier(base_score=0.5, booster='dart', ...",dart,0.3,1e-08,0.09,12,50,1200,0.3,0.786106,0.026664,5
106,"XGBClassifier(base_score=0.5, booster='dart', ...",dart,0.3,1e-08,0.09,12,150,1200,0.3,0.786106,0.026664,5
105,"XGBClassifier(base_score=0.5, booster='dart', ...",dart,0.3,1e-08,0.09,12,50,1200,0.3,0.786106,0.026664,5
205,"XGBClassifier(base_score=0.5, booster='dart', ...",dart,0.3,1e-08,0.09,12,50,1200,0.3,0.786106,0.026664,5
100,"XGBClassifier(base_score=0.5, booster='dart', ...",dart,0.3,1e-08,0.09,5,150,1200,0.3,0.786106,0.026664,5
212,"XGBClassifier(base_score=0.5, booster='dart', ...",dart,0.3,1e-08,0.09,12,50,1200,0.3,0.786106,0.026664,5
