In [1]:
#importing libraries
import numpy as np
from collections import Counter
import pandas as pd

import lightgbm as lgb

from sklearn.datasets import load_breast_cancer,load_boston,load_wine
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import mean_squared_error,roc_auc_score,precision_score, accuracy_score
pd.options.display.max_columns = 999
import pickle
import cv2

from skopt import BayesSearchCV

from skopt.space import Real, Categorical, Integer


from src.utils.feats import load_gei
from src.utils.results import df_results
    
import pandas as pd

import matplotlib.pyplot as plt

In [2]:
datapath = "../data/feats/database24_gei_480x640.pkl"

dim = (64, 48)
crop_person = True

X, y = load_gei(datapath, dim=dim, crop_person=crop_person) 

In [3]:
n_splits = 3
cv = KFold(n_splits=n_splits, random_state=42, shuffle=True)

In [4]:
#setting up the parameters
params={}
params['learning_rate']=0.03
params['boosting_type']='gbdt' #GradientBoostingDecisionTree
params['objective']='multiclass' #Multi-class target feature
params['metric']='multi_logloss' #metric for multi-class
params['max_depth']=10
params['num_class']=24 #no.of unique values in the target class not inclusive of the end value

In [5]:
# pipeline class is used as estimator to enable
# search over different model types

pipe = Pipeline([
    ('model', lgb.LGBMClassifier())
])

In [None]:
X, y = load_gei(datapath, dim=dim, crop_person=crop_person) 

lgb_search = {
    'model': Categorical([lgb.LGBMClassifier(
        boosting_type='gbdt',
        objective='multiclass',
        metric='multi_logloss',
        num_classes=24,
        random_state=0)]),
    'model__min_data_in_leaf': Integer(75, 90),  
    'model__learning_rate': Real(0.1, 0.3, prior='uniform'),
    'model__n_estimators': Integer(500, 1100),    
    'model__num_leaves': Integer(20, 50)    
}

opt = BayesSearchCV(
    pipe,
    # (parameter space, # of evaluations)    
    [(lgb_search, 256),],
    cv=cv,
    scoring='accuracy'
)

opt.fit(X, y)







In [12]:
df = df_results(opt)
df.to_csv('results_lightGBM.csv')
df

Unnamed: 0,model,model__learning_rate,model__min_data_in_leaf,model__n_estimators,model__num_leaves,mean_test_score,std_test_score,rank
48,LGBMClassifier(learning_rate=0.170947423212654...,0.170947,86,810,50,0.822669,0.031320,1
227,LGBMClassifier(learning_rate=0.170947423212654...,0.258007,86,772,50,0.822669,0.038656,1
82,LGBMClassifier(learning_rate=0.170947423212654...,0.141029,78,615,50,0.822669,0.030010,1
131,LGBMClassifier(learning_rate=0.170947423212654...,0.238254,81,831,20,0.820841,0.035700,4
163,LGBMClassifier(learning_rate=0.170947423212654...,0.100445,80,707,50,0.820841,0.033740,4
...,...,...,...,...,...,...,...,...
46,LGBMClassifier(learning_rate=0.170947423212654...,0.300000,75,609,32,0.793419,0.028238,248
70,LGBMClassifier(learning_rate=0.170947423212654...,0.100000,90,675,47,0.791590,0.042616,253
26,LGBMClassifier(learning_rate=0.170947423212654...,0.268945,90,1027,21,0.791590,0.034406,253
56,LGBMClassifier(learning_rate=0.170947423212654...,0.231607,84,789,20,0.787934,0.043595,255
