# Accross Models Hyper Parameter Optimization

### Method used:
* Wrapper function for GridSearchCV

In [4]:
import numpy as np
import pandas as pd

from sklearn.model_selection import GridSearchCV
from sklearn import datasets

from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC


# TODO: Select and load Dataset
breast_cancer = datasets.load_breast_cancer()

display(pd.DataFrame(breast_cancer.data).head())

X_cancer = breast_cancer.data
y_cancer = breast_cancer.target

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


### Wrapper Class & respective Functions

Here we have created a class and the wrapper functions for the main functions of the base Classifier

In [37]:
class EstimatorSelectionHelper:
    
    def __init__(self, models, params):
        self.models = models
        self.params = params
        self.keys = models.keys()
        self.grid_searches = {}
    
    def fit(self, X, y, **grid_kwargs):
        print(grid_kwargs)
        for key in self.keys:
            print('Running GridSearchCV for %s.' % key)
            model = self.models[key]
            params = self.params[key]
            grid_search = GridSearchCV(model, params, **grid_kwargs, return_train_score=True)
#             grid_search = GridSearchCV(model, params, cv=cv, n_jobs=n_jobs,
#                               verbose=verbose, scoring=scoring, refit=refit,
#                               return_train_score=True)
            grid_search.fit(X, y)
            self.grid_searches[key] = grid_search
        print('Done.')
    
    def score_summary(self, sort_by='mean_test_score'):
        frames = []
        for name, grid_search in self.grid_searches.items():
            frame = pd.DataFrame(grid_search.cv_results_)
            frame = frame.filter(regex='^(?!.*param_).*$')
            frame['estimator'] = len(frame)*[name]
            frames.append(frame)
        df = pd.concat(frames)
        
        df = df.sort_values([sort_by], ascending=False)
        df = df.reset_index()
        df = df.drop(['rank_test_score', 'index'], 1)
        
        columns = df.columns.tolist()
        columns.remove('estimator')
        columns = ['estimator']+columns
        df = df[columns]
        return df

# Define the PARAMS and CLASSIFIERS list

In this step we define the Classifiers that we will be running HPO on as well as their respective set of parameters.



In [34]:
models1 = { 
    'ExtraTreesClassifier': ExtraTreesClassifier(),
    'RandomForestClassifier': RandomForestClassifier(),
    'AdaBoostClassifier': AdaBoostClassifier(),
    'GradientBoostingClassifier': GradientBoostingClassifier(),
    'SVC': SVC()
}

params1 = {
    'ExtraTreesClassifier': { 'n_estimators': [16, 32] },
    'RandomForestClassifier': { 'n_estimators': [16, 32] },
    'AdaBoostClassifier':  { 'n_estimators': [16, 32] },
    'GradientBoostingClassifier': { 'n_estimators': [16, 32], 'learning_rate': [0.8, 1.0] },
    'SVC': [
        {'kernel': ['linear'], 'C': [1, 10]},
        {'kernel': ['rbf'], 'C': [1, 10], 'gamma': [0.001, 0.0001]},
    ]
}

In [39]:
helper1 = EstimatorSelectionHelper(models1, params1)
helper1.fit(X_cancer, y_cancer, scoring='f1', n_jobs=2, cv=5)
# helper1.fit(X_cancer, y_cancer, scoring='f1', cv=5)

helper1.score_summary()

{'scoring': 'f1', 'n_jobs': 2, 'cv': 5}
Running GridSearchCV for ExtraTreesClassifier.
Running GridSearchCV for RandomForestClassifier.




Running GridSearchCV for AdaBoostClassifier.
Running GridSearchCV for GradientBoostingClassifier.
Running GridSearchCV for SVC.
Done.


Unnamed: 0,estimator,mean_fit_time,std_fit_time,mean_score_time,std_score_time,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,AdaBoostClassifier,0.090869,0.018745,0.003125,0.006249,{'n_estimators': 32},0.957746,0.965986,0.985915,0.972222,0.97931,0.972163,0.00987,1.0,1.0,1.0,1.0,1.0,1.0,0.0
1,ExtraTreesClassifier,0.027614,0.004445,0.003133,0.006266,{'n_estimators': 32},0.945205,0.959459,0.986111,0.97931,0.985714,0.971028,0.016231,1.0,1.0,1.0,1.0,1.0,1.0,0.0
2,AdaBoostClassifier,0.038266,0.008078,0.003124,0.006248,{'n_estimators': 16},0.951049,0.965986,0.986111,0.972222,0.971831,0.969363,0.011342,0.998249,0.998249,0.993056,1.0,0.996516,0.997214,0.002353
3,RandomForestClassifier,0.04406,0.008089,0.001612,0.003225,{'n_estimators': 32},0.930556,0.965986,0.985915,0.965035,0.979021,0.965183,0.019128,1.0,0.998243,1.0,0.996516,1.0,0.998952,0.001395
4,GradientBoostingClassifier,0.055161,0.016293,0.0,0.0,"{'learning_rate': 1.0, 'n_estimators': 32}",0.951049,0.945205,0.986111,0.972222,0.971831,0.965163,0.015054,1.0,1.0,1.0,1.0,1.0,1.0,0.0
5,ExtraTreesClassifier,0.027599,0.004461,0.003127,0.006253,{'n_estimators': 16},0.957746,0.952381,0.978723,0.958333,0.978723,0.96511,0.011244,1.0,1.0,1.0,1.0,1.0,1.0,0.0
6,SVC,5.538546,1.557422,0.000415,0.00083,"{'C': 10, 'kernel': 'linear'}",0.952381,0.952381,0.979021,0.957143,0.971429,0.9624,0.010822,0.977153,0.982517,0.974093,0.97747,0.970537,0.976354,0.00397
7,RandomForestClassifier,0.021339,0.008592,0.0,0.0,{'n_estimators': 16},0.923077,0.938776,0.985915,0.965517,0.985714,0.959597,0.025242,0.994746,0.998243,0.998255,1.0,1.0,0.998249,0.001918
8,GradientBoostingClassifier,0.042889,0.014629,0.0,0.0,"{'learning_rate': 1.0, 'n_estimators': 16}",0.957143,0.932432,0.971429,0.965035,0.964539,0.958022,0.013653,1.0,1.0,1.0,1.0,1.0,1.0,0.0
9,SVC,1.796168,0.694563,0.001606,0.003211,"{'C': 1, 'kernel': 'linear'}",0.96,0.945946,0.979021,0.937063,0.964029,0.957182,0.014544,0.977312,0.980668,0.965398,0.980936,0.977312,0.976325,0.005683
