In [None]:
import sys
import IPython
import numpy as np
import pandas as pd
import sklearn as sk

print('Python version: %s.%s.%s' % sys.version_info[:3])
print('IPython version:', IPython.__version__)
print('numpy version:', np.__version__)
print('pandas version:', pd.__version__)
print('scikit-learn version:', sk.__version__)

In [None]:
pd.options.display.max_colwidth=-1

In [None]:
from sklearn.model_selection import GridSearchCV

class EstimatorSelectionHelper:
    def __init__(self, models, params):
        if not set(models.keys()).issubset(set(params.keys())):
            missing_params = list(set(models.keys()) - set(params.keys()))
            raise ValueError("Some estimators are missing parameters: %s" % missing_params)
        self.models = models
        self.params = params
        self.keys = models.keys()
        self.grid_searches = {}
    
    def fit(self, X, y, cv=3, n_jobs=1, verbose=1, scoring=None, refit=False):
        for key in self.keys:
            print("Running GridSearchCV for %s." % key)
            model = self.models[key]
            params = self.params[key]
            gs = GridSearchCV(model, params, cv=cv, n_jobs=n_jobs, 
                              verbose=verbose, scoring=scoring, refit=refit, return_train_score=True)
            gs.fit(X, y)
            self.grid_searches[key] = gs    
    
    def score_summary(self, sort_by=None):
        scores = pd.concat(list(map(lambda k: pd.DataFrame.from_dict({'estimator': k, **self.grid_searches[k].cv_results_}), self.keys)))
        if sort_by: scores.sort_values(sort_by, inplace=True, ascending=False)
        return scores

In [None]:
features_iq_train = pd.read_csv('./generated/dengue_features_train_s1_iq.csv')
labels_iq_train = pd.read_csv('./generated/dengue_labels_train_iq.csv')
features_sj_train = pd.read_csv('./generated/dengue_features_train_s1_sj.csv')
labels_sj_train = pd.read_csv('./generated/dengue_labels_train_sj.csv')

In [None]:
from sklearn import preprocessing

In [None]:
X = preprocessing.scale(features_iq_train)
y = labels_iq_train['total_cases']

In [None]:
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor, RandomForestRegressor
from sklearn.neural_network import MLPRegressor

models2 = { 
    'GB_R': GradientBoostingRegressor(),
#     'AB_R': AdaBoostRegressor(),
#     'RF_R': RandomForestRegressor(),
#     'MLP_R': MLPRegressor()
}

params2 = { 
    'GB_R': { 'learning_rate': np.logspace(-1.5, 0.5, 10), 'n_estimators': np.linspace(25, 75, endpoint=False, num=15).astype(int), 'min_samples_leaf': [6, 8, 10]},
    'GB_R': { 'learning_rate': [0.1], 'n_estimators': [48], 'min_samples_leaf': [8], 'loss': ['ls'], 'criterion': ['mse']},
    'AB_R': { 'learning_rate': np.logspace(-1.5, 0.5, 10), 'n_estimators': np.linspace(25, 75, endpoint=False, num=15).astype(int) },
    'RF_R': { 'n_estimators': np.linspace(25, 75, endpoint=False, num=15).astype(int), 'min_samples_leaf': [6, 8, 10] },
    'MLP_R': {}
}

In [None]:
helper2 = EstimatorSelectionHelper(models2, params2)
helper2.fit(X, y, n_jobs=-1, scoring=['neg_mean_absolute_error', 'neg_mean_squared_error'], cv=5)

In [None]:
helper2.score_summary(sort_by='mean_test_neg_mean_absolute_error')[['estimator', 'mean_test_neg_mean_absolute_error', 'mean_test_neg_mean_squared_error', 'params']].head(10)

Useless