In [1]:
from nested_cv import NestedCV

import pandas as pd
import numpy as np
from sklearn.datasets import load_boston, load_iris, load_breast_cancer
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.model_selection import KFold

# When using Random Search, we get a user warning with this little number of hyperparameters
# Suppress it
import warnings
warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)

おためし修正

# Regression Example

In [6]:
boston = load_boston()
X = boston.data
y = boston.target

# Define a parameters grid
param_grid = {
     'max_depth': [3, 7, 10, None],
     'n_estimators': [100,200],
     'min_samples_split':[2,3,5,7,10]
}

# Either specify a strategy or number
# Here we choose a strategy
outer_cv = KFold(n_splits=5,
                 shuffle=True,
                 random_state=123)
inner_cv = KFold(n_splits=5,
                 shuffle=True,
                 random_state=123)

NCV = NestedCV(model=RandomForestRegressor(), params_grid=param_grid,
               outer_cv=outer_cv, inner_cv=inner_cv, n_jobs = -1,
               cv_options={'sqrt_of_score':True, 
                           'recursive_feature_elimination':False, 
                           'rfe_n_features':2})
NCV.fit(X=X,y=y)

NCV.outer_scores

[4.2847157727008085,
 3.4072642252185354,
 2.8051399371930317,
 2.2417498868498766,
 3.4797203328262443]

# Classification Example

## Breast Cancer (2 Classes)

In [3]:
from sklearn.metrics import roc_auc_score

# Binary classification
cancer = load_breast_cancer()
X = cancer.data
y = cancer.target

# Define a parameters grid
param_grid = {
     'max_depth': [3, None],
     'n_estimators': [10, 20]
}

NCV = NestedCV(model=RandomForestClassifier(), params_grid=param_grid,
               outer_cv=5, inner_cv=5,
               cv_options={'metric':roc_auc_score, 
                           'metric_score_indicator_lower':False,
                           'randomized_search_iter':30, 
                           'predict_proba':True})
NCV.fit(X=X,y=y)

NCV.outer_scores

[0.9912337662337662,
 0.974025974025974,
 0.9803240740740741,
 0.9963247577681257,
 0.9969551282051282]

## Iris (3 Classes)

In [4]:
from sklearn.metrics import f1_score

# Multiclass classification
iris = load_iris()
X = iris.data
y = iris.target

# Define a parameters grid
param_grid = {
     'max_depth': [3, None],
     'n_estimators': [10, 20]
}

NCV = NestedCV(model=RandomForestClassifier(), params_grid=param_grid, 
               outer_cv=5, inner_cv=5,
               cv_options={'metric':f1_score, 
                           'metric_score_indicator_lower':False,
                           'randomized_search_iter':30, 
                           'predict_proba':False,
                           'multiclass_average': 'micro'})
NCV.fit(X=X,y=y)

NCV.outer_scores

[0.9333333333333333,
 0.9333333333333333,
 0.9333333333333333,
 0.9333333333333333,
 0.9666666666666667]