# GridSearch 활용 예제

In [1]:
from sklearn.model_selection import GridSearchCV

In [7]:
param_grid = {
    'n_estimators': [100, 150, 200, 250],
    'max_depth': [None, 6, 9, 12],
    'min_samples_split': [0.01, 0.05, 0.1],
    'max_features': ['auto', 'sqrt'],
}

In [5]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression
X, y = make_regression(n_features=4, n_informative=2, random_state=0, shuffle=False)
estimator = RandomForestRegressor(max_depth=2, random_state=0)
estimator.fit(X, y)
print(estimator.predict([[0, 0, 0, 0]]))

[-8.32987858]


In [6]:
estimator.feature_importances_

array([0.18146984, 0.81473937, 0.00145312, 0.00233767])

In [8]:
from sklearn.model_selection import KFold

kf = KFold(random_state=30,
           n_splits=10,
           shuffle=True,
          )

In [9]:
# define grid_search
grid_search = GridSearchCV(estimator=estimator, 
                           param_grid=param_grid, 
                           cv=kf, 
                           n_jobs=-1, 
                           verbose=2
                          )

# fit with (x_train, y_train)
grid_search.fit(X, y)

Fitting 10 folds for each of 96 candidates, totalling 960 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:    7.2s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:   15.4s
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:   26.7s
[Parallel(n_jobs=-1)]: Done 960 out of 960 | elapsed:   39.9s finished


GridSearchCV(cv=KFold(n_splits=10, random_state=30, shuffle=True),
             estimator=RandomForestRegressor(max_depth=2, random_state=0),
             n_jobs=-1,
             param_grid={'max_depth': [None, 6, 9, 12],
                         'max_features': ['auto', 'sqrt'],
                         'min_samples_split': [0.01, 0.05, 0.1],
                         'n_estimators': [100, 150, 200, 250]},
             verbose=2)

In [10]:
grid_search.best_params_

{'max_depth': 9,
 'max_features': 'auto',
 'min_samples_split': 0.01,
 'n_estimators': 200}

In [11]:
estimator = RandomForestRegressor(n_estimators=200,max_depth=9, max_features= 'auto', min_samples_split=0.01, random_state=0)
estimator.fit(X, y)
print(estimator.predict([[0, 0, 0, 0]]))

[-0.21872506]
