#### sklearn.model_selection.GridSearchCV
- lass sklearn.model_selection.GridSearchCV(estimator, param_grid, *, scoring=None, n_jobs=None, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', error_score=nan, return_train_score=False)

In [6]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score,  GridSearchCV, train_test_split
from sklearn.datasets import load_iris
import numpy as np

In [8]:
iris_data = load_iris()
dt_clf = DecisionTreeClassifier(random_state=156)

data = iris_data.data
label = iris_data.target

scores = cross_val_score(dt_clf, data, label, scoring='accuracy', cv=3)
print('교차 검증별 정확도: ', np.round(scores,4))
print('평균 검증 정확도: ', np.round(np.mean(scores),4))



교차 검증별 정확도:  [0.98 0.94 0.98]
평균 검증 정확도:  0.9667


In [7]:
X_train, X_test, y_train, y_test = train_test_split(iris_data.data, iris_data.target, test_size=0.2, random_state=121)

dtree = DecisionTreeClassifier()

parameters = {'max_depth':[1,2,3], 'min_samples_split':[2,3]}

import pandas as pd

grid_dtree = GridSearchCV(dtree, param_grid = parameters, cv=3, refit = True)
grid_dtree.fit(X_train,y_train)

scores_df = pd.DataFrame(grid_dtree.cv_results_)
scores_df[['params','mean_test_score','rank_test_score','split0_test_score','split1_test_score','split2_test_score']]


Unnamed: 0,params,mean_test_score,rank_test_score,split0_test_score,split1_test_score,split2_test_score
0,"{'max_depth': 1, 'min_samples_split': 2}",0.7,5,0.7,0.7,0.7
1,"{'max_depth': 1, 'min_samples_split': 3}",0.7,5,0.7,0.7,0.7
2,"{'max_depth': 2, 'min_samples_split': 2}",0.958333,3,0.925,1.0,0.95
3,"{'max_depth': 2, 'min_samples_split': 3}",0.958333,3,0.925,1.0,0.95
4,"{'max_depth': 3, 'min_samples_split': 2}",0.975,1,0.975,1.0,0.95
5,"{'max_depth': 3, 'min_samples_split': 3}",0.975,1,0.975,1.0,0.95


In [9]:
grid_dtree.best_estimator_

depth=3일때 제일 좋다

In [10]:
grid_dtree.best_params_

{'max_depth': 3, 'min_samples_split': 2}

In [11]:
grid_dtree.predict(X_test)

array([1, 2, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 0, 0, 2, 1, 0, 2, 0, 2, 2,
       1, 1, 1, 1, 0, 0, 2, 2])

In [12]:
grid_dtree.predict_proba(X_test)

array([[0.        , 0.97142857, 0.02857143],
       [0.        , 0.        , 1.        ],
       [0.        , 0.97142857, 0.02857143],
       [1.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        ],
       [0.        , 0.97142857, 0.02857143],
       [0.        , 0.97142857, 0.02857143],
       [0.        , 0.97142857, 0.02857143],
       [0.        , 0.97142857, 0.02857143],
       [0.        , 0.        , 1.        ],
       [0.        , 0.        , 1.        ],
       [0.        , 0.97142857, 0.02857143],
       [0.        , 0.97142857, 0.02857143],
       [1.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        ],
       [0.        , 0.        , 1.        ],
       [0.        , 0.97142857, 0.02857143],
       [1.        , 0.        , 0.        ],
       [0.        , 0.        , 1.        ],
       [1.        , 0.        , 0.        ],
       [0.        , 0.        , 1.        ],
       [0.        , 0.        , 1.        ],
       [0.

In [13]:
print('GridSearchCV 최적 파라미터:', grid_dtree.best_params_)
print(f'GridSearchCV 최고 정확도: {0:.4f}', grid_dtree.best_score_)

GridSearchCV 최적 파라미터: {'max_depth': 3, 'min_samples_split': 2}
GridSearchCV 최고 정확도: 0.0000 0.975
