이 노트북의 코드에 대한 설명은 [다중 평가 지표: cross_validate()](https://tensorflow.blog/2018/03/13/%EB%8B%A4%EC%A4%91-%ED%8F%89%EA%B0%80-%EC%A7%80%ED%91%9C-cross_validate/) 글을 참고하세요.

In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split, cross_val_score

In [2]:
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(
    digits.data, digits.target == 9, random_state=42)

In [3]:
from sklearn.svm import SVC

In [4]:
cross_val_score(SVC(), X_train, y_train)

array([0.90200445, 0.90200445, 0.90200445])

In [5]:
cross_val_score(SVC(), X_train, y_train, scoring='accuracy')

array([0.90200445, 0.90200445, 0.90200445])

In [6]:
from sklearn.model_selection import cross_validate

In [7]:
cross_validate(SVC(), X_train, y_train, 
               scoring=['accuracy', 'roc_auc'], 
               return_train_score=True)

{'fit_time': array([0.07734418, 0.07731342, 0.07697606]),
 'score_time': array([0.06752324, 0.06801224, 0.06795096]),
 'test_accuracy': array([0.90200445, 0.90200445, 0.90200445]),
 'test_roc_auc': array([0.99657688, 0.99814815, 0.99943883]),
 'train_accuracy': array([1., 1., 1.]),
 'train_roc_auc': array([1., 1., 1.])}

In [8]:
cross_validate(SVC(), X_train, y_train, 
               scoring=['accuracy'], 
               return_train_score=False)['test_accuracy']

array([0.90200445, 0.90200445, 0.90200445])

In [9]:
cross_validate(SVC(), X_train, y_train, 
               scoring={'acc':'accuracy', 'ra':'roc_auc'}, 
               return_train_score=False)

{'fit_time': array([0.07807112, 0.07732844, 0.07694435]),
 'score_time': array([0.06790733, 0.06801248, 0.0679276 ]),
 'test_acc': array([0.90200445, 0.90200445, 0.90200445]),
 'test_ra': array([0.99657688, 0.99814815, 0.99943883])}

In [10]:
from sklearn.model_selection import GridSearchCV

In [11]:
param_grid = {'gamma': [0.0001, 0.01, 0.1, 1, 10]}

In [12]:
grid = GridSearchCV(SVC(), param_grid=param_grid, 
                    scoring=['accuracy'], refit='accuracy',
                    return_train_score=True)
grid.fit(X_train, y_train)

GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'gamma': [0.0001, 0.01, 0.1, 1, 10]},
       pre_dispatch='2*n_jobs', refit='accuracy', return_train_score=True,
       scoring=['accuracy'], verbose=0)

In [13]:
grid.best_params_

{'gamma': 0.0001}

In [14]:
grid.best_score_

0.9651076466221232

In [15]:
np.transpose(pd.DataFrame(grid.cv_results_))

Unnamed: 0,0,1,2,3,4
mean_fit_time,0.0168132,0.0757732,0.0780415,0.0616728,0.0608571
mean_score_time,0.00678404,0.033596,0.0335807,0.0269036,0.0265659
mean_test_accuracy,0.965108,0.902004,0.902004,0.902004,0.902004
mean_train_accuracy,0.970676,1,1,1,1
param_gamma,0.0001,0.01,0.1,1,10
params,{'gamma': 0.0001},{'gamma': 0.01},{'gamma': 0.1},{'gamma': 1},{'gamma': 10}
rank_test_accuracy,1,2,2,2,2
split0_test_accuracy,0.966592,0.902004,0.902004,0.902004,0.902004
split0_train_accuracy,0.975501,1,1,1,1
split1_test_accuracy,0.96882,0.902004,0.902004,0.902004,0.902004


In [16]:
grid = GridSearchCV(SVC(), param_grid=param_grid, 
                    scoring={'acc':'accuracy', 'ra':'roc_auc'}, refit='ra',
                    return_train_score=True)
grid.fit(X_train, y_train)

GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'gamma': [0.0001, 0.01, 0.1, 1, 10]},
       pre_dispatch='2*n_jobs', refit='ra', return_train_score=True,
       scoring={'acc': 'accuracy', 'ra': 'roc_auc'}, verbose=0)

In [17]:
grid.best_params_

{'gamma': 0.01}

In [18]:
grid.best_score_

0.9983352038907595

In [19]:
np.transpose(pd.DataFrame(grid.cv_results_))

Unnamed: 0,0,1,2,3,4
mean_fit_time,0.0155746,0.0764983,0.0781859,0.0618318,0.0608933
mean_score_time,0.0142375,0.0683261,0.0679193,0.0541253,0.0534937
mean_test_acc,0.965108,0.902004,0.902004,0.902004,0.902004
mean_test_ra,0.988403,0.998335,0.918986,0.5,0.5
mean_train_acc,0.970676,1,1,1,1
mean_train_ra,0.992134,1,1,1,1
param_gamma,0.0001,0.01,0.1,1,10
params,{'gamma': 0.0001},{'gamma': 0.01},{'gamma': 0.1},{'gamma': 1},{'gamma': 10}
rank_test_acc,1,2,2,2,2
rank_test_ra,2,1,3,4,4


In [20]:
grid.best_estimator_

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.01, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)