In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")
%matplotlib inline

In [2]:
# AUCのような基準を、GridSearchCVやcross_val_scoreによるモデル選択で用いたいという場合も多いだろう
# GridsearchCVにもcross_val_scoreにもあるscoringという引数を用いる。利用したい評価基準を文字列で与えるだけでよい
# デフォルトの基準（精度）からAUCに変更するには、"roc_auc"をscoringパラメータに与えればよい
# クラス分類におけるscoringパラメータの値として重要なものとしては以下
# accuracy(デフォルト)、roc_auc(ROCカーブ下領域:AUC)、average_precison(適合率-再現率カーブ下領域)、f-値、f値_macro、f値_micro、f値_weighted
# 回帰でよく用いられるのは、r2(決定係数)、mean_squared_error(平均二乗誤差)、mean_absolute_error(平均絶対誤差)だ
from sklearn.datasets import load_digits
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC

digits = load_digits()
print(f"Default scoring: {cross_val_score(SVC(), digits.data, digits.target == 9)}") # scoring = "accuracy"が default
explicit_accuracy = cross_val_score(SVC(), digits.data, digits.target == 9, scoring = "accuracy")
print(f"Explicit accuracy scoring: {explicit_accuracy}")
roc_auc = cross_val_score(SVC(), digits.data, digits.target == 9, scoring = "roc_auc")
print(f"AUC scoring: {roc_auc}")

Default scoring: [0.975      0.99166667 1.         0.99442897 0.98050139]
Explicit accuracy scoring: [0.975      0.99166667 1.         0.99442897 0.98050139]
AUC scoring: [0.99717078 0.99854252 1.         0.999828   0.98400413]


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score

X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target == 9, random_state = 0)
param_grid = {"gamma":[0.0001, 0.01, 0.1, 1, 10]}
grid = GridSearchCV(SVC(), param_grid = param_grid)
grid.fit(X_train, y_train)
print("Grid-Search with accuracy")
print(f"Best parameters: {grid.best_params_}")
print(f"Best cross-validation score(accuracy): {grid.best_score_:.3f}")
print(f"Test set AUC: {roc_auc_score(y_test, grid.decision_function(X_test)):.3f}")
print(f"Test set accuracy: {grid.score(X_test, y_test):.3f}")

Grid-Search with accuracy
Best parameters: {'gamma': 0.0001}
Best cross-validation score(accuracy): 0.976
Test set AUC: 0.992
Test set accuracy: 0.973


In [4]:
# AUCをスコアに用いる
grid = GridSearchCV(SVC(), param_grid = param_grid, scoring = "roc_auc")
grid.fit(X_train, y_train)
print("Grid-Search with AUC")
print(f"Best parameters:", grid.best_params_)
print(f"Best cross-validation score (AUC): {grid.best_score_:.3f}")
print(f"Test set AUC: {grid.score(X_test, y_test):.3f}")

Grid-Search with AUC
Best parameters: {'gamma': 0.01}
Best cross-validation score (AUC): 0.998
Test set AUC: 1.000
