Grid search

In [0]:
from sklearn import datasets  # サンプル用のデータ・セット
from sklearn.svm import SVC  # SVM の実行関数
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split  # 訓練データとテストデータを分ける関数
from sklearn.metrics import classification_report, confusion_matrix  # 学習結果要約用関数

# サンプル用のデータを読み込み
digits = datasets.load_digits()
n_samples = len(digits.images)
print(n_samples)
X = digits.images.reshape((n_samples, -1))
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
print(len(y_test))

# 探索するパラメータを設定
param_grid = [
    {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
    {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
]

# 評価関数を指定
scores = ['accuracy', 'precision_micro', 'recall_micro', 'f1_micro']

# 各評価関数ごとにグリッドサーチを行う
for score in scores:
    print("evaluation metrics: "+score)
    clf = GridSearchCV(SVC(C=1), param_grid, cv=5, scoring=score, n_jobs=1)  # n_jobs: 並列計算を行う（-1 とすれば使用PCで可能な最適数の並列処理を行う）
    clf.fit(X_train, y_train)

    
#    for params, mean_score, all_scores in clf.cv_scores_:
#        print("{:.3f} (+/- {:.3f}) for {}".format(mean_score, all_scores.std() / 2, params))

    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r" % (mean, std / 2, params))

    print(clf.best_estimator_)  # 最適なパラメータを表示
        
    # 最適なパラメータのモデルでクラスタリングを行う
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))  # クラスタリング結果を表示
    print("            pred")
    print(confusion_matrix(y_true, y_pred))       # クラスタリング結果を表示


1797
899
evaluation metrics: accuracy
0.973 (+/-0.004) for {'C': 1, 'kernel': 'linear'}
0.973 (+/-0.004) for {'C': 10, 'kernel': 'linear'}
0.973 (+/-0.004) for {'C': 100, 'kernel': 'linear'}
0.973 (+/-0.004) for {'C': 1000, 'kernel': 'linear'}
0.986 (+/-0.005) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.958 (+/-0.007) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.987 (+/-0.005) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.981 (+/-0.007) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.987 (+/-0.005) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.981 (+/-0.007) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.987 (+/-0.005) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.981 (+/-0.007) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
             