<a href="https://colab.research.google.com/github/ymuto0302/RW2024/blob/main/nested_cross_validation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.svm import SVC
from sklearn.datasets import make_classification
import numpy as np

# サンプルデータの生成
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)

# パラメータグリッドの定義
param_grid = {'C': [0.1, 1, 10], 'kernel': ['rbf', 'linear']}

# 外部ループと内部ループの設定
outer_cv = KFold(n_splits=5, shuffle=True, random_state=42)
inner_cv = KFold(n_splits=3, shuffle=True, random_state=42)

# ネステッド交差検証の実行
outer_scores = []

for train_idx, test_idx in outer_cv.split(X):
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    # 内部ループでのグリッドサーチ
    clf = GridSearchCV(SVC(), param_grid, cv=inner_cv)
    clf.fit(X_train, y_train)

    # 最適なモデルでテストデータを評価
    score = clf.score(X_test, y_test)
    outer_scores.append(score)

print(f"Nested CV scores: {outer_scores}")
print(f"Average score: {np.mean(outer_scores):.3f} (+/- {np.std(outer_scores):.3f})")


Nested CV scores: [0.88, 0.845, 0.885, 0.865, 0.88]
Average score: 0.871 (+/- 0.015)
