In [12]:
from sklearn.model_selection import train_test_split, GridSearchCV
from mlxtend.evaluate import PredefinedHoldoutSplit
from sklearn.pipeline import make_pipeline
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
import numpy as np

In [None]:
iris = load_iris()
X, y = iris.data, iris.target
X_train_valid, X_test, y_train_valid, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=True, random_state=123, stratify=y
)

train_ind, valid_ind = train_test_split(
    np.arange(X_train_valid.shape[0]), test_size=0.2, shuffle=True, random_state=123, stratify=y_train_valid
)

In [14]:
pipe = make_pipeline(StandardScaler(), KNeighborsClassifier())

params = {'kneighborsclassifier__n_neighbors': [1, 3, 5],
          'kneighborsclassifier__p': [1, 2]}
split_array = np.full(len(y_train_valid), -1)  #marked all training as -1
split_array[valid_ind] = 0 #marking valid. indicies as 0
split = PredefinedHoldoutSplit(split_array)

grid = GridSearchCV(pipe, param_grid=params, cv=split)
grid.fit(X_train_valid, y_train_valid)

grid.cv
print("Best parameters:", grid.best_params_)
print("Best cross-validation score:", grid.best_score_)

Best parameters: {'kneighborsclassifier__n_neighbors': 1, 'kneighborsclassifier__p': 1}
Best cross-validation score: 1.0


In [15]:
clf = grid.best_estimator_
clf.fit(X_train_valid, y_train_valid)
print('Test Accuracy: %.2f%%' % (clf.score(X_test, y_test) * 100))


Test Accuracy: 90.00%
