# Machine Learning: Hyperparameter Tunning
---
- 모델에 집중하기 위해, 전처리가 필요없는 데이터셋으로 진행하겠습니다!
- https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_openml.html

## 1. 데이터 로딩

In [None]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1)

In [None]:
sample = 10000
X, y = mnist["data"][:sample], mnist["target"][:sample]
X.shape


## 2. 모델 적용 / 검증

In [None]:
import matplotlib.pyplot as plt

# Import datasets, classifiers and performance metrics
from sklearn import datasets,metrics
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split



classifier = KNeighborsClassifier()


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, shuffle=False)


classifier.fit(X_train, y_train)


predicted = classifier.predict(X_test)


print("Classification report for classifier %s:\n%s\n"
      % (classifier, metrics.classification_report(y_test, predicted, digits=4)))
disp = metrics.plot_confusion_matrix(classifier, X_test, y_test)
disp.figure_.suptitle("Confusion Matrix")
print("Confusion matrix:\n%s" % disp.confusion_matrix)

plt.show()

## 3. 하이퍼 파라미터 튜닝: GridSearch

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    "n_neighbors":[2,3,4,5,6,7]
}

grid_search = GridSearchCV(KNeighborsClassifier(), param_grid, cv=10, verbose=3, n_jobs=-1)
grid_search.fit(X_train, y_train)

In [None]:
grid_search.best_score_

In [None]:
grid_search.best_params_

In [None]:
classifier = KNeighborsClassifier(n_neighbors=3)
classifier.fit(X_train, y_train)

In [None]:
predicted = classifier.predict(X_test)


print("Classification report for classifier %s:\n%s\n"
      % (classifier, metrics.classification_report(y_test, predicted, digits=4)))
disp = metrics.plot_confusion_matrix(classifier, X_test, y_test)
disp.figure_.suptitle("Confusion Matrix")
print("Confusion matrix:\n%s" % disp.confusion_matrix)

plt.show()