## 超参数

In [2]:
import numpy as np
from sklearn import datasets

In [3]:
digits = datasets.load_digits()

In [4]:
X = digits.data
y = digits.target

In [5]:
from sklearn.model_selection import train_test_split

In [7]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666)

In [8]:
from sklearn.neighbors import KNeighborsClassifier

In [9]:
knn_clf = KNeighborsClassifier(n_neighbors=3)
knn_clf.fit(x_train, y_train)
knn_clf.score(x_test, y_test)

0.9888888888888889

## 寻找最好的K

In [13]:
%%time
best_score = 0.0
best_k = -1
for k in range(1, 11):
    knn_clf = KNeighborsClassifier(n_neighbors=k)
    knn_clf.fit(x_train, y_train)
    score = knn_clf.score(x_test, y_test)
    if score > best_score:
        best_score = score
        best_k = k
print("best K:", best_k)
print("best score:", best_score)

best K: 4
best score: 0.9916666666666667
Wall time: 974 ms


### 考虑距离？不考虑距离？

In [16]:
%%time
best_method = ""
best_score = 0.0
best_k = -1
for method in ["uniform", "distance"]:
    print("method:", method)   
    for k in range(1, 11):
        knn_clf = KNeighborsClassifier(n_neighbors=k, weights=method)
        knn_clf.fit(x_train, y_train)
        score = knn_clf.score(x_test, y_test)
        if score > best_score:
            best_score = score
            best_k = k
            best_method = method  
        print("K:", k)
        print("score:", score)

print("best method:", best_method)           
print("best K:", best_k)
print("best score:", best_score)

method: uniform
K: 1
score: 0.9833333333333333
K: 2
score: 0.9888888888888889
K: 3
score: 0.9888888888888889
K: 4
score: 0.9916666666666667
K: 5
score: 0.9888888888888889
K: 6
score: 0.9888888888888889
K: 7
score: 0.9861111111111112
K: 8
score: 0.9861111111111112
K: 9
score: 0.9833333333333333
K: 10
score: 0.9833333333333333
method: distance
K: 1
score: 0.9833333333333333
K: 2
score: 0.9861111111111112
K: 3
score: 0.9888888888888889
K: 4
score: 0.9888888888888889
K: 5
score: 0.9888888888888889
K: 6
score: 0.9888888888888889
K: 7
score: 0.9888888888888889
K: 8
score: 0.9888888888888889
K: 9
score: 0.9861111111111112
K: 10
score: 0.9861111111111112
best method: uniform
best K: 4
best score: 0.9916666666666667
Wall time: 2.07 s


### 探索明可夫斯基距离相应的p

In [18]:
%%time
best_p = -1
best_score = 0.0
best_k = -1 

for k in range(1, 11):
    print("K:", k)
    for p in range(1, 6):
        knn_clf = KNeighborsClassifier(n_neighbors=k, weights="distance", p=p)
        knn_clf.fit(x_train, y_train)
        score = knn_clf.score(x_test, y_test)
        if score > best_score:
            best_score = score
            best_k = k
            best_p = p  
        print("\tp:", p)
        print("\tscore:", score)

print("best p:", best_p)           
print("best K:", best_k)
print("best score:", best_score)

K: 1
	p: 1
	score: 0.9861111111111112
	p: 2
	score: 0.9833333333333333
	p: 3
	score: 0.9861111111111112
	p: 4
	score: 0.9861111111111112
	p: 5
	score: 0.9861111111111112
K: 2
	p: 1
	score: 0.9861111111111112
	p: 2
	score: 0.9861111111111112
	p: 3
	score: 0.9861111111111112
	p: 4
	score: 0.9861111111111112
	p: 5
	score: 0.9861111111111112
K: 3
	p: 1
	score: 0.9833333333333333
	p: 2
	score: 0.9888888888888889
	p: 3
	score: 0.9833333333333333
	p: 4
	score: 0.9833333333333333
	p: 5
	score: 0.9833333333333333
K: 4
	p: 1
	score: 0.9833333333333333
	p: 2
	score: 0.9888888888888889
	p: 3
	score: 0.9861111111111112
	p: 4
	score: 0.9833333333333333
	p: 5
	score: 0.9833333333333333
K: 5
	p: 1
	score: 0.9888888888888889
	p: 2
	score: 0.9888888888888889
	p: 3
	score: 0.9861111111111112
	p: 4
	score: 0.9861111111111112
	p: 5
	score: 0.9805555555555555
K: 6
	p: 1
	score: 0.9833333333333333
	p: 2
	score: 0.9888888888888889
	p: 3
	score: 0.9888888888888889
	p: 4
	score: 0.9833333333333333
	p: 5
	score: