In [30]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import neighbors, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [15]:
iris = datasets.load_iris()

print(type(iris))
iris_X = iris.data
iris_y = iris.target

print(iris_X.shape)
print(iris_y.shape)
print(f'Number of classes: ${len(np.unique(iris_y))}')
print(f'Number of data points:: ${len(np.unique(iris_X))}')

<class 'sklearn.utils._bunch.Bunch'>
(150, 4)
(150,)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]
Number of classes: $3
Number of data points:: $74


In [19]:
X0 = iris_X[iris_y == 0,:]
print(f"Samples from class 0: \n{X0[:5,:]}")

X1 = iris_X[iris_y == 1,:]
print(f"Samples from class 1: \n{X1[:5,:]}")

X2 = iris_X[iris_y == 2,:]
print(f"Samples from class 2: \n{X2[:5,:]}")


Samples from class 0: 
[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]
Samples from class 1: 
[[7.  3.2 4.7 1.4]
 [6.4 3.2 4.5 1.5]
 [6.9 3.1 4.9 1.5]
 [5.5 2.3 4.  1.3]
 [6.5 2.8 4.6 1.5]]
Samples from class 2: 
[[6.3 3.3 6.  2.5]
 [5.8 2.7 5.1 1.9]
 [7.1 3.  5.9 2.1]
 [6.3 2.9 5.6 1.8]
 [6.5 3.  5.8 2.2]]


In [25]:
X_train, X_test, y_train, y_test = train_test_split(
    iris_X, iris_y, test_size=50
)

print(f"Training size: {len(y_train)}" )
print(f"Test size    : {len(y_test)}" )

Training size: 100
Test size    : 50


In [27]:
clf = neighbors.KNeighborsClassifier(n_neighbors = 1, p = 2)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print('Print results for 20 test data points:')
print("Predicted labels: ", y_pred[20:40])
print("Ground truth    : ", y_test[20:40])

Print results for 20 test data points:
Predicted labels:  [0 2 1 2 1 1 0 2 1 2 0 1 0 1 0 1 2 1 2 1]
Ground truth    :  [0 2 1 1 1 1 0 2 1 2 0 1 0 1 0 1 2 1 2 1]


In [32]:
print(f"Accuracy of 1NN: {100*accuracy_score(y_test, y_pred)}%")

Accuracy of 1NN: 96.0%


In [33]:
clf = neighbors.KNeighborsClassifier(n_neighbors = 10, p = 2)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

print(f"Accuracy of 10NN with major voting: {100*accuracy_score(y_test, y_pred)}" )

Accuracy of 10NN with major voting: 98.0


In [36]:
clf = neighbors.KNeighborsClassifier(n_neighbors = 10, p = 2, weights = 'distance')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

print(f"Accuracy of 10NN (1/distance weights): {100*accuracy_score(y_test, y_pred)}")

Accuracy of 10NN (1/distance weights): 96.0


In [37]:
def myweight(distances):
    sigma2 = .5 # we can change this number
    return np.exp(-distances**2/sigma2)

clf = neighbors.KNeighborsClassifier(n_neighbors = 10, p = 2, weights = myweight)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

print(f"Accuracy of 10NN (customized weights): {100*accuracy_score(y_test, y_pred)}")

Accuracy of 10NN (customized weights): 96.0
