# KNN classification
Predict the label of a data point depend on the labels of K nearest neighbours. Distance using here is Euclidean distance. <br><br>
Euclidean distance: $ d\left( p,q\right)   = \sqrt {\sum _{i=1}^{n}  \left( q_{i}-p_{i}\right)^2 } $
![](pics/knn.pic1.png)
![](pics/knn.pic2.png)

In [46]:
import numpy as np
from collections import Counter


class KNN:
    
    def __init__(self, k=3):
        self.k = k
        return
    
    def fit(self, X, y):
        self.X_train = X
        self.y_train = y
        return
        
    

        
    def ucli_distance(self, x1, x2):
        return np.sqrt(sum((x1-x2)**2))
            
        
        
    def predict_one(self, x):
        distances = [self.ucli_distance(x,x_train) for x_train in self.X_train]
        top_k = np.argsort(distances)[:self.k]
        top_k_labels = [self.y_train[idx] for idx in top_k]
        predicted_labels = Counter(top_k_labels).most_common(1)
        return predicted_labels[0][0]
    
    def predict(self, X):
        return np.array([self.predict_one(x) for x in X])
    
    def score(self, X, y):
        lst = [yhat==y for (yhat,y) in zip(self.predict(X),y)]
        return f'{lst.count(True)/len(lst):.2f}'
        
        
        
        

# Time for the Test

In [89]:
from sklearn import datasets
from sklearn.model_selection import train_test_split

iris = datasets.load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size= 0.5,
                                                    random_state=1234)

clf = KNN(k=3)
clf.fit(X_train, y_train)
print(f'y_test: \n{y_test}')
print(f'y_hat: \n{clf.predict(X_test)}')
print(f'Accuracy Score: {clf.score(X_test, y_test)}')

y_test: 
[1 1 2 0 1 0 0 0 1 2 1 0 2 1 0 1 2 0 2 1 1 1 1 1 2 0 2 1 2 0 1 2 0 2 1 0 0
 0 0 1 0 1 0 2 2 0 2 2 2 2 0 2 2 1 1 1 1 1 1 0 0 2 2 2 0 0 0 2 1 2 2 1 0 2
 0]
y_hat: 
[1 2 2 0 1 0 0 0 1 2 1 0 2 1 0 1 2 0 2 1 1 1 1 1 2 0 1 1 2 0 1 2 0 1 1 0 0
 0 0 1 0 1 0 2 2 0 2 2 2 2 0 2 2 1 1 1 1 1 1 0 0 2 2 2 0 0 0 1 1 2 2 1 0 2
 0]
Accuracy Score: 0.95
