# K-Nearest Neigbours

## Import Libraries

In [40]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

print("libraries imported")

libraries imported


In [41]:
arr = np.array([1, 2, 2, 3, 3, 4, 4, 4, 5])
np.bincount(arr)

array([0, 1, 2, 2, 3, 1])

## Model Architecture

In [42]:

class KNN:
    
    def __init__(self, k=5, distance_metric='euclidean') -> None:
        self.k = k
        self.distance_metric = distance_metric

    def fit(self, X, y):
        self.X = X
        self.y = y

    def _calculate_distances(self, x):

        # euclidean distance formula: sqrt((x1 - x2)^2 + (y1 - y2)^2)
        if self.distance_metric == 'euclidean':
            distances = np.sqrt(np.sum((self.X - x)**2, axis=1))  
        
        # manhattan distance formula: |x1 - x2| + |y1 - y2|
        elif self.distance_metric == 'manhattan':
            distances = np.sum(np.abs(self.X - x), axis=1)

        # cosine distance formula: (x1 * x2) / (|x1| * |x2|)
        elif self.distance_metric == 'cosine':
            distances = np.dot(self.X, x) / (np.linalg.norm(self.X) * np.linalg.norm(x))
        else:
            raise ValueError("distance_metric must be 'euclidean', 'manhattan' or 'cosine'")
        
        return distances

    def predict(self, X):
        y_pred = []
        for x in X:
            # calculate distances between x and all points in X train dataset
            distances = self._calculate_distances(x)

            # sort distances and get the k nearest neighbours
            k_nearest = np.argsort(distances)[:self.k]

            # get the labels of the k nearest neighbours
            k_nearest_labels = self.y[k_nearest]

            # get the most common label
            label = np.bincount(k_nearest_labels).argmax() # np.bincount returns an array with the count of each element in the array
            y_pred.append(label)

        return np.array(y_pred)
            

## Dataset

In [43]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

iris = load_iris()

X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=42)

## Train model

In [44]:
knn = KNN(k=5, distance_metric='euclidean')
knn.fit(X_train, y_train)

## Evaluate model

In [45]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

y_pred = knn.predict(X_test)

acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='macro')
rec = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

print(f"Accuracy: {acc}")
print(f"Precision: {prec}")
print(f"Recall: {rec}")
print(f"F1: {f1}")

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1: 1.0
