<a href="https://colab.research.google.com/github/whistle-hikhi/K-nearest-neighbors/blob/main/knn_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np
from collections import Counter
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

class KNN:
    def __init__(self, k=3):
      self.k = k

    def fit(self, X, y):
      self.X_train = X
      self.y_train = y

    def euclidean_distance(self, x1, x2):
      return np.sqrt(np.sum((x1 - x2)**2))

    def predict(self, X):
      pred = [self._predict(x) for x in X]
      return np.array(pred)

    def _predict(self, x):
      distances = [self.euclidean_distance(x, x_train) for x_train in self.X_train]
      k_indices = np.argsort(distances)[:self.k]

      k_nearest_labels = [self.y_train[i] for i in k_indices]

      most_common = Counter(k_nearest_labels).most_common(1)
      return most_common[0][0]

In [6]:
iris = load_iris()
X, y = iris.data, iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
knn = KNN(k=3)
knn.fit(X_train, y_train)

In [8]:
predictions = knn.predict(X_test)

In [9]:
accuracy = np.mean(predictions == y_test)
print(f"Prediction from scratch: {predictions}")
print(f"Actual labels: {y_test}")
print(f"Accuracy: {accuracy}")

Prediction from scratch: [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]
Actual labels: [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]
Accuracy: 1.0


In [10]:
class_names = iris.target_names
print("Samples predictions")
for i in range(min(5, len(predictions))):
  print(f"{i+1}. {class_names[predictions[i]]}")

Samples predictions
1. versicolor
2. setosa
3. virginica
4. versicolor
5. versicolor
