In [None]:
from numpy import sqrt, abs

In [81]:
# KNN class
class KNNClassifier:
    # Intializing the main attributes of the KNN Classifier
    def __init__(self, k = 5, distance = 'euclidean'):
        self.k = k
        self.distance = distance

    # Euclidean Distance Function
    def _euclidean_distance(self, X1, X2):
        res = 0

        for i in range(len(X1)):
            res += (X1[i] - X2[i]) ** 2
        
        return sqrt(res)
    
    # Manhattan Distance Function
    def _manhattan_distance(self, X1, X2):
        res = 0

        for i in range(len(X1)):
            res += abs(X1[i] - X2[i])
        
        return res
    
    # Cosine Distance Function
    def _cosine_distance(self, X1, X2):
        X1_dot_X2 = 0
        X1_magnitude = 0
        X2_magnitude = 0

        for i in range(len(X1)):
            X1_dot_X2 += X1[i] * X2[i]
            X1_magnitude += X1[i] ** 2
            X2_magnitude += X2[i] ** 2
        
        X1_magnitude = sqrt(X1_magnitude)
        X2_magnitude = sqrt(X2_magnitude)

        return 1 - X1_dot_X2 / (X1_magnitude * X2_magnitude)
    
    # Storing training data
    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    # Prediction Funtion
    def predict(self, X_test):
        y_predict = []

        for x in X_test:
            distances = []

            # Compute the distance between each test data point and the others training data points
            for i in range(len(self.X_train)):
                if self.distance == 'euclidean':
                    dist = self._euclidean_distance(self.X_train[i], x)
                elif self.distance == 'manhattan':
                    dist = self._manhattan_distance(self.X_train[i], x)
                elif self.distance == 'cosine':
                    dist = self._cosine_distance(self.X_train[i], x)
                else:
                    raise ValueError('Invalid distance argument')

                distances.append((dist, self.y_train[i]))
            
            # Sort distances and extract the k-nearest neighbors
            distances.sort(key=lambda x: x[0])
            k_nearest = [(label, distance) for distance, label in distances[:self.k]]

            counter = {}
            sum_dist = {}
            # Count labels and Compute the sum of distances for each label
            for label, distance in k_nearest:
                counter[label] = counter.get(label, 0) + 1
                sum_dist[label] = sum_dist.get(label, 0) + distance
            
            max_count = 0
            max_label = -1
            min_distance = float('inf')
            # Extract the optimal label
            for label, count in counter.items():
                if count > max_count or (count == max_count and sum_dist[label] < min_distance):
                    max_count = count
                    max_label = label
                    min_distance = sum_dist[label]
            
            y_predict.append(max_label)

        return y_predict

In [82]:
def accuracy_score(y_test, y_predict):
    accuracy = 0
    for i in range(len(y_predict)):
        if y_predict[i] == y_test[i]:
            accuracy += 1
    return accuracy / len(y_predict)