## **Our own KNN**

In [None]:
import numpy as np
from collections import Counter

class Knn:

    def __init__(self,neighbors):
        self.n_neighbors = neighbors
        self.X_train = None
        self.y_train = None

    def fit(self,X_train,y_train):
        self.X_train = X_train
        self.y_train = y_train

    def predict(self,X_test):

        y_pred = []

        for i in X_test:
            # calculate distance with each training point
            distances = []
            for j in self.X_train:
                distances.append(self.calculate_distance(i,j))
            sorted_dist = sorted(list(enumerate(distances)),key=lambda x:x[1])[0:self.n_neighbors]
            label = self.majority_count(sorted_dist)
            y_pred.append(label)
        return np.array(y_pred)


    def calculate_distance(self,point_A,point_B):
        return np.linalg.norm(point_A - point_B)

    def majority_count(self,sorted_dist):
        votes = []
        for i,v in sorted_dist:
            votes.append(self.y_train[i])
        votes = Counter(votes)

        return votes.most_common()[0][0]

In [None]:
my_knn = Knn(neighbors=5)

In [None]:
my_knn.n_neighbors

5

## **Applying on Dataset**

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/campusx-official/knn-from-scratch/refs/heads/master/Social_Network_Ads.csv')
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [None]:
df.shape

(400, 5)

In [None]:
df = df.drop(columns = ['User ID'] , axis=1)
df.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0


In [None]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=0)

In [None]:
X_train.head()

Unnamed: 0,Gender,Age,EstimatedSalary
336,Male,58,144000
64,Female,59,83000
55,Female,24,55000
106,Female,26,35000
300,Female,58,38000


In [None]:
X_train.shape , y_train.shape

((320, 3), (320,))

In [None]:
encoder = LabelEncoder()
X_train['Gender'] = encoder.fit_transform(X_train['Gender'])
X_test['Gender'] = encoder.transform(X_test['Gender'])

In [None]:
X_train.head()

Unnamed: 0,Gender,Age,EstimatedSalary
336,1,58,144000
64,0,59,83000
55,0,24,55000
106,0,26,35000
300,0,58,38000


In [None]:
y_train.head()

Unnamed: 0,Purchased
336,1
64,0
55,0
106,0
300,1


In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
X_train[:5]

array([[ 1.02532046,  1.92295008,  2.14601566],
       [-0.97530483,  2.02016082,  0.3787193 ],
       [-0.97530483, -1.3822153 , -0.4324987 ],
       [-0.97530483, -1.18779381, -1.01194013],
       [-0.97530483,  1.92295008, -0.92502392]])

# **Sklearn's KNN**

In [None]:
knn = KNeighborsClassifier(n_neighbors=5)

knn.fit(X_train,y_train)

y_pred = knn.predict(X_test)

print("Sklearn's KNN Accuracy :",accuracy_score(y_test,y_pred)*100)

Sklearn's KNN Accuracy : 95.0


# **Our Own KNN**

In [None]:
y_train = y_train.reset_index(drop=True)

In [None]:
my_knn = Knn(neighbors=5)

my_knn.fit(X_train,y_train)

y_pred1 = my_knn.predict(X_test)

print("Our Own KNN Accuracy :",accuracy_score(y_test,y_pred1)*100)

Our Own KNN Accuracy : 95.0
