In [3]:
import numpy as np 
from collections import Counter
import pandas as pd

In [4]:
class KNN:
    def __init__(self,k,type="Classify"):
        self.k = k
        self.type = type
    
    def fit(self,x,y):
        self.X_train = x
        self.Y_train = y
    
    def predict(self,x_test):
        pred = [self.single_prediction(i) for i in x_test]
        return pred
    
    def single_prediction(self,x):
        distance = [self.Euclid_dist(x,i) for i in self.X_train]
        idx = np.argsort(distance)[:self.k]

        k_nearest_label = [self.Y_train[i] for i in idx]
        if self.type == "Regression":
            return np.mean(k_nearest_label)
        else:
            most_common = Counter(k_nearest_label).most_common()
            return most_common[0][0] 
    
    def Euclid_dist(self,x1,x2):
        return np.sqrt(np.sum((x1-x2)**2))
    
    def score(self,y_test,y_pred):
        if self.type=="Regression":
            return np.mean((y_test-y_pred)**2)
        else:
            return sum(1 for i in range(len(y_test)) if y_test[i] == y_pred[i]) / len(y_pred)

---
## Validating the algorithm on Classification

In [6]:
from sklearn.datasets import load_iris
x,y = load_iris().data, load_iris().target
from sklearn.model_selection import train_test_split
x_train,x_test, y_train,y_test = train_test_split(x,y,random_state=42)
model = KNN(4)
model.fit(x_train,y_train)

In [7]:
pred = model.predict(x_test)
pred

[1,
 0,
 2,
 1,
 1,
 0,
 1,
 2,
 1,
 1,
 2,
 0,
 0,
 0,
 0,
 1,
 2,
 1,
 1,
 2,
 0,
 2,
 0,
 2,
 2,
 2,
 2,
 2,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 2,
 1,
 0]

In [68]:
model.score(y_test,pred)

1.0

---
## Regression with KNN

In [1]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
from sklearn.datasets import fetch_california_housing
dataset = fetch_california_housing()
dataset.feature_names

['MedInc',
 'HouseAge',
 'AveRooms',
 'AveBedrms',
 'Population',
 'AveOccup',
 'Latitude',
 'Longitude']

In [5]:
X,Y = pd.DataFrame(dataset.data,columns=dataset.feature_names), dataset.target
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)
from sklearn.model_selection import train_test_split
X_train,X_test, Y_train,Y_test = train_test_split(X,Y,random_state=42)

In [6]:
X_train

array([[ 0.18087872,  0.66431034, -0.05689491, ..., -0.0521138 ,
        -0.85297106,  0.72359203],
       [ 0.77700173,  1.06160074,  0.37804328, ..., -0.06413879,
         0.71545395, -1.20305254],
       [ 0.02549196,  0.58485227,  0.27527566, ..., -0.08037958,
         1.31941463, -1.55743535],
       ...,
       [-0.49283239,  0.58485227, -0.58294927, ...,  0.02517025,
        -0.74997001,  0.59381804],
       [ 0.97302487, -1.08376738,  0.39058403, ...,  0.01042151,
         0.91209231, -1.19306993],
       [-0.68174943,  1.85618152, -0.81905034, ..., -0.09262259,
         1.00104776, -1.42267006]])

In [7]:
testing_model = KNN(3,type="Regression")
testing_model.fit(X_train,Y_train)
predictions_whole_data = testing_model.predict(X_test)
score = testing_model.score(Y_test,predictions_whole_data)
score

0.45580732183186895