# K-Nearest Neighbors

## Dependency

In [29]:
import pandas as pd
from math import sqrt
from collections import Counter
from typing import List

## Parameter

In [10]:
data_01 = '../data/iris.data'
COLUMNS = [
    'sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class'
]

## Function

In [34]:
def euclidean_distance(row1: List[float], row2: List[float]) -> float:
    ans = 0
    for i in range(len(row1)):
        ans += (row1[i] - row2[i])**2
    ans = sqrt(ans)
    return ans


def get_neighbors(train: List[List[float]], test: List[float], k: int) -> List[List[float]]:
    distances = []
    for t in train:
        distance = euclidean_distance(test, t)
        distances.append([distance, t])
    
    # Sort list of list of distance and train data by distance in ascending order
    distances.sort()
    
    neighbors = []
    for i in range(k):
        neighbors.append(distances[i][1])
    
    return neighbors


def classify_by_knn(train: List[List[float]], test: List[float], k):
    neighbors = get_neighbors(train, test, k)
    # row is a list of firstly list of features and at the end response
    classes = [row[-1] for row in neighbors]
    count = Counter(classes)
    # most_common returns a list of tuples (key, count)
    return count.most_common()[0][0]

## Read data

In [11]:
df = pd.read_csv(data_01, names=COLUMNS)

In [13]:
print(df.shape)
print(df.dtypes)

(150, 5)
sepal_length    float64
sepal_width     float64
petal_length    float64
petal_width     float64
class            object
dtype: object


In [14]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [15]:
df.tail()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica
149,5.9,3.0,5.1,1.8,Iris-virginica
