#### Importing Libraries

In [1]:
import pandas as pd
import numpy as np

#### Read CSV Files

In [2]:
df_train = pd.read_csv('train.csv', header = None)
df_train = df_train.rename(columns={0: 'Sepal Length', 1: 'Sepal Width', 2: 'Petal Length', 3: 'Petal Width', 4:'Class'})
df_train.head()

Unnamed: 0,Sepal Length,Sepal Width,Petal Length,Petal Width,Class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
df_test = pd.read_csv('test.csv', header = None)
df_test = df_test.rename(columns={0: 'Sepal Length', 1: 'Sepal Width', 2: 'Petal Length', 3: 'Petal Width', 4:'Class'})
df_test.head()

Unnamed: 0,Sepal Length,Sepal Width,Petal Length,Petal Width,Class
0,4.8,3.0,1.4,0.3,Iris-setosa
1,5.1,3.8,1.6,0.2,Iris-setosa
2,4.6,3.2,1.4,0.2,Iris-setosa
3,5.3,3.7,1.5,0.2,Iris-setosa
4,5.0,3.3,1.4,0.2,Iris-setosa


#### Quick Select Function

In [4]:
# It considers the last element as pivot and moves all smaller element to left of it and greater elements to right 
def partition(arr, l, r): 
    x = arr[r] 
    i = l 
    for j in range(l, r): 
        if arr[j] <= x: 
            arr[i], arr[j] = arr[j], arr[i] 
            i += 1
    arr[i], arr[r] = arr[r], arr[i] 
    return i

# Finds the smallest Kth value in an array
def kthSmallest(arr, l, r, k): 
    if (k > 0 and k <= r - l + 1): 
        index = partition(arr, l, r) 

        if (index - l == k - 1): 
            return arr[index] 

        if (index - l > k - 1): 
            return kthSmallest(arr, l, index - 1, k) 

        return kthSmallest(arr, index + 1, r, k - index + l - 1) 
    return "Error: K Not In Bounds"

#### Calculating The Euclidean Distance

In [5]:
def euclidean_distance(a,b):
    return np.linalg.norm(a-b)

#### Finding The Mode

In [6]:
def get_mode(x_list):
    return np.argmax(np.bincount(x_list))

#### Dictionary Of Class Against Label

In [7]:
class_dict = {}
dict_class = {}
for i, c in enumerate(df_train['Class'].unique()):
    class_dict[c] = i
    dict_class[i] = c
class_dict

{'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}

#### KNN-Algorithm

In [8]:
def KNNAlgorithm(x, k):
    
    # 1. Compute distance between d and every sample in D
    dist_values = []
    for i in range(len(df_train)):
        dist_values.append(euclidean_distance(x,np.asarray(df_train.iloc[i][:-1])))
    distances = dist_values.copy()
    
    # 2. Choose K samples in D that are nearest to d
    if len(df_train) == len(distances):
        k_value = kthSmallest(dist_values, 0, len(dist_values) - 1, k)
        labels = []
        for i in range(len(distances)):
            if distances[i] < k_value:
                labels.append(class_dict[df_train.iloc[i]['Class']])
        ind = distances.index(k_value)
        labels.append(class_dict[df_train.iloc[ind]['Class']])
        
        
        # 3. Assign label of the majority class
        return dict_class[get_mode(labels)]
                
    else:
        print('Error: Array Lengths Dont Match')

#### Finding The Predicted Class For The Test Set For Each K

In [9]:
K = [1,3,5]
for k in K:
    Predicted_Class = []
    for i in range(len(df_test)):
        x = np.asarray(df_test.iloc[i][:4])
        Predicted_Class.append(KNNAlgorithm(x, k))
    df_test['Predicted_'+str(k)] = pd.Series(Predicted_Class)
df_test

Unnamed: 0,Sepal Length,Sepal Width,Petal Length,Petal Width,Class,Predicted_1,Predicted_3,Predicted_5
0,4.8,3.0,1.4,0.3,Iris-setosa,Iris-setosa,Iris-setosa,Iris-setosa
1,5.1,3.8,1.6,0.2,Iris-setosa,Iris-setosa,Iris-setosa,Iris-setosa
2,4.6,3.2,1.4,0.2,Iris-setosa,Iris-setosa,Iris-setosa,Iris-setosa
3,5.3,3.7,1.5,0.2,Iris-setosa,Iris-setosa,Iris-setosa,Iris-setosa
4,5.0,3.3,1.4,0.2,Iris-setosa,Iris-setosa,Iris-setosa,Iris-setosa
5,5.7,3.0,4.2,1.2,Iris-versicolor,Iris-versicolor,Iris-versicolor,Iris-versicolor
6,5.7,2.9,4.2,1.3,Iris-versicolor,Iris-versicolor,Iris-versicolor,Iris-versicolor
7,6.2,2.9,4.3,1.3,Iris-versicolor,Iris-versicolor,Iris-versicolor,Iris-versicolor
8,5.1,2.5,3.0,1.1,Iris-versicolor,Iris-versicolor,Iris-versicolor,Iris-versicolor
9,5.7,2.8,4.1,1.3,Iris-versicolor,Iris-versicolor,Iris-versicolor,Iris-versicolor


#### Creating A Confusion Matrix For All K's

In [10]:
def confusionMatrix(Y_Pred, Y_Test):
    df_confusion = pd.crosstab(Y_Pred, Y_Test, margins=True)
    display(df_confusion)
    return df_confusion
df_conf1 = confusionMatrix(df_test['Predicted_1'],df_test['Class'])
df_conf3 = confusionMatrix(df_test['Predicted_3'],df_test['Class'])
df_conf5 = confusionMatrix(df_test['Predicted_5'],df_test['Class'])

Class,Iris-setosa,Iris-versicolor,Iris-virginica,All
Predicted_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Iris-setosa,5,0,0,5
Iris-versicolor,0,5,0,5
Iris-virginica,0,0,5,5
All,5,5,5,15


Class,Iris-setosa,Iris-versicolor,Iris-virginica,All
Predicted_3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Iris-setosa,5,0,0,5
Iris-versicolor,0,5,0,5
Iris-virginica,0,0,5,5
All,5,5,5,15


Class,Iris-setosa,Iris-versicolor,Iris-virginica,All
Predicted_5,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Iris-setosa,5,0,0,5
Iris-versicolor,0,5,0,5
Iris-virginica,0,0,5,5
All,5,5,5,15


#### Evaluation Report

In [11]:
classes = list(df_train['Class'].unique())
classes

['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']

In [12]:
def evaluationReport(df_conf):
    print('EVALUATION REPORT \n')
    print('-----------------')
    df_c = df_conf.iloc[:-1,:-1]
    TP, FP, FN, TN, P, R, F, A = [], [], [], [], [], [], [], []
    for i, c in enumerate(classes):
        TP.append(df_c.iloc[i][i])
        FP.append(sum(df_c.iloc[i,:]) - TP[-1])
        FN.append(sum(df_c.iloc[:,i]) - TP[-1])
        TN.append(df_conf.iloc[3,3] - TP[-1] - FP[-1] - FN[-1])
        A.append((TP[-1]+TN[-1])/(df_conf.iloc[3,3]))
        P.append( (TP[-1])/(TP[-1] + FP[-1]) )
        R.append( (TP[-1])/(TP[-1] + FN[-1]) )
        F.append( (2*P[-1]*R[-1])/(P[-1]+R[-1]) )
    print('Macro Average (Accuracy) : ', sum(A)/float(3))
    print('Macro Average (Precision): ', sum(P)/float(3))
    print('Macro Average (Recall)   : ', sum(R)/float(3))
    print('Macro Average (F1-Score) : ', (sum(F))/float(3))
    print('\n')
    p = (sum(TP))/(sum(TP)+sum(FP))
    r = (sum(TP))/(sum(TP)+sum(FN))
    print('Micro Average (Accuracy) : ', (sum(TP) + sum(TN))/( sum(TP)+ sum(FN) + sum(FP) + sum(TN)))
    print('Micro Average (Precision): ', (sum(TP))/(sum(TP)+sum(FP)))
    print('Micro Average (Recall)   : ', (sum(TP))/(sum(TP)+sum(FN)))
    print('Micro Average (F1-Score) : ', (2*p*r)/(p+r))

#### For K = 1

In [13]:
evaluationReport(df_conf1)

EVALUATION REPORT 

-----------------
Macro Average (Accuracy) :  1.0
Macro Average (Precision):  1.0
Macro Average (Recall)   :  1.0
Macro Average (F1-Score) :  1.0


Micro Average (Accuracy) :  1.0
Micro Average (Precision):  1.0
Micro Average (Recall)   :  1.0
Micro Average (F1-Score) :  1.0


#### For K = 3

In [14]:
evaluationReport(df_conf3)

EVALUATION REPORT 

-----------------
Macro Average (Accuracy) :  1.0
Macro Average (Precision):  1.0
Macro Average (Recall)   :  1.0
Macro Average (F1-Score) :  1.0


Micro Average (Accuracy) :  1.0
Micro Average (Precision):  1.0
Micro Average (Recall)   :  1.0
Micro Average (F1-Score) :  1.0


#### For K = 5

In [15]:
evaluationReport(df_conf5)

EVALUATION REPORT 

-----------------
Macro Average (Accuracy) :  1.0
Macro Average (Precision):  1.0
Macro Average (Recall)   :  1.0
Macro Average (F1-Score) :  1.0


Micro Average (Accuracy) :  1.0
Micro Average (Precision):  1.0
Micro Average (Recall)   :  1.0
Micro Average (F1-Score) :  1.0
