## Importing Modules

In [31]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
from sklearn.model_selection import train_test_split
import operator

## Importing data from sklearn Iris dataset

In [13]:
data = pd.read_csv("Iris.csv")

In [61]:
data.head(5)
data = data.sample(frac = 1)
print(data.head())

      Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm  \
37    38            4.9           3.1            1.5           0.1   
46    47            5.1           3.8            1.6           0.2   
123  124            6.3           2.7            4.9           1.8   
9     10            4.9           3.1            1.5           0.1   
109  110            7.2           3.6            6.1           2.5   

            Species  
37      Iris-setosa  
46      Iris-setosa  
123  Iris-virginica  
9       Iris-setosa  
109  Iris-virginica  


## Creating Training and Testing Datasets

In [62]:
train = np.array (data[ : 97])
test = np.array (data[97: ])

In [63]:
print(len(train), len(test))

97 53


## Writing KNN algorithm

In [64]:
def euclidean_distance(instance1, instance2, length):
    distance = 0
    for x in range(length):
        distance += pow(instance1[x] - instance2[x], 2)
    return math.sqrt(distance)

In [65]:
data1 = [2,2,2,'a']
data2 = [3,3,3,'b']

euclidean_distance(data1, data2, 3)

1.7320508075688772

In [66]:
def get_neighbors(train, test, k):
    distance = []
    length = len(test) -1
    for x in range(len(train)):
        dist = euclidean_distance(test, train[x], length)
        distance.append((train[x], dist))
    distance.sort(key = operator.itemgetter(1))
    neighbors = []
    for x in range(k):
        neighbors.append(distance[x][0])
    return neighbors

In [67]:
data1 = [[2,2,2,'a'],[3,3,3,'b']]
data2 = [4,4,4]
k = 1
get_neighbors(data1, data2, k)

[[3, 3, 3, 'b']]

In [68]:
def get_response(neighbors):
    class_votes = {}
    for x in range(len(neighbors)):
        response = neighbors[x][-1]
        if(response in class_votes):
            class_votes[response] += 1
        else:
            class_votes[response] = 1
    sorted_votes = sorted(class_votes.items(), key = operator.itemgetter(1), reverse = True)
    return sorted_votes[0][0]

In [69]:
data1 = [[2,2,2,'a'],[3,3,3,'a'],[4,4,4,'b']]
get_response(data1)

'a'

In [70]:
def get_accuracy(test, prediction):
    correct = 0
    for x in range(len(test)):
        if(test[x][-1] == prediction[x]):
            correct += 1
    return (correct / float(len(test)))*100

In [71]:
data1 = [[2,2,2,'a'],[3,3,3,'a'],[4,4,4,'b']]
prediction = ["a","a","a"]
get_accuracy(data1, prediction)

66.66666666666666

# Testing KNN Algorithm

In [72]:
if (__name__ == "__main__"):
    prediction = []
    for x in range(len(test)):
        neighbors = get_neighbors(train, test[x], k)
        result = get_response(neighbors)
        prediction.append(result)
        print(">Predicted"+str(result)+"   >actual"+str(test[x][-1]))
    print(get_accuracy(test, prediction))
        

>PredictedIris-versicolor   >actualIris-versicolor
>PredictedIris-virginica   >actualIris-virginica
>PredictedIris-setosa   >actualIris-setosa
>PredictedIris-setosa   >actualIris-setosa
>PredictedIris-virginica   >actualIris-virginica
>PredictedIris-virginica   >actualIris-virginica
>PredictedIris-virginica   >actualIris-virginica
>PredictedIris-versicolor   >actualIris-versicolor
>PredictedIris-setosa   >actualIris-setosa
>PredictedIris-setosa   >actualIris-setosa
>PredictedIris-virginica   >actualIris-virginica
>PredictedIris-setosa   >actualIris-setosa
>PredictedIris-versicolor   >actualIris-versicolor
>PredictedIris-virginica   >actualIris-virginica
>PredictedIris-virginica   >actualIris-virginica
>PredictedIris-setosa   >actualIris-setosa
>PredictedIris-setosa   >actualIris-setosa
>PredictedIris-virginica   >actualIris-virginica
>PredictedIris-virginica   >actualIris-virginica
>PredictedIris-virginica   >actualIris-virginica
>PredictedIris-virginica   >actualIris-virginica
>Predic

# Conclusion

Our KNN algorithm gave a accuracy of 100% on Iris Dataset, which is kind of unbeliveable though!!!