In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Extracting Data

In [2]:
data = pd.read_csv('sobar-72.csv')
data.tail()
train_knn = data.iloc[:-15, :]
test_knn = data.iloc[-15:, :]

dataset = np.array(data)
train = dataset[:-15]
test = dataset[-15:]
train = np.array(train)
test = np.array(test)

print(train)

[[10 13 12 ... 11  8  1]
 [10 11 11 ...  4  4  1]
 [10 15  3 ...  3 15  1]
 ...
 [10 15 15 ... 13 11  0]
 [10  9  8 ... 13 10  0]
 [10 10  5 ... 15 15  0]]


# Important Functions

In [3]:
from math import sqrt

def euclidean_dis(v1, v2):
    sum = 0
    for i in range(len(v1)-1):
        sum += (v1[i] - v2[i])**2
    return sqrt(sum)

def find_neighbours(point, training_set, k):
    distances = []
    for i in range(len(training_set)):
        dis = euclidean_dis(point, training_set[i])
        distances.append([dis, i])
    distances.sort()
    #sorted(distances, key=lambda x:x[0])
    
    knn = []
    for i in range(k):
        knn.append(training_set[distances[i][1]])
    
    knn = np.array(knn)
    return knn

from statistics import mode
def predict_label(neighbours):
    labels = [neighbours[i][-1] for i in range(len(neighbours))]
    return mode(labels)

#print(find_neighbours(test[0], train, 5))

# Initializing Condensed Set

In [4]:
number_of_rows = len(train)
random_index = np.random.randint(0, number_of_rows)
condensed_set = np.array([train[random_index]])
train = np.delete(train, random_index, 0)
number_of_rows -= 1

random_index = np.random.randint(0, number_of_rows)
random_data = train[random_index, :]
train = np.delete(train, random_index, 0)
number_of_rows -= 1

train.shape

(55, 20)

# Iteratively preparing Condensed Set

In [5]:
while number_of_rows:
    neigh = find_neighbours(random_data, condensed_set, 1)
    print(neigh[0][-1], random_data[-1])
    if random_data[-1] != neigh[0][-1]:
        condensed_set = np.vstack((condensed_set, random_data))
    random_index = np.random.randint(0, number_of_rows)
    random_data = train[random_index, :]
    train = np.delete(train, random_index, 0)
    number_of_rows -= 1


1 0
0 0
0 1
0 0
0 1
0 0
0 1
1 0
1 1
1 1
0 0
0 0
1 1
0 0
0 0
0 0
0 1
1 0
0 0
1 0
0 0
1 1
0 0
1 1
0 0
0 0
0 0
0 0
1 1
0 1
1 0
0 0
0 0
1 1
0 0
1 1
0 0
0 1
1 1
0 0
0 0
0 0
1 0
1 1
0 0
0 0
1 1
0 0
0 1
0 0
0 0
1 1
0 1
0 0
0 0


# Printing Condensed Set

In [6]:
print(condensed_set)

[[10 12  7  5 10  8  8  1  8 10  4  6  3  3  2  4  4  3  5  1]
 [10 14 10  9 15  4  5  2  5  7  3 10  7  4  6  7  5  9 12  0]
 [10 15  4  6 14  6 10  5  3  7  2  7 13  3  3 15  3  3  5  1]
 [ 8 12  9 10 10  5 10  5  5  5  2 10  9 13  2  9  8  7 12  1]
 [ 8 11  7  8 10  7  8  1  5  3  2 15  5  3  6 12  5  4  7  1]
 [10 11 11 10 14  5  8  1  4  3  4 15 11 13  9 13 13 12 13  0]
 [10 15  7  2 15  6 10  1  3  5  2  9 15 13 10 15 13 15 15  1]
 [10 15  9  3 15  8 10  1  3  5  6 10 15 13 10 15 15 15 15  0]
 [10 13 12  2 15  7 10  5 15 10  2 15 12 11  7  6 10  9 12  0]
 [ 9 12 14  9 15 10  9  3  6  3  2 15 15  3 10 15 11  3 11  1]
 [10  3  5  2  9  6 10  1  3  9  6 11 10  9  9 14  6 10 10  0]
 [10 15 12 10 15  6 10  1  3  3  2  4  3  3  2 15 13  6 11  1]
 [10 10 12  5  7  6  6  4  5 10  4 11  9 11  8 11 11 10 11  0]
 [10 15  3  2 14  8 10  1  4  7  2  7  3  3  6 11  3  3 15  1]
 [10 12 12  8 10  8  6  2  7  6  2 12 11  9  8 12 10 10  9  1]]


# Using Condensed set to Classify Test Set

In [7]:
predictions = []
actual_labels = [test[i][-1] for i in range(len(test))]
for i in range(len(test)):
    nn5 = find_neighbours(test[i], condensed_set, 5)
    predictions.append(predict_label(nn5))

print(predictions)
print(actual_labels)
correctly_classified = 0
for i in range(len(test)):
    if predictions[i] == actual_labels[i]:
        correctly_classified += 1
print('Accuracy from CNN: ', (correctly_classified/float(len(test)))*100, '%')

[1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Accuracy from CNN:  60.0 %


# Implementing k Nearest Neighbours

In [8]:
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=5)

train_knn = np.array(train_knn)
x=train_knn[:,:-1]
y=train_knn[:,-1]
neigh.fit(x, y)

KNeighborsClassifier()

In [9]:
prediction_knn = neigh.predict(test[:, :-1])
print(prediction_knn)
correctly_classified_knn = 0
for i in range(len(test)):
    if predictions[i] == actual_labels[i]:
        correctly_classified_knn += 1
print('Accuracy from k-NN: ', (correctly_classified_knn/float(len(test)))*100, '%')

[1 1 1 1 0 1 0 0 0 0 0 0 0 0 0]
Accuracy from k-NN:  60.0 %
