In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Load data
q1_digits = np.load('q1_data/q1_digits.npz')
digits_train = q1_digits['digits_train']
labels_train = q1_digits['labels_train']

# Display an image of a digit
plt.figure()
plt.imshow(digits_train[0,:,:]) # first example in the training dataset
#plt.savefig('digit.png')

# Print the class of the first example
print('The class label of the first example:', labels_train[0])

# Plot multiple images (1 by 3)
fig, ax = plt.subplots(nrows=1, ncols=3)
ax[0].imshow(digits_train[0,:,:])
ax[1].imshow(digits_train[1,:,:])
ax[2].imshow(digits_train[2,:,:])
#plt.savefig('many_digit.png')

First, implment distance function:

In [None]:
def euclidean(x1, x2):
    return np.linalg.norm(x1 - x2)

# Manhattan distance implementation:
def manhattan(x1, x2):
    return np.linalg.norm(x1 - x2, ord = 1)

# Cosine similarity implementation:
def cosine(x1, x2):
    nu = np.sum(x1 * x2)
    de = np.linalg.norm(x1) * np.linalg.norm(x2)
    return 1 - nu / de

# Chebyshev distance implementation:
def chebyshev(x1, x2):
    return np.max(abs(x1 - x2))

Next, implement get neighbors function:

In [None]:
# Load test digits and labels:
digits_test = q1_digits['digits_test']
labels_test = q1_digits['labels_test']

def indirect(distance, X1, X2):
    options = {
        "euclidean": euclidean(X1, X2),
        "manhattan": manhattan(X1, X2),
        "cosine": cosine(X1, X2),
        "chebyshev": chebyshev(X1, X2)
    }
    return options[distance]


def get_neighbor(k, X_train, x_test, dist):
    distance_dic = {}
    for i in range(len(X_train)):
        distance = indirect(dist, X_train[i], x_test)
        distance_dic[i] = distance
    sorted_dic = sorted(distance_dic.items(), key = lambda x: x[1])[:k]
    return sorted_dic

# get_neighbor(8, digits_train, digits_test[19], "euclidean")

Now do question (a):

In [None]:
def q_a(k, X_train, y_train, X_test):
    
    for j in range(len(X_test)):
        # Plot test images:
        print("The image of the #{} test data: ".format(j + 1))
        plt.figure()
        plt.imshow(X_test[j,:,:])
        plt.savefig('submit_image_q1a/test-image-{}.png'.format(j))
        
        neighbors = get_neighbor(k, X_train, X_test[j], "euclidean")
        indices = [d[0] for d in neighbors]
        classes = [np.asscalar(i) for i in y_train[indices]]

        print("The indices for {} nearest neighbors of test image {} are: {}".format(k, j, indices))
        print("The classes for {} nearest neighbors of test image {} are: {}".format(k, j, classes))
        # Print nearest neighbors images:
        print("The image of the {} nearest neighbors are: ".format(k))

        fig, ax = plt.subplots(nrows=1, ncols = k)

        for m in range(len(indices)):
            ax[m].imshow(digits_train[indices[m],:,:])
        fig.savefig('submit_image_q1a/neighbors-under-test-image-{}.png'.format(j))


q_a(8, digits_train, labels_train, digits_test[:5])
        

Implement the classify and accuracy function:

In [None]:
def classify(indices, labels):
    count_dic = {}
    for item in zip(indices, labels):
        if not item[1] in count_dic:
            count_dic[item[1]] = 0
        count_dic[item[1]] += 1
    predicted_class = sorted(count_dic.items(), key = lambda x: -x[1])[0][0]
    return predicted_class


def get_accuracy(true_y, predicted_y):
    count = 0
    l = len(true_y)
    for i in range(l):
        if true_y[i] == predicted_y[i]:
            count += 1
    accuracy = count / l
    print("The classification accuracy is {}".format(accuracy))
    return accuracy
        

Now do question (b):

In [None]:
def q_b(k, X_train, y_train, X_test, y_test, dist = "euclidean"):
    true_y_list = [np.asscalar(i) for i in y_test]
    predicted_y_list = []
    
    for j in range(len(X_test)):
#         print(y_test[j])
        neighbors = get_neighbor(k, X_train, X_test[j], dist)
        indices = [d[0] for d in neighbors]
        classes = [np.asscalar(i) for i in y_train[indices]]
#         print("Classes are: ", classes)
        predicted_class = classify(indices, classes)
        predicted_y_list.append(predicted_class)
    accuracy = get_accuracy(true_y_list, predicted_y_list)
    return accuracy
    


In [None]:
q_b(10, digits_train.reshape(len(digits_train), 784), labels_train, digits_test.reshape(len(digits_test), 784), labels_test)

Now do question (c) coding part:

In [None]:
k_list = [1,2,5,8,10,20,50]
accuracy_list = []
for k in k_list:
    print("For k = {}:".format(k))
    accuracy_list.append(q_b(k, digits_train, labels_train, digits_test, labels_test))
# Plot the graph related the choice of k with the accuracies:
plt.figure()
plt.plot(k_list, accuracy_list, '--o', c = "red")
plt.title("Accuracy Changes as k Changes")
plt.xlabel("k")
plt.ylabel("Accuracy")
plt.show()
plt.savefig("q1c.png")

In [None]:
q_b(10, digits_train, labels_train, digits_test, labels_test, "manhattan")

In [None]:
q_b(10, digits_train, labels_train, digits_test, labels_test, "cosine")

In [None]:
q_b(10, digits_train, labels_train, digits_test, labels_test, "chebyshev")