#### Sources
- https://cs231n.github.io/classification/
- https://www.youtube.com/watch?v=t2IwlUtbCFE&list=PL16j5WbGpaM0_Tj8CRmurZ8Kk1gEBc7fg&index=2
- https://www.cs.toronto.edu/~kriz/cifar.html

In [54]:
import numpy as np
import pickle
import matplotlib.pyplot as plt

In [3]:
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [44]:

cifar_data_1 = unpickle('cifar-10-python/data_batch_1')
cifar_data_2 = unpickle('cifar-10-python/data_batch_2')
cifar_data_3 = unpickle('cifar-10-python/data_batch_3')
cifar_data_4 = unpickle('cifar-10-python/data_batch_4')
cifar_data_5 = unpickle('cifar-10-python/data_batch_5')

In [46]:
Xtr = np.concatenate([cifar_data_1[b'data'], cifar_data_2[b'data'], cifar_data_3[b'data'], cifar_data_4[b'data']])
ytr = np.concatenate([cifar_data_1[b'labels'], cifar_data_2[b'labels'], cifar_data_3[b'labels'], cifar_data_4[b'labels']])
Xtst = cifar_data_5[b'data']
ytst = cifar_data_5[b'labels']

In [50]:
def eval(ypred, ytst):
    return np.sum(np.equal(ypred, ytst)) / ypred.shape[0]

### Nearest Neighbor Classification using L1 Distance

distance metric: d(I1, I2) = $\Sigma$|I$_{1}$$^{p}$ - I$_{2}$$^{p}$|

In [71]:
class L1NN(object):
    def __init__(self):
        pass

    def train(self, X, y):
        self.Xtr = X
        self.ytr = np.array(y)

    def predict(self, X):
        num_test = X.shape[0]
        Ypred = np.zeros(num_test, dtype = self.ytr.dtype)

        for i in range(num_test):
            distances = np.sum(np.abs(self.Xtr - X[i, :]), axis=1)
            min_index = np.argmin(distances) 
            Ypred[i] = self.ytr[min_index]

        return Ypred

In [76]:
class L2NN(object):
    def __init__(self):
        pass

    def train(self, X, y):
        self.Xtr = X
        self.ytr = np.array(y)

    def predict(self, X):
        num_test = X.shape[0]
        Ypred = np.zeros(num_test, dtype = self.ytr.dtype)

        for i in range(num_test):
            distances = np.sum(np.square(self.Xtr - X[i, :]), axis=1)
            min_index = np.argmin(distances) 
            Ypred[i] = self.ytr[min_index]

        return Ypred

In [99]:
l1nn = L1NN()
l2nn = L2NN()

In [100]:
l1nn.train(Xtr, ytr)
l2nn.train(Xtr, ytr)

In [101]:
#l1nn accuracy
sample_idxs = np.random.choice(np.arange(Xtst.shape[0]), size=10000)
Xtst_sample = Xtst[sample_idxs, :]
ytst_sample = np.array(ytst)[sample_idxs]
l1nn_ypred_sample = l1nn.predict(Xtst_sample)
l2nn_ypred_sample = l2nn.predict(Xtst_sample)

print('l1nn accuracy:', eval(l1nn_ypred_sample, ytst_sample))
print('l2nn accuracy:', eval(l2nn_ypred_sample, ytst_sample))

l1nn accuracy: 0.22
l2nn accuracy: 0.2


#### K Nearest Neighbors

In [168]:
class KNN(object):
    def __init__(self):
        pass

    def train(self, X, y):
        self.Xtr = X
        self.ytr = np.array(y)

    def predict(self, X, k):
        num_test = X.shape[0]
        Ypred = np.zeros(num_test, dtype = self.ytr.dtype)

        for i in range(num_test):
            knn_idxs = np.argpartition(np.sum(np.abs(self.Xtr - X[i, :]), axis=1), kth=k)[:k]
            knn_labels = self.ytr[knn_idxs]
            values, counts = np.unique(knn_labels, return_counts=True)
            Ypred[i] = values[np.argmax(counts)]

        return Ypred

In [169]:
knn = KNN()

In [170]:
knn.train(Xtr, ytr)

In [172]:
#knn accuracy
sample_idxs = np.random.choice(np.arange(Xtst.shape[0]), size=1000)
Xtst_sample = Xtst[sample_idxs, :]
ytst_sample = np.array(ytst)[sample_idxs]
l1nn_ypred_sample = l1nn.predict(Xtst_sample)
l2nn_ypred_sample = l1nn.predict(Xtst_sample)
knn_ypred_sample = knn.predict(Xtst_sample, 5)

print('l1nn accuracy:', eval(l1nn_ypred_sample, ytst_sample))
print('l2nn accuracy:', eval(l2nn_ypred_sample, ytst_sample))
print('knn accuracy:', eval(knn_ypred_sample, ytst_sample))

l1nn accuracy: 0.23
l2nn accuracy: 0.23
knn accuracy: 0.21


In [160]:
k_neighbor_labels = ytr[k_neighbor_idxs]
k_neighbor_labels

array([5, 7, 7, 7, 5])

In [163]:
values, counts = np.unique(k_neighbor_labels, return_counts=True)

In [167]:
values, counts, values[np.argmax(counts)]

(array([5, 7]), array([2, 3]), np.int64(7))