In [46]:
from tqdm import tqdm_notebook

In [56]:
import numpy as np
from assignment1.cs231n.data_utils import load_CIFAR10

class NearestNeighbor(object):
    def __init__(self):
        pass

    def train(self, X, y):
        """ X is N x D where each row is an example. Y is 1-dimension of size N """
        # the nearest neighbor classifier simply remembers all the training data
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        """ X is N x D where each row is an example we wish to predict label for """
        num_test = X.shape[0]
        # lets make sure that the output type matches the input type
        y_pred = np.zeros(num_test, dtype = self.y_train.dtype)
        # loop over all test rows
        for i in tqdm_notebook(range(num_test), total=num_test, mininterval=1):
            # find the nearest training image to the i'th test image
            # using the L1 distance (sum of absolute value differences)

            distances = np.sum(np.abs(self.X_train - X[i,:]), axis = 1)
            min_index = np.argmin(distances) # get the index with smallest distance
            y_pred[i] = self.y_train[min_index] # predict the label of the nearest example

        return y_pred

In [66]:
import numpy as np
from assignment1.cs231n.data_utils import load_CIFAR10

class KNearestNeighbor(object):
    def __init__(self, k):
        self.k = k 

    def train(self, X, y):
        """ X is N x D where each row is an example. Y is 1-dimension of size N """
        # the nearest neighbor classifier simply remembers all the training data
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        """ X is N x D where each row is an example we wish to predict label for """
        num_test = X.shape[0]
        # lets make sure that the output type matches the input type
        y_pred = np.zeros(num_test, dtype = self.y_train.dtype)
        # loop over all test rows
        for i in tqdm_notebook(range(num_test), total=num_test, mininterval=1):
            # find the nearest training image to the i'th test image
            # using the L1 distance (sum of absolute value differences)
            distances = np.sum(np.abs(self.X_train - X[i,:]), axis = 1)
            min_indices = np.argsort(distances)[:self.k] # get k indices with smallest distances
            # predict the label by voting among the k nearest examples
            unique_label_count = np.unique(self.y_train[min_indices], return_counts=True) 
            unique_label = unique_label_count[0]
            unique_count  = unique_label_count[1]
            if len(np.where(unique_count == max(unique_count))[0]) > 1:
                y_pred[i] = -1
            else: 
                y_pred[i] = unique_label[np.argmax(unique_count)]
        return y_pred

In [60]:
X_train, y_train, X_test, y_test = load_CIFAR10('data/cifar-10/') # a magic function we provide
# flatten out all images to be one-dimensional
X_train_rows = X_train.reshape(Xtr.shape[0], 32 * 32 * 3) # Xtr_rows becomes 50000 x 3072
X_test_rows = X_test.reshape(Xte.shape[0], 32 * 32 * 3) # Xte_rows becomes 10000 x 3072

In [70]:
%%time
nn = NearestNeighbor() # create a Nearest Neighbor classifier class
nn.train(X_train_rows, y_train) # train the classifier on the training images and labels

CPU times: user 177 µs, sys: 3.11 ms, total: 3.29 ms
Wall time: 3.24 ms


In [54]:
y_test_predict = nn.predict(X_test_rows) # predict labels on the test images


(50000, 3072)
(3072,)


HBox(children=(IntProgress(value=0, max=10000), HTML(value='')))




In [58]:
# and now print the classification accuracy, which is the average number
# of examples that are correctly predicted (i.e. label matches)
print('accuracy: %f' % ( np.mean(y_test_predict == y_test) ))

accuracy: 0.385900


In [71]:
%%time
knn = KNearestNeighbor(1) # create a Nearest Neighbor classifier class
knn.train(X_train_rows[:5000], y_train[:5000]) # train the classifier on the training images and labels

CPU times: user 7 µs, sys: 2 µs, total: 9 µs
Wall time: 11.9 µs


In [72]:
y_test_predict = knn.predict(X_test_rows[:500]) # predict labels on the test images


HBox(children=(IntProgress(value=0, max=500), HTML(value='')))




In [73]:
# and now print the classification accuracy, which is the average number
# of examples that are correctly predicted (i.e. label matches)
print('accuracy: %f' % ( np.mean(y_test_predict == y_test[:500]) ))

accuracy: 0.290000


In [13]:
def my_generator(step=4): 
    with open('file.txt', 'r') as f: 
        for l in f: 
#             if 'foo' in l: 
            yield l 

myiterator = my_generator() 
for item in myiterator: 
    print(item) 


1hjkkfoovxcv

2sdfsdf

3svsdxdvsd

4safasfooqwqqwr

5fsfsafsf

6fasfasfoo
