In [1]:
import numpy as np
from sortedcontainers import SortedList
from util import get_data
from datetime import datetime

In [2]:
class KNN(object):
    def __init__(self, k):
        self.k = k
        
    def fit(self, X, y):
        self.X = X
        self.y = y
    
    def predict(self, X):
        y = np.zeros(len(X))
        for i, x in enumerate(X):
            sl = SortedList(load=self.k)
            for j, xt in enumerate(self.X):
                diff = x - xt
                d = diff.dot(diff)
                if len(sl) < self.k:
                    sl.add((d, self.y[j]))
                else:
                    if d < sl[-1][0]:
                        del sl[-1]
                        sl.add((d, self.y[j]))
            votes = {}
            for _, v in sl:
                votes[v] = votes.get(v, 0) + 1
            max_votes = 0
            max_votes_class = -1
            for v, count in votes.iteritems():
                if count > max_votes:
                    max_votes_class = v
                    max_votes = count
            y[i] = max_votes_class
        return y
    
    def score(self, X, Y):
        pred = self.predict(X)
        return np.mean(pred == Y)

In [3]:
X, Y = get_data(2000)
print len(X), len(Y)
Ntrain = 1000
Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain]
Xtest, Ytest = X[Ntrain:], Y[Ntrain:]

for k in (1,2,3,4,5):
    knn = KNN(k)
    t0 = datetime.now()
    knn.fit(Xtrain, Ytrain)
    print 'Training time:', (datetime.now() - t0)
    t0 = datetime.now()
    print 'Train accuracy:', knn.score(Xtrain, Ytrain)
    print 'Time to compute train accuracy:', (datetime.now()-t0), "Train size:", len(Ytrain)
    
    t0 = datetime.now()
    print 'Test accuracy:', knn.score(Xtest, Ytest)
    print 'Time to compute test accuracy:', (datetime.now()-t0), "Test size:", len(Ytest)

Reading in and transforming data...
2000 2000
Training time: 0:00:00
Train accuracy: 1.0
Time to compute train accuracy: 0:00:10.880000 Train size: 1000
Test accuracy: 0.884
Time to compute test accuracy: 0:00:15.019000 Test size: 1000
Training time: 0:00:00
Train accuracy: 0.95
Time to compute train accuracy: 0:00:11.046000 Train size: 1000
Test accuracy: 0.874
Time to compute test accuracy: 0:00:15.569000 Test size: 1000
Training time: 0:00:00
Train accuracy: 0.949
Time to compute train accuracy: 0:00:12.115000 Train size: 1000
Test accuracy: 0.89
Time to compute test accuracy: 0:00:17.516000 Test size: 1000
Training time: 0:00:00
Train accuracy: 0.936
Time to compute train accuracy: 0:00:14.235000 Train size: 1000
Test accuracy: 0.883
Time to compute test accuracy: 0:00:17.375000 Test size: 1000
Training time: 0:00:00
Train accuracy: 0.925
Time to compute train accuracy: 0:00:12.640000 Train size: 1000
Test accuracy: 0.885
Time to compute test accuracy: 0:00:17.229000 Test size: 100