In [1]:
import torch 
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable
from torch.nn.parameter import Parameter
import bottleneck as bn
import os
import time

In [2]:
import numpy as np
from scipy.spatial import KDTree
import torch.multiprocessing as mp
from sklearn.neighbors import KNeighborsClassifier
from sklearn import preprocessing

In [3]:
os.system("taskset -p 0xff %d" % os.getpid())

0

In [3]:
class RBFkernel():
    def __init__(self,var_s,length_scale, var_n=0.):
        self.var_s = var_s
        self.var_n = var_n
        self.inv_lscale = -0.5/length_scale
    
    def cov(self, tX, X):
        k1 = np.sum(tX**2,1).reshape(-1,1)
        k2 = np.repeat(np.sum(X**2,1).reshape(1,-1),tX.shape[0],axis=0)
        k = k1+k2-2*np.matmul(tX,X.transpose())
        k = self.var_s*np.exp(k*self.inv_lscale)
        return k

In [33]:
class KNN():
    def __init__(self,knl,K,num_job=8):
        #super(KNN, self).__init__()
        self.knl = knl
        self.K = K
        self.num_job = num_job
    def set_affinity_on_worker(self):
        """When a new worker process is created, the affinity is set to all CPUs"""
        #print("I'm the process %d, setting affinity to all CPUs." % os.getpid())
        os.system("taskset -p 0xff %d" % os.getpid())

    def Klargest(self, a):
        return np.argpartition(a, -self.K)[-self.K:]
        
    def Nnbrs(self, cor):
        pool = mp.Pool(self.num_job)
        nbr_ids=[None]*cor.shape[0]
        for i in range(len(nbr_ids)):
            nbr_ids[i]=pool.apply_async(self.Klargest, args=(cor[i],))
        pool.close()
        pool.join()
        nbr_ids = [ni.get() for ni in nbr_ids]
        return np.vstack(nbr_ids)
    
    def assignLbl(self, nbr_lbs):
        C = nbr_lbs.max()+1
        camp = np.tile(np.array(range(0,C)),self.K).reshape(self.K,-1)
        return np.argsort((np.tile(nbr_lbs.reshape(-1,1),C)==camp).sum(axis=0))[-1]
    
    def predict(self,nbr_ids,nbr_lbs):
        pool = mp.Pool(self.num_job)
        y = [None]*nbr_ids.shape[0]
        for i in range(nbr_ids.shape[0]):
            y[i] = pool.apply_async(self.assignLbl, args=(nbr_lbs[i],))
        pool.close()
        pool.join()
        y = [ni.get() for ni in y]
        return y
    
    
    def fit_predict(self, testX, X, Y, batch_size=100):
        
        #C = Y.max()+1
        #self.camp = np.tile(np.array(range(0,C)),self.K).reshape(self.K,-1)
        
        B = testX.shape[0]/batch_size
        if B*batch_size < testX.shape[0]:
            B+=1
        nbrs = np.zeros((testX.shape[0],self.K),dtype=int)
        py = np.zeros(testX.shape[0],dtype=int)
        for b in range(B):
            uper = min((b+1)*batch_size, testX.shape[0])
            print b,uper
            tX = testX[b*batch_size:uper]
            cor = self.knl.cov(tX,X)
            nbrs[b*batch_size:uper] = self.Nnbrs(cor)
            py[b*batch_size:uper] = self.predict(nbrs[b*batch_size:uper], Y[nbrs[b*batch_size:uper]])
        #py = Variable(torch.FloatTensor(py))
        return py
            
    

In [9]:
batch_size = 100
train_dataset = dsets.MNIST(root='/home/yu/gits/pytorch-tutorial/tutorials/data/',
                            train=True, 
                            transform=transforms.ToTensor(),
                            download=True)

test_dataset = dsets.MNIST(root='/home/yu/gits/pytorch-tutorial/tutorials/data/',
                           train=False, 
                           transform=transforms.ToTensor())

# Data Loader (Input Pipeline)
#train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
#                                           batch_size=batch_size, 
#                                           shuffle=True)

#test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
#                                         batch_size=batch_size, 
#                                          shuffle=False)

In [10]:
X = train_dataset.train_data.numpy()

In [11]:
X = X.reshape(X.shape[0],-1)

In [12]:
X = preprocessing.scale(X)
X = preprocessing.normalize(X)



In [13]:
Y = train_dataset.train_labels.numpy()

In [14]:
testX = test_dataset.test_data.numpy()
testX = testX.reshape(testX.shape[0],-1)
testX = preprocessing.scale(testX)
testX = preprocessing.normalize(testX)

In [15]:
var_s = Variable(torch.FloatTensor([[1.]]),requires_grad=True)
var_n = Variable(torch.FloatTensor([[1.]]),requires_grad=True)
length_scale = Variable(torch.FloatTensor([[1.]]),requires_grad=True)

In [16]:
#kernel = RBFkernel(var_s, var_n, length_scale)
kernel = RBFkernel(1.,1.)

In [35]:
model = KNN(kernel,10)

In [36]:
start = time.time()
tmp = model.fit_predict(testX, X,Y,batch_size=1000)
stop = time.time()
stop-start

0 1000
1 2000
2 3000
3 4000
4 5000
5 6000
6 7000
7 8000
8 9000
9 10000


49.831063985824585

In [22]:
testY = test_dataset.test_labels.numpy()

In [37]:
(tmp==testY).sum()/10000.

0.94059999999999999