In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

allFeat = np.load('/tf/notebooks/stl_feat/feats_2048.npy')
gtAll = np.load('/tf/notebooks/stl_feat/labels_2048.npy')-1



In [2]:
from clusteringX2 import normIt
from sklearn.cluster import KMeans

def clusteringKmeans(feat, numClass=2):
    feat, m = normIt(feat)
    
    # initilization
    kmeans = KMeans(n_clusters=numClass, random_state=0).fit(feat)
    labelK = kmeans.predict(feat)
    means =np.zeros([numClass, feat.shape[1]])

    for i in range(numClass):
        mask = labelK == i
        means[i,:] = np.mean(feat[mask,:], axis =0)

    return kmeans, m


In [3]:
class NodeK:
    def __init__(self, mask=None, kmeans = None, m=None, numClus=None):
        self.mask = mask
        self.classifier = kmeans
        self.m = m
        self.numClus = numClus
        

    
class Layer:
    def __init__(self):
        self.nodeList = []
        
class Tree:
    def __init__(self):
        self.numLayers = 0
        self.listOfLayers = []
        self.numNodes = 0
        self.listOfNodes = []
        
    def addNode(self, c):
        self.listOfNodes.append(c)
        self.numNodes = self.numNodes + 1
        
    def addLayer(self, c):
        self.listOfLayers.append(c)
        self.numLayers = self.numLayers + 1

    
def startTree(allFeat, numClass=10):
    tree = Tree()    
    kmeans, m = clusteringKmeans(allFeat, numClass)
    mask = np.ones(allFeat.shape[0], dtype=bool)
    n = NodeK(mask, kmeans, m, numClass)

    tree.addNode(n)
    newLayer = Layer()
    newLayer.nodeList.append(tree.numNodes-1)
    tree.addLayer(newLayer)
    
    return tree

    
def node2LabelK(feat, node):
    x, _ = normIt(feat, node.m)
    return node.classifier.predict(x)

    

    

def extendTree(tree, allFeat, numClass=10, clusteringThreshold=100):
    newLayer = Layer()
    lastLayer = tree.listOfLayers[-1]
    for nodeIndex in lastLayer.nodeList:
        node = tree.listOfNodes[nodeIndex]
        curFeat = allFeat[node.mask,:]
        clusterInd = node2LabelK(curFeat, node)
        curInd = np.where(node.mask)[0]

        for i in range(numClass):
            subMask = clusterInd == i
            if sum(subMask) > clusteringThreshold:
                
                newMask =  np.zeros(allFeat.shape[0], dtype=bool)
                newMask[curInd[subMask]] = 1
                

                kmeans, m = clusteringKmeans(allFeat[newMask,:], numClass)
                n = NodeK(newMask, kmeans, m, numClass)
                
                tree.addNode(n)
                newLayer.nodeList.append(tree.numNodes-1)
        tree.addLayer(newLayer)





    
    
feat = allFeat[gtAll<4,:]#[np.logical_or(gtAll==0,  gtAll==1),:]
    
tree = startTree(feat)


In [22]:
extendTree(tree, feat)    

In [23]:
from sklearn.metrics import pairwise_distances

feat1_, _ = normIt(feat)
d_ori = pairwise_distances(feat1_, feat1_)
nnOri = np.argsort(d_ori, axis =1)

projectX = np.zeros([feat.shape[0], tree.numNodes])
for i, n in enumerate(tree.listOfNodes):
    m = n.mask
    projectX[m,i] = 1


In [24]:

clusNeigbours = np.zeros([feat.shape[0],feat.shape[0]], dtype=bool )

for i in range(feat.shape[0]):
    sameClus = np.sum(np.absolute(projectX - projectX[i,:]), axis =1)
    mask = sameClus == 0
    clusNeigbours[i,:] = mask
    clusNeigbours[i,i] = 0

    
sorted_dist = np.sort(d_ori, axis =1)

err_store = np.zeros(clusNeigbours.shape[0])

err = 0
num_neigbours = 0
for i in range(nnOri.shape[0]):
    #val = np.min(d_ori[clusNeigbours[i,:], :])    
    val1 = np.min(d_ori[clusNeigbours[i,:], i])  
    val2 = sorted_dist[i, 1]
    err = err + (val1-val2)/val2
    err_store[i] = (val1-val2)/val2
    num_neigbours = num_neigbours + sum(clusNeigbours[i,:])
    #print((val1-val2)/val2, val1-val2, sum(clusNeigbours[i,:]))
print(err/nnOri.shape[0])
print(num_neigbours/nnOri.shape[0])

0.01839507125009748
358.69576923076926


In [26]:
print(np.median(err_store))
e = np.sort(err_store)
print(e[5000:])

0.0
[0.1134485  0.11392804 0.1140201  0.11463634 0.11464337 0.11500245
 0.11532362 0.1161875  0.11621554 0.11716925 0.1172725  0.11734523
 0.117943   0.11816794 0.12001792 0.12158482 0.12197601 0.12205692
 0.12232045 0.12260818 0.12279965 0.12341097 0.12358131 0.12380792
 0.12518215 0.12532035 0.12657154 0.12661635 0.12728546 0.1289207
 0.12931718 0.12950775 0.13057428 0.13127667 0.13175269 0.13217687
 0.13223057 0.13247108 0.13318749 0.1335642  0.13433533 0.13593933
 0.13636381 0.1388089  0.13910582 0.140051   0.14061699 0.14228071
 0.14272073 0.14443116 0.1445864  0.1446738  0.14601649 0.14627635
 0.14786993 0.14927065 0.14966266 0.15142649 0.15164909 0.15190402
 0.15233411 0.15270719 0.15297382 0.15334911 0.15472458 0.15551219
 0.15663497 0.15665734 0.15758798 0.16016011 0.16050797 0.16146605
 0.16181917 0.16216364 0.16246819 0.16256037 0.16285555 0.16399347
 0.16407495 0.16452247 0.1665861  0.16737806 0.16824313 0.16863601
 0.16883426 0.16898956 0.16923423 0.16936286 0.16991703 0.1