In [1]:
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from keras import backend as K
import numpy as np
from sklearn.svm import OneClassSVM
from sklearn.metrics import roc_curve, auc

import dirUtil 
from highDimLearning import vgg2feat

import tensorflow as tf

from sklearn.metrics.pairwise import euclidean_distances

import matplotlib.pyplot as plt


import os
import random 
    

def feat2labels(feat):
    numClass = len(feat)
    numDim = feat[0].shape[1]
    numPts = 0
    for i in range(numClass):
        numPts = numPts + feat[i].shape[0]

    allFeat = np.zeros([numPts, numDim])
    allLabels = np.zeros(numPts, dtype = int)
    cur = 0
    for i in range(numClass):
        allFeat[cur:cur+feat[i].shape[0],:] = feat[i]
        allLabels[cur:cur+feat[i].shape[0]] = i
        cur = cur + feat[i].shape[0]
    return allFeat, allLabels
        
    
def readNetVlad(folder, numDim =4096):
    folderName = os.path.basename(os.path.normpath(folder))
    print(folderName)
    file = folder +'/vd16_pitts30k_conv5_3_vlad_preL2_intra_white_' +folderName + '_db.bin'    
    data = np.fromfile(file, '<f4')
    numPts = int(len(data)/numDim)
    return np.reshape(data, [numPts, numDim])

from scipy.optimize import linear_sum_assignment

def cluster_acc(Y_pred, Y):
    assert Y_pred.size == Y.size
    D = max(Y_pred.max(), Y.max())+1
    w = np.zeros((D,D), dtype=np.int64)
    for i in range(Y_pred.size):
        w[Y_pred[i], Y[i]] += 1
    
    row_ind, col_ind = linear_sum_assignment(w.max() - w)
    
    print(w[row_ind,col_ind])
    return w[row_ind,col_ind].sum()/Y_pred.size, w



Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:

feat_list = np.load('/media/daniel/D/Data/STL-10/image_net_long.npy')
gt = np.load('/media/daniel/D/Data/STL-10/labels_2048.npy')-1



In [29]:
import copy

def get_train_feat(feat_list, target_class, num_train):
    train_feat = []
    training_index = []
    for i in target_class:
        ind = np.random.choice(range(feat_list[i].shape[0]), num_train, replace=False)
        train_feat.append(copy.copy(feat_list[i][ind,:]))
        training_index.append(ind)
    train_feat = np.concatenate(train_feat, axis =1)
    
    return train_feat, training_index



In [25]:
def get_means_list(feat):
    num_clus = len(feat)
    means = np.zeros([num_clus, feat[0].shape[1]])
    for i in range(num_clus):
        means[i] = np.mean(feat[i], axis =0)
    return means


def sorted_neighbors_of_i(m_all, i):
    neighbors = np.zeros(m_all.shape[0])
    for j in range(m_all.shape[0]):
        neighbors[j] = np.linalg.norm(m_all[i,:]-m_all[j,:])
    return neighbors, np.argsort(neighbors)



In [None]:
def stacked_classifer(train_feat, target, m_all):
    _, neighs = sorted_neighbors_of_i(m_all, target)
    classifers = []
    current_shell = []
    for i in neighs:
        current_shell.append(i)
        if len(current_shell)> 1:
            m1 = np.mean(m_all[current_shell,:], axis =0, keepdims=True)
            tf = train_feat-m1
            tf = tf/np.linalg.norm(tf, axis =1, keepdims=True)
            clf1 = OneClassSVM(gamma='auto').fit(tf)
            classifers.append({'mean': m1, 'classifer': clf1})
    return classifers

# def stacked_classifer2(train_feat, gt, target, m_all):
#     _, neighs = sorted_neighbors_of_i(m_all, target)
#     classifers = []
#     current_shell = []
#     for i in neighs:
#         current_shell.append(i)
#         if len(current_shell)> 1:
#             m1 = np.mean(m_all[[i, target],:], axis =0, keepdims=True)
#             tf = train_feat-m1
#             tf = tf/np.linalg.norm(tf, axis =1, keepdims=True)
#             clf1 = OneClassSVM(gamma='auto').fit(tf)
#             classifers.append({'mean': m1, 'classifer': clf1})
            
#             m1 = np.mean(m_all[current_shell,:], axis =0, keepdims=True)
#             tf = train_feat-m1
#             tf = tf/np.linalg.norm(tf, axis =1, keepdims=True)
#             clf1 = OneClassSVM(gamma='auto').fit(tf)
#             classifers.append({'mean': m1, 'classifer': clf1})
#     return classifers

def score_samples_list(classifers, test_feat):
    num_feat = 0
    for f in feat_list:
        num_feat = num_feat + f.shape[0]
    gt = np.zeros(num_feat, dtype=int)

    score = np.zeros([num_feat, len(classifers)])
    runner = 0
    for i in range(len(test_feat)):
        last = runner+test_feat[i].shape[0]
        gt[runner:last]=i
        for j in range(len(classifers)):
            
            m = classifers[j]['mean']
            
            tsf = test_feat[i]-m
            tsf = tsf/np.linalg.norm(tsf, keepdims=True, axis =1)
            s = classifers[j]['classifer'].score_samples(tsf)   
            score[runner:last,j] = s
        runner = last
    return score, gt

target = 3
num_train = 300

m_all = get_means_list(feat_list)

train_feat, training_index = get_train_feat(feat_list, [target], num_train)
classifers = stacked_classifer(train_feat,  target, m_all)
s, gt = score_samples_list(classifers, feat_list)

fpr, tpr, _ = roc_curve(gt==target, s[:,0])
roc_auc = auc(fpr, tpr)
print(roc_auc)
fpr, tpr, _ = roc_curve(gt==target, s[:,-1])
roc_auc = auc(fpr, tpr)
print(roc_auc)
fpr, tpr, _ = roc_curve(gt==target, np.mean(s, axis =1))
roc_auc = auc(fpr, tpr)
print(roc_auc)


In [50]:
fpr, tpr, _ = roc_curve(gt==target, s[:,0])
roc_auc = auc(fpr, tpr)
print(roc_auc)
fpr, tpr, _ = roc_curve(gt==target, s[:,-1])
roc_auc = auc(fpr, tpr)
print(roc_auc)
fpr, tpr, _ = roc_curve(gt==target, np.mean(s, axis =1))
roc_auc = auc(fpr, tpr)
print(roc_auc)


0.4656401709401709
0.8833028270874425
0.8567676528599606
