In [1]:
from platform import python_version
print(python_version())

3.6.7


In [1]:
# Utils

from tqdm import tqdm_notebook # Progress bar

import numpy as np
import matplotlib.pyplot as plt

import json
from scipy.io import loadmat

In [2]:
with open( "PR_data/feature_data.json", "r" ) as file:
    features = json.load( file )
    
data = np.asarray( features )

print( 'Data shape: {}'.format( data.shape ) )

Data shape: (14096, 2048)


In [3]:
# Load matfile
mat = loadmat( 'PR_data/cuhk03_new_protocol_config_labeled.mat' )

# Load labels
labels = mat[ 'labels' ].flatten()

# Load camId
camIds = mat[ 'camId' ].flatten()

# Load indexes
train_idxs   = mat[ 'train_idx' ].flatten()
query_idxs    = mat[ 'query_idx' ].flatten()
gallery_idxs = mat[ 'gallery_idx' ].flatten()

# Load training indexes
print( "Loading Training indexes : {}".format( train_idxs.shape ) )
print( "Loading Query indexes : {}".format( query_idxs.shape ) )
print( "Loading Gallery indexes : {}".format( gallery_idxs.shape ) )

Loading Training indexes : (7368,)
Loading Query indexes : (1400,)
Loading Gallery indexes : (5328,)


In [65]:
# Create Train Set
train_set   = []
train_label = []

for i in train_idxs:
    train_set.append( data[ i - 1 ] )
    train_label.append( labels[ i - 1 ] )
    
train_set   = np.asarray( train_set )
train_label = np.asarray( train_label )

print( 'Train Set : {}'.format( train_set.shape ) )
print( 'Train Label : {}'.format( train_label.shape ) )


# Create Query Set
query_set   = []
query_label = []
query_camId = []

for i in query_idxs:
    query_set.append( data[ i - 1] )
    query_label.append( labels[ i - 1 ] )
    query_camId.append( camIds[ i - 1 ] )
    
query_set   = np.asarray( query_set )
query_label = np.asarray( query_label )
query_camId = np.asarray( query_camId )

print( '\nQuery Set : {}'.format( query_set.shape ) )
print( 'Query Label : {}'.format( query_label.shape ) )
print( 'Query CamId : {}'.format( query_camId.shape ) )


# Create Gallery Set
gallery_set   = []
gallery_label = []
gallery_camId = []

for i in gallery_idxs:
    gallery_set.append( data[ i - 1] )
    gallery_label.append( labels[ i - 1 ] )
    gallery_camId.append( camIds[ i - 1 ] )
    
gallery_set   = np.asarray( gallery_set )
gallery_label = np.asarray( gallery_label )
gallery_camId = np.asarray( gallery_camId )

print( '\nGallery Set : {}'.format( gallery_set.shape ) )
print( 'Gallery Label : {}'.format( gallery_label.shape ) )
print( 'Gallery CamId : {}'.format( gallery_camId.shape ) )

Train Set : (7368, 2048)
Train Label : (7368,)

Query Set : (1400, 2048)
Query Label : (1400,)
Query CamId : (1400,)

Gallery Set : (5328, 2048)
Gallery Label : (5328,)
Gallery CamId : (5328,)


In [66]:
np.unique( query_label ).shape

(700,)

# PCA

In [68]:
from sklearn.decomposition import PCA

pca = PCA( n_components = 110 )
pca.fit( train_set )

PCA(copy=True, iterated_power='auto', n_components=110, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)

In [69]:
pca_query_set   = pca.transform( query_set )
pca_gallery_set = pca.transform( gallery_set )
pca_train_set   = pca.transform( train_set )

In [70]:
# Query Augmented
qs = pca_query_set.T

query_augmented = np.vstack( ( qs, query_camId, query_label ) )
query_augmented = query_augmented.T

# Gallery Augmented
gs = pca_gallery_set.T

gallery_augmented = np.vstack( ( gs, gallery_camId, gallery_label ) )
gallery_augmented = gallery_augmented.T

print( 'Query Augmented: {}'.format( query_augmented.shape ) )
print( 'Gallery Augmented: {}'.format( gallery_augmented.shape ) )

Query Augmented: (1400, 112)
Gallery Augmented: (5328, 112)


In [71]:
%%time

from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestNeighbors

# KNN Parameters
knn_n_neighbors = 20
knn_metric = 'euclidean'


query_rank_list = []

# for i in range( 2,4 ):
for i in tqdm_notebook( range( query_augmented.shape[ 0 ] ) ):

    
    query_label = query_augmented[ i, -1 ].astype( int )

    # Remove rows which have the same camId AND label
    gallery_reduced = gallery_augmented[ ~np.logical_and( ( gallery_augmented[ :, -1 ] == query_augmented[ i ][ -1 ] ),
                                                          ( gallery_augmented[ :, -2 ] == query_augmented[ i ][ -2 ] )
                                      ) ]
    
    # Train KNN
    X = gallery_reduced[ :, : - 2 ] # All rows, but in each row, remove last 2 columns ( camId and label )
    Y = gallery_reduced[ :, - 1 ] # All rows in the last column ( the labels )
    
    KNN = NearestNeighbors( n_neighbors = knn_n_neighbors, metric = knn_metric )
    KNN.fit( X, Y )    
    
    # Test query point
    X_test = query_augmented[ i ][ : -2 ].reshape( 1, -1 ) # Remove last 2 columns ( camId and label )
    
    distances, indices = KNN.kneighbors( X_test ) # Neighbours are ordered closest to furthest
    
    # Compare
    distances = distances.flatten()
    indices   = indices.flatten()
    
    rank_list = [ gallery_reduced[ ind, -1 ].astype( int ) == query_label for ind in indices ]
    query_rank_list.append( rank_list )
    
    #count += ( gallery_reduced[ indices[ 0 ], -1 ].astype( int ) == query_augmented[ i, -1 ].astype( int ) )  
    
query_rank_list = np.asarray( query_rank_list )

HBox(children=(IntProgress(value=0, max=1400), HTML(value='')))


CPU times: user 30.9 s, sys: 77 ms, total: 31 s
Wall time: 30.8 s


## CMC

In [72]:
rankAt1  = query_rank_list.T[ 0 ]
rankAt5  = query_rank_list.T[ : 5 ].T
rankAt10 = query_rank_list.T[ : 10 ].T

cmc1  = rankAt1
cmc5  = np.sum( rankAt5, axis = 1 ) > 0 # Dirty python trick: sum across rows, if sum > 0, set to 1
cmc10 = np.sum( rankAt10, axis = 1 ) > 0

print( 'rank@1: {}%'.format( np.sum( cmc1 ) / cmc1.shape[ 0 ] * 100 ) )
print( 'rank@5: {}%'.format( np.sum( cmc5 ) / cmc5.shape[ 0 ] * 100 ) )
print( 'rank@10: {}%'.format( np.sum( cmc10 ) / cmc10.shape[ 0 ] * 100 ) )

rank@1: 46.64285714285714%
rank@5: 67.64285714285714%
rank@10: 74.85714285714286%


# LMNN

In [6]:
! pip install --user pylmnn

[31mtwisted 18.7.0 requires PyHamcrest>=1.9.0, which is not installed.[0m
[33mYou are using pip version 10.0.1, however version 18.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [75]:
from pylmnn import LargeMarginNearestNeighbor as LMNN

In [76]:
lmnn = LMNN( n_neighbors = 4 )

lmnn.fit( pca_train_set, train_label )

LargeMarginNearestNeighbor(callback=None, impostor_store='auto', init='pca',
              max_impostors=500000, max_iter=50, n_components=None,
              n_jobs=1, n_neighbors=4, neighbors_params=None,
              random_state=None, store_opt_result=False, tol=1e-05,
              verbose=0, warm_start=False, weight_push_loss=0.5)

In [77]:
%%time

from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestNeighbors

# KNN Parameters
knn_n_neighbors = 20
knn_metric = 'euclidean'


query_rank_list = []

# for i in range( 2,4 ):
for i in tqdm_notebook( range( query_augmented.shape[ 0 ] ) ):

    
    query_label = query_augmented[ i, -1 ].astype( int )

    # Remove rows which have the same camId AND label
    gallery_reduced = gallery_augmented[ ~np.logical_and( ( gallery_augmented[ :, -1 ] == query_augmented[ i ][ -1 ] ),
                                                          ( gallery_augmented[ :, -2 ] == query_augmented[ i ][ -2 ] )
                                      ) ]
    
    # Train KNN
    X = gallery_reduced[ :, : - 2 ] # All rows, but in each row, remove last 2 columns ( camId and label )
    Y = gallery_reduced[ :, - 1 ] # All rows in the last column ( the labels )
    
    
    KNN = NearestNeighbors( n_neighbors = knn_n_neighbors, metric = knn_metric )
    KNN.fit( lmnn.transform( X ) , Y )    
    
    # Test query point
    X_test = query_augmented[ i ][ : -2 ].reshape( 1, -1 ) # Remove last 2 columns ( camId and label )
    
    distances, indices = KNN.kneighbors( X_test ) # Neighbours are ordered closest to furthest
    
    # Compare
    distances = distances.flatten()
    indices   = indices.flatten()
    
    rank_list = [ gallery_reduced[ ind, -1 ].astype( int ) == query_label for ind in indices ]
    query_rank_list.append( rank_list )
    
    #count += ( gallery_reduced[ indices[ 0 ], -1 ].astype( int ) == query_augmented[ i, -1 ].astype( int ) )  
    
query_rank_list = np.asarray( query_rank_list )

HBox(children=(IntProgress(value=0, max=1400), HTML(value='')))


CPU times: user 1min 16s, sys: 2.2 s, total: 1min 18s
Wall time: 39.4 s


In [78]:
for i in range( 100 ):
    print( query_rank_list[ i ] )

[False False False False False False False False False False False False
 False False False False False False False False]
[False False False False False False False False False False False False
 False False False False False False False False]
[False False False False False False False False False False False False
 False False False False False False False False]
[False False False False False False False False False False False False
 False False False False False False False False]
[False False False False False False False False False False False False
 False False False False False False False False]
[False False False False False False False False False False False False
 False False False False False False False False]
[False False False False False False False False False False False False
 False False False False False False False False]
[False False False False False False False False False False False False
 False False False False False False False False]
[False False Fal

In [79]:
rankAt1  = query_rank_list.T[ 0 ]
rankAt5  = query_rank_list.T[ : 5 ].T
rankAt10 = query_rank_list.T[ : 10 ].T

cmc1  = rankAt1
cmc5  = np.sum( rankAt5, axis = 1 ) > 0 # Dirty python trick: sum across rows, if sum > 0, set to 1
cmc10 = np.sum( rankAt10, axis = 1 ) > 0

print( 'rank@1: {}%'.format( np.sum( cmc1 ) / cmc1.shape[ 0 ] * 100 ) )
print( 'rank@5: {}%'.format( np.sum( cmc5 ) / cmc5.shape[ 0 ] * 100 ) )
print( 'rank@10: {}%'.format( np.sum( cmc10 ) / cmc10.shape[ 0 ] * 100 ) )

rank@1: 5.5%
rank@5: 14.071428571428571%
rank@10: 20.0%
