In [1]:
import numpy as np
import sys
import caffe

In [2]:
#init net
DATA_DIR= '../../dataset/market/Market-1501/'
MODEL_FILE = 'test.prototxt'
PRETRAINED = 'model_market.caffemodel'
model_name=PRETRAINED[PRETRAINED.rfind('/')+1:-11]

caffe.set_device(1)
caffe.set_mode_gpu()
net = caffe.Classifier(MODEL_FILE, PRETRAINED,caffe.TEST)

In [3]:
N,C,H,W=net.blobs['data'].data.shape
crop_h=crop_w=0
transformer = caffe.io.Transformer({'data': (N,C,H+2*crop_h,W+2*crop_w)})
transformer.set_transpose('data', (2,0,1))
transformer.set_mean('data', np.array([ 104,  117,  123])) # mean pixel
transformer.set_raw_scale('data', 255)  # the reference model operates on images in [0,255] range instead of [0,1]
transformer.set_channel_swap('data', (2,1,0))  # the reference model has channels in BGR order instead of RGB


def readImages(images):
    imageLen=len(images)
    imageDataList=[]
    for imageIdx in range(imageLen):
        imageName=images[imageIdx]
        imageImage=transformer.preprocess('data', caffe.io.load_image(imageName))
        imageDataList.append(imageImage[:,crop_h:H+crop_h,crop_w:W+crop_w]) #center crop
        imageIdx+=1
    #imageData and imageData
    imageData=np.asarray(imageDataList)
    return imageData

def readDir(list_dir):
    import os
    file_list=os.listdir(list_dir)
    final_list=[]
    for filename in file_list:
        if filename[0]!='-' and filename[filename.rfind('.')+1:]=='jpg':
            final_list.append(list_dir+filename)
    return final_list   

In [4]:
def extract_features(file_list):
    file_len=len(file_list)
    features=[]
    batch_size=200
    for batch_idx in range(file_len/batch_size+1):
        cur_len=batch_size if batch_idx <file_len/batch_size else file_len%batch_size
        cur_list=file_list[batch_idx*batch_size+0:batch_idx*batch_size+cur_len]
        image_data=readImages(cur_list)
        net.blobs['data'].reshape(cur_len,C,H,W)
        net.blobs['data'].data[:] = image_data
        net.forward()
        normed_features=net.blobs['concat_features'].data.copy()
        from sklearn.preprocessing import normalize
        for idx in range(cur_len):
            cur_feature=np.squeeze(normed_features[idx,:])
            cur_feature = cur_feature/np.linalg.norm(cur_feature)
            features.append(cur_feature)
    return features
def get_gt_dict(gallery_list):
    gt_dict={}
    for idx in range(len(gallery_list)):
        gallery_name=gallery_list[idx]
        gallery_person_id=gallery_name[gallery_name.rfind('/')+1:gallery_name.rfind('/')+5]
        gallery_cam_id=gallery_name[gallery_name.rfind('/')+7:gallery_name.rfind('/')+8]
        if gt_dict.has_key(gallery_person_id):
            gt_dict[gallery_person_id].append(idx)
        else:
            gt_dict[gallery_person_id]=[idx]
    return gt_dict

In [5]:
query_list=readDir(DATA_DIR+'query/')
gallery_list=readDir(DATA_DIR+'bounding_box_test/')
gt_dict=get_gt_dict(gallery_list)
print len(query_list),len(gallery_list),len(gt_dict),len(gt_dict['0000'])

3368 15913 751 2798


In [6]:
import time
tic=time.time()
query_features=extract_features(query_list)
gallery_features=extract_features(gallery_list)
toc=time.time()

In [7]:
print (toc-tic),(toc-tic)/(len(query_list)+len(gallery_list))

60.6539011002 0.00314578606401


In [8]:
def rank_for_queries(query_features,gallery_features):
    import numpy as np
    all_rank_list=[]
    for query_idx in range(len(query_features)):
        query_feature=query_features[query_idx]

        score_list=[]
        for gallery_idx in range(len(gallery_features)):
            gallery_feature=gallery_features[gallery_idx]
            dist = np.sqrt(np.sum((query_feature-gallery_feature)**2))
            similar_score=1.0/(1.0+dist)
            score_list.append(similar_score)
        #we get scoreList, then cal predictLists
        ranked_idx_list=np.argsort(score_list)[::-1]
        all_rank_list.append(ranked_idx_list)
    return all_rank_list

In [9]:
all_rank_list=rank_for_queries(query_features,gallery_features)

######################################################
##
## I use parallel to run the query in batch_num=10 batches
## In this way, one query on the 1w galleries takes 0.027s
## Multi-process does not work for ipython notebook on Windows
##
######################################################

# import ipyparallel as ipp
# client = ipp.Client()
# view = client.load_balanced_view()
# batch_num=2*len(client.ids)
# print batch_num
# batch_size_queries=len(query_features)/batch_num+1

# tic=time.time()
# task_results=[]
# for task_idx in range(batch_num):
#     batch_query_features=query_features[task_idx*batch_size_queries:(task_idx+1)*batch_size_queries]
#     task_results.append(view.apply(rank_for_queries,batch_query_features,gallery_features))    
    
# all_rank_list=[]
# for task_idx in range(batch_num):
#     all_rank_list.extend(task_results[task_idx].result())
# toc=time.time()
# print len(all_rank_list),(toc-tic),(toc-tic)/len(query_list)

In [10]:
histogram=np.zeros(len(gallery_list))
meanAP=0.0
len_queries=len(query_list)
for query_idx in range(len_queries):#
    ranked_idx_list=all_rank_list[query_idx]
    #good or junk
    query_name=query_list[query_idx]
    query_person_id=query_name[query_name.rfind('/')+1:query_name.rfind('/')+5]
    query_cam_id=query_name[query_name.rfind('/')+7:query_name.rfind('/')+8]
    relevant_idx_list=gt_dict[query_person_id]
    good_relevant=[]
    junk_relevant=[]
    for relevant_idx in relevant_idx_list:
        gallery_name=gallery_list[relevant_idx]
        gallery_cam_id=gallery_name[gallery_name.rfind('/')+7:gallery_name.rfind('/')+8]
        if gallery_cam_id==query_cam_id:
            junk_relevant.append(relevant_idx)
        else:
            good_relevant.append(relevant_idx)
    #cmc and meanAP
    matched_num=0.0
    sum_precision=0.0
    rank_idx=0
    for perdicted_idx in ranked_idx_list:
        if perdicted_idx in junk_relevant:
            continue
        elif perdicted_idx in good_relevant:
            matched_num+=1.0
            sum_precision+=matched_num/(rank_idx+1)
            histogram[rank_idx]+= 1 if matched_num<=1 else 0 #multiple results
        rank_idx+=1
        if matched_num>=len(good_relevant): #recall=1
            break
    meanAP+=sum_precision/len(good_relevant)
        
cmc=np.cumsum(histogram)/len_queries
meanAP/=len_queries
print cmc[:50],meanAP

[ 0.80967933  0.86846793  0.89608076  0.91003563  0.91983373  0.92814727
  0.93408551  0.93853919  0.94388361  0.94714964  0.95071259  0.95368171
  0.95635392  0.95872922  0.9608076   0.96229216  0.96437055  0.96644893
  0.96704276  0.96733967  0.9682304   0.97001188  0.97179335  0.97238717
  0.97327791  0.97357482  0.97387173  0.97416865  0.97416865  0.97476247
  0.97505938  0.97565321  0.97624703  0.97684086  0.97713777  0.97773159
  0.97832542  0.97862233  0.97862233  0.97980998  0.97980998  0.9804038
  0.98070071  0.98070071  0.98099762  0.98159145  0.98188836  0.98188836
  0.98218527  0.98218527] 0.634278192554


In [11]:
print model_name+","+"%dx%d,crop=%d"%(H,W,crop_h)+":",cmc[0],meanAP

model_market,160x80,crop=0: 0.809679334917 0.634278192554
