In [2]:
import numpy as np
import sys
import caffe

In [64]:
#init net
EXP_DIR='./viper_origin_with_cuhk/'
MODEL_FILE = EXP_DIR+'test.prototxt'
PRETRAINED = EXP_DIR+'snapshot/model_iter_8000.caffemodel'#market_512_sigmoid_iter_26843.caffemodel'
# PRETRAINED = '../cuhk03/base_networks/googlenet/snapshot/model_iter_20000.caffemodel'
model_name=PRETRAINED[PRETRAINED.rfind('/')+1:-11]

caffe.set_device(1)
caffe.set_mode_gpu()
net = caffe.Classifier(MODEL_FILE, PRETRAINED,caffe.TEST)

In [65]:
N,C,H,W=net.blobs['data'].data.shape
crop_h=crop_w=0
transformer = caffe.io.Transformer({'data': (N,C,H+2*crop_h,W+2*crop_w)})
transformer.set_transpose('data', (2,0,1))
transformer.set_mean('data', np.array([ 104,  117,  123])) # mean pixel
transformer.set_raw_scale('data', 255)  # the reference model operates on images in [0,255] range instead of [0,1]
transformer.set_channel_swap('data', (2,1,0))  # the reference model has channels in BGR order instead of RGB


def readImages(images):
    imageLen=len(images)
    imageDataList=[]
    for imageIdx in range(imageLen):
        imageName=images[imageIdx]
        imageImage=transformer.preprocess('data', caffe.io.load_image(imageName))
        imageDataList.append(imageImage[:,crop_h:H+crop_h,crop_w:W+crop_w]) #center crop
        imageIdx+=1
    #imageData and imageData
    imageData=np.asarray(imageDataList)
    return imageData

def extract_features(file_list):
    file_len=len(file_list)
    features=[]
    batch_size=300
    for batch_idx in range(file_len/batch_size+1):
        cur_len=batch_size if batch_idx <file_len/batch_size else file_len%batch_size
        cur_list=file_list[batch_idx*batch_size+0:batch_idx*batch_size+cur_len]
        image_data=readImages(cur_list)
        net.blobs['data'].reshape(cur_len,C,H,W)
        net.blobs['data'].data[:] = image_data
        net.forward()
        normed_features=net.blobs['normed_feature'].data.copy()
        from sklearn.preprocessing import normalize
        for idx in range(cur_len):
            cur_feature=np.squeeze(normed_features[idx,:])
#             cur_feature = cur_feature/np.linalg.norm(cur_feature)
            features.append(cur_feature)
    return features

In [66]:
def get_pid(name):
    try: 
        pid=int(name[name.rfind('\\')+1:name.rfind('_')])
    except:
        pid=-1234
    return pid

def split_viper(data_dir,rand_seed=0):
    import glob
    #all images
    cam_a_files=glob.glob(data_dir+'cam_a/*.bmp')
    cam_b_files=glob.glob(data_dir+'cam_b/*.bmp')
    all_dict={}
    for a_file in cam_a_files:
        person_id=get_pid(a_file)
        all_dict[person_id]=[a_file]
    for b_file in cam_b_files:
        person_id=get_pid(b_file)
        all_dict[person_id].append(b_file)
    np.random.seed(rand_seed)
    test_ids=np.random.choice(all_dict.keys(),len(all_dict)/2,replace=False)
    #split
    train_dict={}
    test_dict={}
    for person_id in all_dict:
        if person_id in test_ids:
            test_dict[person_id]=all_dict[person_id]
        else:
            train_dict[person_id]=all_dict[person_id]
    return train_dict,test_dict

def get_gt_dict(gallery_list):
    gt_dict={}
    for idx in range(len(gallery_list)):
        gallery_name=gallery_list[idx]
        gallery_person_id=int(gallery_name[gallery_name.rfind('\\')+1:gallery_name.rfind('_')])
        if gt_dict.has_key(gallery_person_id):
            gt_dict[gallery_person_id].append(idx)
        else:
            gt_dict[gallery_person_id]=[idx]
    return gt_dict

In [67]:
#dataset related
DATA_DIR=r'D:/v-limz/dataset/viper/VIPeR/'
_,test_dict=split_viper(DATA_DIR)
query_list=[]
gallery_list=[]
for pid in test_dict:
    query_list.append(test_dict[pid][0])
    gallery_list.append(test_dict[pid][1])
gt_dict=get_gt_dict(gallery_list)
print len(query_list),len(gallery_list),len(gt_dict)

316 316 316


In [68]:
import time
tic=time.time()
query_features=extract_features(query_list)
gallery_features=extract_features(gallery_list)
toc=time.time()
print len(query_features),len(gallery_features)

316 316


In [69]:
def rank_for_queries(query_features,gallery_features):
    import numpy as np
    all_rank_list=[]
    for query_idx in range(len(query_features)):
        query_feature=query_features[query_idx]

        score_list=[]
        for gallery_idx in range(len(gallery_features)):
            gallery_feature=gallery_features[gallery_idx]
            dist = np.sqrt(np.sum((query_feature-gallery_feature)**2))
            similar_score=1.0/(1.0+dist)
            score_list.append(similar_score)
        #we get scoreList, then cal predictLists
        ranked_idx_list=np.argsort(score_list)[::-1]
        all_rank_list.append(ranked_idx_list)
    return all_rank_list

######################################################
##
## I use parallel to run the query in batch_num=10 batches
## In this way, one query on the 1w galleries takes 0.027s
## Multi-process does not work for ipython notebook on Windows
##
######################################################
def parallel_rank(query_features,gallery_features):
    import ipyparallel as ipp
    client = ipp.Client()
    view = client.load_balanced_view()
    batch_num=2*len(client.ids)
    batch_size_queries=len(query_features)/batch_num+1

    tic=time.time()
    task_results=[]
    for task_idx in range(batch_num):
        batch_query_features=query_features[task_idx*batch_size_queries:(task_idx+1)*batch_size_queries]
        task_results.append(view.apply(rank_for_queries,batch_query_features,gallery_features))    

    all_rank_list=[]
    for task_idx in range(batch_num):
        all_rank_list.extend(task_results[task_idx].result())
    toc=time.time()
    print len(all_rank_list),(toc-tic),(toc-tic)/len(query_list)
    return all_rank_list

In [70]:
# all_rank_list=rank_for_queries(query_features,gallery_features)
all_rank_list=parallel_rank(query_features,gallery_features)

316 0.599999904633 0.00189873387542


In [71]:
histogram=np.zeros(len(gallery_list))
meanAP=0.0
len_queries=len(query_list)
for query_idx in range(len_queries):#
    ranked_idx_list=all_rank_list[query_idx]
    #good or junk
    query_name=query_list[query_idx]
    query_person_id=get_pid(query_name)
    relevant_idx_list=gt_dict[query_person_id]
    #cmc and meanAP
    matched_num=0.0
    sum_precision=0.0
    rank_idx=0
    for perdicted_idx in ranked_idx_list:
        if perdicted_idx in relevant_idx_list:
            matched_num+=1.0
            sum_precision+=matched_num/(rank_idx+1)
            histogram[rank_idx]+= 1 if matched_num<=1 else 0 #multiple results
        rank_idx+=1
        if matched_num>=len(relevant_idx_list): #recall=1
            break
    meanAP+=sum_precision/len(relevant_idx_list)
        
cmc=np.cumsum(histogram)/len_queries
meanAP/=len_queries
print cmc[:50],meanAP

[ 0.40506329  0.50949367  0.57278481  0.61708861  0.64556962  0.67088608
  0.69620253  0.71202532  0.7278481   0.74683544  0.75949367  0.76582278
  0.78164557  0.78481013  0.79113924  0.80696203  0.81329114  0.81962025
  0.83227848  0.83227848  0.84177215  0.84493671  0.85126582  0.86075949
  0.86392405  0.86392405  0.87974684  0.87974684  0.88291139  0.88924051
  0.89556962  0.89873418  0.90189873  0.90506329  0.90506329  0.91139241
  0.91772152  0.92088608  0.92088608  0.92405063  0.92721519  0.92721519
  0.93037975  0.93670886  0.93670886  0.93670886  0.93670886  0.93987342
  0.94620253  0.94620253] 0.51900619095


In [63]:
print EXP_DIR+model_name+","+"%dx%d"%(H,W)+":",cmc[0],meanAP

./viper_origin_with_cuhk/model_iter_6000,160x80: 0.408227848101 0.538970073989
