## 模型评估

1. 从验证集中挑选出分类正确的pair，按照置信度（预测概率）从高到低排序展示这些图片pair
2. 从验证集中挑选出分类错误的pair，按照置信度（预测概率）从高到低排序展示这些图片pair
3. 观察在不同的分类阈值下，验证集上的分类准确率和召回率（能给出判断结果的样本占比），找到合适的分类阈值
4. 编辑模型：输入一张图片，输出和100张基准图片进行比较后的得分

In [None]:
# define the model
from keras.models import Model, Sequential
from keras.layers import Input, Dense, concatenate
from keras.layers.normalization import BatchNormalization
from keras.layers.core import Dropout
from keras import optimizers
from keras.utils import to_categorical

img_feature_a = Input(shape=(2048,))
img_feature_b = Input(shape=(2048,))

shared_fc_layer = Sequential([
    Dense(1024, activation='relu', input_shape=(2048, )),
    BatchNormalization(),
    Dropout(0.5),
    Dense(256, activation='relu'),
    BatchNormalization(), 
    Dropout(0.5),
])

encoded_a = shared_fc_layer(img_feature_a)
encoded_b = shared_fc_layer(img_feature_b)

merged_vector = concatenate([encoded_a, encoded_b])

#x = merged_vector
x = Dense(256, activation='relu')(merged_vector)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
output = Dense(2, activation='softmax')(x)

model = Model(inputs=[img_feature_a, img_feature_b], outputs=output)

model.load_weights("./data/rank_model.h5")

In [None]:
# data prepare
import cPickle as pickle
import numpy as np
import lmdb
import random

env = lmdb.open("./data/features")
txn = env.begin()

def get_XY(dataset):
    img_pairs = []
    X1 = []
    X2 = []
    Y = []
    for img_a, s_a, img_b, s_b, cmpret in dataset:
        feature_a = txn.get(img_a)
        feature_b = txn.get(img_b)
        if feature_a is None or feature_b is None:
            continue
        feature_a = np.fromstring(feature_a, np.float32)
        feature_b = np.fromstring(feature_b, np.float32)
        img_pairs.append((img_a, s_a, img_b, s_b))
        X1.append(feature_a)
        X2.append(feature_b)
        Y.append(cmpret)
    X1 = np.array(X1)
    X2 = np.array(X2)
    Y = np.array(Y)
    return img_pairs, X1, X2, Y

batch_size = 128
valid_list = pickle.load(open("./data/valid.list", 'rb'))
img_pairs, X1_valid, X2_valid, Y_valid = get_XY(valid_list)

print len(img_pairs)
env.close()

In [None]:
Y_predict = model.predict([X1_valid, X2_valid], batch_size=batch_size)
Y_predict_class = Y_predict[:, 1] > 0.5

In [None]:
%matplotlib inline
from PIL import Image
import matplotlib.pyplot as plt
import random
import os

img_path = "../data/img/"
def cmpPlot(imgid_a, s_a, imgid_b, s_b):
    plt.figure()
    
    plt.subplot(1, 2, 1)
    plt.title("%s_%s"%(imgid_a, s_a))
    img = Image.open(img_path+"%s.jpg"%imgid_a)
    plt.imshow(img)
    plt.axis('off')
    
    plt.subplot(1, 2, 2)
    plt.title("%s_%s"%(imgid_b, s_b))
    img = Image.open(img_path+"%s.jpg"%imgid_b)
    plt.imshow(img)
    plt.axis('off')
    
    plt.show()
    
acc = 0
for i, (img_a, s_a, img_b, s_b) in enumerate(img_pairs):
    if Y_predict_class[i] != Y_valid[i]:
        #print img_a, s_a, img_b, s_b, Y_predict_class[i], Y_predict[i, 1]
        acc += 1
    if img_a == "55046":
        print img_a, s_a, img_b, s_b, Y_predict_class[i], Y_predict[i, 1]
#rint acc * 1.0 / len(img_pairs)

In [None]:
cmpPlot("55046", 8.0, "55092", 9.0)

In [None]:
img_path = "../data/img/"
def Plot(imgid, s):
    plt.figure()
    
    plt.title("%s_%s"%(imgid, s))
    img = Image.open(img_path+"%s.jpg"%imgid)
    plt.imshow(img)
    plt.axis('off')
    
    plt.show()
    
for img, s in sorted(p_score.items(), key=lambda x: x[1]):
    Plot(img, s)

In [None]:
Y_valid == Y_predict_class