In [1]:
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 30 12:46:51 2020
@author: x.liang@greenwich.ac.uk
Image Similarity using ResNet50
"""
import os
import numpy as np
from resnet50 import ResNet50
#from keras.models import Model
from keras.layers import Input
from keras.preprocessing import image
from keras.applications.imagenet_utils import preprocess_input
from sklearn.metrics.pairwise import cosine_similarity
#from scipy.spatial import distance

'''
def get_feature_vector(img):
 img1 = cv2.resize(img, (224, 224))
 feature_vector = feature_model.predict(img1.reshape(1, 224, 224, 3))
 return feature_vector
'''
# avg_pool (AveragePooling2D) output shape: (None, 1, 1, 2048)
# Latest Keras version causing no 'flatten_1' issue; output shape:(None,2048) 
def get_feature_vector_fromPIL(img):
    feature_vector = feature_model.predict(img)
    a, b, c, n = feature_vector.shape
    feature_vector= feature_vector.reshape(b,n)
    return feature_vector

def calculate_similarity_cosine(vector1, vector2):
 #return 1 - distance.cosine(vector1, vector2)
    return cosine_similarity(vector1, vector2)

# This distance can be in range of [0,∞]. And this distance is converted to a [0,1]
def calculate_similarity_euclidean(vector1, vector2):
    return 1/(1 + np.linalg.norm(vector1- vector2))  

# Use ResNet-50 model as an image feature extractor
image_input = Input(shape=(224, 224, 3))
feature_model = ResNet50(input_tensor=image_input, include_top=False,weights='imagenet')

# Load images in the images folder into array
#TODO set path to folder with images
data_path = '/home/vakidzaci/projects/SIFTImageSimilarity/data/dataset'

data_dir_list = os.listdir(data_path)

In [2]:
def getimg(img_path):
    img_path = data_path + '/'+ img_path
    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    return x

In [3]:
data_dir_list.sort()

In [4]:
pairs = []
c = 0

# same object on different images
while c < len(data_dir_list)-1:
    code1 = data_dir_list[c].split("_")[0]
    code2 = data_dir_list[c + 1].split("_")[0]
    if code1 == code2:
        pairs.append([data_dir_list[c],data_dir_list[c+1], 1])
        c += 1
    c += 1

import random
for i in range(len(data_dir_list)):
    filepath1 = data_dir_list[i]
    filepath2 = random.choice(data_dir_list)
    if filepath1.split("_")[0] != filepath2.split("_")[1]:
        pairs.append([filepath1, filepath2, 0])

In [5]:
def getScore(file1,file2):
    img1 = getimg(file1)
    img2 = getimg(file2)
    image_similarity_cosine = calculate_similarity_cosine(get_feature_vector_fromPIL(img1), get_feature_vector_fromPIL(img2))
    return  image_similarity_cosine[0][0]

In [None]:
pairs = np.array(pairs)
predictions = []
for filename1, filename2, cls_ in pairs:
    pred_score = getScore(filename1, filename2)
    predictions.append(pred_score)

In [None]:
y_true = pairs[:,2]
y_true = y_true.astype(np.float)

In [None]:
y_true.shape[0], len(predictions)

In [None]:
y_pred = []
threshold = 0.5
for score in predictions:
    if score > threshold:
        y_pred.append(1)
    else:
        y_pred.append(0)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score
print(f"Accuracy {accuracy_score(y_true, y_pred)}")
print()
print("For same objects")
print(f"Precision {precision_score(y_true, y_pred,pos_label=1)}")
print(f"Recall {recall_score(y_true, y_pred,pos_label=1)}")
print()
print("For different objects")
print(f"Precision {precision_score(y_true, y_pred,pos_label=0)}")
print(f"Recall {recall_score(y_true, y_pred,pos_label=0)}")