In [1]:
from __future__ import division
from __future__ import print_function

from keras.layers import Lambda, Dense, TimeDistributed, Input
from keras.preprocessing.image import load_img, img_to_array
from keras.models import Model
from keras.applications.vgg16 import VGG16
from keras.engine.topology import Layer
import keras.backend as K

import scipy.io
import numpy as np

IMG_SIZE = 1024

from keras_rmac.get_regions import get_size_vgg_feat_map, rmac_regions
from keras_rmac.RoiPooling import RoiPooling
from keras_rmac.utils import preprocess_image


def generate_model(input_shape, num_rois, model_summary=False):

    # Load VGG16
    vgg16_model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
    # The output size of VGG16 is 7x7x512
    
    # freeze the layers
    for layer in vgg16_model.layers[:-4]:
        layer.trainable = False

    print("Number of Region of Interests: ", num_rois)
    # Generate input layer for Region of Interests
    in_roi = Input(shape=(num_rois, 4), name='input_roi')
    
    # Use the RoiPooling layer which picks the regions and generate max over the regions
    x = RoiPooling([1], num_rois)([vgg16_model.layers[-5].output, in_roi])
    model = Model([vgg16_model.input, in_roi], x)
    if model_summary:
        model.summary()

    return model

Using TensorFlow backend.


In [2]:
def generate_rep(img_path, img_size=224, model_summary=False):
    img = load_img(img_path, target_size=(img_size, img_size))

    x = img_to_array(img)
    x = np.expand_dims(x, axis=0)

    # Mean substraction
    x = preprocess_image(x)

    Wmap, Hmap = get_size_vgg_feat_map(x.shape[2], x.shape[1])
    regions = rmac_regions(Wmap, Hmap, 3)
    print('Loading Model...')
    model = generate_model((x.shape[1], x.shape[2], x.shape[3]),
                           len(regions), model_summary)

    # Compute vector
    print('Generating Vector...')
    RMAC = model.predict([x, np.expand_dims(regions, axis=0)])
    return np.sum(RMAC, axis=1)

In [3]:
vec = generate_rep("images/100000.jpg", model_summary=True)

  b = (W - wl) / (l + Wd - 1)
  b = (H-wl)/(l+Hd-1)


Loading Model...
Number of Region of Interests:  14
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 224, 224, 64) 1792        input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv2 (Conv2D)           (None, 224, 224, 64) 36928       block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_pool (MaxPooling2D)      (None, 112, 112, 64) 0           block1_conv2[0][0]               
_________________________________________________________

In [4]:
from glob import glob
import json
import time

def generate_rmac_vectors(image_dir, output_path):
    rmac_vec = {}

    img_files = glob(image_dir + "*")
    print("Number of images to generate for: ", len(img_files))
    with open(output_path, "w") as f:
        for idx, im in enumerate(img_files):
            imfile = im.split("/")[-1]
            print("Generating for im: {} file: {}".format(idx, imfile))
            rmac_vec[imfile] = generate_rep(im, 224).tolist()

        json.dump(rmac_vec, f)
        
    return rmac_vec

In [5]:
t = time.time()
file = "rmac_vectors_{}.txt".format(t)
image_dir = "subset_image/100"

rmac_vectors = generate_rmac_vectors(image_dir, file)

Number of images to generate for:  24
Generating for im: 0 file: 100600.jpg
Loading Model...
Number of Region of Interests:  14
Generating Vector...
Generating for im: 1 file: 100301.jpg
Loading Model...
Number of Region of Interests:  14
Generating Vector...
Generating for im: 2 file: 100100.jpg
Loading Model...
Number of Region of Interests:  14
Generating Vector...
Generating for im: 3 file: 100900.jpg
Loading Model...
Number of Region of Interests:  14
Generating Vector...
Generating for im: 4 file: 100503.jpg
Loading Model...
Number of Region of Interests:  14
Generating Vector...
Generating for im: 5 file: 100700.jpg
Loading Model...
Number of Region of Interests:  14
Generating Vector...
Generating for im: 6 file: 100001.jpg
Loading Model...
Number of Region of Interests:  14
Generating Vector...
Generating for im: 7 file: 100701.jpg
Loading Model...
Number of Region of Interests:  14
Generating Vector...
Generating for im: 8 file: 100101.jpg
Loading Model...
Number of Region of

In [6]:
from sklearn.metrics.pairwise import cosine_similarity

def get_similar_images(imgpath, image_dict):
    imfile = imgpath.split("/")[-1]
    # check if the image has pre-generated vector else generate it
    if imfile in image_dict.keys():
        img_vec = image_dict[imfile]
    else:
        img_vec = generate_rep(imgpath)

    similarities = {}
    for k, v in image_dict.items():
        similarities[k] = cosine_similarity(
            np.array(img_vec),
            np.array(v))

    # Return the top n similar images
    most_sim = dict(sorted(similarities.items(),
                key=lambda x:x[1],
                reverse=True)[:5]).keys()
    
    return most_sim

In [7]:
get_similar_images('100300.jpg', rmac_vectors)

dict_keys(['100300.jpg', '100400.jpg', '100301.jpg', '100302.jpg', '100503.jpg'])

In [8]:
get_similar_images('subset_image/101300.jpg', rmac_vectors)

  b = (W - wl) / (l + Wd - 1)
  b = (H-wl)/(l+Hd-1)


Loading Model...
Number of Region of Interests:  14
Generating Vector...


dict_keys(['100300.jpg', '100000.jpg', '100502.jpg', '100400.jpg', '100503.jpg'])