In [1]:
import sys
import os
import numpy as np
from gensim.models import KeyedVectors
from text.encoder import TextEncoder
from text.summarizer import NewsSummarizer
from common.space import Space
from config import w2v_file, vocab_limit
from config import get_logger

sys.path.append(os.path.join(sys.path[0],'image','captioning')) # add models.py

logger = get_logger('recommender')

In [2]:
keyed_vectors = KeyedVectors.load_word2vec_format(w2v_file, limit=vocab_limit, binary=True)
space = Space(keyed_vectors)

2019-12-22 15:54:15,540 gensim.models.utils_any2vec: INFO loading projection weights from ~/Downloads/GoogleNews-vectors-negative300.bin.gz
2019-12-22 15:54:58,037 gensim.models.utils_any2vec: INFO loaded (1000000, 300) matrix from ~/Downloads/GoogleNews-vectors-negative300.bin.gz


In [3]:
class Recommender:

    def __init__(self, space: Space):
        self.space = space
        self.text_encoder = TextEncoder(space=self.space)
        self.image_subspaces_loc = None
    
    def set_image_subspaces(self, path: str):
        self.image_subspaces_loc = path
                
    def predict(self, text: str, count: int = 5):
        sims = self.compute_similarities(text)
        return sorted(sims, key=lambda x: x[1], reverse=True)[:count]

#     def display_predictions(self, count: int=5):
#         preds = self.predict(text, count)
#         pass

    # Search the most similar word subspace from images
    def compute_similarities(self, text: str):
        sub_txt = self.text_encoder.create_subspace(full_text=text)
        logger.info(sub_txt.shape)
        sims = []
        if self.image_subspaces_loc is not None:
            for im in os.listdir(self.image_subspaces_loc):
                sub_img = np.load(os.path.join(self.image_subspaces_loc, im))
                sim = self.space.subspaces_similarity(sub_txt, sub_img)
                sims.append((im.split('.')[0], sim))
        return sims
        
    def __str__(self):
        return "Image Recommender Model"

In [4]:
goi_subspaces = '../data/subspaces/goi'

In [5]:
recommender = Recommender(space=space)
recommender.set_image_subspaces(goi_subspaces)

In [37]:
text = """
Shocking CCTV footage released by Manchester police shows the moment the man wielding 
a large-bladed knife is tackled to the ground by armed officers. At about 11 pm on Tuesday, 
CCTV operators spotted a man waving the butcher’s knife around the Piccadilly Garden’s 
area of Manchester and informed the police. The man can be seen struggling to stand and 
interacts with terrified members of the public, as he continues to wave the knife around.
A 55-year-old man has been arrested on suspicion of affray and remains in police custody 
for questioning."""
text2 = """
a close up of a green head on a table. a close up of a green head on a table.
"""

In [38]:
recommender.compute_similarities(text=text2)

2019-12-22 16:03:02,475 recommender: INFO (300, 5)


[('3361086680_baf1ebe647_o', 0.35934952)]

In [8]:
from image.encoder import ImageEncoder
from config import google_open_images_folder
image_encoder = ImageEncoder(space=space)

In [9]:
def test_subspace_from_image(image_filename: str):
    captions: list = image_encoder._get_captions(image_filename)
    print(captions)
    keywords: Counter = image_encoder._get_keywords(captions)
    print(keywords)
    word_subspace = image_encoder.create_subspace(image_filename)
    return word_subspace

In [10]:
image_filename = os.path.join(google_open_images_folder, '3361086680_baf1ebe647_o.jpg')
ws = test_subspace_from_image(image_filename)

['a close up of a broccoli head on a table', 'a close up of a broccoli head on a table', 'a close up of a piece of broccoli', 'a close up of a piece of broccoli', 'a close up of a piece of broccoli']
Counter({'piece': 1, 'head': 1, 'table': 1, 'broccoli': 1})
