In [0]:
from matplotlib.pyplot import imshow
%matplotlib inline

In [0]:
import json
import os

import numpy as np
import torch
import torch.multiprocessing as multiprocessing

import torchvision.models as models
import torchvision.transforms as transforms
from annoy import AnnoyIndex
from PIL import Image
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

cuda = torch.cuda.is_available()

In [0]:
tags_directory = "C:/ML/mirflickr25/meta/tags"
images_directory = "C:/ML/mirflickr25"
tags_json_filename = "C:/ML/mirflickr25/tags.json"
vectors_file = "C:/ML/mirflickr25/vectors.pt"
annoy_index_file = "C:/ML/mirflickr25/image_vectors.ann"

In [0]:
def get_index_from_image_file(image_filename):
  return int(image_filename[2:-4])


def get_tags(tags_file):
  with open("{}/{}".format(tags_directory, tags_file), encoding="utf8") as f:
    return list(map(lambda x: x.strip(), f.readlines()))

  
def get_index_from_tags_file(tags_file):
  return int(tags_file[4:-4])


def is_image_file(file):
  return file[-4:] == ".jpg"


def build_tags_json(tag_files, tags_json_filename):
  image_tags = {}

  for i, tags_file in enumerate(tag_files):
    image_tags[get_index_from_tags_file(tags_file)] = get_tags(tags_file)
  
  with open(tags_json_filename, "w", encoding="utf8") as tags_json_file:
    json.dump(image_tags, tags_json_file)
        
  return image_tags


def load_tags_json(tags_json_filename):
  with open(tags_json_filename, "r") as tags_json_file:
    image_tags = json.load(tags_json_file)
    return image_tags


def get_tag_scores(image_tags):
  tag_scores = {}
  for tags in image_tags.values():
    for tag in tags:
      tag = tag.lower()
      tag_scores[tag] = tag_scores.get(tag, 0) + 1

  return tag_scores


def get_filtered_tags(image_tags):
  tag_scores = get_tag_scores(image_tags)
  
  # Will keep only the tags that have at least 100 occurrences
  filtered_tags = {}
  for image, tags in image_tags.items():
    filtered_image_tags = []
    for tag in tags:
      if tag_scores.get(tag, 0) >= 100:
        filtered_image_tags.append(tag)
      
    if filtered_image_tags:
      filtered_tags[int(image)] = filtered_image_tags
      
  return filtered_tags

        
def get_image_score(image_tags, image):
  image_index = get_index_from_image_file(image)
  return len(image_tags.get(image_index, []))


def get_filtered_images(images_directory, tags_directory, min_score=3):
  image_tags = build_tags_json(list_tag_files(tags_directory), tags_json_filename)
  filtered_tags = get_filtered_tags(image_tags)
  images = list_image_files(images_directory)    
  return [img for img in images if get_image_score(filtered_tags, img) >= min_score]


def list_image_files(images_directory):
  images = sorted([f for f in os.listdir(images_directory) if is_image_file(f)],
                   key=lambda f: get_index_from_image_file(f))
  return images


def show_image(filtered_images, image_index):
  image_filepath = "{}/{}".format(images_directory,
                                  filtered_images[image_index])
  imshow(np.asarray(Image.open(image_filepath)))


def list_tag_files(tags_directory):
  tag_files = sorted(os.listdir(tags_directory),
                     key=lambda f: get_index_from_tags_file(f))
  return tag_files


def get_model():
  resnet = models.resnet50(pretrained=True)

  for param in resnet.parameters():
    param.requires_grad = False
   
  resnet.eval()

  if cuda:
    device = torch.device("cuda")
    resnet.to(device)
    
  return resnet


def img2vec(resnet, image_tensors):
    last_layer = resnet._modules.get("avgpool")
    
    if cuda:
        image_tensors = image_tensors.cuda()
    
    embedding = torch.zeros(image_tensors.shape[0], 2048, 1, 1)
    
    def copy_output(m, i, o):
        embedding.copy_(o.data)
    
    h = last_layer.register_forward_hook(copy_output)
    resnet(image_tensors)
    h.remove()

    return embedding


# These are the expected values of this pre-trained model.
# See https://pytorch.org/docs/stable/torchvision/models.html#torchvision-models
input_image_size = (224, 224)
expected_mean = [0.485, 0.456, 0.406]
expected_std = [0.229, 0.224, 0.225]

scaler = transforms.Resize(input_image_size)
normalize = transforms.Normalize(mean=expected_mean, std=expected_std)
to_tensor = transforms.ToTensor()


def get_tensor_for_image(file_path):
  img = Image.open(file_path)
  variable = Variable(normalize(to_tensor(scaler(img))))
  return variable


class MirflickrImagesDataset(Dataset):
  def __init__(self, images_directory, image_files):
    self.images_directory = images_directory
    self.image_files = image_files
    
  def __len__(self):
    return len(self.image_files)
  
  def __getitem__(self, idx):
    return get_tensor_for_image(self.get_image_path(self.image_files[idx]))
  
  def get_image_path(self, file):
    return "{}/{}".format(self.images_directory, file)


def _build_image_vectors(dataset, resnet, images, batch_size=64, num_workers=4):
  dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers)

  img_vectors = torch.zeros(len(images), 2048, 1, 1)

  for batch_index, batch in enumerate(dataloader):
    batch_start = batch_index * batch_size
    batch_end = batch_start + batch_size
    
    # Pad the last batch if it's smaller than batch_size
    if batch.shape[0] != batch_size:
      new_batch = torch.zeros(batch_size, batch.shape[1], batch.shape[2],
                              batch.shape[3])
      new_batch[:batch.shape[0]] = batch
      img_vectors[batch_start:batch_end] = img2vec(resnet, new_batch)[:batch.shape[0]]
      continue
    
    img_vectors[batch_start:batch_end] = img2vec(resnet, batch)
    print("computed batch {}".format(batch_index))
    
  return img_vectors


def build_and_save_image_vectors():
  filtered_images = get_filtered_images(images_directory, tags_directory)

  dataset = MirflickrImagesDataset(images_directory, filtered_images)
  resnet = get_model()
  img_vectors = _build_image_vectors(dataset, resnet, filtered_images)
  torch.save(img_vectors, vectors_file)
  return img_vectors


def build_annoy_index(img_vectors):
  annoy_index = AnnoyIndex(2048)
  for i in range(len(img_vectors)):
    annoy_index.add_item(i, img_vectors[i])
  
  annoy_index.build(10)
  return annoy_index


def load_annoy_index(file):
  annoy_index = AnnoyIndex(2048)
  annoy_index.load(file)
  return annoy_index


def main():
  img_vectors = build_and_save_image_vectors()
  #img_vectors = torch.load(vectors_file)

  annoy_index = build_annoy_index(img_vectors)
  annoy_index.save(annoy_index_file)
  #annoy_index = load_annoy_index(annoy_index_file)

In [0]:
def get_images_by_tag(filtered_tags):
  images_by_tag = {}
  for image, tags in filtered_tags.items():
    for tag in tags:
      images_by_tag.setdefault(tag, set()).add(image)

  return images_by_tag    

In [0]:
def get_filtered_image_to_index(filtered_images):
  filtered_image_to_index = {}
  for i, image in enumerate(filtered_images):
    filtered_image_to_index[get_index_from_image_file(image)] = i
  return filtered_image_to_index

In [0]:
def get_concreteness(images_by_tag, filtered_image_to_index, word, annoy_index, n, k=5):
  mni = 0.0
  associated_images = images_by_tag[word]

  for image in associated_images:
    if image not in filtered_image_to_index:
        continue

    neighbors = set(annoy_index.get_nns_by_item(filtered_image_to_index[image], k))
    mni += 1.0 * (len(associated_images.intersection(neighbors)))
  
  mni = mni / len(associated_images)  
  denominator = (1.0 * k * len(associated_images)) / n
  return mni / denominator

In [0]:
# Since these files have already been computed, we'll just load them from disk
img_vectors = torch.load(vectors_file)
annoy_index = load_annoy_index(annoy_index_file)

image_tags = load_tags_json(tags_json_filename)
filtered_tags = get_filtered_tags(image_tags)
images_by_tag = get_images_by_tag(filtered_tags)
n = len(filtered_tags)

filtered_images = get_filtered_images(images_directory, tags_directory)
filtered_image_to_index = get_filtered_image_to_index(filtered_images)

In [0]:
concreteness = {}
for word in images_by_tag:
  concreteness[word] = get_concreteness(images_by_tag, filtered_image_to_index, word,
                                        annoy_index, n)

In [0]:
sorted_concreteness = sorted(concreteness.items(), key=lambda x: x[1], reverse=True)

In [0]:
len(sorted_concreteness)

208