In [None]:
import logging
import os
import sys

import numpy as np
from matplotlib.pyplot import imshow
%matplotlib inline

import torch

import concreteness, mirflickr

In [None]:
# Setup logging. We'll use DEBUG as logging level by default, but feel free to bump it down.
logging_level = logging.DEBUG
logging_format = '%(asctime)s [%(levelname)s] %(message)s'
logging.basicConfig(level=logging_level, format=logging_format, stream=sys.stdout)

log = logging.getLogger()

In [None]:
# Edit these to point to your dataset and cache directories
DATASET_DIR = "/home/victor/data/mirflickr/"
CACHE_DIR = "/home/victor/ml/concreteness/data/cache"

if not os.path.exists(CACHE_DIR):
    os.makedirs(CACHE_DIR)

# Based on the values above, we'll figure out other specific paths
images_directory = os.path.join(DATASET_DIR, "images")
tags_directory = os.path.join(DATASET_DIR, "tags")
vectors_file = os.path.join(CACHE_DIR, "vectors.pt")
annoy_index_file = os.path.join(CACHE_DIR, "index.ann")

In [None]:
# Number of NNS to use during search
K = 50

In [None]:
# Load the Mirflickr dataset
dataset = mirflickr.MirflickrImagesDataset(images_directory, tags_directory,
                                           transform=concreteness.get_tensor_for_image)

In [None]:
# Using a ResNet50, build image vectors for the dataset
if os.path.isfile(vectors_file):
    img_vectors = torch.load(vectors_file)
    log.info("Loaded image vectors.")
else:
    log.info("Building image vectors.")
    img_vectors = concreteness.build_image_vectors(dataset)
    log.info("Built image vectors.")
    torch.save(img_vectors, vectors_file)
    log.info("Saved image vectors to %s", vectors_file)

In [None]:
# Build NNS
nns = concreteness.build_nns(img_vectors, K, annoy_index_file=annoy_index_file)

In [None]:
# Add helper to show images. We'll use it to verify our computed NNS
def show_image(dataset, image_index):
    image = dataset.get_pil_image(image_index)
    imshow(np.asarray(image))

In [None]:
# We'll pick an image and display it along with two of its nearest neighbors.
# Feel free to change the image_index to see other examples.
image_index = 1
original_image, neighbor, another_neighbor = list(nns[image_index])[image_index:image_index + 3]

In [None]:
show_image(dataset, image_index)

In [None]:
show_image(dataset, neighbor)

In [None]:
show_image(dataset, another_neighbor)

In [None]:
log.info("Computing concreteness.")
concreteness_dict = concreteness.get_concreteness(dataset, nns, K)
sorted_concreteness = sorted(concreteness_dict.items(), key=lambda x: x[1], reverse=True)
log.info("Done!")

In [None]:
sorted_concreteness