In [None]:
import numpy as np
import pandas as pd
from skimage.color import rgb2lab, lab2rgb
import os
from tqdm import tqdm_notebook as tqdm
from PIL import Image
import pickle
from sklearn.cluster import KMeans
import itertools

# select some images

In [None]:
n_images = 10000

path_to_images = '/Users/pimh/datasets/small_images/'

random_ids = np.random.choice(os.listdir(path_to_images), 
                              n_images, 
                              replace=False)

random_ids = np.sort(random_ids)

# get their palettes

In [None]:
def get_palette(image, palette_size=5, image_size=75):
    image = image.resize((image_size, image_size),
                         resample=Image.BILINEAR)
    lab_image = rgb2lab(np.array(image)).reshape(-1, 3)
    clusters = KMeans(n_clusters=palette_size).fit(lab_image)
    return clusters.cluster_centers_

In [None]:
palette_dict = {}

for image_id in tqdm(random_ids):
    try: 
        image = Image.open(path_to_images + image_id)        
        if len(np.array(image).shape) != 3:
            image = Image.fromarray(np.stack((image,)*3, -1))
        palette_dict[image_id] = get_palette(image)
    except: 
        pass

image_ids = np.sort(list(palette_dict.keys()))
palettes = [palette_dict[image_id] for image_id in image_ids]

# compute distances

In [None]:
all_possible_palettes = np.stack([list(itertools.permutations(palette, 5)) 
                                  for palette in palettes])

In [None]:
palette_distances = {}

for image_id, palette in tqdm(palette_dict.items()):
    distances = (np.linalg.norm(all_possible_palettes - palette, axis=3)
                 .sum(axis=2)
                 .min(axis=1))
    
    palette_distances[image_id] = dict(zip(image_ids, distances))

palette_distances = pd.DataFrame(palette_distances)

# save the data

In [None]:
palette_distances.to_pickle('../../../apps/palette_api/data/palette_distances.pkl')

In [None]:
with open('../../../apps/palette_api/data/palettes.pkl', 'wb') as f:
    pickle.dump(palette_dict, f)