In [None]:
from sklearn import datasets
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from io import BytesIO
from PIL import Image
import base64
import numpy as np
import umap
import cv2
import os
import hdbscan
from matplotlib import cm
from skimage import io, filters, measure, morphology, img_as_ubyte

from bokeh.plotting import figure, show, output_notebook
from bokeh.models import HoverTool, ColumnDataSource, CategoricalColorMapper
from bokeh.palettes import Spectral10

output_notebook()

%matplotlib inline


In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
# spefify path to your images 
image_path = 'data'

instances = []
image_paths = []

for filepath in os.listdir(image_path):
    img = cv2.imread(image_path + "\\" + filepath,0)
    image_paths.append(image_path + "\\" + filepath)
    
    scale_percent = 50 # percent of original size, use if you need to downsample
    width = int(img.shape[1] * scale_percent / 100)
    height = int(img.shape[0] * scale_percent / 100)
    dim = (width, height)
    
    # resize image
    resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
    instances.append(resized)


In [None]:
data = []
for image in instances:
    data.append(image.flatten())
    
print(np.shape(data))

In [None]:
sns.set(style='white', rc={'figure.figsize':(10,8)})

In [None]:
clusterable_embedding = umap.UMAP(
    n_neighbors=50,
    min_dist=0.0,
    n_components=2,
    random_state=42,
).fit_transform(data)

In [None]:
reducer = umap.UMAP(
    n_neighbors=50,
    min_dist=0.0,
    n_components=2,
    random_state=42,)
reducer.fit(data)

In [None]:
plt.scatter(clusterable_embedding[:, 0], clusterable_embedding[:, 1],
            s=0.1, cmap='Spectral');

In [None]:
labels = hdbscan.HDBSCAN(
    min_samples=50,
    min_cluster_size=50,
).fit_predict(clusterable_embedding)

In [None]:
clustered = (labels >= 0)
plt.scatter(clusterable_embedding[~clustered, 0],
            clusterable_embedding[~clustered, 1],
                    c=(0.5, 0.5, 0.5),
            s=0.1,
            alpha=0.5)

plt.scatter(clusterable_embedding[clustered, 0],
            clusterable_embedding[clustered, 1],
            c = labels[clustered],
            s=0.1,
            cmap='Spectral');

## Interactive Plot

In [None]:
embedding = reducer.transform(data)
# Verify that the result of calling transform is
# idenitical to accessing the embedding_ attribute
assert(np.all(embedding == reducer.embedding_))
embedding.shape

In [None]:
def to_png(arr):
    out = BytesIO()
    im = Image.fromarray(arr)
    im.save(out, format='png')
    return out.getvalue()

In [None]:
def b64_image_files(im):
    colormap='gray'
    cmap = cm.get_cmap(colormap)
    urls = []
    png = to_png(img_as_ubyte(cmap(im)))
    url = 'data:image/png;base64,' + base64.b64encode(png).decode('utf-8')
    return url

In [None]:
if (np.size(np.unique(labels)) - 2) < 0:
    colors = 0
else:
    colors = (np.size(np.unique(labels)) - 2)

In [None]:
df = pd.DataFrame(embedding, columns=('x', 'y'))
df['class'] = [str(x) for x in labels]
df['image'] = list(map(b64_image_files, instances))

datasource = ColumnDataSource(df)
color_mapping = CategoricalColorMapper(factors=[str(colors - x) for x in np.unique(labels)],
                                       palette=Spectral10)

plot_figure = figure(
    title='UMAP projection',
    plot_width=600,
    plot_height=600,
    tools=('pan, wheel_zoom, reset')
)

plot_figure.add_tools(HoverTool(tooltips="""
<div>
    <div>
        <img src='@image'  style='float:left; width:100px;height:100px; margin: 5px 5px 5px 5px'/>
    </div>
    <div>
        <span style='font-size: 16px; color: #224499'>Class:</span>
        <span style='font-size: 18px'>@class</span>
    </div>
</div>
"""))

plot_figure.circle(
    'x',
    'y',
    source=datasource,
    color=dict(field='class', transform=color_mapping),
    line_alpha=0.6,
    fill_alpha=0.6,
    size=4
)
show(plot_figure)
