In [16]:
pip install umap-learn keras bokeh

Note: you may need to restart the kernel to use updated packages.


In [10]:
# Import necessary libraries
import numpy as np
import os
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.preprocessing import image
import umap.umap_ as umap
from sklearn.cluster import DBSCAN
n_neighbors = 5
min_dist =0.1
# Load and preprocess images
def load_and_preprocess_image(image_path, target_size=(224, 224)):
    img = image.load_img(image_path, target_size=target_size)
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    return preprocess_input(img_array)

image_dir = "images"
image_paths = [os.path.join(image_dir, img) for img in os.listdir(image_dir) if img.endswith('.jpeg')]
images = np.vstack([load_and_preprocess_image(img_path) for img_path in image_paths])

# Extract features using ResNet50
model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
features = model.predict(images)

# Reduce dimensionality with UMAP
n_neighbors_value = min(5, len(images) - 1)  # ensure it's less than the number of images
reducer = umap.UMAP(n_neighbors=n_neighbors_value, n_components=2, metric='cosine', min_dist=min_dist)
reduced_features = reducer.fit_transform(features)



In [11]:
reduced_features

array([[ 2.770464 , 22.770649 ],
       [ 3.448341 , 23.614315 ],
       [ 6.61226  , 23.881523 ],
       [ 4.2323546, 22.913202 ],
       [ 4.5508957, 23.509552 ],
       [ 5.351562 , 25.417938 ],
       [ 3.12506  , 22.412535 ],
       [ 5.2834   , 23.954283 ],
       [ 6.9301167, 24.124784 ],
       [ 6.3035316, 24.540722 ],
       [ 3.977628 , 23.579678 ],
       [ 6.956659 , 24.604525 ],
       [ 5.8981695, 25.651417 ],
       [ 4.7678328, 25.11481  ],
       [ 5.6875987, 24.915102 ],
       [ 3.7031102, 23.195509 ],
       [ 6.5624027, 25.152428 ],
       [ 3.9045005, 22.494768 ]], dtype=float32)

In [32]:
# Cluster using DBSCAN
dbscan = DBSCAN(eps=0.9, min_samples=2, metric='euclidean')
clusters = dbscan.fit_predict(reduced_features)
n_clusters_ = len(set(clusters)) - (1 if -1 in clusters else 0)

In [33]:
n_clusters_

2

In [50]:
import random as rd
from bokeh.plotting import figure, show
from bokeh.models import HoverTool, ColumnDataSource, ColorBar
from bokeh.transform import linear_cmap
from bokeh.palettes import all_palettes
from bokeh.palettes import inferno
color_map = rd.choices(inferno(256), k=n_clusters_+1)
color_map

['#FBAC10', '#9E2963', '#D94D3D']

In [51]:
clusters

array([0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0])

In [52]:
import pandas as pd

In [53]:
df=pd.DataFrame({
    'x': reduced_features[:, 0],
    'y': reduced_features[:, 1],
    'path': image_paths,
    'cluster': clusters,
    'color': [color_map[i] for i in clusters]
})
df

Unnamed: 0,x,y,path,cluster,color
0,2.770464,22.770649,images/cat9.jpeg,0,#FBAC10
1,3.448341,23.614315,images/cat4.jpeg,0,#FBAC10
2,6.61226,23.881523,images/dog2.jpeg,1,#9E2963
3,4.232355,22.913202,images/cat2.jpeg,0,#FBAC10
4,4.550896,23.509552,images/cat5.jpeg,0,#FBAC10
5,5.351562,25.417938,images/dog9.jpeg,1,#9E2963
6,3.12506,22.412535,images/cat1.jpeg,0,#FBAC10
7,5.2834,23.954283,images/cat6.jpeg,0,#FBAC10
8,6.930117,24.124784,images/dog4.jpeg,1,#9E2963
9,6.303532,24.540722,images/dog3.jpeg,1,#9E2963


In [54]:
source = ColumnDataSource(data=df)
hover = HoverTool(tooltips=[
    ("index", "$index"),
    ("(x,y)", "(@x, @y)"),
    ('path', '@path'),
    ('cluster', '@cluster')
])

plot = figure(width=800, height=800, tools=[hover], title="Movie Poster Clusters with Bokeh")

plot.circle('x', 'y', size=10, source=source, fill_color={"field":"color"})
show(plot)

In [16]:
n_clusters_

2