In [2]:
import colorsys
import pandas as pd
import numpy as np
import ast

import pyjnius
from sklearn.cluster import DBSCAN

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns

**1. Color conversion: HEX -> HSB**

HSB (hue, saturation, and brightness) color transform: more intuitive color choice by artists. Created by Alvy Ray Smith in 1974.

**1.1. HEX to HSB (values between 0 and 1):**

In [3]:
def hex_to_hsb(hex_color):
    hex_color = hex_color.lstrip("#")
    r, g, b = [int(hex_color[i:i+2], 16) for i in (0, 2, 4)]
    return colorsys.rgb_to_hsv(r / 255.0, g / 255.0, b / 255.0)

**1.2. Convert color palette into H S B components:**

In [4]:
def extract_hsb_components(colors, top_n=10):
    colors = colors[:top_n] + ["#000000"] * (top_n - len(colors))  # pad with black if needed
    h, s, b = [], [], []
    for color in colors:
        hue, sat, bri = hex_to_hsb(color)
        h.append(hue)
        s.append(sat)
        b.append(bri)
    return h, s, b

**2. Add decomposed colors to dataframe**

In [5]:
df = pd.read_csv("omniart-paintings-filtered-clean.csv")

In [6]:
# Drop missing palette info
df = df.dropna(subset=['color_pallete', 'palette_count'])

In [7]:
# Parse strings as lists
df['color_pallete'] = df['color_pallete'].apply(
    lambda x: ast.literal_eval(x) if isinstance(x, str) else x
)


In [8]:
# Apply and expand into new columns
top_n = 10
hsb = df['color_pallete'].apply(lambda lst: extract_hsb_components(lst, top_n=top_n))

In [9]:
# Split tuples into separate lists
df[['H', 'S', 'B']] = pd.DataFrame(hsb.tolist(), index=df.index)

In [10]:
# Explode into columns H_1..H_10, S_1..S_10, B_1..B_10
for i in range(top_n):
    df[f'H_{i+1}'] = df['H'].apply(lambda x: x[i])
    df[f'S_{i+1}'] = df['S'].apply(lambda x: x[i])
    df[f'B_{i+1}'] = df['B'].apply(lambda x: x[i])

**3. Cluster colors into human comprehensible groups**

**3.1. Get features:**

**1.1. Get list of all colors:**

In [11]:
all_colors = []

for _, row in df.iterrows():
    h_list = row['H']
    s_list = row['S']
    b_list = row['B']
    # Zip to get list of colors as (H,S,B) tuples
    for h, s, b in zip(h_list, s_list, b_list):
        all_colors.append([h, s, b])

all_colors = np.array(all_colors)  # shape (num_colors, 3)

**1.2. Get unique colors:**

In [12]:
unique_colors = np.unique(all_colors, axis=0)

**1.3. Convert hue (H) to sin and cos (circular distance)**

In [13]:
H = unique_colors[:, 0]
S = unique_colors[:, 1]
B = unique_colors[:, 2]

H_sin = np.sin(H * 2 * np.pi)
H_cos = np.cos(H * 2 * np.pi)

**2. Features set**

In [14]:
features = np.column_stack((H_sin, H_cos, S, B))

**3. Cluster with DBSCAN**

Trying to get to 569 colors: 126 (Y) + 60 (O) + 161 (R) + 38 (V) + 47 (B) + 35 (G) + 34 (Br) + 35 (Bk) + 33 (W)
https://www.artiscreation.com/Color_index_names.html

eps: float, default=0.5
The maximum distance between two samples for one to be considered as in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. This is the most important DBSCAN parameter to choose appropriately for your data set and distance function.

min_sample: sint, default=5
The number of samples (or total weight) in a neighborhood for a point to be considered as a core point. This includes the point itself. If min_samples is set to a higher value, DBSCAN will find denser clusters, whereas if it is set to a lower value, the found clusters will be more sparse.

In [None]:
epsilon = 5
min_elements = 2000 # 2 * number of features

In [None]:
dbscan = DBSCAN(eps=epsilon, min_samples=min_elements)

In [None]:
dbscan

In [None]:
dbscan.fit(features)

In [15]:
len(unique_colors)/569

2104.1511423550087