In [17]:
import colorsys
import pandas as pd
import numpy as np
import ast

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
from sklearn.preprocessing import normalize
from sklearn.cluster import KMeans
from sklearn.manifold import TSNE
from sklearn.metrics.pairwise import euclidean_distances


**1. Color conversion: HEX -> HSB**

HSB (hue, saturation, and brightness) color transform: more intuitive color choice by artists. Created by Alvy Ray Smith in 1974.

In [18]:
# HEX to RGB (0-255)
def hex_to_rgb(hex_color):
    hex_color = hex_color.lstrip("#")
    return [int(hex_color[i:i+2], 16) for i in (0, 2, 4)]

In [5]:
# HEX to HSB (values between 0-1)
def hex_to_hsb(hex_color):
    r, g, b = hex_to_rgb(hex_color)
    return colorsys.rgb_to_hsv(r / 255.0, g / 255.0, b / 255.0)

In [6]:
# Converts color palette into separate H, S, B components
def extract_hsb_components(colors, top_n=10):
    colors = colors[:top_n] + ["#000000"] * (top_n - len(colors))  # pad with black if needed
    h, s, b = [], [], []
    for color in colors:
        hue, sat, bri = hex_to_hsb(color)
        h.append(hue)
        s.append(sat)
        b.append(bri)
    return h, s, b

**2. Add decomposed colors to dataframe**

In [7]:
df = pd.read_csv("omniart-paintings-filtered-clean.csv")

In [8]:
# Drop missing palette info
df = df.dropna(subset=['color_pallete', 'palette_count'])

In [9]:
# Parse strings as lists
df['color_pallete'] = df['color_pallete'].apply(
    lambda x: ast.literal_eval(x) if isinstance(x, str) else x
)

df['palette_count'] = df['palette_count'].apply(
    lambda x: [float(i) for i in ast.literal_eval(x)] if isinstance(x, str) else [float(i) for i in x]
)


In [10]:
# Apply and expand into new columns
top_n = 10
hsb = df['color_pallete'].apply(lambda lst: extract_hsb_components(lst, top_n=top_n))

In [11]:
# Split tuples into separate lists
df[['H', 'S', 'B']] = pd.DataFrame(hsb.tolist(), index=df.index)

In [12]:
# Explode into columns H_1..H_10, S_1..S_10, B_1..B_10
for i in range(top_n):
    df[f'H_{i+1}'] = df['H'].apply(lambda x: x[i])
    df[f'S_{i+1}'] = df['S'].apply(lambda x: x[i])
    df[f'B_{i+1}'] = df['B'].apply(lambda x: x[i])

**3. Cluster colors into human comprehensible groups**

**1. Get a list of all colors:**

In [20]:
all_colors = []

for _, row in df.iterrows():
    h_list = row['H']
    s_list = row['S']
    b_list = row['B']
    # Zip to get list of colors as (H,S,B) tuples
    for h, s, b in zip(h_list, s_list, b_list):
        all_colors.append([h, s, b])

all_colors = np.array(all_colors)  # shape (num_colors, 3)

**2. Convert hue (H) to sin and cos (circular distance)**

In [15]:
H = all_colors[:, 0]
S = all_colors[:, 1]
B = all_colors[:, 2]

H_sin = np.sin(H * 2 * np.pi)
H_cos = np.cos(H * 2 * np.pi)

features = np.stack([H_sin, H_cos, S, B], axis=1)  # shape (num_colors, 4)

In [16]:
# 3. Cluster with KMeans
k = 250  # Number of clusters you want

kmeans = KMeans(n_clusters=k, random_state=42)
labels = kmeans.fit_predict(features)

In [19]:
# 4. Cluster centers (convert back to HSB)
centers = kmeans.cluster_centers_
hue_centers = (np.arctan2(centers[:, 0], centers[:, 1]) / (2 * np.pi)) % 1
saturation_centers = centers[:, 2]
brightness_centers = centers[:, 3]

cluster_centers_hsb = np.stack([hue_centers, saturation_centers, brightness_centers], axis=1)