In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from collections import Counter
import os
import pandas as pd

In [3]:
def extract_colors(image, num_colors=10):
    # Reshape the image to be a list of pixels
    pixels = image.reshape((-1, 3))

    # Use KMeans to cluster pixels
    kmeans = KMeans(n_clusters=num_colors)
    kmeans.fit(pixels)
    colors = kmeans.cluster_centers_
    labels = kmeans.labels_

    # Count each label to find most popular colors
    label_counts = Counter(labels)

    # Get colors and sort by popularity
    sorted_colors = [(colors[label], count) for label, count in label_counts.items()]
    sorted_colors.sort(key=lambda x: x[1], reverse=True)
    
    return sorted_colors

def plot_colors(colors, save_path):
    # Create a square figure
    square = np.zeros((100, 100 * len(colors), 3), dtype='uint8')

    start = 0
    for idx, (color, count) in enumerate(colors):
        end = start + 100
        square[:, start:end, :] = color
        start = end

    plt.figure(figsize=(12, 6))
    plt.axis('off')
    plt.imshow(square)
    plt.savefig(save_path)
    plt.close()

In [4]:
def process_images(folder_path, num_colors=10):
    color_data = []

    for filename in os.listdir(folder_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_path = os.path.join(folder_path, filename)
            image = cv2.imread(image_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            # Extract and sort colors
            colors = extract_colors(image, num_colors)
            
            # Create and save colorbar
            colorbar_path = os.path.join(folder_path, f"colorbar_{filename}")
            plot_colors(colors, colorbar_path)
            
            # Collect data for the dataframe
            color_info = {"Image": filename}
            total_count = sum(count for _, count in colors)
            for i, (color, count) in enumerate(colors):
                percentage = (count / total_count) * 100
                color_info[f"Color_{i+1}"] = color
                color_info[f"Percentage_{i+1}"] = percentage
            color_data.append(color_info)

    df = pd.DataFrame(color_data)
    return df

In [5]:
if __name__ == "__main__":
    folder_path = "photos"
    num_colors = 10
    df = process_images(folder_path, num_colors)
    df.to_csv(os.path.join(folder_path, "color_data.csv"), index=False)
    print("Color data saved to color_data.csv")
    print(df)

Color data saved to color_data.csv
                                             Image  \
0             9164be3778c76111132e4c202cf76991.jpg   
1    colorbar_9164be3778c76111132e4c202cf76991.jpg   
2                           colorbar_IMG_1070.JPEG   
3                           colorbar_IMG_1071.JPEG   
4                           colorbar_IMG_1072.JPEG   
..                                             ...   
101                                  IMG_1226.JPEG   
102                                   IMG_1237.JPG   
103                                  IMG_9099.JPEG   
104                                  IMG_9307.JPEG   
105                                  IMG_9313.JPEG   

                                               Color_1  Percentage_1  \
0    [178.56404061083686, 199.28379053835357, 198.3...     27.427759   
1    [254.97288474873454, 254.9934934745255, 254.97...     88.116667   
2    [254.99065150981718, 254.98985867718048, 254.9...     88.115694   
3    [254.99496401513454, 25