In [None]:
import pandas as pd
import numpy as np
import cv2
import os
from tqdm import tqdm

# ====== 参数设置 ======
csv_path = r"C:\Users\Public\projects\601\cleaned_final_data.csv"  # 修改为你的实际路径
output_csv_path = r"C:\Users\Public\projects\601\final_data.csv"
resize_width = 500  # 不缩放原图（若想缩小计算量可设为100）
n_bins = 32  # RGB 颜色量化等级

# ====== 加载数据 ======
df = pd.read_csv(csv_path)
if "image_path" not in df.columns:
    raise ValueError("CSV 文件中未找到 'image_path' 列，请检查列名是否正确")

image_paths = df["image_path"].dropna().tolist()

# ====== 颜色处理函数 ======
def quantize_rgb(img, n_bins):
    return (img // (256 // n_bins)).astype(int)

def encode_colors(quantized_img, n_bins):
    return (quantized_img[:, :, 0] * n_bins * n_bins +
            quantized_img[:, :, 1] * n_bins +
            quantized_img[:, :, 2])

def count_distinct_colors(path, n_bins, resize_width):
    try:
        if not os.path.exists(path):
            return np.nan
        img = cv2.imread(path)
        if img is None:
            return np.nan
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        h, w = img.shape[:2]
        if w != resize_width:
            scale = resize_width / w
            img = cv2.resize(img, (resize_width, int(h * scale)))
        quantized = quantize_rgb(img, n_bins)
        encoded = encode_colors(quantized, n_bins)
        unique_colors = np.unique(encoded)
        return len(unique_colors)
    except Exception as e:
        print(f"Error processing {path}: {e}")
        return np.nan

# ====== 批量处理图像 ======
color_counts = []
for path in tqdm(image_paths, desc=f"Processing images with N={n_bins}"):
    count = count_distinct_colors(path, n_bins, resize_width)
    color_counts.append(count)

# ====== 保存结果 ======
df["color_counter"] = color_counts
df.to_csv(output_csv_path, index=False)
print(f"\n✅ 已生成带 color_counter 列的文件：{output_csv_path}")
