In [14]:
from ultralytics import YOLO
from glob import glob
import cv2
import numpy as np
from typing import TypedDict, Tuple, Set, Dict, List
import json
from collections import Counter

In [15]:
CSS_COLORS_PATH = "colourrgbs.json"
IMAGES_FOLDER_PATH = "../holden_scrape/wallpapers/full/"
MODEL_PATH = "yolo11x.pt"
TAGS_OUPUT_FILE = "tags.json"

In [16]:
with open('colourrgbs.json', 'r') as f:
    # taken from https://www.rapidtables.com/web/color/RGB_Color.html
    CSS_COLORS: Dict[str, Tuple[int, int, int]] = json.load(f)

In [17]:
class DetectedColors(TypedDict):
    main_color_name: str
    main_color_rgb: Tuple[int,int,int]
    other_colors_names: Set[str]

class ImageTags(TypedDict):
    image_path: str
    objects: List[str]
    repeating_pattern: bool
    colors: DetectedColors


In [5]:
model = YOLO('yolo11x.pt')  # you can use 'yolov8s.pt' or larger models for better accuracy

In [6]:
def detect_objects(model, image_path, confidence_threshold=0.25):
    """
    Detect objects in an image and return tags with confidence scores
    
    Args:
        image_path: Path to the image file
        confidence_threshold: Minimum confidence score to consider a detection
        
    Returns:
        List of tuples containing (class_name, confidence_score)
    """    
    # Run inference on GPU
    results = model(image_path, device='cuda')
    
    # Extract detected objects
    detections = []
    
    for result in results:
        boxes = result.boxes
        for box in boxes:
            confidence = float(box.conf[0])
            if confidence >= confidence_threshold:
                class_id = int(box.cls[0])
                class_name = model.names[class_id]
                detections.append((class_name, confidence))
    
    return detections

In [7]:
def has_repeating_pattern(img_path, fft_thresh=5.0, autocorr_thresh=0.3):
    """
    Detects repeating patterns using FFT spectrum analysis and autocorrelation
    Args:
        img_path: Path to input image
        fft_thresh: Threshold for FFT magnitude variance (empirical)
        autocorr_thresh: Threshold for autocorrelation peaks (0-1)
    Returns:
        bool: True if repeating pattern detected
    """
    # Read and preprocess image
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (256, 256))  # Standardize size
    
    # FFT Analysis
    fft = np.fft.fft2(img)
    fft_shift = np.fft.fftshift(fft)
    # Revised FFT analysis
    magnitude = np.abs(fft_shift)
    fft_score = np.std(magnitude) / (np.mean(magnitude) + 1e-6)
        
    # Improved autocorrelation analysis
    autocorr = np.fft.ifft2(np.abs(fft)**2).real
    autocorr = cv2.normalize(autocorr, None, 0, 1, cv2.NORM_MINMAX)
    
    # Revised autocorrelation with peak validation
    center = autocorr[64:192, 64:192]
    peak_value = np.percentile(center, 99.9)
    peak_count = np.sum(center > 0.8*peak_value)  # Count significant peaks
    
    
    # Decision logic
    return (
        (fft_score > fft_thresh) and 
        (peak_value > autocorr_thresh) and
        (peak_count > 1) and  # Require multiple peaks
        (np.mean(magnitude) > np.min(magnitude))  # Reject flat spectra
    )

In [8]:
# Modified color matching section
def closest_color(rgb_array: np.ndarray) -> str:
    """Find closest CSS color using numpy vector operations"""
    css_colors = np.array(list(CSS_COLORS.values()))
    color_names = list(CSS_COLORS.keys())
    distances = np.linalg.norm(css_colors - rgb_array, axis=1)
    return color_names[np.argmin(distances)]

def detect_colors(image_path: str, num_colors: int = 15) -> DetectedColors:
    """
    Detect dominant colors in an image using K-Means clustering
    Args:
        image_path: Path to input image
        num_colors: Number of dominant colors to detect
    Returns:
        DetectedColors dictionary with main color and other prominent colors
    """
    # Load image and convert to RGB
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # Resize for faster processing and convert to float32
    img = cv2.resize(img, (200, 200), interpolation=cv2.INTER_AREA)
    pixels = img.reshape(-1, 3).astype(np.float32)
    
    # K-Means clustering to find dominant colors
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 200, 0.1)
    _, labels, centers = cv2.kmeans(
        pixels, num_colors, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS
    )
    
    # Get color frequencies and sort by prevalence
    label_counts = Counter(labels.flatten())
    sorted_colors = sorted(centers, key=lambda c: -label_counts[np.where(centers == c)[0][0]])
    # Convert to numpy arrays before color matching
    main_rgb = sorted_colors[0].tolist()
    other_rgbs = sorted_colors[1:]

    
    return {
        'main_color_name': closest_color(main_rgb),
        'main_color_rgb': list(round(i) for i in main_rgb),
        'other_colors_names': list(closest_color(c) for c in other_rgbs)
    }

In [None]:
images_tags = []
for idx, img_path in enumerate(glob(IMAGES_FOLDER_PATH+"*.jpg")):
    if idx % 20 == 0:
        print(f"=>Processing image {idx}")
    tags = {
        "image_path": img_path,
        "objects": [img_score[0] for img_score in detect_objects(model, img_path)],
        "repeating_pattern": True if has_repeating_pattern(img_path) else False,
        "colors": detect_colors(img_path)
    }
    images_tags.append(tags)
    with open(TAGS_OUPUT_FILE, "w") as f:
        json.dump(images_tags, f)

