In [8]:
"""
Background Brightness Normalization Script - Updated Version
Uses TOP HALF of background only to avoid shadow influence
"""

import cv2
import numpy as np
from pathlib import Path
import re

def analyze_background_brightness(image_path, use_top_half=True):
    """
    Analyze average brightness of background (gray vinyl) - used as reference only
    
    Parameters:
    -----------
    image_path : str or Path
        Path to image
    use_top_half : bool
        If True, only use top half of image to avoid shadows (default: True)
    
    Returns:
    --------
    tuple: (brightness, img, hsv, mask)
    """
    img = cv2.imread(str(image_path))
    if img is None:
        print(f"Warning: Unable to read image {image_path}.")
        return None, None, None, None
    
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    height, width = img.shape[:2]
    
    # Create mask for gray areas (low saturation, high brightness)
    lower_gray = np.array([0, 0, 80])  
    upper_gray = np.array([180, 80, 230]) 
    mask = cv2.inRange(hsv, lower_gray, upper_gray)
    
    # Use only top half to avoid shadows at the bottom
    if use_top_half:
        mask[height//2:, :] = 0  # Zero out bottom half
    
    # Calculate average V (brightness) value of background
    v_channel = hsv[:,:,2]
    background_pixels = v_channel[mask > 0]
    
    if len(background_pixels) == 0:
        print(f"Warning: No background found in {image_path}.")
        return None, img, hsv, mask
    
    background_brightness = np.mean(background_pixels)
    
    return background_brightness, img, hsv, mask

def normalize_entire_image(image_path, target_brightness, output_path, use_top_half=True, verbose=True):
    """Adjust ENTIRE image brightness based on background reference"""
    current_brightness, img, hsv, mask = analyze_background_brightness(image_path, use_top_half)
    
    if current_brightness is None:
        return None
    
    if verbose:
        print(f"Processing: {Path(image_path).name}")
        print(f"  Background brightness (top half): {current_brightness:.1f}")
        print(f"  Target brightness: {target_brightness:.1f}")
        print(f"  Adjustment ratio: {target_brightness/current_brightness:.2f}x")
    
    # Calculate brightness adjustment ratio based on background
    brightness_ratio = target_brightness / current_brightness
    
    # Adjust V channel for THE ENTIRE IMAGE (not just background)
    v_channel = hsv[:,:,2].astype(float)
    
    # Apply adjustment to ALL pixels (plants + background)
    v_adjusted = np.clip(v_channel * brightness_ratio, 0, 255)
    
    # Merge back to HSV
    hsv[:,:,2] = v_adjusted.astype(np.uint8)
    
    # Convert to BGR
    result = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
    
    # Save
    cv2.imwrite(str(output_path), result)
    
    if verbose:
        print(f"  ✓ Entire image adjusted and saved: {output_path}\n")
    
    return current_brightness

def should_process_file(filename, pattern_range, exclude_pattern='top'):
    """
    Check if file should be processed:
    1. Must end with -N.jpeg where N is in the specified range (1-16)
    2. Must NOT contain 'top' in the filename
    """
    # Check if contains exclude pattern
    if exclude_pattern.lower() in filename.lower():
        return False
    
    # Check if ends with -N.jpeg pattern
    for num in pattern_range:
        if filename.endswith(f"-{num}.jpeg") or filename.endswith(f"-{num}.JPEG"):
            return True
    
    return False

def process_multiple_folders(base_path, folder_names, output_folder_name='normalized',
                             include_pattern_range=range(1, 17), exclude_pattern='top',
                             target_brightness=None, use_top_half=True,
                             file_extensions=['.jpg', '.jpeg', '.png']):
    """Process multiple folders and save to normalized/day1, normalized/day2, etc."""
    
    base_path = Path(base_path)
    output_base = base_path / output_folder_name
    
    # Create normalized folder
    output_base.mkdir(exist_ok=True)
    
    # Collect all image files from all folders
    all_image_files = []
    
    print("=" * 60)
    print("Collecting images from folders...")
    print("=" * 60)
    
    for folder_name in folder_names:
        folder_path = base_path / folder_name
        
        if not folder_path.exists():
            print(f"Warning: Folder '{folder_name}' not found. Skipping...")
            continue
        
        # Create corresponding output folder
        output_folder = output_base / folder_name
        output_folder.mkdir(exist_ok=True)
        
        # Find image files
        folder_images = []
        for ext in file_extensions:
            folder_images.extend(folder_path.glob(f'*{ext}'))
            folder_images.extend(folder_path.glob(f'*{ext.upper()}'))
        
        # Filter: must match -N.jpeg pattern AND not contain 'top'
        filtered_images = [img for img in folder_images 
                          if should_process_file(img.name, include_pattern_range, exclude_pattern)]
        
        # Count excluded by each criterion for reporting
        excluded_by_top = len([img for img in folder_images if exclude_pattern.lower() in img.name.lower()])
        excluded_by_pattern = len([img for img in folder_images 
                                  if not any(img.name.endswith(f"-{num}.jpeg") or 
                                           img.name.endswith(f"-{num}.JPEG") 
                                           for num in include_pattern_range)])
        
        print(f"\n{folder_name}:")
        print(f"  Total images: {len(folder_images)}")
        print(f"  Excluded (contains '{exclude_pattern}'): {excluded_by_top}")
        print(f"  Excluded (not matching -1 to -16.jpeg): {excluded_by_pattern}")
        print(f"  To process: {len(filtered_images)}")
        
        all_image_files.extend(filtered_images)
    
    if not all_image_files:
        print("\nError: No images found to process.")
        return
    
    print(f"\n{'=' * 60}")
    print(f"Total images to process: {len(all_image_files)}")
    print(f"Output folder: {output_base}")
    print(f"Method: Entire image adjustment based on background reference")
    print(f"Background region: Top half only (avoids shadows) ⭐")
    print("=" * 60)
    
    # Step 1: Analyze background brightness of all images
    print("\nStep 1: Background Brightness Analysis (Top Half Only)")
    print("-" * 60)
    
    brightness_values = {}
    for img_file in all_image_files:
        brightness, _, _, _ = analyze_background_brightness(img_file, use_top_half)
        if brightness is not None:
            brightness_values[img_file] = brightness
            print(f"{img_file.parent.name}/{img_file.name}: {brightness:.1f}")
    
    if not brightness_values:
        print("Error: No processable images found.")
        return
    
    # Set target brightness
    if target_brightness is None:
        target_brightness = max(brightness_values.values())
        print(f"\n✓ Target brightness (auto-selected - brightest background): {target_brightness:.1f}")
    else:
        print(f"\n✓ Target brightness (manually set): {target_brightness:.1f}")
    
    # Step 2: Adjust all images to target brightness
    print(f"\n{'=' * 60}")
    print("Step 2: Entire Image Brightness Adjustment")
    print("(Plants and background both adjusted)")
    print("=" * 60)
    
    success_count = 0
    for img_file in brightness_values.keys():
        # Get the folder name (day1, day2, etc.)
        folder_name = img_file.parent.name
        
        # Create output path: normalized/day1/filename.jpg
        output_folder = output_base / folder_name
        output_file = output_folder / img_file.name
        
        result = normalize_entire_image(img_file, target_brightness, output_file, use_top_half)
        if result is not None:
            success_count += 1
    
    print("=" * 60)
    print(f"✓ Complete! {success_count} images processed")
    print(f"Results saved in: {output_base}/")
    print(f"  Structure: normalized/day1/, normalized/day2/, ...")
    print(f"\nNow all images simulate the same lighting conditions!")
    print(f"You can accurately compare plant colors across images.")
    print("=" * 60)

# ========================================
# Change settings here
# ========================================

# Input base path (folder containing day1, day2, etc.)
cwd = "/Users/jiwoohan/Documents/git/604-project-3/pictures"
folders = ['day0', 'day1', 'day2', 'day3', 'day4', 'day5']

# Output folder name
OUTPUT_FOLDER = 'normalized'

# Include pattern: only process files ending with -1.jpeg to -16.jpeg
INCLUDE_PATTERN_RANGE = range(1, 17)  # 1 to 16

# Exclude pattern: skip files containing this word
EXCLUDE_PATTERN = 'top'

# Target brightness (None = auto-select brightest background)
TARGET_BRIGHTNESS = None

# Use top half of background only (recommended to avoid shadows)
USE_TOP_HALF = True

# File extensions to process
FILE_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.JPG', '.JPEG', '.PNG']

# ========================================
# Start processing
# ========================================

if __name__ == "__main__":
    process_multiple_folders(
        base_path=cwd,
        folder_names=folders,
        output_folder_name=OUTPUT_FOLDER,
        include_pattern_range=INCLUDE_PATTERN_RANGE,
        exclude_pattern=EXCLUDE_PATTERN,
        target_brightness=TARGET_BRIGHTNESS,
        use_top_half=USE_TOP_HALF,
        file_extensions=FILE_EXTENSIONS
    )

Collecting images from folders...

day0:
  Total images: 36
  Excluded (contains 'top'): 0
  Excluded (not matching -1 to -16.jpeg): 20
  To process: 16

day1:
  Total images: 38
  Excluded (contains 'top'): 16
  Excluded (not matching -1 to -16.jpeg): 6
  To process: 16

day2:
  Total images: 38
  Excluded (contains 'top'): 16
  Excluded (not matching -1 to -16.jpeg): 6
  To process: 16

day3:
  Total images: 38
  Excluded (contains 'top'): 16
  Excluded (not matching -1 to -16.jpeg): 6
  To process: 16

day4:
  Total images: 38
  Excluded (contains 'top'): 16
  Excluded (not matching -1 to -16.jpeg): 6
  To process: 16

day5:
  Total images: 38
  Excluded (contains 'top'): 16
  Excluded (not matching -1 to -16.jpeg): 6
  To process: 16

Total images to process: 96
Output folder: /Users/jiwoohan/Documents/git/604-project-3/pictures/normalized
Method: Entire image adjustment based on background reference
Background region: Top half only (avoids shadows) ⭐

Step 1: Background Brightness

In [1]:
"""
Flower Color Analysis to CSV - SIMPLIFIED VERSION
Focus on detecting yellow flowers while excluding green stems
Based on the methodology: Yellow detection + Green exclusion
Saves 9 CSV files: RGB (R, G, B), HSV (H, S, V), LAB (L, a, b)
"""

import cv2
import numpy as np
from pathlib import Path
import csv

def detect_flowers_simple(image_path):
    """
    Simple flower detection focusing on yellow flowers
    Excludes only green stems
    """
    img = cv2.imread(str(image_path))
    if img is None:
        return None, None
    
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    
    # Step 1: Detect yellow flowers (broader range)
    # Hue: 15-35 (yellow to orange)
    # Saturation: 30+ (has color, not gray)
    # Value: 50+ (not too dark)
    lower_flower = np.array([15, 30, 50])
    upper_flower = np.array([35, 255, 255])
    mask_flower = cv2.inRange(hsv, lower_flower, upper_flower)
    
    # Step 2: Exclude green stems
    # Hue: 35-85 (green range)
    # Saturation: 40+ (definitely green, not brown/gray)
    # Value: 20+ (not completely black)
    lower_green = np.array([35, 40, 20])
    upper_green = np.array([85, 255, 255])
    mask_green = cv2.inRange(hsv, lower_green, upper_green)
    
    # Apply exclusion
    mask = cv2.bitwise_and(mask_flower, cv2.bitwise_not(mask_green))
    
    # Light cleanup with morphological operations
    kernel = np.ones((3, 3), np.uint8)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
    
    return img, mask

def analyze_flower_color(image_path, mask=None):
    """Analyze flower color in RGB, HSV, and LAB color spaces"""
    if mask is None:
        img, mask = detect_flowers_simple(image_path)
        if img is None:
            return None
    else:
        img = cv2.imread(str(image_path))
    
    # Convert to different color spaces
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    
    # Extract flower pixels
    flower_pixels_rgb = img_rgb[mask > 0]
    flower_pixels_hsv = hsv[mask > 0]
    flower_pixels_lab = lab[mask > 0]
    
    if len(flower_pixels_rgb) == 0:
        return None
    
    # Calculate means
    rgb_mean = np.mean(flower_pixels_rgb, axis=0)
    hsv_mean = np.mean(flower_pixels_hsv, axis=0)
    lab_mean = np.mean(flower_pixels_lab, axis=0)
    
    return {
        'RGB_mean_R': rgb_mean[0],
        'RGB_mean_G': rgb_mean[1],
        'RGB_mean_B': rgb_mean[2],
        'HSV_mean_H': hsv_mean[0],
        'HSV_mean_S': hsv_mean[1],
        'HSV_mean_V': hsv_mean[2],
        'LAB_mean_L': lab_mean[0],
        'LAB_mean_a': lab_mean[1],
        'LAB_mean_b': lab_mean[2],
        'pixel_count': len(flower_pixels_rgb),
    }

def process_folders_to_csv(base_path, folder_names, output_folder, 
                           image_range=range(1, 17), exclude_pattern='top'):
    """
    Process multiple folders and create 9 CSV files
    
    Parameters:
    -----------
    base_path : str
        Base path containing day folders
    folder_names : list
        List of folder names (e.g., ['day0', 'day1', 'day2', ...])
    output_folder : str
        Where to save CSV files
    image_range : range
        Range of image numbers to process (default: 1-16)
    exclude_pattern : str
        Pattern to exclude from filenames (default: 'top')
    """
    
    base_path = Path(base_path)
    output_folder = Path(output_folder)
    output_folder.mkdir(parents=True, exist_ok=True)
    
    # Initialize data structure
    data = {
        'RGB_R': {},
        'RGB_G': {},
        'RGB_B': {},
        'HSV_H': {},
        'HSV_S': {},
        'HSV_V': {},
        'LAB_L': {},
        'LAB_a': {},
        'LAB_b': {}
    }
    
    for img_num in image_range:
        for metric in data.keys():
            data[metric][img_num] = {}
    
    print("=" * 70)
    print("SIMPLIFIED Flower Color Analysis")
    print("=" * 70)
    print("Detection method:")
    print("  ✓ Yellow flower detection (H: 15-35, S: 30+, V: 50+)")
    print("  ✓ Green stem exclusion (H: 35-85, S: 40+, V: 20+)")
    print("  ✓ Minimal post-processing")
    print("=" * 70)
    
    # Process each folder
    for folder_name in folder_names:
        folder_path = base_path / folder_name
        
        if not folder_path.exists():
            print(f"\nWarning: {folder_name} not found, skipping...")
            continue
        
        print(f"\nProcessing {folder_name}...")
        
        # Process each image number
        for img_num in image_range:
            # Find files matching pattern
            matching_files = []
            for ext in ['.jpeg', '.jpg', '.png', '.JPEG', '.JPG', '.PNG']:
                matching_files.extend(folder_path.glob(f"*-{img_num}{ext}"))
            
            # Filter out files with exclude pattern
            if exclude_pattern:
                matching_files = [f for f in matching_files 
                                if exclude_pattern.lower() not in f.name.lower()]
            
            if not matching_files:
                print(f"  Image {img_num}: Not found")
                continue
            
            # Use first matching file
            img_path = matching_files[0]
            
            # Analyze color
            stats = analyze_flower_color(img_path)
            
            if stats:
                # Store data
                data['RGB_R'][img_num][folder_name] = stats['RGB_mean_R']
                data['RGB_G'][img_num][folder_name] = stats['RGB_mean_G']
                data['RGB_B'][img_num][folder_name] = stats['RGB_mean_B']
                data['HSV_H'][img_num][folder_name] = stats['HSV_mean_H']
                data['HSV_S'][img_num][folder_name] = stats['HSV_mean_S']
                data['HSV_V'][img_num][folder_name] = stats['HSV_mean_V']
                data['LAB_L'][img_num][folder_name] = stats['LAB_mean_L']
                data['LAB_a'][img_num][folder_name] = stats['LAB_mean_a']
                data['LAB_b'][img_num][folder_name] = stats['LAB_mean_b']
                
                print(f"  Image {img_num}: ✓")
            else:
                print(f"  Image {img_num}: No flowers detected")
    
    # Save to CSV files
    print("\n" + "=" * 70)
    print("Saving to CSV files...")
    print("=" * 70)
    
    for metric_name, metric_data in data.items():
        csv_path = output_folder / f"{metric_name}.csv"
        
        with open(csv_path, 'w', newline='') as f:
            writer = csv.writer(f)
            
            # Header row
            header = ['image_number'] + folder_names
            writer.writerow(header)
            
            # Data rows
            for img_num in sorted(metric_data.keys()):
                row = [img_num]
                for folder in folder_names:
                    value = metric_data[img_num].get(folder, '')
                    if value != '':
                        if metric_name == 'pixel_count':
                            value = int(value)
                        else:
                            value = f"{value:.2f}"
                    row.append(value)
                writer.writerow(row)
        
        print(f"✓ {csv_path.name}")
    
    print("=" * 70)
    print(f"✓ Complete! {len(data)} CSV files saved to {output_folder}")
    print("\nFiles created:")
    print("  - RGB_R.csv, RGB_G.csv, RGB_B.csv")
    print("  - HSV_H.csv, HSV_S.csv, HSV_V.csv")
    print("  - LAB_L.csv, LAB_a.csv, LAB_b.csv")
    print("=" * 70)

# ========================================
# Configuration
# ========================================

# Base path (folder containing day0, day1, etc.)
BASE_PATH = "/Users/jiwoohan/Documents/git/604-project-3/pictures/normalized"

# Folder names (days)
FOLDERS = ['day0', 'day1', 'day2', 'day3', 'day4', 'day5']

# Output folder for CSV files
OUTPUT_FOLDER = "/Users/jiwoohan/Documents/git/604-project-3/pictures/flower_color_data"

# Image number range (1-16)
IMAGE_RANGE = range(1, 17)

# Pattern to exclude (set to None to include all files)
EXCLUDE_PATTERN = 'top'

# ========================================
# Run
# ========================================

if __name__ == "__main__":
    process_folders_to_csv(
        base_path=BASE_PATH,
        folder_names=FOLDERS,
        output_folder=OUTPUT_FOLDER,
        image_range=IMAGE_RANGE,
        exclude_pattern=EXCLUDE_PATTERN
    )

SIMPLIFIED Flower Color Analysis
Detection method:
  ✓ Yellow flower detection (H: 15-35, S: 30+, V: 50+)
  ✓ Green stem exclusion (H: 35-85, S: 40+, V: 20+)
  ✓ Minimal post-processing

Processing day0...
  Image 1: ✓
  Image 2: ✓
  Image 3: ✓
  Image 4: ✓
  Image 5: ✓
  Image 6: ✓
  Image 7: ✓
  Image 8: ✓
  Image 9: ✓
  Image 10: ✓
  Image 11: ✓
  Image 12: ✓
  Image 13: ✓
  Image 14: ✓
  Image 15: ✓
  Image 16: ✓

Processing day1...
  Image 1: ✓
  Image 2: ✓
  Image 3: ✓
  Image 4: ✓
  Image 5: ✓
  Image 6: ✓
  Image 7: ✓
  Image 8: ✓
  Image 9: ✓
  Image 10: ✓
  Image 11: ✓
  Image 12: ✓
  Image 13: ✓
  Image 14: ✓
  Image 15: ✓
  Image 16: ✓

Processing day2...
  Image 1: ✓
  Image 2: ✓
  Image 3: ✓
  Image 4: ✓
  Image 5: ✓
  Image 6: ✓
  Image 7: ✓
  Image 8: ✓
  Image 9: ✓
  Image 10: ✓
  Image 11: ✓
  Image 12: ✓
  Image 13: ✓
  Image 14: ✓
  Image 15: ✓
  Image 16: ✓

Processing day3...
  Image 1: ✓
  Image 2: ✓
  Image 3: ✓
  Image 4: ✓
  Image 5: ✓
  Image 6: ✓
  Image 7: 