In [None]:
import os
import cv2
import numpy as np
from glob import glob

def get_batches(image_folder):
    images = glob(os.path.join(image_folder, '*.jpg'))
    batches = {}
    for img in images:
        prefix = os.path.basename(img)[:9]
        if prefix not in batches:
            batches[prefix] = []
        batches[prefix].append(img)
    return batches

def calculate_batch_avg_and_std_rgb(batch_images):
    r_values, g_values, b_values = [], [], []
    for img_path in batch_images:
        img = cv2.imread(img_path)
        if img is None:
            print(f"Warning: Could not read image {img_path}")
            continue
        b, g, r = cv2.split(img)
        r_values.append(np.mean(r))
        g_values.append(np.mean(g))
        b_values.append(np.mean(b))
    
    if not r_values or not g_values or not b_values:
        return (np.nan, np.nan, np.nan), (np.nan, np.nan, np.nan)
    
    r_avg = np.mean(r_values)
    g_avg = np.mean(g_values)
    b_avg = np.mean(b_values)

    r_std = np.std(r_values)
    g_std = np.std(g_values)
    b_std = np.std(b_values)
    
    r_std = r_std if r_std != 0 else 1
    g_std = g_std if g_std != 0 else 1
    b_std = b_std if b_std != 0 else 1
    
    return (r_avg, g_avg, b_avg), (r_std, g_std, b_std)

def calculate_overall_avg_rgb(batches):
    batch_avgs = []
    for batch in batches.values():
        avg_rgb, _ = calculate_batch_avg_and_std_rgb(batch)
        if not np.isnan(avg_rgb).any():
            batch_avgs.append(avg_rgb)
    
    if not batch_avgs:
        raise ValueError("No valid batches found.")
    
    overall_avg_rgb = np.mean(batch_avgs, axis=0)
    
    return overall_avg_rgb

def adjust_image_color(img, batch_avg_rgb, batch_std_rgb, overall_avg_rgb):
    b, g, r = cv2.split(img)
    
    r_adjusted = ((r - batch_avg_rgb[0]) / batch_std_rgb[0])
    g_adjusted = ((g - batch_avg_rgb[1]) / batch_std_rgb[1])
    b_adjusted = ((b - batch_avg_rgb[2]) / batch_std_rgb[2])
    
    adjusted_img = cv2.merge([b_adjusted, g_adjusted, r_adjusted])
    return adjusted_img

def process_and_save_images(batches, overall_avg_rgb, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for batch_prefix, batch_images in batches.items():
        batch_avg_rgb, batch_std_rgb = calculate_batch_avg_and_std_rgb(batch_images)
        for img_path in batch_images:
            img = cv2.imread(img_path)
            if img is None:
                print(f"Warning: Could not read image {img_path}")
                continue
            adjusted_img = adjust_image_color(img, batch_avg_rgb, batch_std_rgb, overall_avg_rgb)
            output_path = os.path.join(output_folder, os.path.basename(img_path))
            cv2.imwrite(output_path, adjusted_img)

def main(image_folder, output_folder):
    batches = get_batches(image_folder)
    overall_avg_rgb = calculate_overall_avg_rgb(batches)
    process_and_save_images(batches, overall_avg_rgb, output_folder)

# Example usage
image_folder = r'D:\Second Dataset\uncleaned'  # or 'D:\\Second Dataset\\uncleaned'
output_folder = r'D:\Second Dataset\cleaned6'   # or 'D:\\Second Dataset\\cleaned'
main(image_folder, output_folder)
