In [2]:
import os
import cv2
import numpy as np
from glob import glob
import random

def get_batches(image_folder):
    images = glob(os.path.join(image_folder, '*.jpg'))
    batches = {}
    for img in images:
        prefix = os.path.basename(img)[:9]
        if prefix not in batches:
            batches[prefix] = []
        batches[prefix].append(img)
    return batches

def calculate_batch_avg_and_std_rgb(batch_images):
    r_values, g_values, b_values = [], [], []
    for img_path in batch_images:
        img = cv2.imread(img_path)
        if img is None:
            print(f"Warning: Could not read image {img_path}")
            continue
        b, g, r = cv2.split(img)
        r_values.append(np.mean(r))
        g_values.append(np.mean(g))
        b_values.append(np.mean(b))
    
    if not r_values or not g_values or not b_values:
        return (np.nan, np.nan, np.nan), (np.nan, np.nan, np.nan)
    
    r_avg = np.mean(r_values)
    g_avg = np.mean(g_values)
    b_avg = np.mean(b_values)

    r_std = np.std(r_values)
    g_std = np.std(g_values)
    b_std = np.std(b_values)
    
    r_std = r_std if r_std != 0 else 1
    g_std = g_std if g_std != 0 else 1
    b_std = b_std if b_std != 0 else 1
    
    return (r_avg, g_avg, b_avg), (r_std, g_std, b_std)

def calculate_overall_avg_rgb(batches):
    batch_avgs = []
    for batch in batches.values():
        avg_rgb, _ = calculate_batch_avg_and_std_rgb(batch)
        if not np.isnan(avg_rgb).any():
            batch_avgs.append(avg_rgb)
    
    if not batch_avgs:
        raise ValueError("No valid batches found.")
    
    overall_avg_rgb = np.mean(batch_avgs, axis=0)
    
    return overall_avg_rgb

def adjust_image_color(img, batch_avg_rgb, batch_std_rgb, overall_avg_rgb):
    b, g, r = cv2.split(img)
    
    r_adjusted = ((r - batch_avg_rgb[0]) / batch_std_rgb[0])
    g_adjusted = ((g - batch_avg_rgb[1]) / batch_std_rgb[1])
    b_adjusted = ((b - batch_avg_rgb[2]) / batch_std_rgb[2])
    
    adjusted_img = cv2.merge([b_adjusted, g_adjusted, r_adjusted])
    return adjusted_img

def process_images_to_npz(batches, overall_avg_rgb, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for batch_prefix, batch_images in batches.items():
        batch_avg_rgb, batch_std_rgb = calculate_batch_avg_and_std_rgb(batch_images)
        
        for img_path in batch_images:
            img = cv2.imread(img_path)
            if img is None:
                print(f"Warning: Could not read image {img_path}")
                continue
            adjusted_img = adjust_image_color(img, batch_avg_rgb, batch_std_rgb, overall_avg_rgb)

            # Pick a random pixel to check its RGB values
            height, width, _ = adjusted_img.shape
            rand_x = random.randint(0, width - 1)
            rand_y = random.randint(0, height - 1)
            random_pixel_rgb = adjusted_img[rand_y, rand_x]
            print(f"Random pixel at ({rand_x}, {rand_y}) in {os.path.basename(img_path)}: {random_pixel_rgb}")

            # Save each image as a compressed .npz file
            npz_filename = os.path.basename(img_path).replace('.jpg', '.npz')
            npz_file_path = os.path.join(output_folder, npz_filename)

            # Save as .npz using compression to reduce file size
            np.savez_compressed(npz_file_path, image=adjusted_img)

def main(image_folder, output_folder):
    batches = get_batches(image_folder)
    overall_avg_rgb = calculate_overall_avg_rgb(batches)
    process_images_to_npz(batches, overall_avg_rgb, output_folder)

# Example usage
image_folder = r'D:\Second Dataset\rgb'  # or 'D:\\Second Dataset\\uncleaned'
output_folder = r'D:\Second Dataset\cleaned'   # or 'D:\\Second Dataset\\cleaned'
main(image_folder, output_folder)


Random pixel at (144, 226) in 11IE4DKTR_11556-9586-12068-10098.jpg: [22.56248711  2.53535438  3.26765996]
Random pixel at (412, 417) in 11IE4DKTR_6121-684-6633-1196.jpg: [47.31105546  4.20939539  5.38476297]
Random pixel at (149, 282) in 11TAQJ6ET_1124-8510-1636-9022.jpg: [-2.86887016 -0.69248859 -1.01113223]
Random pixel at (149, 508) in 11TAQJ6ET_1148-8089-1660-8601.jpg: [-1.99000399 -1.55749969 -1.45715617]
Random pixel at (457, 193) in 11TAQJ6ET_1288-961-1800-1473.jpg: [0.55870789 2.14116847 2.04731764]
Random pixel at (88, 147) in 11TAQJ6ET_1422-8427-1934-8939.jpg: [-1.99000399 -1.19956406 -1.26600306]
Random pixel at (439, 386) in 11TAQJ6ET_1567-8177-2079-8689.jpg: [-3.30830324 -1.49784375 -1.64830929]
Random pixel at (116, 215) in 11TAQJ6ET_1749-948-2261-1460.jpg: [0.55870789 1.18667346 1.34642288]
Random pixel at (292, 93) in 11TAQJ6ET_1995-8426-2507-8938.jpg: [-2.86887016 -1.28904797 -1.39343847]
Random pixel at (283, 6) in 11TAQJ6ET_2034-926-2546-1438.jpg: [0.38293465 1.27615