In [4]:
import os
import cv2
import numpy as np
from glob import glob
import random

def get_batches(image_folder):
    images = glob(os.path.join(image_folder, '*.tif'))
    batches = {}
    for img in images:
        # Extract the unique identifier from the filename (e.g., "20190804111453")
        identifier = os.path.basename(img).split('_')[6]  # Assuming the format is consistent
        if identifier not in batches:
            batches[identifier] = []
        batches[identifier].append(img)
    return batches

def calculate_batch_avg_and_std_rgb(batch_images):
    r_values, g_values, b_values = [], [], []
    for img_path in batch_images:
        img = cv2.imread(img_path)
        if img is None:
            print(f"Warning: Could not read image {img_path}")
            continue
        b, g, r = cv2.split(img)
        r_values.append(np.mean(r))
        g_values.append(np.mean(g))
        b_values.append(np.mean(b))
    
    if not r_values or not g_values or not b_values:
        return (np.nan, np.nan, np.nan), (np.nan, np.nan, np.nan)
    
    r_avg = np.mean(r_values)
    g_avg = np.mean(g_values)
    b_avg = np.mean(b_values)

    r_std = np.std(r_values)
    g_std = np.std(g_values)
    b_std = np.std(b_values)
    
    r_std = r_std if r_std != 0 else 1
    g_std = g_std if g_std != 0 else 1
    b_std = b_std if b_std != 0 else 1
    
    return (r_avg, g_avg, b_avg), (r_std, g_std, b_std)

def calculate_overall_avg_rgb(batches):
    batch_avgs = []
    for batch in batches.values():
        avg_rgb, _ = calculate_batch_avg_and_std_rgb(batch)
        if not np.isnan(avg_rgb).any():
            batch_avgs.append(avg_rgb)
    
    if not batch_avgs:
        raise ValueError("No valid batches found.")
    
    overall_avg_rgb = np.mean(batch_avgs, axis=0)
    
    return overall_avg_rgb

def adjust_image_color(img, batch_avg_rgb, batch_std_rgb, overall_avg_rgb):
    b, g, r = cv2.split(img)
    
    # Convert to float32 to allow negative and floating point values
    r = r.astype(np.float32)
    g = g.astype(np.float32)
    b = b.astype(np.float32)

    # Adjust color with floating point values
    r_adjusted = ((r - batch_avg_rgb[0]) / batch_std_rgb[0])
    g_adjusted = ((g - batch_avg_rgb[1]) / batch_std_rgb[1])
    b_adjusted = ((b - batch_avg_rgb[2]) / batch_std_rgb[2])
    
    adjusted_img = cv2.merge([b_adjusted, g_adjusted, r_adjusted])
    return adjusted_img

def process_images_to_tif(batches, overall_avg_rgb, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for batch_prefix, batch_images in batches.items():
        batch_avg_rgb, batch_std_rgb = calculate_batch_avg_and_std_rgb(batch_images)
        
        for img_path in batch_images:
            img = cv2.imread(img_path)
            if img is None:
                print(f"Warning: Could not read image {img_path}")
                continue
            adjusted_img = adjust_image_color(img, batch_avg_rgb, batch_std_rgb, overall_avg_rgb)

            # Pick a random pixel to check its RGB values
            height, width, _ = adjusted_img.shape
            rand_x = random.randint(0, width - 1)
            rand_y = random.randint(0, height - 1)
            random_pixel_rgb = adjusted_img[rand_y, rand_x]
            print(f"Random pixel at ({rand_x}, {rand_y}) in {os.path.basename(img_path)}: {random_pixel_rgb}")

            # Save each image to a tif file, using float32 to preserve floating point and negative values
            tif_filename = os.path.basename(img_path)
            tif_file_path = os.path.join(output_folder, tif_filename)

            # Convert to 32-bit floating point TIFF
            cv2.imwrite(tif_file_path, adjusted_img.astype(np.float32), [cv2.IMWRITE_TIFF_COMPRESSION, 1])

def main(image_folder, output_folder):
    batches = get_batches(image_folder)
    overall_avg_rgb = calculate_overall_avg_rgb(batches)
    process_images_to_tif(batches, overall_avg_rgb, output_folder)

# Example usage
image_folder = r'D:\City Dataset\SpaceNet6_city_dataset_cleaned' 
output_folder = r'D:\City Dataset\cleaned_tif'
main(image_folder, output_folder)


Random pixel at (276, 28) in SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804111224_20190804111453_tile_8679.tif: [-1.6737188 -1.7519639 -1.9959457]
Random pixel at (256, 146) in SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804111224_20190804111453_tile_8681.tif: [7.2032013 6.7878666 5.4377418]
Random pixel at (14, 95) in SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804111224_20190804111453_tile_8683.tif: [-0.9018128  -0.02366477 -0.42267844]
Random pixel at (49, 226) in SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804111224_20190804111453_tile_8685.tif: [-2.0596719 -2.0061255 -2.074609 ]
Random pixel at (262, 388) in SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804111224_20190804111453_tile_8687.tif: [-1.8666954 -1.8536284 -1.8779507]
Random pixel at (217, 28) in SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804111224_20190804111453_tile_8689.tif: [-1.8184513 -1.5994668 -1.091317 ]
Random pixel at (246, 150) in SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804111224_20190804111453_tile_8691.tif: [4.74275   4.5512443 4.2577915]


In [20]:
import os
import cv2
import numpy as np
from glob import glob
import random

def get_batches(image_folder):
    images = glob(os.path.join(image_folder, '*.tif'))
    batches = {}
    for img in images:
        # Extract the unique identifier from the filename (e.g., "20190804111453")
        identifier = os.path.basename(img).split('_')[6]  # Assuming the format is consistent
        if identifier not in batches:
            batches[identifier] = []
        batches[identifier].append(img)
    return batches

def calculate_batch_avg_and_std_rgb(batch_images):
    r_values, g_values, b_values = [], [], []
    for img_path in batch_images:
        img = cv2.imread(img_path)
        if img is None:
            print(f"Warning: Could not read image {img_path}")
            continue
        b, g, r = cv2.split(img)
        r_values.append(np.mean(r))
        g_values.append(np.mean(g))
        b_values.append(np.mean(b))
    
    if not r_values or not g_values or not b_values:
        return (np.nan, np.nan, np.nan), (np.nan, np.nan, np.nan)
    
    r_avg = np.mean(r_values)
    g_avg = np.mean(g_values)
    b_avg = np.mean(b_values)

    r_std = np.std(r_values)
    g_std = np.std(g_values)
    b_std = np.std(b_values)
    
    r_std = r_std if r_std != 0 else 1
    g_std = g_std if g_std != 0 else 1
    b_std = b_std if b_std != 0 else 1
    
    return (r_avg, g_avg, b_avg), (r_std, g_std, b_std)

def calculate_overall_avg_rgb(batches):
    batch_avgs = []
    for batch in batches.values():
        avg_rgb, _ = calculate_batch_avg_and_std_rgb(batch)
        if not np.isnan(avg_rgb).any():
            batch_avgs.append(avg_rgb)
    
    if not batch_avgs:
        raise ValueError("No valid batches found.")
    
    overall_avg_rgb = np.mean(batch_avgs, axis=0)
    
    return overall_avg_rgb

def adjust_image_color(img, batch_avg_rgb, batch_std_rgb, overall_avg_rgb):
    b, g, r = cv2.split(img)
    
    # Convert to float32 to allow negative and floating point values
    r = r.astype(np.float32)
    g = g.astype(np.float32)
    b = b.astype(np.float32)

    # Adjust color with floating point values
    r_adjusted = ((r - batch_avg_rgb[0]) / batch_std_rgb[0])
    g_adjusted = ((g - batch_avg_rgb[1]) / batch_std_rgb[1])
    b_adjusted = ((b - batch_avg_rgb[2]) / batch_std_rgb[2])
    
    adjusted_img = cv2.merge([b_adjusted, g_adjusted, r_adjusted])
    return adjusted_img

def process_images_to_npz(batches, overall_avg_rgb, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for batch_prefix, batch_images in batches.items():
        batch_avg_rgb, batch_std_rgb = calculate_batch_avg_and_std_rgb(batch_images)
        
        for img_path in batch_images:
            img = cv2.imread(img_path)
            if img is None:
                print(f"Warning: Could not read image {img_path}")
                continue
            adjusted_img = adjust_image_color(img, batch_avg_rgb, batch_std_rgb, overall_avg_rgb)

            # Pick a random pixel to check its RGB values
            height, width, _ = adjusted_img.shape
            rand_x = random.randint(0, width - 1)
            rand_y = random.randint(0, height - 1)
            random_pixel_rgb = adjusted_img[rand_y, rand_x]
            print(f"Random pixel at ({rand_x}, {rand_y}) in {os.path.basename(img_path)}: {random_pixel_rgb}")

            # Save each image as an .npz file
            npz_filename = os.path.basename(img_path).replace('.tif', '.npz')
            npz_file_path = os.path.join(output_folder, npz_filename)

            # Save as .npz preserving the floating point values
            np.savez(npz_file_path, image=adjusted_img)

def main(image_folder, output_folder):
    batches = get_batches(image_folder)
    overall_avg_rgb = calculate_overall_avg_rgb(batches)
    process_images_to_npz(batches, overall_avg_rgb, output_folder)

# Example usage
image_folder = r'D:\City Dataset\SpaceNet6_city_dataset_cleaned' 
output_folder = r'D:\City Dataset\cleaned_npz'
main(image_folder, output_folder)


Random pixel at (338, 61) in SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804111224_20190804111453_tile_8679.tif: [-1.3842541 -1.5486345 -1.8779507]
Random pixel at (114, 326) in SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804111224_20190804111453_tile_8681.tif: [2.6682527 2.4162867 2.1338809]
Random pixel at (310, 313) in SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804111224_20190804111453_tile_8683.tif: [-0.41937152 -0.22699407 -0.34401506]
Random pixel at (146, 223) in SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804111224_20190804111453_tile_8685.tif: [-1.7702072 -1.7519639 -1.8779507]
Random pixel at (205, 62) in SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804111224_20190804111453_tile_8687.tif: [-2.2044044 -1.7519639 -1.4846338]
Random pixel at (31, 239) in SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804111224_20190804111453_tile_8689.tif: [-1.1430335 -1.7011315 -1.1699804]
Random pixel at (324, 85) in SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804111224_20190804111453_tile_8691.tif: [-2.7833338  -2.6161134   0.206

In [19]:
import tifffile as tiff
import numpy as np

def read_image_rgb_values_with_tifffile(image_path):
    try:
        # Load the image using tifffile
        img = tiff.imread(image_path)
        
        print(f"Image loaded successfully with tifffile: {image_path}")
        print(f"Image dimensions: {img.shape}")  # Shape of the image (height, width, channels)
        
        # Check if the image is single-channel or multi-channel
        if len(img.shape) == 2:  # Single-channel grayscale image
            print("The image is a single-channel grayscale image.")
            return img
        
        # If it's a multi-channel image (e.g., RGB)
        # Print the RGB value of the top-left corner pixel
        print(f"Sample RGB value at (0,0): {img[0, 0]}")

        # If you'd like to access specific pixel RGB values, here's how to do it:
        height, width, _ = img.shape
        sample_x, sample_y = np.random.randint(0, width), np.random.randint(0, height)
        print(f"Sample RGB value at random pixel ({sample_x},{sample_y}): {img[sample_y, sample_x]}")

        return img
    except Exception as e:
        print(f"Error: Could not read image {image_path}. Error: {e}")

# Example usage
image_path = r'D:\Research\hierarchical-bayesian-model-validation\dataset-preparation\SpaceNet6_city_dataset\SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804111224_20190804111453_tile_8679.tif'
rgb_values = read_image_rgb_values_with_tifffile(image_path)


Image loaded successfully with tifffile: D:\Research\hierarchical-bayesian-model-validation\dataset-preparation\SpaceNet6_city_dataset\SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804111224_20190804111453_tile_8679.tif
Image dimensions: (400, 400, 3)
Sample RGB value at (0,0): [3.077841  3.382101  3.2954264]
Sample RGB value at random pixel (245,65): [-2.1139407 -1.8536284 -1.7702072]


In [13]:
print(os.path.exists(image_path))  # Should return True if the file exists


True
