In [1]:
import os
os.chdir("..")
print("Current Directory:", os.getcwd())

Current Directory: d:\workspace\iscat


In [2]:
def get_nd2_paths(base_path, option):
    """
    Recursively collects paths to .nd2 files inside specified subfolders of Metasurface directories.

    Args:
        base_path (str): The base directory to search.
        option (str): The folder to consider ('Brightfield' or 'Laser').

    Returns:
        list: A list of paths to .nd2 files.
    """
    if option not in {'Brightfield', 'Laser'}:
        raise ValueError("Option must be 'Brightfield' or 'Laser'")
    
    nd2_paths = []
    
    for root, dirs, files in os.walk(base_path):
        # Check if the current directory is a Metasurface directory
        if 'Metasurface' in os.path.basename(root):
            target_folder = os.path.join(root, option)
            if os.path.isdir(target_folder):
                for file in os.listdir(target_folder):
                    if file.endswith('.nd2'):
                        nd2_paths.append(os.path.join(target_folder, file))  
    return nd2_paths
a=get_nd2_paths("dataset\\2024_11_12\Metasurface\Chip_01","Brightfield")

In [20]:
import os
import h5py
import numpy as np
from nd2 import ND2File

def nd2_to_hdf5(nd2_paths, output_hdf5_path, patch_size=(256, 256), overlap=0):
    """
    Load ND2 files, extract image and mask patches, and save them into an HDF5 file with metadata.

    Args:
        nd2_paths (list of str): Paths to ND2 files.
        output_hdf5_path (str): Path to the output HDF5 file.
        patch_size (tuple): Size of the patches (height, width).
        overlap (int): Overlap between patches in pixels.

    Returns:
        None
    """
    patch_height, patch_width = patch_size

    # Metadata for masks
    mask_metadata = {
        "Captured Cy5_mask.npy": "Cy5: 80nm",
        "Captured FITC_mask.npy": "FITC: 300nm",
        "Captured TRITC_mask.npy": "TRITC: 1300nm",
    }

    with h5py.File(output_hdf5_path, 'w') as hdf5_file:
        image_dataset = None  # Placeholder for image patches dataset
        mask_dataset = None   # Placeholder for mask patches dataset

        # Add general metadata to the HDF5 file
        hdf5_file.attrs["description"] = "Image and mask patches with nanometer scale metadata."
        hdf5_file.attrs["mask_info"] = ", ".join([f"class:{idx} ({value})" for idx,(key, value) in enumerate(mask_metadata.items())])

        # Process each ND2 file
        for nd2_path in nd2_paths:
            print(f"Processing {nd2_path}...")

            # Load ND2 image
            with ND2File(nd2_path) as nd2:
                image = nd2.asarray()

                # Ensure the image dimensions are correct
                if image.ndim != 3:  # Expecting (Z, H, W)
                    raise ValueError(f"Expected 3D data (Z, H, W), got shape {image.shape}")

                num_slices, height, width = image.shape

            # Get the directory of the current ND2 file and find corresponding masks
            nd2_dir = os.path.dirname(nd2_path)
            mask_paths = {name: os.path.join(nd2_dir, name) for name in mask_metadata.keys()}

            # Validate that all mask files exist
            for mask_name, mask_path in mask_paths.items():
                if not os.path.exists(mask_path):
                    raise FileNotFoundError(f"Mask file {mask_path} not found.")

            # Load masks
            masks = {mask_name: np.load(mask_path) for mask_name, mask_path in mask_paths.items()}

            # Ensure masks have the same spatial dimensions as the image
            for mask_name, mask_array in masks.items():
                if mask_array.shape != (height, width):
                    raise ValueError(f"Mask {mask_name} shape {mask_array.shape} does not match image shape {(height, width)}")

            # Iterate over the image and extract patches
            for y in range(0, height - patch_height + 1, patch_height - overlap):
                for x in range(0, width - patch_width + 1, patch_width - overlap):
                    # Extract image patch
                    image_patch = image[:, y:y + patch_height, x:x + patch_width]

                    # Extract mask patches
                    mask_patches = {mask_name: mask_array[y:y + patch_height, x:x + patch_width]
                                    for mask_name, mask_array in masks.items()}

                    # Save image patches into HDF5 file
                    if image_dataset is None:
                        image_dataset = hdf5_file.create_dataset(
                            "image_patches",
                            shape=(0, num_slices, patch_height, patch_width),
                            maxshape=(None, num_slices, patch_height, patch_width),
                            chunks=(1, num_slices, patch_height, patch_width),
                            dtype=image_patch.dtype
                        )

                    image_dataset.resize(image_dataset.shape[0] + 1, axis=0)
                    image_dataset[-1] = image_patch

                    # Save mask patches into HDF5 file
                    if mask_dataset is None:
                        mask_dataset = hdf5_file.create_dataset(
                            "mask_patches",
                            shape=(0, len(masks), patch_height, patch_width),
                            maxshape=(None, len(masks), patch_height, patch_width),
                            chunks=(1, len(masks), patch_height, patch_width),
                            dtype=np.uint8  # Assuming masks are binary or class-labeled
                        )

                        # Add metadata for masks
                        for idx, (mask_name, metadata) in enumerate(mask_metadata.items()):
                            mask_dataset.attrs[f"mask_{idx}"] = metadata

                    # Combine all masks into a single array for storage
                    combined_mask_patch = np.stack([mask_patches[mask_name] for mask_name in masks.keys()])
                    mask_dataset.resize(mask_dataset.shape[0] + 1, axis=0)
                    mask_dataset[-1] = combined_mask_patch

    print(f"Image and mask patches saved to {output_hdf5_path}")

In [21]:
import nd2

nd2_files = [a[0]]  # List of ND2 file paths
output_file = 'dataset\\all_patches.h5'  # Output HDF5 file
nd2_to_hdf5_with_masks_and_metadata(nd2_files,output_file)

Processing dataset\2024_11_12\Metasurface\Chip_01\Metasurface 01\Brightfield\01_01_BF.nd2...
Image and mask patches saved to dataset\all_patches.h5


In [22]:
import h5py

with h5py.File(output_file, 'r') as f:
    print(f"Keys: {list(f.keys())}")
    print(f"Image patches shape: {f['image_patches'].shape}")
    print(f"Mask patches shape: {f['mask_patches'].shape}")
    print(f"Mask metadata: {list(f['mask_patches'].attrs)}")
    print(f"Description: {f.attrs['description']}")
    print(f"Mask info: {f.attrs['mask_info']}")

Keys: ['image_patches', 'mask_patches']
Image patches shape: (80, 201, 256, 256)
Mask patches shape: (80, 3, 256, 256)
Mask metadata: ['mask_0', 'mask_1', 'mask_2']
Description: Image and mask patches with nanometer scale metadata.
Mask info: class:0 (Cy5: 80nm), class:1 (FITC: 300nm), class:2 (TRITC: 1300nm)
