In [2]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from skimage import io
import SimpleITK as sitk
from tqdm import tqdm

In [5]:
def export_yolo_data(
    patient_id,
    image_export_folder,
    annotation_export_folder,
    mask_export_folder,
    root_folder,
):
    subfolder = f"patient0{patient_id:03d}"

    original_files = [
        "2CH_ED",
        "2CH_ES",
        "4CH_ED",
        "4CH_ES",
    ]

    for ori_file in original_files:
        path1 = os.path.join(root_folder, subfolder, f"{subfolder}_{ori_file}.nii.gz")
        if not os.path.isfile(path1):
            print(f"Original file not found: {path1}")
            continue

        img1 = sitk.GetArrayFromImage(sitk.ReadImage(path1))
        img1 = (
            img1.astype(np.float32) / img1.max()
        )  # Normalize the original image to the range [0, 1]

        gt_file = ori_file + "_gt"
        path2 = os.path.join(root_folder, subfolder, f"{subfolder}_{gt_file}.nii.gz")

        if os.path.isfile(path2):
            img2 = sitk.GetArrayFromImage(sitk.ReadImage(path2))

            class_labels = np.unique(img2)

            for class_id, class_label in enumerate(class_labels):
                if class_label == 0:  # Ignore the background class
                    continue

                binary_mask = (img2 == class_label).astype(np.uint8)

                # Save binary mask
                mask_export_path = os.path.join(
                    mask_export_folder, f"{subfolder}_{ori_file}_{class_id}.png"
                )
                io.imsave(
                    mask_export_path, binary_mask * 255
                )  # Convert the mask to 8 bit before saving

                # Save the image as a JPEG file with a suffix corresponding to the class ID
                img_export_path = os.path.join(
                    image_export_folder, f"{subfolder}_{ori_file}_{class_id}.jpeg"
                )
                io.imsave(
                    img_export_path, (img1 * 255).astype(np.uint8)
                )  # Convert the image to 8 bit before saving

                contours, _ = cv2.findContours(
                    binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
                )

                # Save the bounding box annotations in a text file
                bbox_export_path = os.path.join(
                    annotation_export_folder, f"{subfolder}_{ori_file}_{class_id}.txt"
                )

                with open(bbox_export_path, "w") as f:
                    for contour in contours:
                        x, y, w, h = cv2.boundingRect(contour)
                        f.write(
                            f"{1} {(x + w / 2) / img1.shape[1]} {(y + h / 2) / img1.shape[0]} {w / img1.shape[1]} {h / img1.shape[0]}\n"
                        )
        else:
            print(f"Corresponding segmentation file not found: {path2}")


In [7]:
image_export_folder = "./out_SAM/train/images"
annotation_export_folder = "./out_SAM/train/labels"
mask_export_folder = "./out_SAM/train/masks"
root_folder = "/home/aistudent/camus/CAMUS_public/database_nifti"

os.makedirs(image_export_folder, exist_ok=True)
os.makedirs(annotation_export_folder, exist_ok=True)
os.makedirs(mask_export_folder, exist_ok=True)

n_patients = 450

for patient_id in tqdm(range(1, n_patients + 1)):
    export_yolo_data(
        patient_id,
        image_export_folder,
        annotation_export_folder,
        mask_export_folder,
        root_folder,
    )


  io.imsave(mask_export_path, binary_mask * 255)  # Convert the mask to 8 bit before saving
  io.imsave(mask_export_path, binary_mask * 255)  # Convert the mask to 8 bit before saving
  io.imsave(mask_export_path, binary_mask * 255)  # Convert the mask to 8 bit before saving
  io.imsave(mask_export_path, binary_mask * 255)  # Convert the mask to 8 bit before saving
  io.imsave(mask_export_path, binary_mask * 255)  # Convert the mask to 8 bit before saving
  io.imsave(mask_export_path, binary_mask * 255)  # Convert the mask to 8 bit before saving
  io.imsave(mask_export_path, binary_mask * 255)  # Convert the mask to 8 bit before saving
  io.imsave(mask_export_path, binary_mask * 255)  # Convert the mask to 8 bit before saving
  io.imsave(mask_export_path, binary_mask * 255)  # Convert the mask to 8 bit before saving
100%|██████████| 450/450 [01:35<00:00,  4.71it/s]


In [8]:
image_export_folder = "./out_SAM/val/images"
annotation_export_folder = "./out_SAM/val/labels"
mask_export_folder = "./out_SAM/val/masks"
root_folder = "/home/aistudent/camus/CAMUS_public/database_nifti"

os.makedirs(image_export_folder, exist_ok=True)
os.makedirs(annotation_export_folder, exist_ok=True)
os.makedirs(mask_export_folder, exist_ok=True)

n_patients = 500

for patient_id in tqdm(range(451, n_patients + 1)):
    export_yolo_data(
        patient_id,
        image_export_folder,
        annotation_export_folder,
        mask_export_folder,
        root_folder,
    )


  io.imsave(mask_export_path, binary_mask * 255)  # Convert the mask to 8 bit before saving
100%|██████████| 50/50 [00:09<00:00,  5.19it/s]


In [3]:
from datasets import Dataset, DatasetDict
from PIL import Image
import numpy as np
import os


def load_images(folder: str):
    image_files = os.listdir(folder)
    return [Image.open(os.path.join(folder, file)) for file in image_files]


def load_dataset() -> DatasetDict:
    # Load the image and mask from the folders
    train_images = load_images("out_SAM/train/images")
    train_masks = load_images("out_SAM/train/masks")
    val_images = load_images("out_SAM/val/images")
    val_masks = load_images("out_SAM/val/masks")

    # Create dict datasets
    train_dataset = Dataset.from_dict({"image": train_images, "label": train_masks})
    val_dataset = Dataset.from_dict({"image": val_images, "label": val_masks})

    # Combine them into a dataset split dictionary
    dataset = DatasetDict({"train": train_dataset, "validation": val_dataset})
    return dataset


dataset = load_dataset()


In [4]:
dataset

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 5400
    })
    validation: Dataset({
        features: ['image', 'label'],
        num_rows: 600
    })
})

In [5]:
dataset['train'][0]

{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=L size=748x616>,
 'label': <PIL.PngImagePlugin.PngImageFile image mode=L size=708x584>}

In [6]:
# Save the dataset
dataset.save_to_disk("camus_sam_ds")

Saving the dataset (0/1 shards):   0%|          | 0/5400 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/600 [00:00<?, ? examples/s]