In [1]:
import os
import random
import shutil

In [2]:
# Define source directories
source_dirs = {
    "with_contrast": ["dataset/images_with_contrast", "dataset/augmented_images_with_contrast"],
    "no_contrast": ["dataset/images_no_contrast", "dataset/augmented_images_no_contrast"]
}

label_dirs = {
    "with_contrast": ["dataset/labels", "dataset/augmented_labels"],
    "no_contrast": ["dataset/labels", "dataset/augmented_labels"]
}

# Output directories for split dataset
output_base_dir = 'dataset_split'
for variant in ["with_contrast", "no_contrast"]:
    for split in ["train", "val", "test"]:
        os.makedirs(os.path.join(output_base_dir, f'images_{variant}/{split}'), exist_ok=True)
        os.makedirs(os.path.join(output_base_dir, f'labels_{variant}/{split}'), exist_ok=True)

In [3]:
# Split ratios
train_ratio = 0.7
val_ratio = 0.2
test_ratio = 0.1

In [4]:
def collect_files(image_dirs):
    """Collect all image files from given directories."""
    image_files = []
    for image_dir in image_dirs:
        image_files.extend([os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith('.png') or f.endswith('.jpg')])
    return image_files

def split_files(image_files):
    """Split files into train, validation, and test lists based on set ratios."""
    random.shuffle(image_files)
    num_images = len(image_files)
    train_split = int(train_ratio * num_images)
    val_split = int(val_ratio * num_images)
    train_files = image_files[:train_split]
    val_files = image_files[train_split:train_split + val_split]
    test_files = image_files[train_split + val_split:]
    return train_files, val_files, test_files

def move_files(file_list, image_output_dir, label_output_dir, label_dirs):
    """Move image and label files to specified directories."""
    for image_file in file_list:
        # Move image
        image_filename = os.path.basename(image_file)
        dst_image_path = os.path.join(image_output_dir, image_filename)
        shutil.copy(image_file, dst_image_path)

        # Move corresponding label
        label_filename = os.path.splitext(image_filename)[0] + '.txt'
        for label_dir in label_dirs:
            src_label_path = os.path.join(label_dir, label_filename)
            if os.path.exists(src_label_path):
                dst_label_path = os.path.join(label_output_dir, label_filename)
                shutil.copy(src_label_path, dst_label_path)
                break

def split_dataset(image_dirs, label_dirs, output_variant):
    """Split dataset for a given variant and move files to train, val, and test directories."""
    image_files = collect_files(image_dirs)
    train_files, val_files, test_files = split_files(image_files)

    # Move files for each split
    move_files(train_files, os.path.join(output_base_dir, f'images_{output_variant}/train'), os.path.join(output_base_dir, f'labels_{output_variant}/train'), label_dirs)
    move_files(val_files, os.path.join(output_base_dir, f'images_{output_variant}/val'), os.path.join(output_base_dir, f'labels_{output_variant}/val'), label_dirs)
    move_files(test_files, os.path.join(output_base_dir, f'images_{output_variant}/test'), os.path.join(output_base_dir, f'labels_{output_variant}/test'), label_dirs)

    print(f"Split complete for {output_variant}: {len(train_files)} train, {len(val_files)} val, {len(test_files)} test images.")

In [5]:
# Split datasets for with and without contrast
split_dataset(source_dirs["with_contrast"], label_dirs["with_contrast"], "with_contrast")
split_dataset(source_dirs["no_contrast"], label_dirs["no_contrast"], "no_contrast")


Split complete for with_contrast: 225 train, 64 val, 33 test images.
Split complete for no_contrast: 225 train, 64 val, 33 test images.
