In [1]:
import os
import random
import shutil

# Define source directories for images and labels
source_dirs = {
    "cropped_with_contrast": ["dataset/cropped_with_contrast", "dataset/augmented_cropped_images_with_contrast"],
    "cropped_no_contrast": ["dataset/cropped_no_contrast", "dataset/augmented_cropped_images_no_contrast"],
    "full_with_contrast": ["dataset/full_images/images_with_contrast", "dataset/augmented_images_with_contrast"],
    "full_no_contrast": ["dataset/full_images/images_no_contrast", "dataset/augmented_images_no_contrast"]
}

label_dirs = {
    "cropped_with_contrast": ["dataset/labels_cropped", "dataset/augmented_cropped_labels"],
    "cropped_no_contrast": ["dataset/labels_cropped", "dataset/augmented_cropped_labels"],
    "full_with_contrast": ["dataset/labels_full_images", "dataset/augmented_full_images_labels"],
    "full_no_contrast": ["dataset/labels_full_images", "dataset/augmented_full_images_labels"]
}

# Output directories for the split dataset
output_base_dir = 'dataset_split'
for variant in source_dirs.keys():
    for split in ["train", "validation", "test"]:
        os.makedirs(os.path.join(output_base_dir, f'images_{variant}/{split}'), exist_ok=True)
        os.makedirs(os.path.join(output_base_dir, f'labels_{variant}/{split}'), exist_ok=True)

# Split ratios
train_ratio = 0.7
validation_ratio = 0.2
test_ratio = 0.1

def collect_files(image_dirs):
    """Collect all image files from given directories."""
    image_files = []
    for image_dir in image_dirs:
        image_files.extend([os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith('.png') or f.endswith('.jpg')])
    return image_files

def split_files(image_files):
    """Split files into train, validation, and test lists based on set ratios."""
    random.shuffle(image_files)
    num_images = len(image_files)
    train_split = int(train_ratio * num_images)
    validation_split = int(validation_ratio * num_images)
    train_files = image_files[:train_split]
    validation_files = image_files[train_split:train_split + validation_split]
    test_files = image_files[train_split + validation_split:]
    return train_files, validation_files, test_files

def move_files(file_list, image_output_dir, label_output_dir, label_dirs):
    """Move image and label files to specified directories."""
    for image_file in file_list:
        # Move image
        image_filename = os.path.basename(image_file)
        dst_image_path = os.path.join(image_output_dir, image_filename)
        shutil.copy(image_file, dst_image_path)

        # Move corresponding label
        label_filename = os.path.splitext(image_filename)[0] + '.txt'
        for label_dir in label_dirs:
            src_label_path = os.path.join(label_dir, label_filename)
            if os.path.exists(src_label_path):
                dst_label_path = os.path.join(label_output_dir, label_filename)
                shutil.copy(src_label_path, dst_label_path)
                break

def split_dataset(image_dirs, label_dirs, output_variant):
    """Split dataset for a given variant and move files to train, validation, and test directories."""
    image_files = collect_files(image_dirs)
    train_files, validation_files, test_files = split_files(image_files)

    # Move files for each split
    move_files(train_files, os.path.join(output_base_dir, f'images_{output_variant}/train'), os.path.join(output_base_dir, f'labels_{output_variant}/train'), label_dirs)
    move_files(validation_files, os.path.join(output_base_dir, f'images_{output_variant}/validation'), os.path.join(output_base_dir, f'labels_{output_variant}/validation'), label_dirs)
    move_files(test_files, os.path.join(output_base_dir, f'images_{output_variant}/test'), os.path.join(output_base_dir, f'labels_{output_variant}/test'), label_dirs)

    print(f"Split complete for {output_variant}: {len(train_files)} train, {len(validation_files)} validation, {len(test_files)} test images.")

# Split datasets for all variants
for variant in source_dirs.keys():
    split_dataset(source_dirs[variant], label_dirs[variant], variant)


Split complete for cropped_with_contrast: 179 train, 51 validation, 27 test images.
Split complete for cropped_no_contrast: 179 train, 51 validation, 27 test images.
Split complete for full_with_contrast: 170 train, 48 validation, 26 test images.
Split complete for full_no_contrast: 170 train, 48 validation, 26 test images.
