In [4]:
import os
import random
import shutil

# Paths
source_dir = r'H:\Optima\Annotations'
output_dir = r'H:\Optima\Annotations_Split'

# Define split sizes
num_train = 17
num_val = 3
num_test = 4

# Define the maximum number of patches per class per patient
max_patches_per_class = 500

# Get a list of patients (subfolders in source directory)
patients = [d for d in os.listdir(source_dir) if os.path.isdir(os.path.join(source_dir, d))]

# Check for the correct number of patients
if len(patients) != 24:
    raise ValueError("Expected 24 patients but found {}".format(len(patients)))

# Shuffle and split the list of patients
random.shuffle(patients)
train_patients = patients[:num_train]
val_patients = patients[num_train:num_train + num_val]
test_patients = patients[num_train + num_val:]

# Function to copy patches from each patient to the appropriate split/class folder
def copy_patches(patient_list, split_name):
    for patient in patient_list:
        source_patient_dir = os.path.join(source_dir, patient)
        
        # Iterate over each class folder within the patient folder
        for class_folder in os.listdir(source_patient_dir):
            source_class_dir = os.path.join(source_patient_dir, class_folder)
            dest_class_dir = os.path.join(output_dir, split_name, class_folder)
            
            # Check if the class folder exists for this patient
            if not os.path.isdir(source_class_dir):
                print(f"Class folder {class_folder} missing for patient {patient}. Skipping...")
                continue  # Skip to the next class if the folder doesn't exist
            
            # Create destination class folder if it doesn't exist
            os.makedirs(dest_class_dir, exist_ok=True)
            
            # Copy up to max_patches_per_class patches for the class
            patches = os.listdir(source_class_dir)[:max_patches_per_class]
            for patch in patches:
                src_patch_path = os.path.join(source_class_dir, patch)
                dest_patch_path = os.path.join(dest_class_dir, f"{patient}_{patch}")
                shutil.copy(src_patch_path, dest_patch_path)
            print(f"Copied {len(patches)} patches from {class_folder} of {patient} to {split_name}/{class_folder}")

# Copy data for each split
copy_patches(train_patients, "train")
copy_patches(val_patients, "val")
copy_patches(test_patients, "test")

print("Data split complete. Check output directory:", output_dir)

Copied 96 patches from Adipose of TCGA-A2-A0ST-01Z-00-DX1 to train/Adipose
Copied 500 patches from Background of TCGA-A2-A0ST-01Z-00-DX1 to train/Background
Copied 33 patches from Immune cells of TCGA-A2-A0ST-01Z-00-DX1 to train/Immune cells
Copied 500 patches from Stroma of TCGA-A2-A0ST-01Z-00-DX1 to train/Stroma
Copied 42 patches from Tumor of TCGA-A2-A0ST-01Z-00-DX1 to train/Tumor
Copied 132 patches from Adipose of TCGA-BH-A0B5-01Z-00-DX1 to train/Adipose
Copied 10 patches from Immune cells of TCGA-BH-A0B5-01Z-00-DX1 to train/Immune cells
Copied 76 patches from Normal of TCGA-BH-A0B5-01Z-00-DX1 to train/Normal
Copied 132 patches from Stroma of TCGA-BH-A0B5-01Z-00-DX1 to train/Stroma
Copied 234 patches from Tumor of TCGA-BH-A0B5-01Z-00-DX1 to train/Tumor
Copied 495 patches from Background of TCGA-AN-A0FZ-01Z-00-DX1 to train/Background
Copied 15 patches from Immune cells of TCGA-AN-A0FZ-01Z-00-DX1 to train/Immune cells
Copied 58 patches from Stroma of TCGA-AN-A0FZ-01Z-00-DX1 to train/