In [1]:
# Cell 1: import necessary libraries
import os
import shutil
import random
from pathlib import Path


In [2]:
base_dir     = Path.cwd()                   # Project root directory
source_dir   = base_dir / 'train'           # Folder containing original images
mask_source  = base_dir / 'masks'           # Folder containing original mask PNGs

n_primary    = 1000                         # First 1000 images for train/val
train_ratio  = 0.8                          # 80% of the first 1000 for training
n_test       = 200                          # Next 200 images for test set

# Output directories for images
train_dir     = base_dir / 'train_set'
val_dir       = base_dir / 'val_set'
test_dir      = base_dir / 'test_set'

# Output directories for masks
train_mask_dir = base_dir / 'train_masks'
val_mask_dir   = base_dir / 'val_masks'
test_mask_dir  = base_dir / 'test_masks'


In [3]:
# List all .png files in the source directory
image_files = [
    f for f in source_dir.iterdir()
    if f.is_file() and f.suffix.lower() == '.png'
]

# Shuffle with a fixed seed for reproducibility
random.seed(42)
random.shuffle(image_files)

# Split into primary (first 1000) and test (next 200) subsets
primary_imgs = image_files[:n_primary]
test_images  = image_files[n_primary : n_primary + n_test]

# Further split primary into train and validation
split_idx    = int(len(primary_imgs) * train_ratio)
train_images = primary_imgs[:split_idx]   # 800 images
val_images   = primary_imgs[split_idx:]   # 200 images


In [4]:
# Create target directories if they do not exist
for directory in (train_dir, val_dir, test_dir):
    directory.mkdir(exist_ok=True)

# Function to copy images to a target directory
def copy_images(image_list, dest_dir):
    for img in image_list:
        shutil.copy(img, dest_dir / img.name)

# Copy images
copy_images(train_images, train_dir)
copy_images(val_images,   val_dir)
copy_images(test_images,  test_dir)


In [5]:
# Create mask directories if they do not exist
for directory in (train_mask_dir, val_mask_dir, test_mask_dir):
    directory.mkdir(exist_ok=True)

# Function to copy masks corresponding to a list of images
def copy_masks(image_list, mask_dest):
    for img in image_list:
        mask_file = mask_source / (img.stem + '.png')
        if mask_file.exists():
            shutil.copy(mask_file, mask_dest / mask_file.name)

# Copy masks
copy_masks(train_images, train_mask_dir)
copy_masks(val_images,   val_mask_dir)
copy_masks(test_images,  test_mask_dir)


In [6]:
print(f"Total images found:       {len(image_files)}")
print(f"  • Training images:      {len(train_images)}")
print(f"  • Validation images:    {len(val_images)}")
print(f"  • Test images:          {len(test_images)}\n")

print("Masks copied:")
print(f"  • Training masks:       {len(list(train_mask_dir.iterdir()))}")
print(f"  • Validation masks:     {len(list(val_mask_dir.iterdir()))}")
print(f"  • Test masks:           {len(list(test_mask_dir.iterdir()))}")


Total images found:       9580
  • Training images:      800
  • Validation images:    200
  • Test images:          200

Masks copied:
  • Training masks:       800
  • Validation masks:     200
  • Test masks:           200
