In [21]:
# Initialization
import os
import shutil
import random
from sklearn.model_selection import train_test_split

root = 'MaSTr1325'
img_dir = 'MaSTr1325_images_512x384'
mask_dir = 'MaSTr1325_masks_512x384'

# Target Path
train_img_dir = os.path.join(root, 'train')
train_mask_dir = os.path.join(root, 'train_mask')
val_img_dir = os.path.join(root, 'val')
val_mask_dir = os.path.join(root, 'val_mask')
test_img_dir = os.path.join(root, 'test')
test_mask_dir = os.path.join(root, 'test_mask')

# `mkdir dirs`
for d in [train_img_dir, train_mask_dir, val_img_dir, val_mask_dir, test_img_dir, test_mask_dir]:
    os.makedirs(d, exist_ok=True)

images = [f for f in os.listdir(img_dir) if f.endswith('.jpg')]
masks = [f for f in os.listdir(mask_dir) if f.endswith('.png')]

# make sure the images and masks aligned
images.sort()
masks.sort()
assert len(images) == len(masks), "Number of images and masks do not match"

# Dataset Splitting
|Dataset|\%|Number|
|:-:|:-:|:-:|
|Training|70|927|
|Validation|20|266|
|Test|10|132|

In [23]:
# Split Original Dataset
train_imgs, temp_imgs, train_masks, temp_masks = train_test_split(images, masks, test_size=0.3, random_state=42)
val_imgs, test_imgs, val_masks, test_masks = train_test_split(temp_imgs, temp_masks, test_size=0.33, random_state=42)

# Move File Function
def move_files(file_list, src_dir, dest_dir):
    for file in file_list:
        shutil.move(os.path.join(src_dir, file), os.path.join(dest_dir, file))

# Move Training Dataset
move_files(train_imgs, img_dir, train_img_dir)
move_files(train_masks, mask_dir, train_mask_dir)

# Move Validation Dataset
move_files(val_imgs, img_dir, val_img_dir)
move_files(val_masks, mask_dir, val_mask_dir)

# Move Testing Dataset
move_files(test_imgs, img_dir, test_img_dir)
move_files(test_masks, mask_dir, test_mask_dir)


In [24]:
print("Training set size:", len(os.listdir(train_img_dir)))
print("Validation set size:", len(os.listdir(val_img_dir)))
print("Test set size:", len(os.listdir(test_img_dir)))

Training set size: 927
Validation set size: 266
Test set size: 132


# Generate the lists of dataset

In [26]:
import os

def generate_file_list(directory, output_file):
    with open(output_file, 'w') as f:
        for file_name in sorted(os.listdir(directory)):
            if file_name.endswith('.jpg'):
                file_name_without_ext = os.path.splitext(file_name)[0]
                f.write(f"{file_name_without_ext}\n")

# Define `root`
root = 'MaSTr1325'

# Generate the list of images and masks about training dataset
generate_file_list(os.path.join(root, 'train'), os.path.join(root, 'train_images.txt'))

# Generate the list of images and masks about validation dataset
generate_file_list(os.path.join(root, 'val'), os.path.join(root, 'val_images.txt'))

# Generate the list of images and masks about testing dataset
generate_file_list(os.path.join(root, 'test'), os.path.join(root, 'test_images.txt'))

print("File lists generated successfully.")

File lists generated successfully.


# Sort & Rename images and masks in Datasets

In [7]:
import os
import yaml



def rename_files(images_folder, masks_folder):
    # Define root of images and masks
    image_files = sorted(os.listdir(images_folder))
    mask_files = sorted(os.listdir(masks_folder))

    # len of images and masks aligned
    if len(image_files) != len(mask_files):
        raise ValueError("The number of images and masks do not match.")

    for i, (img, mask) in enumerate(zip(image_files, mask_files)):
        # Generatet the file name，'{i+1}.jpg' and '{i+1}m.png'
        new_image_name = f"{i+1:03d}.jpg"
        new_mask_name = f"{i+1:03d}m.png"

        # Complete File Paths
        img_path = os.path.join(images_folder, img)
        new_img_path = os.path.join(images_folder, new_image_name)

        mask_path = os.path.join(masks_folder, mask)
        new_mask_path = os.path.join(masks_folder, new_mask_name)

        # Rename Files
        os.rename(img_path, new_img_path)
        os.rename(mask_path, new_mask_path)

# Read config.yaml
with open('config.yaml', 'r') as file:
    config = yaml.safe_load(file)

os.chdir('..')
rename_files(config['train_img_dir'], config['train_mask_dir'])
rename_files(config['val_img_dir'], config['val_mask_dir'])
rename_files(config['test_img_dir'], config['test_mask_dir'])
os.chdir('Dataset')

print(f'Rename Successfully.')

Rename Successfully.
