In [1]:
import os  # To interact with the operating system
import re  #  For regular expressions, useful for pattern matching in strings
from pathlib import Path  # For easy path manipulations and handling
import numpy as np  # For numerical operations and array handling
from patchify import patchify  # For splitting images into smaller patches
from PIL import Image  #  For image manipulation


In [3]:
# Create training, validation and testing folders which will contaiin images and masks dataset subfolders
def create_folders(main_folder):
    FOLDERS = ['train', 'val', 'test']
    for folder in FOLDERS:
        if not os.path.exists(os.path.join(main_folder,folder)):
            folder_imgs = os.path.join(main_folder,folder,'images')# f"{folder}/images"
            folder_msks = os.path.join(main_folder,folder,'masks')# f"{folder}/masks"
            os.makedirs(folder_imgs) if not os.path.exists(folder_imgs) else print('folder already exists')
            os.makedirs(folder_msks) if not os.path.exists(folder_msks) else print('folder already exists')

c:\course22\NF1data_json
c:\course22\NF1data_json


In [None]:
import os  # To interact with the operating system
from pathlib import Path  # For easy path manipulations and handling
import numpy as np  # For numerical operations and array handling
from patchify import patchify  # For splitting images into smaller patches
from PIL import Image  # For image manipulation

def create_patches(src, dest_path):
    """
    Create patches from an image and save them to the destination path.

    Parameters:
    - src: Path to the source image
    - dest_path: Path to the destination folder where patches will be saved
    """
    
    # Get the directory and folder names from the source path
    src_path = os.path.dirname(src)
    folder_path = os.path.dirname(src_path)
    folder_name = os.path.basename(folder_path)
    
    # Open the source image using PIL and convert it to a numpy array
    image = Image.open(src)
    image = np.asarray(image)
    
    # Check if the image has a color channel (i.e., more than 2 dimensions)
    if len(image.shape) > 2:  # Only if RGB (3 channels) image exist
        # Create patches of size 256x256x3 (height x width x channels) with a step of 236 pixels
        patches = patchify(image, (256, 256, 3), step=236)
        file_name_wo_ext = Path(src).stem  # Get the file name without extension
        for i in range(patches.shape[0]):
            for j in range(patches.shape[1]):
                patch = patches[i, j, 0]  # Extract the patch
                patch = Image.fromarray(patch)  # Convert the patch back to an image
                num = i * patches.shape[1] + j  # Calculate a unique number for the patch
                patch.save(f"{dest_path}/{file_name_wo_ext}_in_{folder_name}_patch_{num}.png")  # Save the patch
    else:
        # For grayscale images (i.e., 2 dimensions)
        patches = patchify(image, (256, 256), step=236)
        file_name_wo_ext = Path(src).stem  # Get the file name without extension
        for i in range(patches.shape[0]):
            for j in range(patches.shape[1]):
                patch = patches[i, j]  # Extract the patch
                patch = Image.fromarray(patch)  # Convert the patch back to an image
                num = i * patches.shape[1] + j  # Calculate a unique number for the patch
                patch.save(f"{dest_path}/{file_name_wo_ext}_in_{folder_name}_patch_{num}.png")  # Save the patch


In [None]:
import os  # Importing the os module to interact with the operating system
from pathlib import Path  # Importing Path from pathlib for easy path manipulations and handling

# Get the current working directory
root_path = os.getcwd()  # Get the current working directory
print(f"Root path: {root_path}")

# Set the database path
database_path = os.path.join(root_path, 'NF1data_json')
print(f"Database path: {database_path}")

# Walk through the directory 'NF1data_json'
for path_name, _, file_names in os.walk('NF1data_json'):
    print(f"Current path: {path_name}")
    for f in file_names:
        # Create the full path for the current file
        image_path = os.path.join(path_name, f)  # Full path to the image file
        s_path = os.path.dirname(image_path)  # Directory of the image file
        f_type = os.path.basename(s_path)  # Type of file (e.g., 'masks' or 'images')
        path_folder = os.path.dirname(s_path)  # Parent folder path
        f_name = os.path.basename(path_folder)  # Name of the folder (e.g., 'NF100000002')

        # Initialize target folders for images and masks
        target_folder_imgs = None
        target_folder_masks = None

        # Determine the target folders based on the folder name
        if f_name in ['NoTestFolder']: # Due to limit dataset, the testing dataset will be free
            target_folder_imgs = 'test'
            target_folder_masks = 'test'
        elif f_name in ['NF100000003', 'NF100000006', 'NF100000012', 'NF100000009']: # Validation dataset
            target_folder_imgs = 'val'
            target_folder_masks = 'val'
        elif f_name in [
            'NF100000002', 'NF100000005', 'NF100000008', 'NF100000011',
            'NF100000013', 'NF100000014', 'NF100000016', 'NF100000018', 'NF100000022'
        ]:
            target_folder_imgs = 'train' # Training dataset
            target_folder_masks = 'train'

        if target_folder_imgs is not None:
            # Create the source path for the file
            src = os.path.join(path_name, f)
            file_name_wo_ext = Path(src).stem  # File name without extension
            path_split = os.path.split(path_name)

            # Check if the corresponding image and mask files exist
            img_file = os.path.join(path_split[0], 'images', f"{file_name_wo_ext}.png")
            mask_file = os.path.join(path_split[0], 'masks', f"{file_name_wo_ext}.jpg")
            if os.path.exists(img_file) and os.path.exists(mask_file):
                if f_type == 'images':
                    print(f"Target folder for images: {target_folder_imgs}")
                    dest = os.path.join(database_path, target_folder_imgs, f_type)
                    create_patches(src=src, dest_path=dest)

                if f_type == 'masks':
                    print(f"Target folder for masks: {target_folder_masks}")
                    dest = os.path.join(database_path, target_folder_masks, f_type)
                    create_patches(src=src, dest_path=dest)
