In [1]:
# Import the os module for interacting with the operating system, 
# including functions to work with file paths and directories.
import os

# Import the shutil module to facilitate high-level file operations, 
# such as copying and moving files and directories.
import shutil

# Import the random module to generate random numbers and make random selections, 
# useful for tasks like shuffling datasets or selecting random samples.
import random


In [None]:
# Define the source directory containing the labeled images that will be used for training and validation.
SOURCE_DIR = "images/labeled"

# Define the target base directory where the training and validation datasets will be organized.
# This directory will typically contain separate subdirectories for training and validation images.
TARGET_BASE_DIR = "..."     # ../Bounding Box/data for bounding box approach or ../Key Points/data for keypoints approach


In [3]:
# Create a list of dictionaries to store label information for hand signs.
# Each dictionary contains a 'name' representing the sign gesture 
# and an 'id' that serves as a unique identifier for mapping purposes.
label_info = [
    {'name': 'hello', 'id': 1},       # Label for the 'hello' sign, assigned ID 1
    {'name': 'thanks', 'id': 2},      # Label for the 'thanks' sign, assigned ID 2
    {'name': 'yes', 'id': 3},         # Label for the 'yes' sign, assigned ID 3
    {'name': 'no', 'id': 4},          # Label for the 'no' sign, assigned ID 4
    {'name': 'iloveyou', 'id': 5}     # Label for the 'I love you' sign, assigned ID 5
]


In [4]:
# Define the split ratio for dividing the dataset into training and validation sets.
train_ratio = 0.8  # 80% of the dataset will be used for training,
                    # while the remaining 10% will be used for validation.


In [5]:
# Create the paths for the training and validation directories 
# by joining the base target directory with the respective folder names.
train_dir = os.path.join(TARGET_BASE_DIR, 'train')          # Path for the training dataset directory
validation_dir = os.path.join(TARGET_BASE_DIR, 'validation')  # Path for the validation dataset directory

# Create the training directory. 
# The 'exist_ok=True' parameter prevents an error if the directory already exists.
os.makedirs(train_dir, exist_ok=True)

# Create the validation directory with the same 'exist_ok=True' behavior.
os.makedirs(validation_dir, exist_ok=True)


In [6]:
# Iterate over each label in the label_info list to process the corresponding images.
for label in label_info:
    label_name = label['name']  # Extract the current label name for processing.
    
    # Create subdirectories for the current label in both the training and validation directories.
    train_label_dir = os.path.join(train_dir, label_name)  # Path for the training label directory
    validation_label_dir = os.path.join(validation_dir, label_name)  # Path for the validation label directory
    os.makedirs(train_label_dir, exist_ok=True)  # Create the training label directory if it doesn't exist
    os.makedirs(validation_label_dir, exist_ok=True)  # Create the validation label directory if it doesn't exist

    # Construct the path to the directory containing the images for the current label.
    label_dir = os.path.join(SOURCE_DIR, label_name)
    
    # Check if the label directory exists; if not, log a message and skip to the next label.
    if not os.path.exists(label_dir):
        print(f"Directory {label_dir} not found, skipping.")
        continue  # Skip processing for this label if the directory is not found
    
    # List all images in the current label's directory.
    images = os.listdir(label_dir)
    
    # Shuffle the images to ensure a random selection for training and validation.
    random.shuffle(images)

    # Calculate the split index for dividing the images based on the defined training ratio.
    split_index = int(len(images) * train_ratio)

    # Move images to the training directory based on the calculated split index.
    for img in images[:split_index]:  # Process the first portion of images for training
        src_path = os.path.join(label_dir, img)  # Source path of the image
        dst_path = os.path.join(train_label_dir, img)  # Destination path for the training directory
        shutil.move(src_path, dst_path)  # Move the image from source to destination
        print(f"Moved {img} to {train_label_dir}")  # Log the successful move

    # Move the remaining images to the validation directory.
    for img in images[split_index:]:  # Process the remaining images for validation
        src_path = os.path.join(label_dir, img)  # Source path of the image
        dst_path = os.path.join(validation_label_dir, img)  # Destination path for the validation directory
        shutil.move(src_path, dst_path)  # Move the image from source to destination
        print(f"Moved {img} to {validation_label_dir}")  # Log the successful move

# Log completion of the data splitting process.
print("Data split into training and validation sets complete.")


Moved hello_8a0a0c2b-9c59-11ef-ba11-28d0433fc667.jpg to ../data/train/hello
Moved hello_96a8fa65-9c59-11ef-b403-28d0433fc667.jpg to ../data/train/hello
Moved hello_635f5fbb-9c59-11ef-91b6-28d0433fc667.jpg to ../data/train/hello
Moved hello_b1c981b8-9c59-11ef-b5aa-28d0433fc667.jpg to ../data/train/hello
Moved hello_4554e3fb-9c59-11ef-8502-28d0433fc667.jpg to ../data/train/hello
Moved hello_a0eaf115-9c59-11ef-8488-28d0433fc667.jpg to ../data/train/hello
Moved hello_b7dca1d8-9c59-11ef-a259-28d0433fc667.jpg to ../data/train/hello
Moved hello_500d6d6d-9c59-11ef-8835-28d0433fc667.jpg to ../data/train/hello
Moved hello_8120cb5d-9c59-11ef-9c77-28d0433fc667.jpg to ../data/train/hello
Moved hello_5281f9bc-9c59-11ef-9114-28d0433fc667.jpg to ../data/train/hello
Moved hello_8540148a-9c59-11ef-b57d-28d0433fc667.jpg to ../data/train/hello
Moved hello_5ccc7bad-9c59-11ef-8c96-28d0433fc667.jpg to ../data/train/hello
Moved hello_a4f74f4d-9c59-11ef-a3a6-28d0433fc667.jpg to ../data/train/hello
Moved hello_