# Step 1: Importing Necessary Libraries

In [1]:
# Import necessary libraries
# - os: for interacting with the file system
# - shutil: to copy directories
# - random: for random sampling
# - tqdm: for progress bars to track file copying
import os
import shutil
import random
from tqdm import tqdm


# Step 2: Define Helper Functions

In [2]:
# Function to ensure the destination directory exists or create it
def create_directory_if_not_exists(directory_path):
    """
    Ensures that the given directory exists. If not, it creates the directory.
    
    :param directory_path: Path of the directory to check or create.
    """
    os.makedirs(directory_path, exist_ok=True)

# Function to get directories that start with a specified letter
def get_directories_by_letter(source_directory, letter):
    """
    Fetches directories from the source that start with the given letter.
    
    :param source_directory: The source directory to search within.
    :param letter: The letter directories should start with.
    :return: List of directories starting with the letter.
    """
    entries = os.listdir(source_directory)
    return [entry for entry in entries if os.path.isdir(os.path.join(source_directory, entry)) and entry.lower().startswith(letter)]

# Function to copy selected directories to a destination
def copy_selected_directories(directories, source_directory, destination_directory):
    """
    Copies selected directories from the source directory to the destination directory.
    
    :param directories: List of directory names to copy.
    :param source_directory: Directory from where to copy.
    :param destination_directory: Directory where to copy.
    """
    for directory in tqdm(directories, desc="Copying directories", unit="directory"):
        src_path = os.path.join(source_directory, directory)
        dest_path = os.path.join(destination_directory, directory)
        try:
            shutil.copytree(src_path, dest_path, dirs_exist_ok=True)
        except Exception as error:
            print(f"Error copying {directory}: {error}")


# Step 3: Define Source and Destination Paths

In [3]:
# Define source and destination directories
source_directory = 'dataset'  # source directory
destination_directory = 'small_dataset_72'  # destination directory

# Ensure the destination directory exists
create_directory_if_not_exists(destination_directory)

# Step 4: Process Directories by Alphabet and Copy Them

In [4]:
# Iterate through letters in the alphabet to process directories
for letter in tqdm('abcdefghijklmnopqrstuvwxyz', desc="Processing letters", unit="letter"):
    # Get directories that start with the current letter
    directories_starting_with_letter = get_directories_by_letter(source_directory, letter)
    
    # Check if there are at least 3 directories starting with the current letter
    if len(directories_starting_with_letter) < 3:
        print(f"Not enough directories start with '{letter}' to select 3. Found: {len(directories_starting_with_letter)}")
        continue
    
    # Randomly select 3 directories
    selected_directories = random.sample(directories_starting_with_letter, 3)
    
    # Copy the selected directories to the destination
    copy_selected_directories(selected_directories, source_directory, destination_directory)

print("Alphabet-based directory copying completed successfully.")


Processing letters:   0%|                                                                                                                                                                                         | 0/26 [00:00<?, ?letter/s]
Copying directories:   0%|                                                                                                                                                                                      | 0/3 [00:00<?, ?directory/s][A
Copying directories:  33%|██████████████████████████████████████████████████████████                                                                                                                    | 1/3 [00:07<00:14,  7.27s/directory][A
Copying directories:  67%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                          | 2/3 [00:23<00:12, 12.44s/directory][A
Copying directories: 100%|█████████████

Not enough directories start with 'x' to select 3. Found: 0



Copying directories:   0%|                                                                                                                                                                                      | 0/3 [00:00<?, ?directory/s][A
Copying directories:  33%|██████████████████████████████████████████████████████████                                                                                                                    | 1/3 [00:08<00:17,  8.54s/directory][A
Copying directories: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:15<00:00,  5.11s/directory][A
Processing letters: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 26/26 [09:06<00:00, 21.02s/letter]

Not enough directories start with 'z' to select 3. Found: 1
Alphabet-based directory copying completed successfully.





# Step 5: Process Specific Directories

In [5]:
# handle specific directory names or prefixes
specific_directories = [entry for entry in os.listdir(source_directory) 
                        if os.path.isdir(os.path.join(source_directory, entry)) and entry.startswith('none')]

# Copy these directories if found
if specific_directories:
    copy_selected_directories(specific_directories, source_directory, destination_directory)

print("Specific directories starting with 'none' have been copied successfully.")


Copying directories: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 74.34directory/s]

Specific directories starting with 'none' have been copied successfully.





# Step 6: Dataset Processing and Timing

In [6]:
# # Timing and Dataset Processing using gesture_recognizer
# import time

# # Measure the time it takes to process the dataset
# start_time = time.time()

# dataset_path = 'small_dataset_72'

# data = gesture_recognizer.Dataset.from_folder(
#     dirname=dataset_path,
#     hparams=gesture_recognizer.HandDataPreprocessingParams(
#         shuffle=True, min_detection_confidence=0.66
#     ),
# )

# end_time = time.time()
# print(f"Total time for dataset processing: {end_time - start_time:.2f} seconds")

# # Step 7: Split Dataset into Training, Validation, and Test Sets
# train_data, rest_data = data.split(0.8)
# validation_data, test_data = rest_data.split(0.5)


# Step 7: Extract and Display Labels

In [8]:
# Extract and display labels (directory names in the dataset path)
dataset_path = 'small_dataset_72'
labels = [entry for entry in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, entry))]

# Display the labels and the number of labels
print("Labels (Directory names):", labels)
print(f"Number of labels: {len(labels)}")


Labels (Directory names): ['amazing', 'april', 'assist', 'borrow', 'bowling', 'brother', 'challenge', 'comma', 'crazy', 'deposit', 'design', 'devil', 'each', 'enjoy', 'exact', 'famous', 'few', 'forbid', 'great', 'grow up', 'guitar', 'haircut', 'hit', 'hockey', 'impossible', 'influence', 'interest', 'japan', 'join', 'joy', 'kid', 'kneel', 'knock', 'last year', 'lazy', 'license', 'melt', 'mix', 'motor', 'name', 'never', 'next', 'none', 'obtain', 'organize', 'over', 'photographer', 'pray', 'pretty', 'queen', 'question', 'quick', 'radio', 'really', 'respect', 'sad', 'stubborn', 'sunrise', 'toast', 'today', 'type', 'ugly', 'upset', 'use', 'valley', 'verb', 'vote', 'while', 'white', 'write', 'yes', 'your', 'yourself']
Number of labels: 73
