In [1]:
import os
import shutil
import random

In [None]:
def split_data_to_train_test(data_dir, test_ratio=0.1):
    """
    Menyalin 10% dari data setiap kelas ke folder `test`, dan 90% sisanya ke `train`.
    
    Args:
    - data_dir (str): Direktori utama yang berisi folder kelas 'drowsy' dan 'notdrowsy'.
    - test_ratio (float): Rasio data yang akan disalin ke folder `test`.
    """
    # Define paths for train and test directories
    train_dir = os.path.join(data_dir, 'train')
    test_dir = os.path.join(data_dir, 'test')

    # Define class folders
    classes = ['drowsy', 'notdrowsy']
    
    # Ensure train and test directories exist
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)
    
    # Iterate over each class
    for class_name in classes:
        # Set up source, train, and test paths for each class
        source_dir = os.path.join(data_dir, class_name)
        train_class_dir = os.path.join(train_dir, class_name)
        test_class_dir = os.path.join(test_dir, class_name)
        
        # Create class folders in train and test directories
        os.makedirs(train_class_dir, exist_ok=True)
        os.makedirs(test_class_dir, exist_ok=True)
        
        # Get list of all files in the source class folder
        all_files = os.listdir(source_dir)
        all_files = [f for f in all_files if os.path.isfile(os.path.join(source_dir, f))]
        
        # Shuffle and split files
        random.shuffle(all_files)
        split_idx = int(len(all_files) * test_ratio)
        
        test_files = all_files[:split_idx]
        train_files = all_files[split_idx:]
        
        # Copy files to test directory
        for file_name in test_files:
            src_path = os.path.join(source_dir, file_name)
            dst_path = os.path.join(test_class_dir, file_name)
            shutil.copy(src_path, dst_path)
        
        # Copy remaining files to train directory
        for file_name in train_files:
            src_path = os.path.join(source_dir, file_name)
            dst_path = os.path.join(train_class_dir, file_name)
            shutil.copy(src_path, dst_path)
    
    # Print class distribution for train and test folders
    print("\nDistribusi Kelas:")
    for split in ['train', 'test']:
        print(f"\n{split.capitalize()} Data:")
        for class_name in classes:
            class_dir = os.path.join(data_dir, split, class_name)
            file_count = len(os.listdir(class_dir))
            print(f"  {class_name.capitalize()}: {file_count} files")


In [12]:
# Contoh penggunaan
split_data_to_train_test("../Datasets/NTHU-DDD")


Distribusi Kelas:

Train Data:
  Drowsy: 32427 files
  Notdrowsy: 27442 files

Test Data:
  Drowsy: 3603 files
  Notdrowsy: 3049 files
