In [1]:
import os
import shutil
import random

# Define the folder where your images and text files are located
source_folder = r"C:\Users\trant\OneDrive - lmu.edu\Capstone\data\OneDrive_2024-05-21\input"
destination_folder = r"C:\Users\trant\OneDrive - lmu.edu\Capstone\data\OneDrive_2024-05-21\output"

# Create the destination folder if it doesn't exist
if not os.path.exists(destination_folder):
    os.makedirs(destination_folder)

# Class mapping with 17 classes
class_mapping = {
    '0': 'bobcat',
    '1': 'coyote',
    '2': 'gray fox',
    '3': 'mountain lion',
    '4': 'mule deer',
    '5': 'turkey vulture',
    '6': 'striped skunk',
    '7': 'raccoon',
    '8': 'opossum',
    '9': 'bird sp',
    '10': 'domestic dog',
    '11': 'rabbit',
    '12': 'great horned owl',
    '13': 'raven',
    '14': 'barn owl',
    '15': 'ground squirrel',
    '16': 'raptor sp',
    '17': 'horses'
}

# Create subfolders for train, val, and test
for subset in ['train', 'val', 'test']:
    subset_folder = os.path.join(destination_folder, subset)
    if not os.path.exists(subset_folder):
        os.makedirs(subset_folder)
    for class_name in class_mapping.values():
        class_folder = os.path.join(subset_folder, class_name)
        if not os.path.exists(class_folder):
            os.makedirs(class_folder)

# Define the split ratios
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

# Process each file in the source folder
for filename in os.listdir(source_folder):
    if filename.lower().endswith('.jpg'):
        # Get the base name of the file (without extension)
        base_name = os.path.splitext(filename)[0]
        
        # Read the corresponding txt file to get the class number
        txt_file = f"{base_name}.txt"
        txt_file_path = os.path.join(source_folder, txt_file)
        
        # Check if the txt file exists
        if not os.path.exists(txt_file_path):
            print(f"Warning: {txt_file_path} does not exist.")
            continue
        
        with open(txt_file_path, 'r') as file:
            # Read the first line and split by spaces
            content = file.readline().strip().split()
            if len(content) == 0:
                print(f"Warning: {txt_file_path} is empty or has an incorrect format.")
                continue
            class_number = content[0]
        
        # Get the class name from the mapping
        class_name = class_mapping.get(class_number)
        if class_name:
            # Determine the subset (train, val, test)
            rand_num = random.random()
            if rand_num < train_ratio:
                subset = 'train'
            elif rand_num < train_ratio + val_ratio:
                subset = 'val'
            else:
                subset = 'test'
            
            # Create the class folder in the subset if it doesn't exist
            subset_class_folder = os.path.join(destination_folder, subset, class_name)
            
            # Move the image file to the class folder in the corresponding subset
            shutil.copy(os.path.join(source_folder, filename), os.path.join(subset_class_folder, filename))
        else:
            print(f"Warning: Class number {class_number} is not in the class_mapping.")



