Please input the desired variables in the script below before you run.

In [3]:
import os
import random
from PIL import Image

# Define the folder where your images and text files are located
source_folder = r"C:\Users\trant\OneDrive - lmu.edu\Capstone\data\training\training_set"
destination_folder = r"C:\Users\trant\OneDrive - lmu.edu\Capstone\data\training\output2"

#Define the desired image size
IMAGE_SIZE = 300

#Define the desired split distribution
train_ratio = 0.7
test_ratio = 0.3

##################################################################################################################################################################

# Create the destination folder if it doesn't exist
if not os.path.exists(destination_folder):
    os.makedirs(destination_folder)

# Class mapping with 17 classes
class_mapping = {
    '0': 'bobcat',
    '1': 'coyote',
    '2': 'gray fox',
    '3': 'mountain lion',
    '4': 'mule deer',
    '5': 'turkey vulture',
    '6': 'striped skunk',
    '7': 'raccoon',
    '8': 'opossum',
    '9': 'bird sp',
    '10': 'domestic dog',
    '11': 'rabbit',
    '12': 'great horned owl',
    '13': 'raven',
    '14': 'barn owl',
    '15': 'ground squirrel',
    '16': 'raptor sp',
    '17': 'horses'
}

# Create subfolders for train and test
for subset in ['train', 'test']:
    subset_folder = os.path.join(destination_folder, subset)
    if not os.path.exists(subset_folder):
        os.makedirs(subset_folder)
    for class_name in class_mapping.values():
        class_folder = os.path.join(subset_folder, class_name)
        if not os.path.exists(class_folder):
            os.makedirs(class_folder)

# Process each file in the source folder
for filename in os.listdir(source_folder):
    if filename.lower().endswith('.jpg'):
        # Get the base name of the file (without extension)
        base_name = os.path.splitext(filename)[0]
        
        # Read the corresponding txt file to get the class number and bounding box coordinates
        txt_file = f"{base_name}.txt"
        txt_file_path = os.path.join(source_folder, txt_file)
        
        # Check if the txt file exists
        if not os.path.exists(txt_file_path):
            print(f"Warning: {txt_file_path} does not exist.")
            continue
        
        with open(txt_file_path, 'r') as file:
            lines = file.readlines()
            for line_num, line in enumerate(lines):
                content = line.strip().split()
                if len(content) < 5:
                    print(f"Warning: {txt_file_path} has an incorrect format on line {line_num+1}.")
                    continue
                class_number = content[0]
                x_center, y_center, width, height = map(float, content[1:5])
                
                # Get the class name from the mapping
                class_name = class_mapping.get(class_number)
                if class_name:
                    # Determine the subset (train, test)
                    rand_num = random.random()
                    if rand_num < train_ratio:
                        subset = 'train'
                    else:
                        subset = 'test'
                    
                    # Create the class folder in the subset if it doesn't exist
                    subset_class_folder = os.path.join(destination_folder, subset, class_name)
                    
                    # Open the image and get its dimensions
                    img_path = os.path.join(source_folder, filename)
                    img = Image.open(img_path)
                    img_width, img_height = img.size
                    
                    # Calculate the bounding box coordinates
                    left = (x_center - width / 2) * img_width
                    top = (y_center - height / 2) * img_height
                    right = (x_center + width / 2) * img_width
                    bottom = (y_center + height / 2) * img_height
                    
                    # Determine the size of the bounding box
                    box_width = right - left
                    box_height = bottom - top
                    
                    # Make the crop square by adjusting the box size
                    if box_width > box_height:
                        difference = box_width - box_height
                        top -= difference / 2
                        bottom += difference / 2
                        # Ensure the new coordinates are within the image bounds
                        if top < 0:
                            bottom -= top  # Reduce bottom by the negative overflow of top
                            top = 0
                        if bottom > img_height:
                            top -= (bottom - img_height)  # Reduce top to compensate for bottom overflow
                            bottom = img_height
                    else:
                        difference = box_height - box_width
                        left -= difference / 2
                        right += difference / 2
                        # Ensure the new coordinates are within the image bounds
                        if left < 0:
                            right -= left  # Reduce right by the negative overflow of left
                            left = 0
                        if right > img_width:
                            left -= (right - img_width)  # Reduce left to compensate for right overflow
                            right = img_width
                    
                    # Crop the image
                    cropped_img = img.crop((left, top, right, bottom))
                    
                    # Resize the image to 224x224 pixels for MEWC pipeline mode ltraining
                    resized_img = cropped_img.resize((IMAGE_SIZE, IMAGE_SIZE), Image.LANCZOS)
                    
                    # Save the cropped and resized image with a unique filename
                    cropped_filename = f"{base_name}_{line_num+1}.jpg"
                    resized_img.save(os.path.join(subset_class_folder, cropped_filename))
                else:
                    print(f"Warning: Class number {class_number} is not in the class_mapping.")



