In [1]:
import os
import xml.etree.ElementTree as ET

# Paths
xml_folder = "APS360_Project_Dataset/dataset2/Annotations"
output_folder = "APS360_Project_Dataset/dataset2/labels"

# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Function to convert XML to YOLO format
def convert_to_yolo(xml_file, output_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    
    # Get image dimensions
    width = int(root.find("size/width").text)
    height = int(root.find("size/height").text)
    
    with open(output_file, "w") as f:
        for obj in root.findall("object"):
            # Assuming class_id is 0 since you only have one class (card number region)
            class_id = 0
            
            # Get bounding box coordinates
            xmin = int(obj.find("bndbox/xmin").text)
            ymin = int(obj.find("bndbox/ymin").text)
            xmax = int(obj.find("bndbox/xmax").text)
            ymax = int(obj.find("bndbox/ymax").text)
            
            # Convert to YOLO format
            x_center = (xmin + xmax) / 2 / width
            y_center = (ymin + ymax) / 2 / height
            box_width = (xmax - xmin) / width
            box_height = (ymax - ymin) / height
            
            # Write to the file
            f.write(f"{class_id} {x_center} {y_center} {box_width} {box_height}\n")

# Convert each XML file
for xml_file in os.listdir(xml_folder):
    if xml_file.endswith(".xml"):
        input_path = os.path.join(xml_folder, xml_file)
        output_path = os.path.join(output_folder, xml_file.replace(".xml", ".txt"))
        convert_to_yolo(input_path, output_path)


In [2]:
import os
import shutil
import random

# Paths
image_folder = "APS360_Project_Dataset/dataset2/JPEGImages"
label_folder = "APS360_Project_Dataset/dataset2/labels"
train_image_folder = "APS360_Project_Dataset/dataset2/images/train"
val_image_folder = "APS360_Project_Dataset/dataset2/images/val"
test_image_folder = "APS360_Project_Dataset/dataset2/images/test"
train_label_folder = "APS360_Project_Dataset/dataset2/labels/train"
val_label_folder = "APS360_Project_Dataset/dataset2/labels/val"
test_label_folder = "APS360_Project_Dataset/dataset2/labels/test"

# Create the folders if they don't exist
os.makedirs(train_image_folder, exist_ok=True)
os.makedirs(val_image_folder, exist_ok=True)
os.makedirs(test_image_folder, exist_ok=True)
os.makedirs(train_label_folder, exist_ok=True)
os.makedirs(val_label_folder, exist_ok=True)
os.makedirs(test_label_folder, exist_ok=True)

# Get a list of image files and shuffle
image_files = [f for f in os.listdir(image_folder) if f.endswith(".jpg")]
random.shuffle(image_files)

# Set the split ratios
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

# Calculate the number of images for each set
train_count = int(len(image_files) * train_ratio)
val_count = int(len(image_files) * val_ratio)

# Split and move files
for i, image_file in enumerate(image_files):
    base_filename = os.path.splitext(image_file)[0]
    label_file = base_filename + ".txt"
    
    # Check if the label file exists before moving
    if not os.path.exists(os.path.join(label_folder, label_file)):
        print(f"Warning: Label file {label_file} not found for image {image_file}. Skipping.")
        continue
    
    # Determine the target folders based on the split
    if i < train_count:
        target_image_folder = train_image_folder
        target_label_folder = train_label_folder
    elif i < train_count + val_count:
        target_image_folder = val_image_folder
        target_label_folder = val_label_folder
    else:
        target_image_folder = test_image_folder
        target_label_folder = test_label_folder

    # Copy image and label files to their respective folders
    shutil.copy(os.path.join(image_folder, image_file), os.path.join(target_image_folder, image_file))
    shutil.copy(os.path.join(label_folder, label_file), os.path.join(target_label_folder, label_file))


Copying 1758.jpg to APS360_Project_Dataset/dataset2/images/train
Copying 121.jpg to APS360_Project_Dataset/dataset2/images/train
Copying 1352.jpg to APS360_Project_Dataset/dataset2/images/train
Copying 412.jpg to APS360_Project_Dataset/dataset2/images/train
Copying 434.jpg to APS360_Project_Dataset/dataset2/images/train
Copying 18.jpg to APS360_Project_Dataset/dataset2/images/train
Copying 862.jpg to APS360_Project_Dataset/dataset2/images/train
Copying 35.jpg to APS360_Project_Dataset/dataset2/images/train
Copying 48.jpg to APS360_Project_Dataset/dataset2/images/train
Copying 840.jpg to APS360_Project_Dataset/dataset2/images/train
Copying 1261.jpg to APS360_Project_Dataset/dataset2/images/train
Copying 189.jpg to APS360_Project_Dataset/dataset2/images/train
Copying 1093.jpg to APS360_Project_Dataset/dataset2/images/train
Copying 398.jpg to APS360_Project_Dataset/dataset2/images/train
Copying 470.jpg to APS360_Project_Dataset/dataset2/images/train
Copying 200.jpg to APS360_Project_Datas