In [None]:
import os
import random
import shutil
import cv2
import numpy as np
import matplotlib.pyplot as plt
from ultralytics import YOLO


In [None]:
base_path = "/content"
dataset_path = f"{base_path}/dataset"

folders = [
    "train/images", "train/labels",
    "val/images",   "val/labels"
]

for f in folders:
    os.makedirs(f"{dataset_path}/{f}", exist_ok=True)

print("YOLO dataset folders created!")


In [None]:
import xml.etree.ElementTree as ET
from sklearn.model_selection import train_test_split
from glob import glob


xml_files = glob("/content/pklot/**/*.xml", recursive=True)

print("Total annotation files:", len(xml_files))

# train/val split (80% train, 20% val)
train_xml, val_xml = train_test_split(xml_files, test_size=0.2, random_state=42)

def convert_to_yolo(bb, img_w, img_h):
    xmin, ymin, xmax, ymax = bb
    x_center = (xmin + xmax) / 2.0 / img_w
    y_center = (ymin + ymax) / 2.0 / img_h
    w = (xmax - xmin) / img_w
    h = (ymax - ymin) / img_h
    return x_center, y_center, w, h

def process(files, split):
    for xml in files:
        tree = ET.parse(xml)
        root = tree.getroot()

        img_file = xml.replace(".xml", ".jpg")
        if not os.path.exists(img_file):
            img_file = xml.replace(".xml", ".png")
        if not os.path.exists(img_file):
            continue

        img = cv2.imread(img_file)
        h, w, _ = img.shape

        # YOLO label file path
        label_file = xml.replace("/content/pklot/", "")
        label_file = label_file.replace(".xml", ".txt")
        label_path = f"{dataset_path}/{split}/labels/{os.path.basename(label_file)}"

        # Read all bounding boxes
        lines = []
        for obj in root.findall("object"):
            name = obj.find("name").text
            if name.lower() not in ["occupied", "empty"]:
                continue

            cls = 1 if name.lower() == "occupied" else 0

            bbox = obj.find("bndbox")
            xmin = int(bbox.find("xmin").text)
            ymin = int(bbox.find("ymin").text)
            xmax = int(bbox.find("xmax").text)
            ymax = int(bbox.find("ymax").text)

            x, y, wbox, hbox = convert_to_yolo([xmin, ymin, xmax, ymax], w, h)
            lines.append(f"{cls} {x} {y} {wbox} {hbox}")


        with open(label_path, "w") as f:
            for l in lines:
                f.write(l + "\n")


        shutil.copy(img_file, f"{dataset_path}/{split}/images/")

# Process train & val
process(train_xml, "train")
process(val_xml, "val")

print(" Annotations converted to YOLO format")
