In [None]:
IMAGES_DIRECTORY = "ATLAS_Dione_ObjectDetection/JPEGImages"
XML_DIRECTORY = "ATLAS_Dione_ObjectDetection/Annotations"

In [7]:
import os
import json
import xml.etree.ElementTree as ET
from typing import Dict, List, Any
from datetime import datetime

class XMLtoCOCO:
    def __init__(self):
        self.coco_format = {
            "info": {
                "year": datetime.now().year,
                "version": "1.0",
                "description": "Converted from XML",
                "contributor": "",
                "url": "",
                "date_created": datetime.now().strftime("%Y-%m-%d")
            },
            "licenses": [{
                "id": 1,
                "name": "Unknown",
                "url": ""
            }],
            "images": [],
            "annotations": [],
            "categories": []
        }
        self.category_id_map = {}
        self.annotation_id = 1

    def add_category(self, name: str) -> int:
        """Add a category if it doesn't exist and return its ID."""
        if name not in self.category_id_map:
            category_id = len(self.category_id_map) + 1
            self.category_id_map[name] = category_id
            self.coco_format["categories"].append({
                "id": category_id,
                "name": name,
                "supercategory": "none"
            })
        return self.category_id_map[name]

    def parse_xml(self, xml_path: str, image_id: int) -> Dict[str, Any]:
        """Parse a single XML file and return image and annotation data."""
        tree = ET.parse(xml_path)
        root = tree.getroot()

        # Get image information
        size = root.find('size')
        width = int(size.find('width').text)
        height = int(size.find('height').text)
        
        # Get filename
        filename = root.find('filename').text
        
        image_info = {
            "id": image_id,
            "file_name": filename + ".jpg",
            "width": width,
            "height": height,
            "license": 1,
            "date_captured": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }

        annotations = []
        for obj in root.findall('object'):
            category_name = obj.find('pose').text +"_"+ obj.find('name').text
            category_id = self.add_category(category_name)
            
            bbox = obj.find('bndbox')
            xmin = float(bbox.find('xmin').text)
            ymin = float(bbox.find('ymin').text)
            xmax = float(bbox.find('xmax').text)
            ymax = float(bbox.find('ymax').text)
            
            # Calculate COCO format bbox [x,y,width,height]
            width = xmax - xmin
            height = ymax - ymin
            
            annotation = {
                "id": self.annotation_id,
                "image_id": image_id,
                "category_id": category_id,
                "bbox": [xmin, ymin, width, height],
                "area": width * height,
                "segmentation": [],  # Empty for bbox-only annotations
                "iscrowd": 0
            }
            
            annotations.append(annotation)
            self.annotation_id += 1

        return image_info, annotations

    def convert_directory(self, xml_dir: str, output_path: str) -> None:
        """Convert all XML files in a directory to COCO format."""
        image_id = 1
        
        # Process each XML file
        for xml_file in os.listdir(xml_dir):
            if xml_file.endswith('.xml'):
                xml_path = os.path.join(xml_dir, xml_file)
                image_info, annotations = self.parse_xml(xml_path, image_id)
                
                self.coco_format["images"].append(image_info)
                self.coco_format["annotations"].extend(annotations)
                
                image_id += 1

        # Save to JSON file
        with open(output_path, 'w') as f:
            json.dump(self.coco_format, f, indent=2)

    def convert_single_file(self, xml_path: str, output_path: str) -> None:
        """Convert a single XML file to COCO format."""
        image_info, annotations = self.parse_xml(xml_path, 1)
        
        self.coco_format["images"].append(image_info)
        self.coco_format["annotations"].extend(annotations)
        
        with open(output_path, 'w') as f:
            json.dump(self.coco_format, f, indent=2)

In [11]:
from pathlib import Path


directory_path = Path(XML_DIRECTORY)
converter = XMLtoCOCO()
converter.convert_directory(directory_path, Path(IMAGES_DIRECTORY + '/dataset_coco_format.json'))