In [44]:
import os
import xml.etree.ElementTree as ET

def xml_to_dict(element):
    """
    Recursively converts an XML element and its children into a dictionary,
    converting numerical strings to integers where applicable.
    """
    node_dict = {}
    
    # Include attributes if the element has any, converting any numeric strings to integers
    for attr, value in element.attrib.items():
        node_dict[attr] = int(value) if value.isdigit() else value
    
    # Iterate through children and add them to the dictionary
    for child in element:
        child_dict = xml_to_dict(child)
        # If tag already exists, make it a list
        if child.tag in node_dict:
            if not isinstance(node_dict[child.tag], list):
                node_dict[child.tag] = [node_dict[child.tag]]
            node_dict[child.tag].append(child_dict)
        else:
            node_dict[child.tag] = child_dict

    # Include the element text if it has text content, converting it to integer if possible
    if element.text and element.text.strip():
        text_content = element.text.strip()
        node_dict = int(text_content) if text_content.isdigit() else text_content

    return node_dict


def parse_xml_files_from_directory_to_dict(directory_path):
    data_dict = {}
    
    # List all files in the directory
    for filename in os.listdir(directory_path):
        if filename.endswith('.xml'):
            file_path = os.path.join(directory_path, filename)
            
            try:
                # Parse the XML file
                tree = ET.parse(file_path)
                root = tree.getroot()
                
                # Convert XML content to dictionary
                data_dict[filename] = xml_to_dict(root)

            except ET.ParseError as e:
                print(f"Error parsing {filename}: {e}")
            except FileNotFoundError:
                print(f"File not found: {file_path}")
    
    return data_dict

In [45]:
from pathlib import Path


directory_path = Path('C:/Users/nicki/Downloads/atlas_dione_objectdetection.tar/atlas_dione_objectdetection/ATLAS_Dione_ObjectDetection/ATLAS_Dione_ObjectDetection.tar/ATLAS_Dione_ObjectDetection/Annotations')
xml_data_dict = parse_xml_files_from_directory_to_dict(directory_path)

In [46]:
xml_data_dict["set00V000I00000.xml"]

{'folder': 'VOCAtlas',
 'filename': 'set00V000I00000',
 'source': {'database': 'The VOCAtlas Database',
  'annotation': 'PASCAL VOCAtlas',
  'image': 'atlas',
  'flickrid': 'n/a'},
 'owner': {'flickrid': 'n/a', 'name': 'n/a'},
 'size': {'width': 480, 'height': 854, 'depth': 3},
 'segmented': 0,
 'object': [{'name': 'tool',
   'pose': 'Left',
   'truncated': 0,
   'occluded': 0,
   'bndbox': {'xmin': 274, 'ymin': 189, 'xmax': 374, 'ymax': 244},
   'difficult': 0},
  {'name': 'tool',
   'pose': 'Right',
   'truncated': 0,
   'occluded': 0,
   'bndbox': {'xmin': 464, 'ymin': 121, 'xmax': 573, 'ymax': 197},
   'difficult': 0}]}

In [47]:
data = {}
for k,v in xml_data_dict.items():
    data[k] = {kk:vv for kk,vv in v.items() if kk in ["filename", "size", "object"]}
data

{'set00V000I00000.xml': {'filename': 'set00V000I00000',
  'size': {'width': 480, 'height': 854, 'depth': 3},
  'object': [{'name': 'tool',
    'pose': 'Left',
    'truncated': 0,
    'occluded': 0,
    'bndbox': {'xmin': 274, 'ymin': 189, 'xmax': 374, 'ymax': 244},
    'difficult': 0},
   {'name': 'tool',
    'pose': 'Right',
    'truncated': 0,
    'occluded': 0,
    'bndbox': {'xmin': 464, 'ymin': 121, 'xmax': 573, 'ymax': 197},
    'difficult': 0}]},
 'set00V000I00001.xml': {'filename': 'set00V000I00001',
  'size': {'width': 480, 'height': 854, 'depth': 3},
  'object': [{'name': 'tool',
    'pose': 'Left',
    'truncated': 0,
    'occluded': 0,
    'bndbox': {'xmin': 270, 'ymin': 189, 'xmax': 370, 'ymax': 244},
    'difficult': 0},
   {'name': 'tool',
    'pose': 'Right',
    'truncated': 0,
    'occluded': 0,
    'bndbox': {'xmin': 464, 'ymin': 121, 'xmax': 573, 'ymax': 194},
    'difficult': 0}]},
 'set00V000I00002.xml': {'filename': 'set00V000I00002',
  'size': {'width': 480, 'he

In [48]:
import pickle

# Save to a pickle file
with open("data.pkl", "wb") as file:
    pickle.dump(data, file)


In [49]:
import os
import json
import xml.etree.ElementTree as ET
from typing import Dict, List, Any
from datetime import datetime

class XMLtoCOCO:
    def __init__(self):
        self.coco_format = {
            "info": {
                "year": datetime.now().year,
                "version": "1.0",
                "description": "Converted from XML",
                "contributor": "",
                "url": "",
                "date_created": datetime.now().strftime("%Y-%m-%d")
            },
            "licenses": [{
                "id": 1,
                "name": "Unknown",
                "url": ""
            }],
            "images": [],
            "annotations": [],
            "categories": []
        }
        self.category_id_map = {}
        self.annotation_id = 1

    def add_category(self, name: str) -> int:
        """Add a category if it doesn't exist and return its ID."""
        if name not in self.category_id_map:
            category_id = len(self.category_id_map) + 1
            self.category_id_map[name] = category_id
            self.coco_format["categories"].append({
                "id": category_id,
                "name": name,
                "supercategory": "none"
            })
        return self.category_id_map[name]

    def parse_xml(self, xml_path: str, image_id: int) -> Dict[str, Any]:
        """Parse a single XML file and return image and annotation data."""
        tree = ET.parse(xml_path)
        root = tree.getroot()

        # Get image information
        size = root.find('size')
        width = int(size.find('width').text)
        height = int(size.find('height').text)
        
        # Get filename
        filename = root.find('filename').text
        
        image_info = {
            "id": image_id,
            "file_name": filename + ".jpg",
            "width": width,
            "height": height,
            "license": 1,
            "date_captured": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }

        annotations = []
        for obj in root.findall('object'):
            category_name = obj.find('pose').text +"_"+ obj.find('name').text
            category_id = self.add_category(category_name)
            
            bbox = obj.find('bndbox')
            xmin = float(bbox.find('xmin').text)
            ymin = float(bbox.find('ymin').text)
            xmax = float(bbox.find('xmax').text)
            ymax = float(bbox.find('ymax').text)
            
            # Calculate COCO format bbox [x,y,width,height]
            width = xmax - xmin
            height = ymax - ymin
            
            annotation = {
                "id": self.annotation_id,
                "image_id": image_id,
                "category_id": category_id,
                "bbox": [xmin, ymin, width, height],
                "area": width * height,
                "segmentation": [],  # Empty for bbox-only annotations
                "iscrowd": 0
            }
            
            annotations.append(annotation)
            self.annotation_id += 1

        return image_info, annotations

    def convert_directory(self, xml_dir: str, output_path: str) -> None:
        """Convert all XML files in a directory to COCO format."""
        image_id = 1
        
        # Process each XML file
        for xml_file in os.listdir(xml_dir):
            if xml_file.endswith('.xml'):
                xml_path = os.path.join(xml_dir, xml_file)
                image_info, annotations = self.parse_xml(xml_path, image_id)
                
                self.coco_format["images"].append(image_info)
                self.coco_format["annotations"].extend(annotations)
                
                image_id += 1

        # Save to JSON file
        with open(output_path, 'w') as f:
            json.dump(self.coco_format, f, indent=2)

    def convert_single_file(self, xml_path: str, output_path: str) -> None:
        """Convert a single XML file to COCO format."""
        image_info, annotations = self.parse_xml(xml_path, 1)
        
        self.coco_format["images"].append(image_info)
        self.coco_format["annotations"].extend(annotations)
        
        with open(output_path, 'w') as f:
            json.dump(self.coco_format, f, indent=2)

In [50]:
converter = XMLtoCOCO()
converter.convert_directory(directory_path, 'output.json')