In [10]:
import json
import os
from PIL import Image
import xml.etree.ElementTree as ET

# Function to convert coordinates from polygon to bounding box
def polygon_to_bbox(polygon):
    all_points_x = polygon['all_points_x']
    all_points_y = polygon['all_points_y']
    xmin = min(all_points_x)
    xmax = max(all_points_x)
    ymin = min(all_points_y)
    ymax = max(all_points_y)
    return xmin, ymin, xmax, ymax

# Function to create Pascal VOC XML
def create_pascal_voc_xml(image_file, width, height, objects, output_folder, image_folder):
    root = ET.Element("annotation")

    ET.SubElement(root, "folder").text = os.path.basename(image_folder)
    ET.SubElement(root, "filename").text = os.path.basename(image_file)
    ET.SubElement(root, "path").text = os.path.join(image_folder, image_file)

    size = ET.SubElement(root, "size")
    ET.SubElement(size, "width").text = str(width)
    ET.SubElement(size, "height").text = str(height)
    ET.SubElement(size, "depth").text = "3"  # Assuming RGB images

    ET.SubElement(root, "segmented").text = "0"

    for obj in objects:
        obj_elem = ET.SubElement(root, "object")
        # Use 'plate' if label is empty, otherwise use 'text'
        obj_name = 'plate' if obj['label'] == '' else 'text'
        ET.SubElement(obj_elem, "name").text = obj_name
        ET.SubElement(obj_elem, "pose").text = "Unspecified"
        ET.SubElement(obj_elem, "truncated").text = "0"
        ET.SubElement(obj_elem, "difficult").text = "0"

        bbox = obj['bbox']
        bndbox = ET.SubElement(obj_elem, "bndbox")
        ET.SubElement(bndbox, "xmin").text = str(bbox[0])
        ET.SubElement(bndbox, "ymin").text = str(bbox[1])
        ET.SubElement(bndbox, "xmax").text = str(bbox[2])
        ET.SubElement(bndbox, "ymax").text = str(bbox[3])

    tree = ET.ElementTree(root)
    xml_file = os.path.join(output_folder, os.path.splitext(image_file)[0] + ".xml")
    tree.write(xml_file)

# Directory paths
json_folder = '/home/hqanhh/Data_plate/Label'
image_folder = '/home/hqanhh/Data_plate/Image'
output_folder = '/home/hqanhh/Data_plate/xml_label'

# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Process each JSON file in the JSON folder
for json_file in os.listdir(json_folder):
    if json_file.endswith('.json'):
        with open(os.path.join(json_folder, json_file), 'r') as f:
            data = json.load(f)
            img_metadata = data.get('_via_img_metadata', {})
            
            for img_id, img_data in img_metadata.items():
                image_file = img_data['filename']
                img_path = os.path.join(image_folder, image_file)

                # Get image dimensions
                try:
                    with Image.open(img_path) as img:
                        width, height = img.size
                except FileNotFoundError:
                    print(f"Image file {image_file} not found in {image_folder}.")
                    continue

                objects = []
                for region in img_data['regions']:
                    shape = region['shape_attributes']
                    label = region['region_attributes']['label']
                    bbox = polygon_to_bbox(shape)
                    objects.append({
                        'label': label,
                        'bbox': bbox
                    })

                create_pascal_voc_xml(image_file, width, height, objects, output_folder, image_folder)

print("Conversion completed.")


Image file 13_5_1181.jpg not found in /home/hqanhh/Data_plate/Image.
Image file 13_5_00000000079000000_7580.jpg not found in /home/hqanhh/Data_plate/Image.
Image file 13_5_00000000072000100_4253.jpg not found in /home/hqanhh/Data_plate/Image.
Image file 13_5_00000000072000000_341.jpg not found in /home/hqanhh/Data_plate/Image.
Image file 13_5_00000000072000000_103.jpg not found in /home/hqanhh/Data_plate/Image.
Image file 13_5_00000000074000000_7534.jpg not found in /home/hqanhh/Data_plate/Image.
Image file r.jpg not found in /home/hqanhh/Data_plate/Image.
Image file 13_5_1194.jpg not found in /home/hqanhh/Data_plate/Image.
Image file 13_5_1187.jpg not found in /home/hqanhh/Data_plate/Image.
Image file 13_5_00000000072000000_99.jpg not found in /home/hqanhh/Data_plate/Image.
Image file 13_5_00000000072000100_507.jpg not found in /home/hqanhh/Data_plate/Image.
Image file 13_5_00000000072000000_1722.jpg not found in /home/hqanhh/Data_plate/Image.
Image file 13_5_00000000072000100_4251.jp

In [11]:
import os
import shutil
import random
from sklearn.model_selection import train_test_split

# Directory paths
labels_folder = '/home/hqanhh/Data_plate/xml_label'
images_folder = '/home/hqanhh/Data_plate/Image'
train_labels_folder = '/home/hqanhh/Data_plate/mobilenet_split/train/label'
test_labels_folder = '/home/hqanhh/Data_plate/mobilenet_split/test/label'
train_images_folder = '/home/hqanhh/Data_plate/mobilenet_split/train/images'
test_images_folder = '/home/hqanhh/Data_plate/mobilenet_split/test/images'

# Create output directories if they do not exist
os.makedirs(train_labels_folder, exist_ok=True)
os.makedirs(test_labels_folder, exist_ok=True)
os.makedirs(train_images_folder, exist_ok=True)
os.makedirs(test_images_folder, exist_ok=True)

# Get list of all label files
label_files = [f for f in os.listdir(labels_folder) if f.endswith('.xml')]

# Create train-test split
train_files, test_files = train_test_split(label_files, test_size=0.05, random_state=42)

# Function to copy files
def copy_files(file_list, src_labels_folder, src_images_folder, dst_labels_folder, dst_images_folder):
    for file_name in file_list:
        label_src_path = os.path.join(src_labels_folder, file_name)
        image_file_name = os.path.splitext(file_name)[0] + '.jpg'
        image_src_path = os.path.join(src_images_folder, image_file_name)

        # Define destination paths
        label_dst_path = os.path.join(dst_labels_folder, file_name)
        image_dst_path = os.path.join(dst_images_folder, image_file_name)

        # Copy label file and image file if they exist
        if os.path.exists(label_src_path) and os.path.exists(image_src_path):
            shutil.copy(label_src_path, label_dst_path)
            shutil.copy(image_src_path, image_dst_path)
        else:
            print(f"Missing file: {file_name} or corresponding image {image_file_name}")

# Copy train and test files
copy_files(train_files, labels_folder, images_folder, train_labels_folder, train_images_folder)
copy_files(test_files, labels_folder, images_folder, test_labels_folder, test_images_folder)

print("Train-test split and file copying completed.")


Train-test split and file copying completed.
