### Labelme格式数据集转换coco格式

In [21]:
import json
import os
import shutil

import argparse
from tqdm import tqdm

In [22]:
# 标注格式转换相关函数
def get_category_id(label, categories):
    # 检查是否已存在该类别，若不存在则添加
    for category in categories:
        if category['name'] == label:
            return category['id']
    new_category_id = len(categories) + 1
    new_category = {
        "id": new_category_id,
        "name": label,
        "supercategory": "object"
    }
    categories.append(new_category)
    return new_category_id

def labelme_to_coco(labelme_dir, images_dir, output_dir):
    # 创建COCO数据集的目录结构
    os.makedirs(output_dir, exist_ok=True)
    out_images_dir = os.path.join(output_dir, 'images')
    out_annotations_dir = os.path.join(output_dir, 'annotations')
    os.makedirs(out_images_dir, exist_ok=True)
    os.makedirs(out_annotations_dir, exist_ok=True)

    # 遍历Labelme标注文件
    label_files = os.listdir(labelme_dir)
    image_id = 1
    annotation_id = 1
    coco_data = {
        "info": {},
        "licenses": [],
        "images": [],
        "annotations": [],
        "categories": []
    }

    for label_file in tqdm(label_files, total=len(label_files)):
        if label_file.endswith('.json'):
            label_file_path = os.path.join(labelme_dir, label_file)
            with open(label_file_path, 'r') as f:
                label_data = json.load(f)

            # 复制图像文件到COCO数据集的images目录下
            image_file = label_data['imagePath']
            image_file_path = os.path.join(images_dir, image_file)
            shutil.copy(image_file_path, out_images_dir)

            # 构建COCO数据集的images部分
            image_info = {
                "id": image_id,
                "file_name": image_file,
                "height": label_data['imageHeight'],
                "width": label_data['imageWidth']
            }
            coco_data['images'].append(image_info)

            # 构建COCO数据集的annotations部分
            shapes = label_data['shapes']
            for shape in shapes:
                label = shape['label']
                points = shape['points']
                category_id = get_category_id(label, coco_data['categories'])
                
                # 根据标注情况重写此部分，此方法适用于目标检测
                xmin, ymin = points[0]
                xmax, ymax = points[1]
                height = ymax - ymin
                width = xmax - xmin

                annotation_info = {
                    "id": annotation_id,
                    "image_id": image_id,
                    "category_id": category_id,
                    "segmentation": [],
                    "bbox": [xmin, ymin, width, height],
                    "area": height*width,
                    "iscrowd": 0
                }
                coco_data['annotations'].append(annotation_info)
                annotation_id += 1
            image_id += 1

    # 保存COCO标注文件
    coco_annotations_file = os.path.join(out_annotations_dir, 'instances.json')
    with open(coco_annotations_file, 'w') as f:
        json.dump(coco_data, f)

In [23]:
# 指定Labelme格式标注文件的目录和输出的COCO标注文件的目录
image_dir = '/Users/xiaoqiang/Mlearning/dataset/Drink_284_Detection_Labelme/images'
labelme_dir = '/Users/xiaoqiang/Mlearning/dataset/Drink_284_Detection_Labelme/labelme_jsons'
output_dir = '/Users/xiaoqiang/Mlearning/dataset/Drink_coco'

# 调用函数进行转换
labelme_to_coco(labelme_dir=labelme_dir, output_dir=output_dir, images_dir=image_dir)

100%|████████████████████████████████████████| 284/284 [00:00<00:00, 497.99it/s]
