In [1]:
import json
import os
import shutil
from pathlib import Path

import numpy as np
from collections import defaultdict
from tqdm import tqdm


In [2]:
BASE_PATH = "/mnt/folcon/02_model_input_coco/"

PATH_TRAIN_COCO_JSON = 'data/annotations/instances_train2017.json'
PATH_VAL_COCO_JSON = 'data/annotations/instances_val2017.json'
PATH_TEST_COCO_JSON = 'data/annotations/instances_test2017.json'

OUTPUT_PATH = "/mnt/folcon/02_model_input_yolo/"

In [3]:
with open(os.path.join(BASE_PATH, PATH_TRAIN_COCO_JSON)) as f:
    train_coco = json.load(f)

with open(os.path.join(BASE_PATH, PATH_VAL_COCO_JSON)) as f:
    val_coco = json.load(f)

with open(os.path.join(BASE_PATH, PATH_TEST_COCO_JSON)) as f:
    test_coco = json.load(f)

# Copy data

In [4]:
for img_name in tqdm(train_coco['images']):
    img_path = os.path.join(BASE_PATH, 'data/train2017', img_name['file_name'])
    img_name = img_path.split('/')[-1]
    sld_name = img_name.split('__')[0]
    out_put_img_path = os.path.join(OUTPUT_PATH, "images", 'train', img_name)
    os.symlink(img_path, out_put_img_path)

100%|██████████| 16245/16245 [00:00<00:00, 31754.02it/s]


In [5]:
for img_name in tqdm(val_coco['images']):
    img_path = os.path.join(BASE_PATH, 'data/val2017', img_name['file_name'])
    img_name = img_path.split('/')[-1]
    sld_name = img_name.split('__')[0]
    out_put_img_path = os.path.join(OUTPUT_PATH, "images", 'val', img_name)
    os.symlink(img_path, out_put_img_path)

100%|██████████| 2689/2689 [00:00<00:00, 21226.25it/s]


# Create labels

In [16]:
def convert_coco_json(json_dir="../coco/annotations/", use_segments=False, cls91to80=False):
    """Converts COCO JSON format to YOLO label format, with options for segments and class mapping."""

    # Import json
    test_names = []
    for json_file in sorted(Path(json_dir).resolve().glob("*.json")):
        with open(json_file) as f:
            data = json.load(f)
        set_name = json_file.stem.split("_")[-1][:-4]
        if not os.path.exists(os.path.join(OUTPUT_PATH, "labels", set_name)):
            os.makedirs(os.path.join(OUTPUT_PATH, "labels", set_name))
        # Create image dict
        images = {"%g" % x["id"]: x for x in data["images"]}
        # Create image-annotations dict
        imgToAnns = defaultdict(list)
        for ann in data["annotations"]:
            imgToAnns[ann["image_id"]].append(ann)

        # Write labels file
        for img_id, anns in tqdm(imgToAnns.items(), desc=f"Annotations {json_file}"):
            img = images["%g" % img_id]
            h, w, f = img["height"], img["width"], img["file_name"]

            bboxes = []
            for ann in anns:
                if ann["iscrowd"]:
                    continue
                # The COCO box format is [top left x, top left y, width, height]
                box = np.array(ann["bbox"], dtype=np.float64)
                box[:2] += box[2:] / 2  # xy top-left corner to center
                box[[0, 2]] /= w  # normalize x
                box[[1, 3]] /= h  # normalize y
                if box[2] <= 0 or box[3] <= 0:  # if w <= 0 and h <= 0
                    continue

                cls = ann["category_id"]
                box = [cls] + box.tolist()
                if box not in bboxes:
                    bboxes.append(box)
                # Segments
            # Write
            img_name = f.split("/")[-1]
            if set_name == "test":
                test_names.append(img_name)
            with open(Path(os.path.join(OUTPUT_PATH, "labels", set_name, img_name)).with_suffix(".txt"), "a") as file:
                for i in range(len(bboxes)):
                    line = (*(bboxes[i]),)  # cls, box or segments
                    file.write(("%g " * len(line)).rstrip() % line + "\n")
        if set_name == "test":
            for img_info in data["images"]:
                img_name = img_info["file_name"].split("/")[-1]
                if img_name not in test_names:
                    with open(Path(os.path.join(OUTPUT_PATH, "labels", set_name, img_name)).with_suffix(".txt"), "w") as file:
                        file.write("")
                

In [15]:
convert_coco_json(json_dir=os.path.join(BASE_PATH, 'data/annotations/'))

Annotations /mnt/folcon/02_model_input_coco/data/annotations/instances_test2017.json: 100%|██████████| 1315/1315 [00:00<00:00, 8897.94it/s]
Annotations /mnt/folcon/02_model_input_coco/data/annotations/instances_train2017.json: 100%|██████████| 16245/16245 [00:01<00:00, 12376.76it/s]
Annotations /mnt/folcon/02_model_input_coco/data/annotations/instances_val2017.json: 100%|██████████| 2689/2689 [00:00<00:00, 12338.97it/s]


In [30]:
test

[['Primary', 0.888, 0.363, 0.3, 0.3]]