### Set-up for preprocessing stage 

In [None]:
# Import some common libraries
import torch
import numpy as np
import os, json, random, cv2

# Setup detectron2 
import detectron2
from detectron2.structures import BoxMode

In [None]:
# Unpack zipped dataset folder
# !unzip ~/share/datasets/robot_arm_box_minecraft++.zip -d ~/share

### Parse custom synth dataset into detectron2's standard format

In [None]:
dataset_folder = "robot_arm_box_small_minecraft++"
PATH_DATASET = "./datasets" + "/" + dataset_folder

In [None]:
# Collect json files into a list, as there can be multiple captures files
def collect_json_captures(dataset_path):
    json_captures = []
    json_captures_path = os.path.join(dataset_path, "annotations")
    for file in os.listdir(json_captures_path):
        if "captures" in file:
            json_captures.append(os.path.join(json_captures_path, file))

    return json_captures

In [None]:
collect_json_captures(PATH_DATASET + "/train")

['./datasets/robot_arm_box_small_minecraft++/train/annotations/captures_000.json',
 './datasets/robot_arm_box_small_minecraft++/train/annotations/captures_001.json',
 './datasets/robot_arm_box_small_minecraft++/train/annotations/captures_002.json',
 './datasets/robot_arm_box_small_minecraft++/train/annotations/captures_003.json',
 './datasets/robot_arm_box_small_minecraft++/train/annotations/captures_004.json',
 './datasets/robot_arm_box_small_minecraft++/train/annotations/captures_005.json']

In [None]:
def parse_dataset(dataset_path):
    img_path = os.path.join(dataset_path, "images")
    json_files = collect_json_captures(dataset_path)
    dataset_dicts = []

    for file in json_files:

        with open(file) as f:
            img_anns = json.load(f)

            for idx, v in enumerate(img_anns["captures"]):
                record = {}

                filename_full = v["filename"]
                filename = filename_full.split("/")[1]
                height, width = cv2.imread(img_path + "/" + filename).shape[:2]
                id = idx

                record["file_name"] = img_path + "/" + filename
                record["image_id"] = id
                record["height"] = height
                record["width"] = width

                annos = v["annotations"]
                obj_robot = {}
                obj_box = {}
                objs = []
                kps_robot = []
                kps_box = []

                for anno in annos:

                    for val in anno["values"]:
                        if val["label_id"] == 2: # robot_arm instance
                            if "label_name" in val:
                                bbox_robot = [val["x"], val["y"], val["width"], val["height"]]

                            else:
                                for kp in val["keypoints"][:8]:
                                    kpX = kp["x"]
                                    kpY = kp["y"]
                                    v = kp ["state"]
                                    kps_robot.append(kpX)
                                    kps_robot.append(kpY)
                                    kps_robot.append(v)

                                obj_robot = {
                                        "bbox": bbox_robot,
                                        "bbox_mode": BoxMode.XYWH_ABS,
                                        "category_id": 0,
                                        "keypoints": kps_robot,
                                }

                        if val["label_id"] == 1: # box instance
                            if "label_name" in val:
                                bbox_box = [val["x"], val["y"], val["width"], val["height"]]

                            else:
                                for kp in val["keypoints"][:8]:
                                    kpX = kp["x"]
                                    kpY = kp["y"]
                                    v = kp ["state"]
                                    kps_box.append(kpX)
                                    kps_box.append(kpY)
                                    kps_box.append(v)

                                obj_box = {
                                        "bbox": bbox_box,
                                        "bbox_mode": BoxMode.XYWH_ABS,
                                        "category_id": 1,
                                        "keypoints": kps_box,
                                }

                    objs.append(obj_robot) if len(obj_robot)> 0 else None
                    objs.append(obj_box) if len(obj_box) > 0 else None

                record["annotations"] = objs
                dataset_dicts.append(record)

    return dataset_dicts

In [None]:
# Print single entry of synth dataset_dict
parse_dataset(PATH_DATASET + "/val")[0]

{'file_name': './datasets/robot_arm_box_small_minecraft++/val/images/rgb_2.png',
 'image_id': 0,
 'height': 400,
 'width': 650,
 'annotations': [{'bbox': [370.0, 120.0, 130.0, 179.0],
   'bbox_mode': <BoxMode.XYWH_ABS: 1>,
   'category_id': 0,
   'keypoints': [410.0599060058594,
    200.3362579345703,
    2,
    438.17596435546875,
    149.97398376464844,
    2,
    470.6190490722656,
    235.2589874267578,
    2,
    452.725830078125,
    277.7138366699219,
    2,
    0.0,
    0.0,
    0,
    0.0,
    0.0,
    0,
    0.0,
    0.0,
    0,
    0.0,
    0.0,
    0]},
  {'bbox': [112.0, 146.0, 73.0, 75.0],
   'bbox_mode': <BoxMode.XYWH_ABS: 1>,
   'category_id': 1,
   'keypoints': [0.0,
    0.0,
    0,
    0.0,
    0.0,
    0,
    0.0,
    0.0,
    0,
    0.0,
    0.0,
    0,
    181.7122344970703,
    149.6772003173828,
    2,
    130.86744689941406,
    146.66986083984375,
    2,
    165.5909881591797,
    171.12167358398438,
    2,
    112.10213470458984,
    167.77565002441406,
    2]

In [None]:
# Serialize dataset_dicts into json-files
for d in ["train", "test", "val"]:
    with open("dataset_dict_" + d + ".json", "w", encoding="utf-8") as f: 
        json.dump(parse_dataset(PATH_DATASET + "/" + d), f, ensure_ascii=False, indent=4)

### Parse custom real dataset into detectron2's standard format

In [None]:
def parse_real_dataset(dataset_path, coco_json):
    img_path = os.path.join(dataset_path, "resized")
    json_file = os.path.join(dataset_path, coco_json)
    dataset_dicts = []

    with open(json_file) as f:
        coco_anns = json.load(f)
        
        for idx in range(len(coco_anns["images"])):
            record = {}
            file_path = ""
            objs = []
            obj_robot = obj_box = {}
            kps_robot = kps_box = []
            bbox_robot = bbox_box = []
            
            for k, v in coco_anns["images"][idx].items():
                file_path = v if k == "path" else file_path
                
            filename = file_path.split("/")[3]
            height, width = cv2.imread(img_path + "/" + filename).shape[:2]
            id = idx

            record["file_name"] = img_path + "/" + filename
            record["image_id"] = id
            record["height"] = height
            record["width"] = width
            
            # Other than "images" key, "annotations" has 40 values (one for each class in each image)
            # "annotations" entry[0]: robot_arm, [1]: box, [2]: robot_arm, [3]: box, ...
            for k, v in coco_anns["annotations"][idx*2].items():
                kps_robot = v if k == "keypoints" else kps_robot
                bbox_robot = v if k == "bbox" else bbox_robot
            obj_robot = {
                    "bbox": bbox_robot,
                    "bbox_mode": BoxMode.XYWH_ABS,
                    "category_id": 0,
                    "keypoints": kps_robot,
            }    
            
            for k, v in coco_anns["annotations"][idx*2+1].items():
                kps_box = v if k == "keypoints" else kps_box
                bbox_box = v if k == "bbox" else bbox_box
            obj_box = {
                    "bbox": bbox_box,
                    "bbox_mode": BoxMode.XYWH_ABS,
                    "category_id": 1,
                    "keypoints": kps_box,
            }    
            objs.append(obj_robot)
            objs.append(obj_box)

            record["annotations"] = objs
            dataset_dicts.append(record)

    return dataset_dicts

In [None]:
# Print single entry of real dataset_dict
PATH_REAL = "./datasets/real_test"
coco_json = "coco-annotations_150.json"
parse_real_dataset(PATH_REAL, coco_json)[0]

{'file_name': './datasets/real_test/resized/resized_img_real_1.jpg',
 'image_id': 0,
 'height': 400,
 'width': 650,
 'annotations': [{'bbox': [195, 79, 276, 247],
   'bbox_mode': <BoxMode.XYWH_ABS: 1>,
   'category_id': 0,
   'keypoints': [386,
    263,
    2,
    320,
    153,
    2,
    216,
    114,
    2,
    231,
    195,
    2,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0]},
  {'bbox': [382, 172, 79, 93],
   'bbox_mode': <BoxMode.XYWH_ABS: 1>,
   'category_id': 1,
   'keypoints': [0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    440,
    217,
    2,
    458,
    188,
    2,
    385,
    201,
    2,
    408,
    174,
    2]}]}

In [None]:
# Serialize dataset_dict into json-files
with open("dataset_dict_real.json", "w", encoding="utf-8") as f: 
    json.dump(parse_real_dataset(PATH_REAL, coco_json), f, ensure_ascii=False, indent=4)