modanet: https://github.com/eBay/modanet

아래 코드 실행하면 허깅페이스 포맷의 데이터셋이 생성된다.


In [None]:
import json
import io
import os
from tqdm import tqdm
from PIL import Image
from collections import defaultdict
from datasets import Dataset, Value, Sequence, ClassLabel, Features, concatenate_datasets
from datasets import Image as DImage

In [None]:
if not os.path.isdir('./modanet'):
    os.makedirs('./modanet')

In [None]:
!printf "-N" | maskrcnn-modanet datasets download ./modanet/

In [None]:
annotation_path = './modanet/datasets/coco/annotations/instances_all.json'
with open(annotation_path, 'r') as f:
    attribute_dict = json.load(f)

In [None]:
attribute_dict.keys()

In [None]:
def create_annotation_dict(attribute_dict):
    annotation_dict = defaultdict(list)
    for annotation in attribute_dict['annotations']:
        image_id = annotation['image_id']
        category_id = annotation['category_id']
        bbox = annotation['bbox']
        area = annotation['area']
        iscrowd = annotation['iscrowd']

        annotation_dict[image_id].append(
            {
                'category_id': category_id,
                'bbox': bbox,
                'area': area,
                'iscrowd': iscrowd,
            }
        )

    return annotation_dict

def create_image_dict(attribute_dict):
    image_dict = {}
    for image in attribute_dict['images']:
        image_id = image['id']
        width = image['width']
        height = image['height']
        image_fname = image['file_name']

        image_dict[image_id] = {
            'width': width,
            'height': height,
            'image_fname': image_fname,
        }

    return image_dict

In [None]:
attribute_dict['categories']

In [None]:
category_norm_dict = {
    'bag': 'bag',
    'boots': 'shoes',
    'footwear': 'shoes',
    'outer': 'outer',
    'dress': 'dress',
    'pants': 'bottom',
    'top': 'top',
    'shorts': 'bottom',
    'skirt': 'bottom',
    'headwear': 'hat',
    'belt': 'belt',
    'sunglasses': 'eyewear',
}

In [None]:
# labels = ['bag', 'bottom', 'dress', 'hat', 'outer', 'shoes', 'top']
labels = ['bag', 'belt', 'bottom', 'dress', 'eyewear', 'glove', 'hat', 'jumpsuit', 'outer', 'shoes', 'top', 'watch']

print(labels)

id2label = {
    i: c for (i, c) in enumerate(labels)
}

label2id = {
    c: i for (i, c) in enumerate(labels)
}

print(id2label)
print(label2id)

In [None]:
def create_dataset():

    with open(f'./modanet/datasets/coco/annotations/instances_all.json', 'r') as f:
        attribute_dict = json.load(f)

    annotation_dict = create_annotation_dict(attribute_dict)
    image_dict = create_image_dict(attribute_dict)
    category_id_to_name = {}
    for obj in attribute_dict['categories']:
        category_id_to_name[obj['id']] = obj['name']


    bbox_id = 0

    data_list = []
    for image_id, image_obj in tqdm(image_dict.items()):
        width = image_obj['width']
        height = image_obj['height']
        image_fname = image_obj['image_fname']
        image = Image.open(f"./modanet/datasets/coco/images/{image_fname}").convert('RGB')
        jpeg_buffer = io.BytesIO()
        image.save(jpeg_buffer, format='JPEG')
        jpeg_buffer.seek(0)
        image = Image.open(jpeg_buffer)

        data = {}
        data = {
            'image_id': image_id,
            'width': width,
            'height': height,
            'image': image,
        }

        objects = []
        shoes_count = 0
        is_valid = True
        for obj in annotation_dict[image_id]:
            category_id = obj['category_id']
            norm_category = category_norm_dict.get(category_id_to_name[category_id])
            if not norm_category:
                continue

            norm_category_id = label2id[norm_category]
            bbox = obj['bbox']

            x1, y1, width, height = bbox
            if not (x1 >= 0 and y1 >= 0 and width > 0 and height > 0):
                is_valid = False
                break

            area = obj['area']
            iscrowd = obj['iscrowd']

            objects.append(
                {
                    'category': norm_category_id,
                    'bbox_id': bbox_id,
                    'bbox': bbox,
                    'area': area,
                    'iscrowd': iscrowd,
                }
            )

            bbox_id += 1

            if norm_category == "shoes":
                shoes_count += 1

        if not is_valid:
            continue

        if objects and shoes_count <= 2:
            data['objects'] = objects
            data_list.append(data)

    class_label = ClassLabel(names=labels)
    features = Features({
        'image_id': Value('int64'),
        'width': Value('int64'),
        'height': Value('int64'),
        'image': DImage(decode=True),
        'objects': Sequence({
            'bbox_id': Value('int64'),
            'category': class_label,
            'bbox': Sequence(Value('float64'), length=4),
            'area': Value('int64')
        })
    })
    
    # 메모리 부족으로 나눠서 처리
    sub_datasets = []
    for i in tqdm(range(0, len(data_list), 500)):
        sub_data = data_list[i: i + 500]
        sub_dataset = Dataset.from_list(sub_data, features=features)
        sub_datasets.append(sub_dataset)

    dataset = concatenate_datasets(sub_datasets)
    dataset = dataset.train_test_split(test_size=0.1)

    return dataset

In [None]:
dataset = create_dataset()

In [None]:
dataset.save_to_disk('./modanet_hf_dataset')