modanet: https://github.com/eBay/modanet

아래 코드 실행하면 허깅페이스 포맷의 데이터셋이 생성된다.


In [1]:
import json
import io
import os
from tqdm import tqdm
from PIL import Image
from collections import defaultdict
from datasets import Dataset, Value, Sequence, ClassLabel, Features, concatenate_datasets
from datasets import Image as DImage

In [2]:
if not os.path.isdir('./modanet'):
    os.makedirs('./modanet')

In [3]:
!printf "-N" | maskrcnn-modanet datasets download ./modanet/

/Users/vegeta/workspace/fashion-visual-search/object_detection/modanet/
/Users/vegeta/.pyenv/versions/3.12.0/envs/fashion_image_retrieval/lib/python3.12/site-packages/maskrcnn_modanet/
Do you want to download the whole 1 million images (what I had to do) or to just download the 50k annotated with ModaNet?
Y for 1 million (40 GB), N for 50k: downloading paperdoll dataset
			taken from here:
			https://github.com/kyamagu/paperdoll/tree/master/data/chictopia
			
Updated Git hooks.
Git LFS initialized.
/Users/vegeta/.maskrcnn-modanet
saving your path location
/Users/vegeta/workspace/fashion-visual-search/object_detection/modanet/
fast download:
True
Skipping downloading PaperDoll
/Users/vegeta/workspace/fashion-visual-search/object_detection/modanet/datasets
\n now downloading modanet annotations\n \t\t\ttaken from here:\n \t\t\thttps://github.com/eBay/modanet
Cloning into 'modanet'...
remote: Enumerating objects: 70, done.[K
remote: Total 70 (delta 0), reused 0 (delta 0), pack-reused 70 

In [4]:
annotation_path = './modanet/datasets/coco/annotations/instances_all.json'
with open(annotation_path, 'r') as f:
    attribute_dict = json.load(f)

In [5]:
attribute_dict.keys()

dict_keys(['info', 'images', 'year', 'licenses', 'type', 'annotations', 'categories'])

In [6]:
def create_annotation_dict(attribute_dict):
    annotation_dict = defaultdict(list)
    for annotation in attribute_dict['annotations']:
        image_id = annotation['image_id']
        category_id = annotation['category_id']
        bbox = annotation['bbox']
        area = annotation['area']
        iscrowd = annotation['iscrowd']

        annotation_dict[image_id].append(
            {
                'category_id': category_id,
                'bbox': bbox,
                'area': area,
                'iscrowd': iscrowd,
            }
        )

    return annotation_dict

def create_image_dict(attribute_dict):
    image_dict = {}
    for image in attribute_dict['images']:
        image_id = image['id']
        width = image['width']
        height = image['height']
        image_fname = image['file_name']

        image_dict[image_id] = {
            'width': width,
            'height': height,
            'image_fname': image_fname,
        }

    return image_dict

In [7]:
attribute_dict['categories']

[{'supercategory': 'fashion', 'id': 1, 'name': 'bag'},
 {'supercategory': 'fashion', 'id': 2, 'name': 'belt'},
 {'supercategory': 'fashion', 'id': 3, 'name': 'boots'},
 {'supercategory': 'fashion', 'id': 4, 'name': 'footwear'},
 {'supercategory': 'fashion', 'id': 5, 'name': 'outer'},
 {'supercategory': 'fashion', 'id': 6, 'name': 'dress'},
 {'supercategory': 'fashion', 'id': 7, 'name': 'sunglasses'},
 {'supercategory': 'fashion', 'id': 8, 'name': 'pants'},
 {'supercategory': 'fashion', 'id': 9, 'name': 'top'},
 {'supercategory': 'fashion', 'id': 10, 'name': 'shorts'},
 {'supercategory': 'fashion', 'id': 11, 'name': 'skirt'},
 {'supercategory': 'fashion', 'id': 12, 'name': 'headwear'},
 {'supercategory': 'fashion', 'id': 13, 'name': 'scarf/tie'}]

In [8]:
category_norm_dict = {
    'bag': 'bag',
    'boots': 'shoes',
    'footwear': 'shoes',
    'outer': 'outer',
    'dress': 'dress',
    'pants': 'bottom',
    'top': 'top',
    'shorts': 'bottom',
    'skirt': 'bottom',
    'headwear': 'hat',
}

In [9]:
norm_categories = list(sorted(set(category_norm_dict.values())))
print(norm_categories)

id2label = {
    i: c for (i, c) in enumerate(norm_categories)
}

label2id = {
    c: i for (i, c) in enumerate(norm_categories)
}

print(id2label)
print(label2id)

['bag', 'bottom', 'dress', 'hat', 'outer', 'shoes', 'top']
{0: 'bag', 1: 'bottom', 2: 'dress', 3: 'hat', 4: 'outer', 5: 'shoes', 6: 'top'}
{'bag': 0, 'bottom': 1, 'dress': 2, 'hat': 3, 'outer': 4, 'shoes': 5, 'top': 6}


In [10]:
def create_dataset():

    with open(f'./modanet/datasets/coco/annotations/instances_all.json', 'r') as f:
        attribute_dict = json.load(f)

    annotation_dict = create_annotation_dict(attribute_dict)
    image_dict = create_image_dict(attribute_dict)
    category_id_to_name = {}
    for obj in attribute_dict['categories']:
        category_id_to_name[obj['id']] = obj['name']


    bbox_id = 0

    data_list = []
    for image_id, image_obj in tqdm(image_dict.items()):
        width = image_obj['width']
        height = image_obj['height']
        image_fname = image_obj['image_fname']
        image = Image.open(f"./modanet/datasets/coco/images/{image_fname}").convert('RGB')
        jpeg_buffer = io.BytesIO()
        image.save(jpeg_buffer, format='JPEG')
        jpeg_buffer.seek(0)
        image = Image.open(jpeg_buffer)

        data = {}
        data = {
            'image_id': image_id,
            'width': width,
            'height': height,
            'image': image,
        }

        objects = []
        shoes_count = 0
        is_valid = True
        for obj in annotation_dict[image_id]:
            category_id = obj['category_id']
            norm_category = category_norm_dict.get(category_id_to_name[category_id])
            if not norm_category:
                continue

            norm_category_id = label2id[norm_category]
            bbox = obj['bbox']

            x1, y1, width, height = bbox
            if not (x1 >= 0 and y1 >= 0 and width > 0 and height > 0):
                is_valid = False
                break

            area = obj['area']
            iscrowd = obj['iscrowd']

            objects.append(
                {
                    'category': norm_category_id,
                    'bbox_id': bbox_id,
                    'bbox': bbox,
                    'area': area,
                    'iscrowd': iscrowd,
                }
            )

            bbox_id += 1

            if norm_category == "shoes":
                shoes_count += 1

        if not is_valid:
            continue

        if objects and shoes_count <= 2:
            data['objects'] = objects
            data_list.append(data)

    class_label = ClassLabel(names=norm_categories)
    features = Features({
        'image_id': Value('int64'),
        'width': Value('int64'),
        'height': Value('int64'),
        'image': DImage(decode=True),
        'objects': Sequence({
            'bbox_id': Value('int64'),
            'category': class_label,
            'bbox': Sequence(Value('float64'), length=4),
            'area': Value('int64')
        })
    })
    
    # 메모리 부족으로 나눠서 처리
    sub_datasets = []
    for i in tqdm(range(0, len(data_list), 500)):
        sub_data = data_list[i: i + 500]
        sub_dataset = Dataset.from_list(sub_data, features=features)
        sub_datasets.append(sub_dataset)

    dataset = concatenate_datasets(sub_datasets)
    dataset = dataset.train_test_split(test_size=0.1)

    return dataset

In [11]:
dataset = create_dataset()

100%|██████████| 52254/52254 [01:09<00:00, 747.87it/s]
100%|██████████| 88/88 [01:16<00:00,  1.15it/s]


In [12]:
dataset.save_to_disk('./modanet_hf_dataset')

Saving the dataset (0/4 shards):   0%|          | 0/39499 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/4389 [00:00<?, ? examples/s]