In [91]:
import os
import json
import numpy as np
import torch
from PIL import Image, ImageDraw, ImageColor, ImageFont

## Size of dataset

In [14]:
dataset_keys = ('train', 'val', 'test')
dataset_sizes = dict()
for dataset in dataset_keys:
    with open(f'splits/{dataset}.txt') as f:
        size = len(f.readlines())
        dataset_sizes[dataset] = size

print(f'total size: {sum(dataset_sizes.values())}')
print(dataset_sizes)

total size: 52453
{'train': 36589, 'val': 5320, 'test': 10544}


In [63]:
class MapillaryDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms, dataset_key, num_images):
        """ 
        dataset_key (string): "train", "test" or "val" 
        num_images (int): in range 1 to len(dataset)
        """
        assert dataset_key in dataset_keys
        assert num_images > 0 and num_images <= dataset_sizes[dataset_key]

        self.root = root
        self.transforms = transforms
        self.dataset_key = dataset_key
        self.dataset = list(sorted(os.listdir(os.path.join(root, dataset_key, "images"))))[:num_images]

    def __getitem__(self, idx):
        # load images
        image_key = self.dataset[idx]
        img_path = os.path.join(self.root, self.dataset_key, "images", image_key)
        
        # find annotations
        with open(os.path.join('annotations', f'{image_key[:-4]}.json'), 'r') as fid:
            anno = json.load(fid)

        with Image.open(img_path) as img:
            img = img.convert("RGB")

            # get bounding box coordinates
            rects = Image.new('RGBA', img.size)
            #rects_draw = ImageDraw.Draw(rects)

            boxes = []
            labels = []
            for obj in anno['objects']:
                xmin = obj['bbox']['xmin']
                ymin = obj['bbox']['ymin']
                xmax = obj['bbox']['xmax']
                ymax = obj['bbox']['ymax']
                boxes.append([xmin, ymin, xmax, ymax])
                labels.append(obj['label'])

            # convert everything into a torch.Tensor
            boxes = torch.as_tensor(boxes, dtype=torch.float32)

            # there is only one class
            labels = torch.ones((num_objs,), dtype=torch.int64)
            masks = torch.as_tensor(masks, dtype=torch.uint8)

            image_id = torch.tensor([idx])
            area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
            # suppose all instances are not crowd
            iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

            target = {}
            target["boxes"] = boxes
            target["labels"] = labels
            target["masks"] = masks
            target["image_id"] = image_id
            target["area"] = area
            target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return image_key, img_path

    def __len__(self):
        return len(self.dataset)

In [64]:
myDataset = MapillaryDataset(root="", transforms=None, dataset_key="train", num_images=1000)

In [100]:
anno

{'width': 4160,
 'height': 3120,
 'ispano': False,
 'objects': [{'key': '489lm30wc63tjtupe1e2q1',
   'label': 'other-sign',
   'bbox': {'xmin': 2657.890625,
    'ymin': 1013.0859375,
    'xmax': 2695.46875,
    'ymax': 1051.171875},
   'properties': {'barrier': False,
    'occluded': True,
    'out-of-frame': False,
    'exterior': False,
    'ambiguous': True,
    'included': False,
    'direction-or-information': False,
    'highway': False,
    'dummy': False}},
  {'key': 'jwoggy96u43g4ad6o09ee2',
   'label': 'other-sign',
   'bbox': {'xmin': 2641.640625,
    'ymin': 1028.3203125,
    'xmax': 2683.28125,
    'ymax': 1064.12109375},
   'properties': {'barrier': False,
    'occluded': False,
    'out-of-frame': False,
    'exterior': False,
    'ambiguous': True,
    'included': False,
    'direction-or-information': False,
    'highway': False,
    'dummy': False}}]}