In [1]:
from nuimages import NuImages

nuim = NuImages(dataroot='D:/nulimages_data/nuimages-v1.0-all-metadata', version='v1.0-train', verbose=True, lazy=True)

Loading nuImages tables for version v1.0-train...
Done loading in 0.001 seconds (lazy=True).


In [29]:

from typing import Any, List, Dict, Optional, Tuple, Callable

# Functien that recieves a bbox and returns if in the middle 512x512
def get_in_cropped(bbox: List[float]) -> bool:
    x1, y1, x2, y2 = bbox
    return x1 > 544 and x2 < 1056 and y1 > 194 and y2 < 706


def list_anns_jap( sample_token: str, nuim : NuImages, verbose: bool = True, in_cropped = True) -> Tuple[List[str], List[str]]:
    anns = []
    sample = nuim.get('sample', sample_token)
    key_camera_token = sample['key_camera_token']
    object_anns = [o for o in nuim.object_ann if o['sample_data_token'] == key_camera_token]
    for object_ann in object_anns:


        # check in in cropped
        if in_cropped:
            bbox = object_ann['bbox']
            is_in_cropped = get_in_cropped(bbox)
            if not is_in_cropped:
                continue

        category = nuim.get('category', object_ann['category_token'])
        attribute_names = [nuim.get('attribute', at)['name'] for at in object_ann['attribute_tokens']]
        inner_dict = {
            "category_name" : category['name'],
            "attribute_name" : attribute_names[0] if len(attribute_names) > 0 else ""
        }
        anns.append(inner_dict)
    return anns


Loaded 67279 sample(s) in 0.092s,
Loaded 557715 object_ann(s) in 4.372s,
Loaded 25 category(s) in 0.017s,
Loaded 12 attribute(s) in 0.000s,


[{'category_name': 'movable_object.trafficcone', 'attribute_name': ''},
 {'category_name': 'vehicle.car', 'attribute_name': 'vehicle.parked'},
 {'category_name': 'vehicle.car', 'attribute_name': 'vehicle.parked'},
 {'category_name': 'movable_object.trafficcone', 'attribute_name': ''},
 {'category_name': 'vehicle.car', 'attribute_name': 'vehicle.parked'},
 {'category_name': 'movable_object.trafficcone', 'attribute_name': ''},
 {'category_name': 'movable_object.trafficcone', 'attribute_name': ''},
 {'category_name': 'vehicle.car', 'attribute_name': 'vehicle.parked'}]

In [16]:
from pycocotools import mask as maskUtils


# Decoding RLE
# Retrieve the mask value of object_anns and surface_anns

from typing import Any, List, Dict, Optional, Tuple, Callable

def get_mask_of_anns(sample_token: str, nuim : NuImages, verbose: bool = True):
    anns = []
    sample = nuim.get('sample', sample_token)
    key_camera_token = sample['key_camera_token']
    object_anns = [o for o in nuim.object_ann if o['sample_data_token'] == key_camera_token]
    surface_anns = [o for o in nuim.surface_ann if o['sample_data_token'] == key_camera_token]

    for object_ann in object_anns:
        # get the category name
        category = nuim.get('category', object_ann['category_token'])
        category_name = category['name']

        # print category name
        if verbose:
            print(category_name)
        # print the mask
        mask = 'mask', object_ann['mask']
        mask = maskUtils.decode(mask)
        if verbose:
            print(mask)
    
    for surface_ann in surface_anns:
        # get the category name
        category = nuim.get('category', surface_ann['category_token'])
        category_name = category['name']

        # print category name
        if verbose:
            print(category_name)
        # print the mask
        mask =  surface_ann['mask']
        mask['counts'] = mask['counts'].encode('utf-8')
        print(mask)
        mask = maskUtils.decode(mask)
        if verbose:
            print(mask)


In [17]:
# Retrieve the mask value of object_anns and surface_anns for 10 samples 

for i in range(10):
    sample_token = nuim.sample[i]['token']
    print("sample_token: ", sample_token)
    get_mask_of_anns(sample_token, nuim, verbose=True)
    print("=========================================")

sample_token:  0000ad4e5f8440649a605e59b140bbe6
flat.driveable_surface
{'size': [900, 1600], 'counts': b'amIwVTNvaDAwMDAwMDAwMDAwME8xME8xMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDBPMTAwME8xMDAwMDAwMDAwMDAwMDAwMDAwME8xMDAwMDAwME8xMDAwMDAwMDAwMDAwMDAwTzEwMDAwMDAwMDAwME8xMDAwMDAwMDAwMDAwME8xMDAwMDAwMDAwMDAwME8xMDAwMDAwMDAwTzEwMDAwMDAwMDAwMDAwMDAwME8xMDAwMDAwME8xMDAwMDAwMDAwMDAwMDAwMDAwME8xMDAwTzEwMDAwMDAwMDAwMDAwMDAwMDAwMDAwME8xME8xMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwTzAxMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDBPMTAwMDAwMDAwMDAwMDAwMFFOVFdPZjBsaDBbT1RXT2QwbGgwXU9UV09iMGxoMF9PVFdPYDBsaDBAVVdPP2toMEJWV088aWgwRldXTzlpaDBIV1dPN2loMElYV082aGgwS1lXTzNnaDBOWVdPMWdoMDBZV09PZ2gwMVpXT05maDAzWldPTGZoMDVbV09JZWgwOFtXT29OSzJqaDBQMVtXT25OTE9qaDBTMVtXT21OTE9paDBVMVtXT2tOTk5maDBZMV1XT2dOTk9laDBbMV1XT2ZOT01kaDBdMV5XT2VOMExiaDBgMV5XT2NOMUxhaDBiMV9XT2BOMkxfaDBlMV9XT15ONEtdaDBnMWBXT15OMkteaDBoMWBXT1xOMkxeaDBpMWBXT1pOMk1eaDBqMWFXT1dOMDBfaDBqMWFXT1ZOTzBgaDBqMWJXT1VOTjFgaDBrMWFXT1ROTzFgaDBsMWFXT1JOTzJfaDBuMWFXT1FOTzFgaDBuMWJXT1BOTTNhaDB

ValueError: Invalid RLE mask representation

In [34]:
scene_captions = []
for sample in nuim.sample:

    frame_captions_dict = {}

    
    sample_token = nuim.get('sample_data', sample['key_camera_token'])
    filename = sample_token['filename'].split("/")[-1]

    # Check if there is file name contains CAM_FRONT and len(.split(_)) ==2
    condition = "FRONT" in filename and "LEFT" not in filename and "RIGHT" not in filename
    if not condition:
        continue

        
    annotations = list_anns_jap(sample['token'], nuim, True)
    for annotation in annotations:
        
        # Get the category name for this annotation.
        category_name = annotation['category_name']

        # Check if the anootation is in the middle cropped frame op 512x512
        

        # Get the right part of the category name and the attribute name for this annotation.
        if len(category_name.split(".")) > 1:
            category_name = category_name.split('.')[1]

        if category_name == 'construction':
            category_name = 'construction vehicle'

            
        
        # Get the attribute name for this annotation.
        if annotation["attribute_name"] != "":
            attribute_name = annotation["attribute_name"].split('.')[1]
            # Combine all the information into a single string.
            caption = 'a {} which is {}'.format(category_name, attribute_name)
        else:
            caption = 'a {}'.format(category_name)

        # Add this caption to the frame captions.
        if caption in frame_captions_dict:
            frame_captions_dict[caption] += 1
        else:
            frame_captions_dict[caption] = 1
    
    
    # Combine all the frame captions into a single string.
    frame_captions_list = []

    for caption, count in frame_captions_dict.items():
        if count > 1:
            frame_captions_list.append('{} times {}'.format(count, caption))
        else:
            frame_captions_list.append(caption)
    
    frame_caption = ', '.join(frame_captions_list)

    # Add this frame caption to the scene captions.
    scene_caption = '{}& In this driving scene the following objects can be observed: {}'.format(filename, frame_caption)
    scene_captions.append(scene_caption)


print("hoi")
for i, scene_caption in enumerate(scene_captions):
    filename = scene_caption.split('&')[0].split('.')[0]
    filename = f'{filename}.txt'
    path = "D:/nulimages_data/crop/caption/" + filename

    with open(path, 'w') as f:
        f.write(scene_caption.split('&')[1])


n010-2018-07-10-10-24-36+0800__CAM_FRONT__1531189590512488.jpg& In this driving scene the following objects can be observed: 3 times a pedestrian which is moving, a motorcycle which is with_rider, a pedestrian which is sitting_lying_down, a truck which is moving
n016-2018-07-05-17-39-27+0800__CAM_FRONT__1530783609012515.jpg& In this driving scene the following objects can be observed: a bicycle which is without_rider
n013-2018-08-20-10-52-08+0800__CAM_FRONT__1534733892262407.jpg& In this driving scene the following objects can be observed: 
n004-2018-01-03-16-11-21+0800__CAM_FRONT__1514967427247656.jpg& In this driving scene the following objects can be observed: 2 times a car which is moving
n010-2018-09-04-15-18-55+0800__CAM_FRONT__1536045926112488.jpg& In this driving scene the following objects can be observed: 2 times a car which is moving, 3 times a pedestrian which is moving, a car which is parked, a pedestrian which is standing
n013-2018-07-31-11-08-29+0800__CAM_FRONT__15330070

KeyboardInterrupt: 