### Create the CODA dataset (runtime: ~ 4-5min)

You should have downloaded all datasets by now and aranged them like this in your "dataset directory":

##### ONCE (~1300GB): https://once-for-auto-driving.github.io/download.html

1. Download all unlabeled splits with "annotations", all "lidar-data", and "camera data p3
2. Arange them like:

In [25]:
# datasets_root
# |_ONCE
#   |_data_root
#     |_data
#       |-000000
#       | |-cam03
#       | | |-frame_timestamp_1.jpg
#       | | |-frame_timestamp_2.jgp
#       | | |-...
#       | | |_frame_timestamp_n.jpg
#       | |
#       | |-lidar_roof
#       | | |-frame_timestamp_1.bin
#       | | |-frame_timestamp_2.bin
#       | | |-...
#       | | |_frame_timestamp_n.bin
#       | |
#       | |_000000.json
#       |
#       |-000001
#       |-000002
#       |_...

##### KITTI object (~41GB): https://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d

1. Download "left color images", "velodyne point clouds", "camera calibration matrices", and "object development kit"
2. Arange them like:

In [26]:
# datasets_root
# |_KITTI
#   |_object
#     |-data
#     | |_training
#     |   |-calib
#     |   | |-000000.txt
#     |   | |-000001.txt
#     |   | |-...
#     |   | |_nnnnnn.txt
#     |   |
#     |   |-image_2
#     |   | |-000000.png
#     |   | |-000001.png
#     |   | |-...
#     |   | |_nnnnnn.png
#     |   |
#     |   |_velodyne
#     |     |-000000.bin
#     |     |-000001.bin
#     |     |-...
#     |     |_nnnnnn.bin
#     |
#     |_devkit
#       |_mapping
#         |-train_mapping.txt
#         |_train_rand.txt   

##### nuScenes (~293GB): https://www.nuscenes.org/nuscenes#download

1. Download "Trainval" from "Full dataset (v1.0)", this includes all 10 parts and the metadata
2. Arange them like:

In [27]:
# datasets_root
# |_nuScenes
#   |_sweeps
#   | |-CAM_FRONT
#   | | |-n000-time_CAM_FRONT_timestamp.jpg
#   | | |-n000-time_CAM_FRONT_timestamp.jpg
#   | | |-...
#   | | |_n000-time_CAM_FRONT_timestamp.jpg
#   | | 
#   | |_LIDAR_TOP
#   |   |-n000-time_CAM_FRONT_timestamp.pcd.bin
#   |   |-n000-time_CAM_FRONT_timestamp.pcd.bin
#   |   |-...
#   |   |_n000-time_CAM_FRONT_timestamp.pcd.bin
#   |
#   |_v1.0-trainval
#     |-attribute.json
#     |-calibrated_sensor.json
#     |-category.json
#     |-ego_pose.json
#     |-instance.json
#     |-log.json
#     |-map.json
#     |-sample_annotation.json
#     |-sample_data.json
#     |-sample.json
#     |-scene.json
#     |-sensor.json
#     |_visibility.json

##### CODA (~1GB): https://coda-dataset.github.io/download.html (Google drive not working 01.08.2023)

1. Download "CODA base val set"
2. Arange like:

In [28]:
# datasets_root
# |_CODA
#   |_base-val-1500
#     |-images
#     | |-frame_timestamp_1.jpg
#     | |-frame_timestamp_2.jgp
#     | |-...
#     | |_frame_timestamp_n.jpg
#     |
#     |-corner_case.json
#     |-kitti_indices.json
#     |_nuscenes_sample_tokens.json

In [5]:
import json
import shutil
import os
import re
from nuscenes.nuscenes import NuScenes

#### Set paths
<div class="alert alert-block alert-warning">
<h4>ToDo:</h4>
<ol>
    <li> Set "dataset_root" to the root of your <b>dataset directory</b>
    <li> Set "coda_root" to the root of your new <b>CODA directory</b>
</ol>
</div>

In [6]:
datasets_root = f'/disk/ml/datasets/'
coda_root = f'/disk/ml/own_datasets/CODA/'

In [7]:
image_folder = os.path.join(coda_root, 'image')
lidar_folder = os.path.join(coda_root, 'lidar')

coda_root_original = os.path.join(datasets_root, 'CODA/base-val-1500')
nuscenes_root = os.path.join(datasets_root, 'nuScenes/samples')
kitti_root = os.path.join(datasets_root, 'KITTI/object/data/training')
once_root = os.path.join(datasets_root, 'ONCE/data_root/data')

json_cornercases_original = os.path.join(coda_root_original, 'corner_case.json')
json_kitti_indices_original = os.path.join(coda_root_original, 'kitti_indices.json')
json_nuscenes_indices_original = os.path.join(coda_root_original, 'nuscenes_indices.json')

json_cornercases = os.path.join(coda_root, 'corner_case.json')
json_nuscenes_indices = os.path.join(coda_root, 'nuscenes_indices.json')
json_nuscenes_image = os.path.join(coda_root, 'nuscenes_image.json')
json_nuscenes_lidar = os.path.join(coda_root, 'nuscenes_lidar.json')
json_kitti_indices = os.path.join(coda_root, 'kitti_indices.json')

kitti_image_original = os.path.join(kitti_root, 'image_2')
kitti_image = os.path.join(coda_root, 'image')
kitti_lidar_original = os.path.join(kitti_root, 'velodyne')
kitti_lidar = os.path.join(coda_root, 'lidar')

nuscenes_image_original = os.path.join(nuscenes_root, 'CAM_FRONT')
nuscenes_image = os.path.join(coda_root, 'image')
nuscenes_lidar_original = os.path.join(nuscenes_root, 'LIDAR_TOP')
nuscenes_lidar = os.path.join(coda_root, 'lidar')

once_image_original = os.path.join(coda_root_original, 'images')
once_image = os.path.join(coda_root, 'image')
once_lidar_original = once_root
once_lidar = os.path.join(coda_root, 'lidar')

In [8]:
sensor_nuscenes_image = 'CAM_FRONT'
sensor_nuscenes_lidar = 'LIDAR_TOP'

Create and copy files into new CODA directory

In [29]:
if not os.path.exists(coda_root):
    os.makedirs(coda_root)
if not os.path.exists(image_folder):
    os.makedirs(image_folder)
if not os.path.exists(lidar_folder):
    os.makedirs(lidar_folder)

shutil.copy(json_cornercases_original, coda_root)
shutil.copy(json_kitti_indices_original, coda_root)
shutil.copy(json_nuscenes_indices_original, coda_root)

'/disk/ml/own_datasets/CODA/nuscenes_indices.json'

Open all json files

In [10]:
with open(json_cornercases, 'r') as f:
    data_cornercases_json = json.load(f)

with open(json_nuscenes_indices, 'r') as f:
    nuscenes_indices_json = json.load(f)

with open(json_nuscenes_indices, 'r') as f:
    nuscenes_image_json = json.load(f)
    
with open(json_nuscenes_indices, 'r') as f:
    nuscenes_lidar_json = json.load(f)
    
    
with open(json_kitti_indices, 'r') as file:
    kitti_indices_json = json.load(file)
    

images = data_cornercases_json['images']

#### Load nuScenes and get image and lidar tokens

Load nuScenes (~1min)

In [11]:
nusc_trainval = NuScenes(version='v1.0-trainval', dataroot=datasets_root + 'nuScenes', verbose=True)

Loading NuScenes tables for version v1.0-trainval...


23 category,
8 attribute,
4 visibility,
64386 instance,
12 sensor,
10200 calibrated_sensor,
2631083 ego_pose,
68 log,
850 scene,
34149 sample,
2631083 sample_data,
1166187 sample_annotation,
4 map,
Done loading in 52.859 seconds.
Reverse indexing ...
Done reverse indexing in 7.3 seconds.


Get nuScenes image and lidar tokens and save them in coda_root

In [12]:
for image in images:
    file_name = image['file_name']
    
    # Check if part of nuScenesÂ  
    if ('nuscenes_' in file_name):

        # Get token ('nuscenes_033402.jpg': '1a41ba0751d5497ebd32df7c86950671')
        token_nuscenes = nuscenes_indices_json[file_name]

        # Get nuScenes data
        my_sample = nusc_trainval.get('sample', token_nuscenes)
        cam_front_data = nusc_trainval.get('sample_data', my_sample['data'][sensor_nuscenes_image])
        nuscenes_image_json[file_name] = cam_front_data['filename']
        lidar_top_data = nusc_trainval.get('sample_data', my_sample['data'][sensor_nuscenes_lidar])
        nuscenes_lidar_json[file_name] = lidar_top_data['filename']

with open(json_nuscenes_image, 'w') as f:
    json.dump(nuscenes_image_json, f)

with open(json_nuscenes_lidar, 'w') as f:
    json.dump(nuscenes_lidar_json, f)

#### Copy KITTI

Copy KITTI image

In [15]:
for file_name in os.listdir(kitti_image_original):
    source = os.path.join(kitti_image_original, file_name)
    
    for file in kitti_indices_json:
        if file.split('_')[1] == file_name:
            destination = os.path.join(kitti_image, file)
            if not os.path.exists(destination):
                shutil.copy(source, destination)

Copy KITTI lidar

In [16]:
for file_name in os.listdir(kitti_lidar_original):
    source = os.path.join(kitti_lidar_original, file_name)
    
    for file in kitti_indices_json:
        if file.split('_')[1].split('.')[0] == file_name.split('.')[0]:
            destination = os.path.join(kitti_lidar,'kitti_' + file_name)
            if not os.path.exists(destination):
                shutil.copy(source, destination)

#### Copy nuScenes

Copy nuScenes image

In [18]:
with open(json_nuscenes_image, 'r') as file:
    nuscenes_image_tokens_json = json.load(file)

for file_name in os.listdir(nuscenes_image_original):
    source = os.path.join(nuscenes_image_original, file_name)

    for file in nuscenes_image_tokens_json:
        if nuscenes_image_tokens_json[file].split('/')[2] == file_name:
            destination = os.path.join(nuscenes_image, file)
            if not os.path.exists(destination):
                shutil.copy(source, destination)

Copy nuScenes lidar

In [20]:
with open(json_nuscenes_lidar, 'r') as file:
    nuscenes_lidar_tokens_json = json.load(file)

for file_name in os.listdir(nuscenes_lidar_original):
    source = os.path.join(nuscenes_lidar_original, file_name)

    for file in nuscenes_lidar_tokens_json:
        if nuscenes_lidar_tokens_json[file].split('/')[2] == file_name:
            destination = os.path.join(nuscenes_lidar, file.split('.')[0] + '.bin')
            if not os.path.exists(destination):
                shutil.copy(source, destination)

#### Copy ONCE

Copy ONCE image

In [23]:
for image in images:
    file_name = image['file_name']
    if re.search(r'[0-9]{6}_', file_name):
        source = os.path.join(once_image_original, file_name)
        destination = os.path.join(once_image, file_name)
        if not os.path.exists(destination):
            shutil.copy(source, destination)

Copy ONCE lidar

In [22]:
for image in images:
    file_name = image['file_name'].split('.')[0] + '.bin'
    if re.search(r'[0-9]{6}_', file_name):
        source = os.path.join(once_lidar_original, file_name.split('_')[0] + '/lidar_roof/' + file_name.split('_')[1])
        destination = os.path.join(once_lidar, file_name)
        if not os.path.exists(destination):
            shutil.copy(source, destination)