In [1]:
# from utils import get_3d_points

In [2]:
import json
from tqdm import tqdm
import tensorflow as tf
import os
import cv2
import math
import numpy as np
import itertools

# from waymo_open_dataset.utils import frame_utils
# from waymo_open_dataset import dataset_pb2
from pyson.utils import multi_thread
# tf.__version__
from glob import glob


In [25]:
import simple_waymo_open_dataset_reader  
from simple_waymo_open_dataset_reader import dataset_pb2, label_pb2
import simple_waymo_open_dataset_reader.utils as su

def get_3d_points_by_cam(frame, camera_name):
    """
        frame: given a frame
        cameraname: int specify the camera id
    """
    camera_calibration = su.get(frame.context.camera_calibrations, camera_name)
    labels = frame.laser_labels
    vehicle_to_image = su.get_image_transform(camera_calibration)
    vertices = [ ]
    for _ in labels:
        x = simple_waymo_open_dataset_reader.utils.get_3d_box_projected_corners(vehicle_to_image, _) 
        if x is not None:
            x = x.tolist()
            vertices.append(x)
    return vertices

In [28]:

################################################
# Step 1 tf->json
###################################################

# data_paths_val = glob('/waymo/validation/*.tfrecord')
# data_paths_test = glob('/ssd6/waymo/test/*.tfrecord')
data_paths_train = glob('/ssd6/waymo/tfrecord_train/*.tfrecord')

paths_records = data_paths_train
print("Len :", len(paths_records))
output_dir = '/ssd6/coco_style_1.2/'
path_output_annotation = "/ssd6/coco_style_1.2/annotations/train.json"
path_sample_coco_annotation = '/ssd6/coco_style_1.2/annotations/val.json'
os.makedirs(output_dir+ "annotations/", exist_ok=True)
os.makedirs(output_dir +"images/", exist_ok=True)

def process_frame(frame):
    frame, frame_id, frame_name = frame
    images = frame.images
    labels = frame.camera_labels
    rt = dict()
    for im_id in range(len(images)):
        image = images[im_id]
        image_id = image.name
        image_name = dataset_pb2.CameraName.Name.Name(image_id)
        output_name = os.path.join(output_dir, 'images', f'{frame_name}_{frame_id}_{image_name}.jpg')
        image = tf.image.decode_jpeg(image.image).numpy()
        if not os.path.exists(output_name):
            print(output_name)
            cv2.imwrite(output_name, image)
        bboxes_coco = []
        bboxes_3d = []
        class_ids = []
        bboxes_id = []
        detection_difficulty_levels = []
        tracking_difficulty_levels = []
        if len(labels) == 0: # no prvided data for 2d
            with_camlabel=False
        else:
            with_camlabel=True
            bboxes = labels[im_id].labels # bboxes list for image 0 in this frame
            for box in bboxes:
                cx, cy, h, w = box.box.center_x, box.box.center_y, box.box.width, box.box.length
                x = cx - w/2
                y = cy - h/2
                class_id = box.type
                np_box = [x,y,w,h]
                np_box = np.clip(np_box, 0, 10000)
                bboxes_id.append(box.id)
                bboxes_coco.append(np_box.tolist())
                class_ids.append(class_id)
                tracking_difficulty_levels.append(box.tracking_difficulty_level)
                detection_difficulty_levels.append(box.detection_difficulty_level)
        
        # process 3d box
#         import ipdb; ipdb.set_trace()
        bboxes_3d = get_3d_points_by_cam(frame, image_id)
        # get_laser_image
        # Get the transformation matrix for the camera.
        vehicle_to_image = utils.get_image_transform(camera_calibration)
        # Decode the image
        img = utils.decode_image(camera)
        # BGR to RGB
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # Some of the labels might be fully hidden therefore we attempt to compute the label visibility
        # by counting the number of LIDAR points inside each label bounding box.
        # For each label, compute the transformation matrix from the vehicle space to the box space.
        vehicle_to_labels = [np.linalg.inv(utils.get_box_transformation_matrix(label.box)) for label in frame.laser_labels]
        vehicle_to_labels = np.stack(vehicle_to_labels)

        # Convert the pointcloud to homogeneous coordinates.
        pcl1 = np.concatenate((pcl,np.ones_like(pcl[:,0:1])),axis=1)

        # Transform the point cloud to the label space for each label.
        # proj_pcl shape is [label, LIDAR point, coordinates]
        proj_pcl = np.einsum('lij,bj->lbi', vehicle_to_labels, pcl1)

        # For each pair of LIDAR point & label, check if the point is inside the label's box.
        # mask shape is [label, LIDAR point]
        mask = np.logical_and.reduce(np.logical_and(proj_pcl >= -1, proj_pcl <= 1),axis=2)

        # Count the points inside each label's box.
        counts = mask.sum(1)

        # Keep boxes which contain at least 10 LIDAR points.
        visibility = counts > 10

        # Display the LIDAR points on the image.
        laser_as_img = np.zeros_like(img)
        display_laser_on_image(laser_as_img, pcl, vehicle_to_image)
        output_laser_name = os.path.join('/ssd6/coco_style_1.2/laser_images', os.path.basename(output_name))
        if not os.path.exists(output_laser_name):
            print(output_laser_name)
            cv2.imwrite(output_laser_name, laser_as_img)
        #----
        rt[output_name] = dict(with_camlabel=with_camlabel,
                               bboxes=bboxes_coco, 
                               bboxes_3d=bboxes_3d,
                               bboxes_id=bboxes_id, 
                               labels=class_ids,
                               timestamp_micros=frame.timestamp_micros,
                               tracking_difficulty_level=tracking_difficulty_levels, 
                               detection_difficulty_level = detection_difficulty_levels)
    return rt

def f_datapath(data_path):
    f_name = os.path.basename(data_path)
    frame_name = os.path.basename(data_path)
    frame_id = 0
    dataset = tf.data.TFRecordDataset(data_path, compression_type='')
    frames = []
    for data in dataset:
        frame_id += 1
        frame = dataset_pb2.Frame()
        frame.ParseFromString(bytearray(data.numpy()))
        frames.append((frame, frame_id, f_name))
    return frames



Len : 797


# debug

In [None]:

frames = f_datapath(paths_records[0])
process_frame(frames[0])



> [0;32m<ipython-input-28-c8ba54492c53>[0m(56)[0;36mprocess_frame[0;34m()[0m
[0;32m     55 [0;31m        [0;32mimport[0m [0mipdb[0m[0;34m;[0m [0mipdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 56 [0;31m        [0mbboxes_3d[0m [0;34m=[0m [0mget_3d_points_by_cam[0m[0;34m([0m[0mframe[0m[0;34m,[0m [0mimage[0m[0;34m.[0m[0mname[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     57 [0;31m[0;34m[0m[0m
[0m


ipdb>  image.name


*** AttributeError: 'numpy.ndarray' object has no attribute 'name'


ipdb>  image_id


1


ipdb>  bboxes_3d = get_3d_points_by_cam(frame, image_id)
ipdb>  bboxes_3d


[[[[[-8250, 633], [-8237, 440]], [[-8305, 633], [-8292, 440]]], [[[-8175, 634], [-8163, 441]], [[-8230, 633], [-8218, 442]]]], [[[[6256, 747], [6252, 603]], [[6302, 749], [6298, 599]]], [[[5259, 737], [5256, 615]], [[5263, 738], [5261, 612]]]], [[[[3702, 831], [3699, 648]], [[4049, 840], [4046, 647]]], [[[4088, 868], [4085, 637]], [[4558, 882], [4554, 635]]]], [[[[4648, 1094], [4640, 663]], [[3837, 1046], [3832, 663]]], [[[3894, 968], [3890, 671]], [[3376, 944], [3373, 671]]]], [[[[2023, 798], [2022, 650]], [[1955, 803], [1955, 648]]], [[[1683, 784], [1683, 653]], [[1608, 788], [1608, 651]]]], [[[[12410, 942], [12383, 540]], [[10647, 909], [10627, 555]]], [[[9222, 862], [9208, 587]], [[8241, 846], [8230, 594]]]], [[[[3164, 1131], [3161, 758]], [[2569, 1080], [2567, 748]]], [[[2907, 1001], [2905, 738]], [[2494, 974], [2492, 733]]]], [[[[2143, 774], [2143, 674]], [[2087, 776], [2087, 673]]], [[[1876, 764], [1876, 674]], [[1817, 766], [1817, 673]]]], [[[[4979, 797], [4975, 612]], [[5415, 

ipdb>  len(bboxes_3d)


79


In [None]:
# ----------------------
data_dict = dict()
for data_path_cam in tqdm(paths_records):
    frames = f_datapath(data_path_cam)
    results = multi_thread(process_frame, frames, verbose=False, max_workers=None)
    name = os.path.basename(data_path_cam)
    for _ in results:
        data_dict.update(_)
    

sample = json.load(open(path_sample_coco_annotation))

sample['info'] = {'description': '2D waymo',
 'url': 'waymo.com',
 'version': '1.0',
 'year': 2020,
 'contributor': 'Hai Anh',
 'date_created': '2019/04/06'}

sample['licenses'] = []

rt_images = []
rt_annotations = []
rt_with_cam_labels = []

for image_id, (image_name, labels) in enumerate(data_dict.items()):
    filename = os.path.basename(image_name)
    image = {'license': 4,
         'file_name': filename,
         'height': 1280,
         'width': 1920,
         'id': image_id,
         'timestamp_micros':labels["timestamp_micros"]
    }
    rt_with_cam_labels.append(labels['with_camlabel'])
    rt_images.append(image)
    bboxes = labels['bboxes']
    labels = labels['labels']
    bboxes_id = labels['bboxes_id']
    bboxes = np.array(bboxes).astype('int')

    anno_indi_dir = os.path.join(output_dir, "annotations", "anns")
    os.makedirs(anno_indi_dir, exist_ok=True)
    for anno_id, (box, box_id, lbl) in enumerate(zip(bboxes,bboxes_id, labels)):
        annotation = dict(image_id=image_id, box_id=box_id,category_id=lbl, bbox=box.tolist(), iscrowd=0, id=len(rt_annotations))
        out_path = os.path.join(anno_indi_dir, f"{anno_id}.json" )
        with open(out_path, "w") as f:
            json.dump(annotation, f)
        rt_annotations.append(os.path.join("anns", f"{anno_id}.json"))

cates =  [
 {'supercategory': 'vehicle','id': 1, 'name': 'vehicle'},
 {'supercategory': 'perdestrian','id': 2, 'name': 'perdestrian'},
 {'supercategory': 'sign','id': 3, 'name': 'sign'},
 {'supercategory': 'cyclis','id': 3, 'name': 'cyclis'}
]

sample['images'] = rt_images
sample['with_camlabel'] = rt_with_cam_labels
sample['annotations'] = rt_annotations
sample['categories'] = cates

with open(path_output_annotation, 'w') as f:
    json.dump(sample, f)
print('done!', path_output_annotation)