
## Installation

To run Jupyter notebook locally:

```
python3 -m pip install waymo-open-dataset-tf-2-12-0==1.6.4
python3 -m pip install "notebook>=5.3" "ipywidgets>=7.5"
python3 -m pip install --upgrade jupyter_http_over_ws>=0.0.7 && \
jupyter serverextension enable --py jupyter_http_over_ws
jupyter notebook
```

In [0]:
# To run in a colab:
!pip3 install waymo-open-dataset-tf-2-12-0==1.6.4

## Imports

In [0]:
#@title Waymo Open Dataset imports
from waymo_open_dataset import dataset_pb2
from waymo_open_dataset import label_pb2
from waymo_open_dataset.metrics.python import keypoint_metrics
from waymo_open_dataset.protos import keypoint_pb2
from waymo_open_dataset.utils import box_utils
from waymo_open_dataset.utils import frame_utils
from waymo_open_dataset.utils import keypoint_data
from waymo_open_dataset.utils import keypoint_draw
from waymo_open_dataset.utils import range_image_utils
from waymo_open_dataset.utils import transform_utils

In [0]:
# File path to a tfrecods file with Frame protos with human keypoints.
frame_path = 'frame_with_keypoints.tfrecord'




In [0]:
#@title Load Frame proto
import tensorflow as tf

dataset = tf.data.TFRecordDataset(frame_path, compression_type='')
for data in dataset:
  frame = dataset_pb2.Frame()
  frame.ParseFromString(bytearray(data.numpy()))
  break

labels = keypoint_data.group_object_labels(frame)
print(f'Loaded {len(labels)} objects')

## Visualization

In [0]:
#@title Auixiliary imports and utils

import os
import math
import numpy as np
from matplotlib import pylab as plt
import plotly.graph_objects as go
import itertools
import PIL.Image
import io
import dataclasses


def _imdecode(buf: bytes) -> np.ndarray:
  with io.BytesIO(buf) as fd:
    pil = PIL.Image.open(fd)
    return np.array(pil)


def _imshow(ax: plt.Axes, image_np: np.ndarray):
  ax.imshow(image_np)
  ax.axis('off')
  ax.set_autoscale_on(False)


def _draw_laser_points(fig: go.Figure,
                       points: np.ndarray,
                       color: str = 'gray',
                       size: int = 3):
  """Visualizes laser points on a plotly figure."""
  fig.add_trace(
      go.Scatter3d(
          mode='markers',
          x=points[:, 0],
          y=points[:, 1],
          z=points[:, 2],
          marker=dict(color=color, size=size)))


def _create_plotly_figure() -> go.Figure:
  """Creates a plotly figure for 3D visualization."""
  fig = go.Figure()
  axis_settings = dict(
      showgrid=False,
      zeroline=False,
      showline=False,
      showbackground=False,
      showaxeslabels=False,
      showticklabels=False)
  fig.update_layout(
      width=600,
      height=600,
      showlegend=False,
      scene=dict(
          aspectmode='data',  # force xyz has same scale,
          xaxis=axis_settings,
          yaxis=axis_settings,
          zaxis=axis_settings,
      ),
  )
  return fig

In [0]:
#@title Select object and camera
object_id = 'DQFLdFau_A8kTPOkDxfgJA'
camera_name = dataset_pb2.CameraName.Name.FRONT_RIGHT

camera_image_by_name = {i.name: i.image for i in frame.images}
obj = labels[object_id]
num_laser_points = len(obj.laser.keypoints.keypoint)
num_camera_points = len(obj.camera[camera_name].keypoints.keypoint)

print(f'Object {object_id} has')
print(f'{num_laser_points} laser keypoints '
      '(short name | location | is_occluded):')
for k in sorted(obj.laser.keypoints.keypoint, key=lambda k: k.type):
  m = k.keypoint_3d.location_m
  location_str = f'({m.x:.2f}, {m.y:.2f}, {m.z:.2f})'
  print(f'{keypoint_draw.point_name(k.type)}\t|'
        f' {location_str:25} | {k.keypoint_3d.visibility.is_occluded}')
print(f'\na LaserKeypoint proto example:\n\n{obj.laser.keypoints.keypoint[0]}')

print(f'{num_camera_points} camera keypoints '
      '(short name |  location | is_occluded):')
for k in sorted(
    obj.camera[camera_name].keypoints.keypoint, key=lambda k: k.type):
  px = k.keypoint_2d.location_px
  location_str = f'({px.x:.0f}, {px.y:.0f})'
  print(f'{keypoint_draw.point_name(k.type)}\t'
        f'| {location_str:13} | {k.keypoint_2d.visibility.is_occluded}')
print(f'\na CameraKeypoint proto example:\n\n'
      f'{obj.camera[camera_name].keypoints.keypoint[0]}')

In [0]:
#@title Show camera keypoints
image_np = _imdecode(camera_image_by_name[camera_name])
croped_image, cropped_camera_keypoints = keypoint_draw.crop_camera_keypoints(
    image_np,
    obj.camera[camera_name].keypoints.keypoint,
    obj.camera[camera_name].box,
    margin=0.3)
camera_wireframe = keypoint_draw.build_camera_wireframe(
    cropped_camera_keypoints)

keypoint_draw.OCCLUDED_BORDER_WIDTH = 3
_, ax = plt.subplots(frameon=False, figsize=(5, 7))
_imshow(ax, croped_image)
keypoint_draw.draw_camera_wireframe(ax, camera_wireframe)

In [0]:
#@title Show laser keypoints

# Select laser points inside pedestrian's bounding box
(range_images, camera_projections, _, range_image_top_pose
) = frame_utils.parse_range_image_and_camera_projection(frame)
points, cp_points = frame_utils.convert_range_image_to_point_cloud(
    frame, range_images, camera_projections, range_image_top_pose)
points_all = np.concatenate(points, axis=0)
box = box_utils.box_to_tensor(obj.laser.box)[tf.newaxis, :]
box_points = points_all[box_utils.is_within_box_3d(points_all, box)[:, 0]]
print(f'{box_points.shape[0]} laser points selected.')

# Visualize 3D scene
laser_wireframe = keypoint_draw.build_laser_wireframe(
    obj.laser.keypoints.keypoint)
fig = _create_plotly_figure()
keypoint_draw.draw_laser_wireframe(fig, laser_wireframe)
_draw_laser_points(fig, box_points)
fig.show()

## Metrics

`waymo_open_dataset` package supports the following metrics:

- Precision (P) and average precision (AP) for various thresholds of Object Keypoint Similarity (OKS).
- Percentage of Correct Keypoints (PCK)
- Mean Per Joint Position Error MPJPE (aka MPJE)

Please refer to [`waymo_open_dataset/metrics/python/keypoint_metrics.py`](https://github.com/waymo-research/waymo-open-dataset/blob/master/waymo_open_dataset/metrics/keypoint_metrics.py) for implementation details.

NOTE: The OKS metric penalizes keypoint coordinates far outside the ground truth bounding box for samples without ground truth keypoints. For such cases we set the default coordinates to be in the middle of the object's box to avoid the penalty.

In [0]:
#@title Example how to compute metrics for camera keypoints
from typing import Tuple

def get_camera_data(
    frame: dataset_pb2.Frame
) -> Tuple[keypoint_data.KeypointsTensors, keypoint_data.KeypointsTensors]:
  """Extracts camera keypoints and bounding boxes from the input Frame proto."""
  all_keypoints = []
  all_boxes = []
  for cl in frame.camera_labels:
    for l in cl.labels:
      if l.HasField('camera_keypoints'):
        box = keypoint_data.create_camera_box_tensors(l.box, dtype=tf.float32)
        keypoints = keypoint_data.create_camera_keypoints_tensors(
            l.camera_keypoints.keypoint,
            default_location=box.center,
            order=keypoint_data.CANONICAL_ORDER_CAMERA,
            dtype=tf.float32)
        all_keypoints.append(keypoints)
        all_boxes.append(box)
  keypoint_tensors = keypoint_data.stack_keypoints(all_keypoints)
  box_tensors = keypoint_data.stack_boxes(all_boxes)
  return keypoint_tensors, box_tensors


gt_cam, gt_cam_box = get_camera_data(frame)

noise_stddev = 5.0  # in pixels
pr_cam = keypoint_data.KeypointsTensors(
    location=gt_cam.location +
    tf.random.normal(gt_cam.location.shape, stddev=noise_stddev),
    visibility=gt_cam.visibility)

all_metrics = keypoint_metrics.create_combined_metric(
    keypoint_metrics.DEFAULT_CONFIG_CAMERA)
all_metrics.update_state([gt_cam, pr_cam, gt_cam_box])
result = all_metrics.result()

print('Camera keypoint metrics:')
for name, tensor in sorted(result.items(), key=lambda e: e[0]):
  print(f'{name:20s}: {tensor.numpy():.3f}')

In [0]:
#@title Example how to compute metrics for laser keypoints


def get_laser_data(
    frame: dataset_pb2.Frame
) -> Tuple[keypoint_data.KeypointsTensors, keypoint_data.KeypointsTensors]:
  """Extracts laser keypoints and bounding boxes from the input Frame proto."""
  all_keypoints = []
  all_boxes = []
  for l in frame.laser_labels:
    if l.HasField('laser_keypoints'):
      box = keypoint_data.create_laser_box_tensors(l.box, dtype=tf.float32)
      keypoints = keypoint_data.create_laser_keypoints_tensors(
          l.laser_keypoints.keypoint,
          default_location=box.center,
          order=keypoint_data.CANONICAL_ORDER_LASER,
          dtype=tf.float32)
      all_keypoints.append(keypoints)
      all_boxes.append(box)
  keypoint_tensors = keypoint_data.stack_keypoints(all_keypoints)
  box_tensors = keypoint_data.stack_boxes(all_boxes)
  return keypoint_tensors, box_tensors


gt_cam, gt_cam_box = get_laser_data(frame)

noise_stddev = 0.05  # in meters
pr_cam = keypoint_data.KeypointsTensors(
    location=gt_cam.location +
    tf.random.normal(gt_cam.location.shape, stddev=noise_stddev),
    visibility=gt_cam.visibility)

all_metrics = keypoint_metrics.create_combined_metric(
    keypoint_metrics.DEFAULT_CONFIG_LASER)
all_metrics.update_state([gt_cam, pr_cam, gt_cam_box])
result = all_metrics.result()

print('Laser keypoint metrics:')
for name, tensor in sorted(result.items(), key=lambda e: e[0]):
  print(f'{name:20s}: {tensor.numpy():.3f}')

In [0]:
#@title Use individual metrics

per_type_scales = [
    keypoint_metrics.DEFAULT_PER_TYPE_SCALES[t]
    for t in keypoint_data.CANONICAL_ORDER_CAMERA
]
oks = keypoint_metrics.AveragePrecisionAtOKS(per_type_scales, thresholds=[0.95])
oks.update_state([gt_cam, pr_cam, gt_cam_box])
oks.result()

# Generate a submission
The Pose Estimation challenge 2023 expects binproto files with `PoseEstimationSubmission` protos.

This sections shows an example how to prepare a submission. Follow the instructions on the [challenge web page](https://waymo.com/open/challenges/2023/pose-estimation) to understand how to submit this tar.gz file to our servers for evaluation.

*This section is self sufficient, so you don't need to execute any cells above to use it.*

In [0]:
#@title Create PoseEstimationSubmission proto
import os
import tqdm
import tensorflow as tf
import dataclasses
from typing import Iterable
import numpy as np

from waymo_open_dataset import dataset_pb2
from waymo_open_dataset.protos import keypoint_pb2
from waymo_open_dataset.protos import box_pb2
from waymo_open_dataset import label_pb2
from waymo_open_dataset.utils import keypoint_data
from waymo_open_dataset.protos import keypoints_submission_pb2

# Input dataset
DATASET_FOLDER = '/waymo_open_dataset_'
VALIDATION_FILES = os.path.join(DATASET_FOLDER, 'validation', '*.tfrecord')

# Where results are going to be saved.
OUTPUT_ROOT_DIRECTORY = '/tmp/waymo_pose_estimation/'
os.makedirs(OUTPUT_ROOT_DIRECTORY, exist_ok=True)

# Iterate over all segments of the dataset and collect predictions.
filenames = tf.io.matching_files(VALIDATION_FILES)


def _create_keypoint_proto(
    loc: np.ndarray, kp_type: keypoint_pb2.KeypointType
) -> keypoint_pb2.LaserKeypoint:
  return keypoint_pb2.LaserKeypoint(
      type=kp_type,
      keypoint_3d={
          'location_m': {'x': loc[0], 'y': loc[1], 'z': loc[2]},
          'visibility': {'is_occluded': False},
      },
  )


def _create_all_keypoints_proto(
    all_loc: np.ndarray, all_vis: np.ndarray
) -> keypoint_pb2.LaserKeypoints:
  keypoints = keypoint_pb2.LaserKeypoints()
  for loc, vis, kp_type in zip(
      all_loc, all_vis, keypoint_data.CANONICAL_ORDER_LASER
  ):
    if vis == 0:
      continue
    keypoints.keypoint.append(_create_keypoint_proto(loc, kp_type))
  return keypoints


def _create_box_proto(
    center: np.ndarray, size: np.ndarray, heading: float
) -> box_pb2.Box3d:
  return box_pb2.Box3d(
      center={'x': center[0], 'y': center[1], 'z': center[2]},
      size={'x': size[0], 'y': size[1], 'z': size[2]},
      heading=heading,
  )


# A "detector" which returns a random number of objects with random keypoints.
def fake_pose_estimation(
    frame: dataset_pb2.Frame,
) -> Iterable[keypoints_submission_pb2.PoseEstimation]:
  # An actual detector would use `frame.laser` and `frame.camera_images`,
  # for a fake detector using `tf.random` is good enough.
  num_objects = tf.random.uniform(shape=(), maxval=200, dtype=tf.int32)
  max_num_keypoints = len(keypoint_data.CANONICAL_ORDER_LASER)
  locations = tf.random.uniform(
      shape=[num_objects, max_num_keypoints, 3], dtype=tf.float32
  )
  is_visible = tf.random.uniform(
      shape=[num_objects, max_num_keypoints], maxval=2, dtype=tf.int32
  )
  box_center = tf.random.uniform(shape=[num_objects, 3], dtype=tf.float32)
  box_size = tf.random.uniform(
      shape=[num_objects, 3], maxval=[1, 1, 2], dtype=tf.float32
  )
  box_heading = tf.random.uniform(shape=[num_objects], dtype=tf.float32)

  for all_loc, all_vis, center, size, heading in zip(
      locations, is_visible, box_center, box_size, box_heading
  ):
    yield keypoints_submission_pb2.PoseEstimation(
        key={
            'context_name': frame.context.name,
            'frame_timestamp_micros': frame.timestamp_micros,
        },
        box=_create_box_proto(center.numpy(), size.numpy(), heading.numpy()),
        laser_keypoints=_create_all_keypoints_proto(
            all_loc.numpy(), all_vis.numpy()
        ),
    )


submission = keypoints_submission_pb2.PoseEstimationSubmission(
    account_name='user@example.com',
    unique_method_name='Random Object Generator',
    authors=['First Author', 'Second Author'],
    affiliation='A Real Organization',
    description='Uses tf.random.uniform',
    method_link='http://example.com/project/page.html',
)
num_frames = 0
# To make this faster as part of the tutorial, we will only process 10 Frames.
# Obviously, to create a valid submission, all the frames from the corresponding
# subset of the dataset needs to be processed. 
# NOTE: Frames with labeled keypoints that are missing in the submissions
# will be considered as false negatives and impact the resulting metric.
some_filesnames = filenames[:2]
print(f'Start processing {len(some_filesnames)} segments:')
for buf in tqdm.tqdm(tf.data.TFRecordDataset(some_filesnames, num_parallel_reads=8)):
  num_frames += 1
  frame = dataset_pb2.Frame.FromString(buf.numpy())
  submission.pose_estimations.extend(fake_pose_estimation(frame))

keypoint_count = sum(
    [len(e.laser_keypoints.keypoint) for e in submission.pose_estimations]
)
print(
    f'Generated a submission with {len(submission.pose_estimations)} objects'
    f' and {keypoint_count} keypoints for {num_frames} frames.'
)

In [0]:
#@title Store compressed submission proto
import io
import tarfile

def _add_tar_file(tar: tarfile.TarFile, filename: str, content: bytes) -> None:
  """Creates a file inside the archive with specified name and content."""
  info = tarfile.TarInfo(name=filename)
  info.size = len(content)
  with io.BytesIO(content) as f:
    tar.addfile(info, f)


submission_path = os.path.join(OUTPUT_ROOT_DIRECTORY, 'submission.tar.gz')
submission_buf = submission.SerializeToString()
with tarfile.open(submission_path, 'w:gz') as tar:
  _add_tar_file(tar, 'submission.binproto', submission_buf)

size_mb = os.stat(submission_path).st_size / (1 << 20)
uncompressed_size_mb = len(submission_buf) / (1 << 20)
print(
    f'Stored {size_mb:.2f} Mb (uncompressed {uncompressed_size_mb:.2f} Mb)'
    f' submission into {submission_path}'
)