## Development-Kit Tutorial for Zenseact Open Dataset
This notebook aims to introduce the ZodFrames & ZodSequences classes, which are helper classes to interact with the Frames and Sequences subsets of the Zenseact Open Dataset (ZOD) respecively. It will highlight some basic functionality that later can be used to build dataloaders in for example PyTorch.

This notebook also aims to give a brief introduction to the which annotations exist and how to visualization them. 

#### The dataset includes data from 3 sensor modalities and calibrations for each sensor:  
1. **Camera** - Anonymized (license plates and faces) front camera images. Available anonymization methods are:
    - blur (Blur)
    - dnat (Deep Fake)


2. **LiDAR** - The LiDAR point cloud is the closest LiDAR scan to the camera timestamp of the core frame. Zenseact Open Dataset also provides a range of LiDAR point clouds captured in [-1s, +1s] at 10Hz around the core frame for the sequences.


3. **OXTS** - High-precision GPS. OXTS data is provided in [-1s, ~10s] around the core frames for each sequence.

#### There are 4 types of annotationed objects:  
1. **dynamic_objects** - objects that can move (vehicles, pedestrians etc.) - annotated with 2D/3D bounding boxes
2. **static_objects** - non-movable objects (light poles, traffic signs etc.) - annotated with 2D/3D bounding boxes
3. **lane_markings** - lane markings and road paitings - annotated with polygons
4. **ego_road** (Doesn't exist for all frames) - polygons that shows the road where ego vehicle can drive - annotated with polygons 

# Initialization


In [None]:
import sys

sys.path.append("../")

# imports for plotting
from matplotlib import pyplot as plt

%matplotlib inline
plt.rcParams["figure.figsize"] = [20, 10]

# import the ZOD DevKit
from zod import ZodFrames
from zod import ZodSequences

# import default constants
import zod.constants as constants
from zod.constants import Camera, Lidar, Anonymization, AnnotationProject

# import useful data classes
from zod.data_classes import LidarData

# NOTE! Set the path to dataset and choose a version
dataset_root = ""
version = "mini"  # "mini" or "full"

# initialize ZodFrames
zod_frames = ZodFrames(dataset_root=dataset_root, version=version)

# initialize ZodSequences
zod_sequences = ZodSequences(dataset_root=dataset_root, version=version)

### Split into Training and Validation sets

In [None]:
# get default training and validation splits
training_frames = zod_frames.get_split(constants.TRAIN)
validation_frames = zod_frames.get_split(constants.VAL)

# print the number of training and validation frames
print(f"Number of training frames: {len(training_frames)}")
print(f"Number of validation frames: {len(validation_frames)}")

training_sequences = zod_sequences.get_split(constants.TRAIN)
validation_sequences = zod_sequences.get_split(constants.VAL)
print(f"Number of training sequences: {len(training_sequences)}")
print(f"Number of validation sequences: {len(validation_sequences)}")

# print out the first 5 training frames
print("The 5 first training frames have the ids:", training_frames[:5])

# show the first training sequence
print("The first training sequence has the id:", training_sequences[0])

# ZodFrames
### Fetch a ZodFrame
The ZodFrames class yeild a `ZodFrame` which acts a cache for the light-weight data (e.g., ego-motion, calibration, and metadata), but also holds an `info` attribute. This in turn holds all the paths to more heavy-weight data (e.g., images and point clouds).


In [None]:
# we can get a specific frame by its id
frame_from_id = zod_frames["009158"]
# or via the index
frame_from_idx = zod_frames[9158]

# these two frames are the same
assert frame_from_id.info == frame_from_idx.info

### Look at some data within a ZodFrame

In [None]:
zod_frame = zod_frames[62592]

# we can access the metadata of a frame
metadata = zod_frame.metadata

# print a subsample of meta data
print(f"Frame id: {metadata.frame_id}")
print(f"Country Code: {metadata.country_code}")
print(f"Time of day: {metadata.time_of_day}")
print(f"Number of vehicles in the frame: {metadata.num_vehicles}")

In [None]:
# we can use the frame to get the OXTS of our the vehicle
oxts = zod_frame.oxts
print(f"Acceleration: {oxts.accelerations.shape}")
print(f"Velocities: {oxts.velocities.shape}")
print(f"Poses: {oxts.poses.shape}")
print(f"Timestamps: {oxts.timestamps.shape}")

In [None]:
# we can use the frame to get the ego-motion of our the vehicle
# note that the ego-motion is a lightwieght version of the oxts data
ego_motion = zod_frame.ego_motion
print(f"Acceleration: {ego_motion.accelerations.shape}")
print(f"Velocities: {ego_motion.velocities.shape}")
print(f"Poses: {ego_motion.poses.shape}")
print(f"Timestamps: {ego_motion.timestamps.shape}")

In [None]:
# The upper-left 3x3 matrix is the rotation matrix
rotation_matrix = zod_frame.ego_motion.poses[0, :3, :3]
print(rotation_matrix)

# The last column contain the translation
translation = zod_frame.ego_motion.poses[0, :3, -1]
print(translation)

In [None]:
# we can also get the calibrations
calibrations = zod_frame.calibration

print(calibrations.lidars[Lidar.VELODYNE])
print(calibrations.cameras[Camera.FRONT])

#### Camera Data 

In [None]:
# get the camera core-frame from front camera with dnat anonymization
camera_core_frame = zod_frame.info.get_key_camera_frame(Anonymization.DNAT)
print(camera_core_frame)

In [None]:
# one can read the image from the filepath
image = camera_core_frame.read()
# or use a helper directly from the frame
zod_frame.get_image(Anonymization.DNAT)

plt.axis("off")
plt.imshow(image)
plt.show()

#### Ego Position Data

In [None]:
from zod.visualization.oxts_on_image import visualize_oxts_on_image
import numpy as np
import cv2

zod_frame = zod_frames["082291"]

# extract the oxts data
oxts = zod_frame.oxts

# visualize the oxts data on the image
calibrations = zod_frame.calibration

# get the time of the keyframe (into which we want to project the oxts points)
key_timestamp = zod_frame.info.keyframe_time.timestamp()

image = zod_frame.get_image(Anonymization.DNAT)
image = visualize_oxts_on_image(oxts, key_timestamp, calibrations, image, camera=Camera.FRONT)

plt.axis("off")
plt.imshow(image)
plt.show()

#### LiDAR Data
Lidar fields description:

| Name | Type | Units | Description |
| --- | --- | --- | --- |
| 'timestamp' | string |  seconds  | UTC timestamp of each point. |
| 'x' | double |  meters  | x coordinate of the point in lidar frame |
| 'y' | double |  meters  | y coordinate of the point in lidar frame |
| 'z' | double |  meters  | z coordinate of the point in lidar frame |
| 'intensity' | double |    | intensity level of each point in range [0..255] |
| 'diode_index' | integer |    | index of diode emitter which produced a point (1..128) |

In [None]:
zod_frame = zod_frames[62592]

# get the lidar core-frame
lidar_core_frame = zod_frame.info.get_key_lidar_frame()
print(lidar_core_frame)

In [None]:
# load the lidar data
pc = lidar_core_frame.read()

# LidarData dataclass is a wrapper around several numpy arrays
assert isinstance(pc, LidarData)

# alternatively, we can use helper functions on the frame itself
assert zod_frame.get_lidar()[0] == pc
assert zod_frame.get_lidar_frames()[0].read() == pc

print(f"Points: {pc.points.shape}")  # x, y, z
print(f"Timestamps: {pc.timestamps.shape}")
print(f"Intensity: {pc.intensity.shape}")
print(f"Diode: {pc.diode_idx.shape}")

# TODO: add visualization, e.g. 3d scatter plot with plotly

# Annotations


In [None]:
# get a new frame
zod_frame = zod_frames["082291"]

# get the object annotations
annotations = zod_frame.get_annotation(AnnotationProject.OBJECT_DETECTION)

# get a single annotation object by index
idx = 31
print(f"Annotation: {annotations[idx].name}")

# there are both 2d and 3d annotations
annotation_2d = annotations[idx].box2d
annotation_3d = annotations[idx].box3d
print(annotation_2d)
print(annotation_3d)

In [None]:
from zod.visualization.object_visualization import overlay_object_2d_box_on_image
from zod.visualization.object_visualization import overlay_object_3d_box_on_image

# we can overlay the 2d annotation on the front camera image
camera_core_frame = zod_frame.info.get_key_camera_frame(Anonymization.DNAT)
image = camera_core_frame.read()

image = overlay_object_2d_box_on_image(image, annotation_2d, color=(255, 0, 0), line_thickness=10)

plt.figure()
plt.axis("off")
plt.imshow(image)

# we can also overlay the 3d annotation on the front camera image,
# but for this we also need the calibrations of the sensor
calibrations = zod_frame.calibration

# overlay the 3d box on the image
image = overlay_object_3d_box_on_image(
    image, annotation_3d, calibrations, color=(255, 0, 0), line_thickness=10
)

plt.figure()
plt.axis("off")
plt.imshow(image)
plt.show()

In [None]:
from zod.utils.polygon_transformations import polygons_to_binary_mask

zod_frame = zod_frames[9158]

# get the ego road annotations
polygon_annotations = zod_frame.get_annotation(AnnotationProject.EGO_ROAD)

# convert the polygons to a binary mask (which can be used
# for ground truth in e.g. semantic segmentation)
mask = polygons_to_binary_mask(polygon_annotations)

# visualize the mask
plt.axis("off")
plt.imshow(mask)
plt.show()

In [None]:
# get another frame
zod_frame = zod_frames[23996]

# get the lane markings annotations
project = constants.AnnotationProject.LANE_MARKINGS
polygon_annotations = zod_frame.get_annotation(project)

# convert the polygons to a binary mask
mask = polygons_to_binary_mask(polygon_annotations)

# visualize the mask
plt.axis("off")
plt.imshow(mask)
plt.show()

In [None]:
# We can overlay the ego road annotations on the image
from zod.visualization.polygon_utils import overlay_mask_on_image
from zod.utils.polygon_transformations import polygons_to_binary_mask

zod_frame = zod_frames[9158]

# get the camera core-frame from front camera with dnat anonymization
camera_core_frame = zod_frame.info.get_key_camera_frame(Anonymization.DNAT)

# get the image
image = camera_core_frame.read()

# get the ego road annotations
polygon_annotations = zod_frame.get_annotation(AnnotationProject.EGO_ROAD)

# convert the polygons to a binary mask (which can be used
# for ground truth in e.g. semantic segmentation)
mask = polygons_to_binary_mask(polygon_annotations)

# overlay the mask on the image
image = overlay_mask_on_image(mask, image, fill_color=(100, 0, 0), alpha=0.5)

# visualize the mask
plt.axis("off")
plt.imshow(image)
plt.show()

In [None]:
# we can overlay the lane markings annotations on the image
zod_frame = zod_frames[29229]

# get the camera core-frame from front camera with dnat anonymization
camera_core_frame = zod_frame.info.get_key_camera_frame(Anonymization.DNAT)

# get the image
image = camera_core_frame.read()

# get the ego road annotations
polygon_annotations = zod_frame.get_annotation(AnnotationProject.LANE_MARKINGS)

# convert the polygons to a binary mask (which can be used
# for ground truth in e.g. semantic segmentation)
mask = polygons_to_binary_mask(polygon_annotations)

# overlay the mask on the image
image = overlay_mask_on_image(mask, image, fill_color=(0, 0, 100), alpha=0.75)

# visualize the mask
plt.axis("off")
plt.imshow(image)
plt.show()

In [None]:
# Visualize LiDAR and objects in Bird's Eye View
from zod.visualization.lidar_bev import BEVBox

zod_frame = zod_frames["009158"]

# get the LiDAR point cloud
pcd = zod_frame.get_lidar()[0]

# get the object annotations
object_annotations = zod_frame.get_annotation(AnnotationProject.OBJECT_DETECTION)

import numpy as np

bev = BEVBox()
bev_image = bev(
    np.hstack((pcd.points, pcd.intensity[:, None])),
    (
        np.array([obj.name for obj in object_annotations if obj.box3d]),
        np.concatenate(
            [obj.box3d.center[None, :] for obj in object_annotations if obj.box3d], axis=0
        ),
        np.concatenate(
            [obj.box3d.size[None, :] for obj in object_annotations if obj.box3d], axis=0
        ),
        np.array([obj.box3d.orientation for obj in object_annotations if obj.box3d]),
    ),
)

In [None]:
# we can also visualize the lidar point cloud in the image
from zod.visualization.lidar_on_image import visualize_lidar_on_image

zod_frame = zod_frames["087912"]

image = zod_frame.get_image()
image_timestamp = zod_frame.info.keyframe_time.timestamp()

# Get a single Lidar point cloud
core_lidar = zod_frame.get_lidar()[0]
# Motion-compensate it to the image timestamp (minorly improves alignment)
compensated_lidar = zod_frame.compensate_lidar(core_lidar, image_timestamp)
# Visualize by projecting the point cloud onto the image
lid_image = visualize_lidar_on_image(
    core_lidar,
    zod_frame.calibration,
    image,
)
plt.axis("off")
plt.title("Core LIDAR projected onto image")
plt.imshow(lid_image)
plt.show()

# Plot aggregated Lidar point cloud
aggregated_lidar = zod_frame.get_aggregated_lidar(
    num_before=10, num_after=0, timestamp=image_timestamp
)
lid_image = visualize_lidar_on_image(
    aggregated_lidar,
    zod_frame.calibration,
    image,
)
plt.axis("off")
plt.title("Aggregated LIDAR projected onto image")
plt.imshow(lid_image)
plt.show()

In [None]:
# we can also visualize all together
zod_frame = zod_frames[9158]

pcd = zod_frame.get_aggregated_lidar(num_before=3)
annotations = zod_frame.get_annotation(AnnotationProject.OBJECT_DETECTION)
polygon_annotations = zod_frame.get_annotation(AnnotationProject.EGO_ROAD)
mask = polygons_to_binary_mask(polygon_annotations)
calibrations = zod_frame.calibration
image = zod_frame.get_image(Anonymization.DNAT)

# overlay the mask/annotation/pointcloud on the image
image = visualize_lidar_on_image(pcd, calibrations, image)
image = overlay_mask_on_image(mask, image, fill_color=(100, 0, 0), alpha=0.5)
for annotation in annotations:
    if annotation.box3d:
        image = overlay_object_3d_box_on_image(
            image, annotation.box3d, calibrations, color=(0, 100, 0), line_thickness=10
        )
plt.axis("off")
plt.imshow(image)
plt.show()

# ZodSequence
Visualization functionality for ZodFrames also works on sequences. Let's take a quick look.

### Fetch a ZodSequence
The ZodSequences class yeild a `ZodSequence` which acts a cache for the light-weight data (e.g., ego-motion, calibration, and metadata), but also holds an `info` attribute. This in turn holds all the paths to more heavy-weight data (e.g., images and point clouds) for all timesteps in the sequence. Note that annotations are only provided for a single frame, namely the `key_frame`.



In [None]:
# We can do the same for the sequences
seq = zod_sequences[training_sequences[0]]

# Get the lidar frames
print(f"Number of lidar frames: {len(seq.info.get_lidar_frames(lidar=Lidar.VELODYNE))}")
# We can also get the original camera frames
print(
    f"Number of camera frames: {len(seq.info.get_camera_frames(anonymization=Anonymization.ORIGINAL))}"
)

# Or see how long the sequence is
print(f"Timespan: {(seq.info.end_time - seq.info.start_time).total_seconds()}")

In [None]:
from zod.visualization.lidar_on_image import visualize_lidar_on_image

# get the key frames
key_camera_frame = seq.info.get_key_camera_frame(Anonymization.ORIGINAL)
key_lidar_frame = seq.info.get_key_lidar_frame()

# get the annotations (when they are ready...)
try:
    annotations = seq.get_annotation(AnnotationProject.OBJECT_DETECTION)
except:
    annotations = []

image = key_camera_frame.read()
pcd = key_lidar_frame.read()

image = visualize_lidar_on_image(
    pcd,
    seq.calibration,
    image,
)

for annotation in annotations:
    if annotation.box3d:
        image = overlay_object_3d_box_on_image(
            image, annotation.box3d, calibrations, color=(0, 100, 0), line_thickness=10
        )

plt.axis("off")
plt.imshow(image)
plt.show()

In [None]:
# we can also visualize the entire sequence

draw_every_nth = 20  # (Only visualization every nth frame for speed...)

# get the mapping between camera and lidar frames
frames = seq.info.get_camera_lidar_map(Anonymization.ORIGINAL, Camera.FRONT, Lidar.VELODYNE)

images = []
# iterate over the frames
for i, frame in enumerate(frames):
    if i % draw_every_nth == 0:
        camera_frame, lidar_frame = frame

        img = camera_frame.read()
        pcd = lidar_frame.read()

        lid_image = visualize_lidar_on_image(
            pcd,
            seq.calibration,
            img,
        )
        images.append(img)

In [None]:
# Create a gif with all the frames (requires imageio)
from IPython.core.display import Image
import imageio

imageio.mimsave("test.gif", images, duration=0.2)
Image(filename="test.gif")