Basic Single Pedestrian Crossing dataset preview with pose projection overlays
==============================================================================

This notebook demonstrates how to load pose projection overlays and use various helper/rendering functions/classes. It requires the `pedestrians_scenarios` package to be installed.

In [None]:
DATASET_DIR = '/datasets/CARLA/WideCamera'
MAX_RENDERED_VIDEOS = 24  # float('inf') # for all in dataset
MAX_DISPLAYED_VIDEOS = 8  # so that notebook doesn't crash

In [None]:
import pandas as pd
import ast
import os

from pedestrians_scenarios.karma.utils.conversions import convert_list_to_vector3d, convert_list_to_transform
from pedestrians_scenarios.karma.pose.pose_dict import convert_list_to_pose_dict, convert_list_to_pose_2d_dict

def convert_to_list(x):
    try:
        return ast.literal_eval(x.replace('nan', '"nan"'))
    except ValueError:
        # for some reason pandas tries to convert the column name too...
        return str(x)

# hint: do not convert the data that is not needed, they take a long time
dataset = pd.read_csv(
    os.path.join(DATASET_DIR, 'data.csv'),
    index_col=['id', 'camera.idx', 'frame.idx', 'pedestrian.idx'],
    converters={
        # 'camera.transform': lambda x: convert_list_to_transform(convert_to_list(x)),
        'pedestrian.spawn_point': lambda x: convert_list_to_transform(convert_to_list(x)),
        'frame.pedestrian.transform': lambda x: convert_list_to_transform(convert_to_list(x)),
        # 'frame.pedestrian.velocity': lambda x: convert_list_to_vector3d(convert_to_list(x)),
        # 'frame.pedestrian.pose.world': lambda x: convert_list_to_pose_dict(convert_to_list(x)),
        # 'frame.pedestrian.pose.component': lambda x: convert_list_to_pose_dict(convert_to_list(x)),
        # 'frame.pedestrian.pose.relative': lambda x: convert_list_to_pose_dict(convert_to_list(x)),
        'frame.pedestrian.pose.camera': lambda x: convert_list_to_pose_2d_dict(convert_to_list(x))
    }
)

In [None]:
# extract the correct frame images from the recordings
# for each frame, overlay the pose on the image
# an save as new video

import numpy as np
from tqdm.auto import tqdm

from pedestrians_scenarios.karma.pose.skeleton import CARLA_SKELETON
from pedestrians_scenarios.karma.renderers.source_videos_renderer import SourceVideosRenderer

os.makedirs(os.path.join(DATASET_DIR, 'poses'), exist_ok=True)

video_groups = dataset.groupby(level=['id', 'camera.idx', 'pedestrian.idx'])
total_rendered = min(MAX_RENDERED_VIDEOS, len(video_groups))
current_rendered = 0

renderer = SourceVideosRenderer(
    data_dir=os.path.join(DATASET_DIR, 'clips'),
    overlay_skeletons=True,
    overlay_labels=True,
    image_size=(dataset['camera.width'].iloc[0], dataset['camera.height'].iloc[0]), # assume all videos have the same size
)

def rounded_point(point):
    l = point.location
    return (round(l.x, 2), round(l.y, 2), round(l.z, 2))

for (index_label, row_group) in tqdm(video_groups, desc='Rendering clip', total=total_rendered):
    if current_rendered >= MAX_RENDERED_VIDEOS:
        break

    clip_id = index_label[0]
    video_id = row_group.iloc[0]['camera.recording'].replace('clips/','').replace('.mp4', '') # this is derived from camera.idx and other values
    pedestrian_id = index_label[2]
    
    frames = row_group.index.get_level_values('frame.idx')
    start_frame = frames.min()
    end_frame = frames.max()

    projections = []
    for _, row in row_group.iterrows():
        projection = np.array([[v.x, v.y] for v in row['frame.pedestrian.pose.camera'].values()], dtype=np.float32)
        projections.append(projection)
    frames = renderer.render_clip(
        video_id=video_id,
        pedestrian_id=pedestrian_id,
        clip_id=clip_id,
        start_frame=start_frame,
        end_frame=end_frame,
        bboxes=None,
        skeletons=[{
            'keypoints': projections,
            'color': None,
            'type': CARLA_SKELETON
        }],
        labels={
            'is_crossing': row_group['frame.pedestrian.is_crossing'].values,
            'map': row_group['world.map'].str.split('/').str[-1].values,
            'spawn': row_group['pedestrian.spawn_point'].apply(rounded_point).values,
            'current': row_group['frame.pedestrian.transform'].apply(rounded_point).values,
        }
    )
    renderer.frames_to_video(frames, video_id, os.path.join(DATASET_DIR, 'poses'))

    current_rendered += 1

    

In [None]:
import glob
import random
from IPython.display import HTML

videos_list = glob.glob(os.path.join(DATASET_DIR, 'poses','*.mp4'))
random.shuffle(videos_list)

In [None]:
shown_videos_list = videos_list[:MAX_DISPLAYED_VIDEOS]

html_source = []
for video_path in shown_videos_list:
    video_tag = '<video width="48%" controls src="{}" type="video/mp4"></video>'.format(video_path)
    html_source.append(video_tag)

HTML(data='\n'.join(html_source))