In [22]:
import lidar

first converting the bagfile to a parquet file

In [40]:
bagfile = "/workspaces/lidar/tests/testdata/big.bag"
bagfile_bag = rosbag.Bag(bagfile)

In [43]:
#!/usr/bin/env python
import pandas as pd
import numpy as np
import itertools
from typing import Optional, List
import rosbag
from tqdm import tqdm
import sensor_msgs.point_cloud2 as pc2
from dask import delayed
import dask.dataframe as dd


PANDAS_TYPEMAPPING = {
    1: np.dtype("int8"),
    2: np.dtype("uint8"),
    3: np.dtype("int16"),
    4: np.dtype("uint16"),
    5: np.dtype("int32"),
    6: np.dtype("uint32"),
    7: np.dtype("float32"),
    8: np.dtype("float64"),
}



def read_bag(
    bag: rosbag.Bag,
    start_frame_number: Optional[int] = 0,
    end_frame_number: Optional[int] = None,
    keep_zeros: bool = False,
    topic: str = "/os1_cloud_node/points",
) -> List:
    messages = bag.read_messages(topics=[topic])
    sliced_messages = itertools.islice(messages, start_frame_number, None)
    result_list = []
    if end_frame_number is None:
        end_frame_number = 2  # TODO fix to lenght of messages
    for frame_number in tqdm(range(start_frame_number, end_frame_number, 1)):
        message = next(sliced_messages)
        frame = delayed(dataframe_from_message(message, keep_zeros))
        result_list.append(frame)
    return result_list


def dataframe_from_message(
    message: rosbag.bag.BagMessage, keep_zeros: bool = False
) -> pd.DataFrame:
    columnnames = [item.name for item in message.message.fields]
    type_dict = {
        item.name: PANDAS_TYPEMAPPING[item.datatype] for item in message.message.fields
    }
    frame_raw = pc2.read_points(message.message)
    frame_df = pd.DataFrame(np.array(list(frame_raw)), columns=columnnames)
    frame_df = frame_df.astype(type_dict)
    if not keep_zeros:
        frame_df = frame_df[
            (frame_df["x"] != 0.0) & (frame_df["y"] != 0.0) & (frame_df["z"] != 0.0)
        ]
        frame_df["original_id"] = frame_df.index
        frame_df = frame_df.astype({"original_id": "uint32"})
        frame_df = frame_df.reset_index(drop=True)
    return frame_df


  

In [44]:
lazy_dataframes = read_bag(bagfile_bag, 0, 20, False, "/os1_cloud_node/points")

100%|██████████| 20/20 [00:06<00:00,  3.06it/s]


In [45]:
test = dd.from_delayed(lazy_dataframes)

In [46]:
test.x.max().compute()

1.1572314500808716

In [47]:
test.tail()

Unnamed: 0,x,y,z,intensity,t,reflectivity,ring,noise,range,original_id
112033,0.822196,-0.013709,-0.133023,89.0,99945160,6,49,53,833,131057
112034,0.811021,0.016207,-0.138964,85.0,99945160,6,50,33,823,131058
112035,0.820294,0.046462,-0.148941,74.0,99945160,5,51,81,835,131059
112036,0.820693,-0.013927,-0.163821,93.0,99945160,6,53,34,837,131061
112037,0.797619,0.045275,-0.175723,91.0,99945160,6,55,32,818,131063


In [48]:
test.head()

Unnamed: 0,x,y,z,intensity,t,reflectivity,ring,noise,range,original_id
0,0.796484,-0.04322,0.236579,50.0,0,3,0,44,832,0
1,0.807739,0.046913,0.214302,134.0,0,9,3,42,837,3
2,0.808805,0.016405,0.19007,153.0,0,10,6,37,831,6
3,0.818806,-0.043872,0.177631,65.0,0,4,8,42,839,8
4,0.821585,-0.01381,0.169493,136.0,0,9,9,25,839,9


In [49]:
test

Unnamed: 0_level_0,x,y,z,intensity,t,reflectivity,ring,noise,range,original_id
npartitions=20,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
,float32,float32,float32,float32,uint32,uint16,uint8,uint16,uint32,uint32
,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...
