In [1]:
import lidar
import rosbag
from dask.distributed import Client
client = Client()  # start distributed scheduler locally.  Launch dashboard

Failed to load Python extension for LZ4 support. LZ4 compression will not be available.


first converting the bagfile to a parquet file

In [2]:
bagfile = "/workspaces/lidar/tests/testdata/big.bag"
bagfile_bag = rosbag.Bag(bagfile)

In [3]:
#!/usr/bin/env python
import pandas as pd
import numpy as np
import itertools
from typing import Optional, List
import rosbag
from tqdm import tqdm
import sensor_msgs.point_cloud2 as pc2
from dask import delayed
import dask.dataframe as dd


PANDAS_TYPEMAPPING = {
    1: np.dtype("int8"),
    2: np.dtype("uint8"),
    3: np.dtype("int16"),
    4: np.dtype("uint16"),
    5: np.dtype("int32"),
    6: np.dtype("uint32"),
    7: np.dtype("float32"),
    8: np.dtype("float64"),
}



def read_bag(
    bag: rosbag.Bag,
    start_frame_number: Optional[int] = 0,
    end_frame_number: Optional[int] = None,
    keep_zeros: bool = False,
    topic: str = "/os1_cloud_node/points",
) -> List:
    messages = bag.read_messages(topics=[topic])
    sliced_messages = itertools.islice(messages, start_frame_number, None)
    result_list = []
    if end_frame_number is None:
        end_frame_number = 2  # TODO fix to lenght of messages
    for frame_number in tqdm(range(start_frame_number, end_frame_number, 1)):
        message = next(sliced_messages)
        frame = delayed(dataframe_from_message(message, keep_zeros))
        result_list.append(frame)
    return result_list


def dataframe_from_message(
    message: rosbag.bag.BagMessage, keep_zeros: bool = False
) -> pd.DataFrame:
    columnnames = [item.name for item in message.message.fields]
    type_dict = {
        item.name: PANDAS_TYPEMAPPING[item.datatype] for item in message.message.fields
    }
    frame_raw = pc2.read_points(message.message)
    frame_df = pd.DataFrame(np.array(list(frame_raw)), columns=columnnames)
    frame_df = frame_df.astype(type_dict)
    if not keep_zeros:
        frame_df = frame_df[
            (frame_df["x"] != 0.0) & (frame_df["y"] != 0.0) & (frame_df["z"] != 0.0)
        ]
        frame_df["original_id"] = frame_df.index
        frame_df = frame_df.astype({"original_id": "uint32"})
        frame_df = frame_df.reset_index(drop=True)
    return frame_df


  

In [104]:
lazy_dataframes = read_bag(bagfile_bag, 0, 10, False, "/os1_cloud_node/points")

100%|██████████| 10/10 [00:05<00:00,  1.99it/s]


In [63]:
test = dd.from_delayed(lazy_dataframes)

In [18]:
test.x.max().compute()

0.9937447905540466

In [11]:
test.tail()

Unnamed: 0,x,y,z,intensity,t,reflectivity,ring,noise,range,original_id
111817,0.805429,0.045549,-0.115905,79.0,99926580,5,47,24,815,131055
111818,0.79422,-0.013478,-0.158536,108.0,99926580,7,53,39,810,131061
111819,0.80339,0.015773,-0.168396,101.0,99926580,7,54,58,821,131062
111820,0.762607,0.014853,-0.189751,69.0,99926580,4,58,37,786,131066
111821,0.776013,0.043899,-0.201706,64.0,99926580,4,59,32,803,131067


In [12]:
test.head()

Unnamed: 0,x,y,z,intensity,t,reflectivity,ring,noise,range,original_id
0,0.796484,-0.04322,0.236579,50.0,0,3,0,44,832,0
1,0.807739,0.046913,0.214302,134.0,0,9,3,42,837,3
2,0.808805,0.016405,0.19007,153.0,0,10,6,37,831,6
3,0.818806,-0.043872,0.177631,65.0,0,4,8,42,839,8
4,0.821585,-0.01381,0.169493,136.0,0,9,9,25,839,9


In [13]:
test

Unnamed: 0_level_0,x,y,z,intensity,t,reflectivity,ring,noise,range,original_id
npartitions=200,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
,float32,float32,float32,float32,uint32,uint16,uint8,uint16,uint32,uint32
,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...


In [14]:
lazy_dataframes[0].compute()

Unnamed: 0,x,y,z,intensity,t,reflectivity,ring,noise,range,original_id
0,0.796484,-0.043220,0.236579,50.0,0,3,0,44,832,0
1,0.807739,0.046913,0.214302,134.0,0,9,3,42,837,3
2,0.808805,0.016405,0.190070,153.0,0,10,6,37,831,6
3,0.818806,-0.043872,0.177631,65.0,0,4,8,42,839,8
4,0.821585,-0.013810,0.169493,136.0,0,9,9,25,839,9
...,...,...,...,...,...,...,...,...,...,...
112182,0.801167,0.016010,-0.137276,107.0,99939680,7,50,43,813,131058
112183,0.814400,0.046128,-0.147871,86.0,99939680,6,51,56,829,131059
112184,0.809908,-0.013744,-0.161668,107.0,99939680,7,53,39,826,131061
112185,0.795561,0.015620,-0.166755,86.0,99939680,6,54,57,813,131062


This is very promissing! I could just keep the tiny_lazy_dataframes and then convert it to a frame whenever needed!

What about the meta data, how can I use that?

In [17]:
test.to_parquet("/workspaces/export_test")

In [4]:
test2 = dd.read_parquet("/workspaces/export_test")

In [5]:
test2.x.max().compute()

1.252312421798706

Writing meta data as a json into the same folder

In [96]:
def pipeline(df: pd.DataFrame, partition_info=None):
    frame_number = partition_info["number"]
    df = df.reset_index(drop=True)
    df["frame_number"] = frame_number
    frame = lidar.Frame.from_instance("dataframe", df)
    frame = frame.limit("x", 0,1)
    df = frame.data
    return pd.DataFrame()

In [97]:
res = test.map_partitions(pipeline).compute()

In [98]:
res.x.max().compute()

AttributeError: 'DataFrame' object has no attribute 'x'

In [94]:
res

0    None
1    None
2    None
3    None
4    None
5    None
6    None
7    None
8    None
9    None
dtype: object

In [64]:
test3 = test.to_delayed()

In [90]:
res

Unnamed: 0,x,y,z,intensity,t,reflectivity,ring,noise,range,original_id,frame_number
0,0.796484,-0.043220,0.236579,50.0,0,3,0,44,832,0,0
1,0.807739,0.046913,0.214302,134.0,0,9,3,42,837,3,0
2,0.808805,0.016405,0.190070,153.0,0,10,6,37,831,6,0
3,0.818806,-0.043872,0.177631,65.0,0,4,8,42,839,8,0
4,0.821585,-0.013810,0.169493,136.0,0,9,9,25,839,9,0
...,...,...,...,...,...,...,...,...,...,...,...
53866,0.789690,0.015505,-0.165525,113.0,99925450,7,54,70,807,131062,9
53867,0.793718,0.045054,-0.174864,98.0,99925450,6,55,18,814,131063,9
53868,0.770258,-0.013596,-0.183800,51.0,99925450,3,57,29,792,131065,9
53869,0.755816,0.014721,-0.188061,81.0,99925450,5,58,40,779,131066,9


In [68]:
test4 = test3[0]

In [72]:
def pipeline2(df: pd.DataFrame, ):
    frame = lidar.Frame.from_instance("dataframe", df)
    frame.limit("x", 0,1)
    return frame.data

In [75]:
test4(len).compute()

TypeError: 'DataFrame' object is not callable

In [21]:
meta = {"oring_file": "sepp", "timestamps": [234234.234234, 234234.22234]}

In [22]:
import json

In [24]:
json.dump(meta)

TypeError: dump() missing 1 required positional argument: 'fp'

In [25]:
with open("/workspaces/export_test/data_file.json", "w") as write_file:
    json.dump(meta, write_file)


In [26]:
with open("/workspaces/export_test/data_file.json", "r") as read_file:
    data = json.load(read_file)


In [28]:
data["t"]

'sepp'

In [6]:
type(test2)

dask.dataframe.core.DataFrame

In [10]:
test2.describe().compute()

Unnamed: 0,x,y,z,intensity,t,reflectivity,ring,noise,range,original_id
count,22405810.0,22405810.0,22405810.0,22405810.0,22405810.0,22405810.0,22405810.0,22405810.0,22405810.0,22405810.0
mean,-1.395355,-0.1555028,0.1420729,109.8774,49397940.0,298.1485,31.5414,44.84404,2951.956,64785.94
std,3.188425,2.228653,0.7776557,132.3687,27723680.0,731.4155,18.37981,58.30621,3002.505,36345.0
min,-34.5218,-15.38729,-0.665405,8.0,0.0,1.0,0.0,0.0,665.0,0.0
25%,-1.821742,-1.5286,-0.2560951,47.0,25745800.0,7.0,16.0,30.0,1212.0,33750.75
50%,0.02228281,0.02162177,-0.004399885,71.0,50354340.0,21.0,32.0,39.0,1956.0,66066.0
75%,0.7839448,1.622289,0.2577102,129.0,74855910.0,115.0,48.0,51.0,3280.0,98234.5
max,1.252312,5.981112,6.153335,2566.0,99984610.0,19740.0,63.0,2071.0,35017.0,131070.0


In [24]:
test2.shape

(Delayed('int-3bde0fd9-9481-440e-8069-0e92b3cb828e'), 10)

In [25]:
len(test2)

22405807

In [28]:
test2

Unnamed: 0_level_0,x,y,z,intensity,t,reflectivity,ring,noise,range,original_id
npartitions=200,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
,float32,float32,float32,float32,uint32,uint16,uint8,uint16,uint32,uint32
,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...


In [29]:
test2.npartitions

200

In [31]:
test3 = test2.get_partition(0)

In [32]:
type(test3)

dask.dataframe.core.DataFrame

In [37]:
test3.compute()

Unnamed: 0,x,y,z,intensity,t,reflectivity,ring,noise,range,original_id
0,0.796484,-0.043220,0.236579,50.0,0,3,0,44,832,0
1,0.807739,0.046913,0.214302,134.0,0,9,3,42,837,3
2,0.808805,0.016405,0.190070,153.0,0,10,6,37,831,6
3,0.818806,-0.043872,0.177631,65.0,0,4,8,42,839,8
4,0.821585,-0.013810,0.169493,136.0,0,9,9,25,839,9
...,...,...,...,...,...,...,...,...,...,...
112182,0.801167,0.016010,-0.137276,107.0,99939680,7,50,43,813,131058
112183,0.814400,0.046128,-0.147871,86.0,99939680,6,51,56,829,131059
112184,0.809908,-0.013744,-0.161668,107.0,99939680,7,53,39,826,131061
112185,0.795561,0.015620,-0.166755,86.0,99939680,6,54,57,813,131062


In [106]:
test_delayed = lazy_dataframes[0]

In [107]:
test_delayed.x.max().compute()

0.982897

In [109]:
test_delayed.visualize()

RuntimeError: Drawing dask graphs requires the `graphviz` python library and the `graphviz` system library to be installed.

In [111]:
delayed(lidar.Frame.from_instance("dataframe", test_delayed))

TypeError: Data argument must be a DataFrame

In [127]:
test = test_delayed.dask.


In [128]:
test.compute()

Unnamed: 0,x,y,z,intensity,t,reflectivity,ring,noise,range,original_id
0,0.796484,-0.043220,0.236579,50.0,0,3,0,44,832,0
1,0.807739,0.046913,0.214302,134.0,0,9,3,42,837,3
2,0.808805,0.016405,0.190070,153.0,0,10,6,37,831,6
3,0.818806,-0.043872,0.177631,65.0,0,4,8,42,839,8
4,0.821585,-0.013810,0.169493,136.0,0,9,9,25,839,9
...,...,...,...,...,...,...,...,...,...,...
112182,0.801167,0.016010,-0.137276,107.0,99939680,7,50,43,813,131058
112183,0.814400,0.046128,-0.147871,86.0,99939680,6,51,56,829,131059
112184,0.809908,-0.013744,-0.161668,107.0,99939680,7,53,39,826,131061
112185,0.795561,0.015620,-0.166755,86.0,99939680,6,54,57,813,131062


In [158]:
def pipeline(df: pd.DataFrame):
    df = df.reset_index(drop=True)
    frame = lidar.Frame.from_instance("dataframe", df)
    frame = frame.limit("x", -0.1,0)
    return frame.data

In [153]:
testagain = [ delayed(lidar.Frame.from_instance)("dataframe", frame) for frame in lazy_dataframes]

In [154]:
res = dd.compute(*testagain)

In [159]:
testagain2 = [ delayed(pipeline)(frame) for frame in lazy_dataframes]

In [162]:
test5=dd.from_delayed(testagain2)

In [165]:
test5.x.max().compute()

-1.482309107814217e-05

In [166]:
test5.x.min().compute()

-0.09998094290494919

In [168]:
test5.npartitions

10

In [170]:
test5.get_partition(0).compute()

Unnamed: 0,x,y,z,intensity,t,reflectivity,ring,noise,range,original_id
0,-0.000524,-1.980622,-0.532670,113.0,24076640,47,60,13,2051,31612
1,-0.000626,-1.998803,-0.230067,169.0,24125640,68,44,12,2012,31660
2,-0.001590,-1.983151,-0.301906,143.0,24125640,57,48,30,2006,31664
3,-0.002651,-2.000234,-0.379937,188.0,24125640,77,52,12,2036,31668
4,-0.004690,-2.006189,-0.458854,185.0,24125640,78,56,27,2058,31672
...,...,...,...,...,...,...,...,...,...,...
1788,-0.002364,1.862870,-0.267647,23.0,75815010,8,47,24,1882,99439
1789,-0.002519,1.856737,-0.336590,35.0,75815010,12,51,61,1887,99443
1790,-0.002702,1.827317,-0.401929,35.0,75815010,12,55,41,1871,99447
1791,-0.002362,1.836176,-0.476509,126.0,75815010,45,59,59,1897,99451


In [173]:
pipeline(lazy_dataframes[0].compute())

Unnamed: 0,x,y,z,intensity,t,reflectivity,ring,noise,range,original_id
0,-0.000524,-1.980622,-0.532670,113.0,24076640,47,60,13,2051,31612
1,-0.000626,-1.998803,-0.230067,169.0,24125640,68,44,12,2012,31660
2,-0.001590,-1.983151,-0.301906,143.0,24125640,57,48,30,2006,31664
3,-0.002651,-2.000234,-0.379937,188.0,24125640,77,52,12,2036,31668
4,-0.004690,-2.006189,-0.458854,185.0,24125640,78,56,27,2058,31672
...,...,...,...,...,...,...,...,...,...,...
1788,-0.002364,1.862870,-0.267647,23.0,75815010,8,47,24,1882,99439
1789,-0.002519,1.856737,-0.336590,35.0,75815010,12,51,61,1887,99443
1790,-0.002702,1.827317,-0.401929,35.0,75815010,12,55,41,1871,99447
1791,-0.002362,1.836176,-0.476509,126.0,75815010,45,59,59,1897,99451
