In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor

## Importing data

In [2]:
df = pd.read_csv('binned_mid360_lidar_25_first.csv')
print(df.head(5))

df_action = pd.read_csv("mid360_livox_imu.csv")
df_action.drop("Unnamed: 0", axis = 1, inplace = True)
df_action = df_action.loc[:, (df_action != 0).any(axis=0)]
print(df_action.head(5))

df_pose = pd.read_csv('vrpn_client_node-holybro-pose.csv')

   Unnamed: 0  group_id          Time  header.seq  header.stamp.secs   
0           0         0  1.690899e+09      264840         1690899086  \
1           1         1  1.690899e+09      264865         1690899086   
2           2         2  1.690899e+09      264890         1690899086   
3           3         3  1.690899e+09      264915         1690899087   
4           4         4  1.690899e+09      264940         1690899087   

   header.stamp.nsecs header.frame_id             timebase  point_num   
0           430251598     livox_frame  1690899086430251472       2016  \
1           680331469     livox_frame  1690899086680331472       2016   
2           930421352     livox_frame  1690899086930421144       1920   
3           180021286     livox_frame  1690899087180021144       2016   
4           430101156     livox_frame  1690899087430101144       2016   

   lidar_id             rsvd   
0       192  b'\x00\x00\x00'  \
1       192  b'\x00\x00\x00'   
2       192  b'\x00\x00\x00'   


# Trying point extraction, then interpolating. then grouping

In [3]:
def parse_points(raw_string, frame_id=None):
    raw_string = raw_string.strip("[]")
    entries = raw_string.split(", ")
    parsed = []
    for entry in entries:
        fields = entry.split(";")
        point = {}
        for field in fields:
            if ":" in field:
                key, value = field.split(":", 1)
                point[key.strip()] = value.strip()
        if frame_id is not None:
            point['frame_id'] = frame_id  # Track source frame
        parsed.append(point)
    return parsed

# Apply to all rows
all_points = []
for idx, row in df.iterrows():
    raw = row['points']
    frame_id = row.get('frame_index', idx)  # Use frame_index if available
    try:
        parsed = parse_points(raw, frame_id)
        for point in parsed:
            for col in df.columns:
                if col != 'points':
                    point[col] = row[col]
        all_points.extend(parsed)
    except Exception as e:
        print(f"Error parsing row {idx}: {e}")

# Convert to DataFrame
points_df = pd.DataFrame(all_points)
points_df = points_df.apply(pd.to_numeric, errors='ignore')

points_df.drop(["header.frame_id", "Unnamed: 0", "rsvd"], axis = 1, inplace = True)

In [4]:
points_df

Unnamed: 0,offset_time,x,y,z,reflectivity,tag,line,frame_id,group_id,Time,header.seq,header.stamp.secs,header.stamp.nsecs,timebase,point_num,lidar_id
0,0,-3.625,-0.643,3.022,49,0,0,0,0,1.690899e+09,264840,1690899086,430251598,1690899086430251472,2016,192
1,4947,-2.117,-0.433,1.925,4,0,1,0,0,1.690899e+09,264840,1690899086,430251598,1690899086430251472,2016,192
2,9894,-4.483,-1.061,4.473,101,0,2,0,0,1.690899e+09,264840,1690899086,430251598,1690899086430251472,2016,192
3,14841,-4.138,-1.122,4.512,83,0,3,0,0,1.690899e+09,264840,1690899086,430251598,1690899086430251472,2016,192
4,19788,-3.630,-0.733,3.044,38,0,0,0,0,1.690899e+09,264840,1690899086,430251598,1690899086430251472,2016,192
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
253915,10050177,-1.032,-0.579,0.166,9,0,3,126,126,1.690899e+09,267990,1690899117,930144310,1690899117930144304,2016,192
253916,10055124,-0.956,-0.577,0.058,12,0,0,126,126,1.690899e+09,267990,1690899117,930144310,1690899117930144304,2016,192
253917,10060071,-0.969,-0.578,0.093,11,0,1,126,126,1.690899e+09,267990,1690899117,930144310,1690899117930144304,2016,192
253918,10065018,-0.995,-0.589,0.132,8,0,2,126,126,1.690899e+09,267990,1690899117,930144310,1690899117930144304,2016,192


## Interpolating since timestamps do not perfectly line up

In [5]:
from scipy.interpolate import interp1d
imu_df = df_action.copy()
lidar_df = points_df.copy()


imu_time = imu_df['Time']  # nanoseconds since epoch
lidar_time = lidar_df['Time']


interpolated = {}
for col in ['angular_velocity.x', 'angular_velocity.y', 'angular_velocity.z',
            'linear_acceleration.x', 'linear_acceleration.y', 'linear_acceleration.z']:
    f_interp = interp1d(imu_time, imu_df[col].values, kind='linear', bounds_error=False, fill_value='extrapolate')
    interpolated[col] = f_interp(lidar_time)
for col in interpolated:
    lidar_df[col] = interpolated[col]


pose_time = df_pose['Time']  # nanoseconds since epoch
lidar_time = lidar_df['Time']

pose_cols = [
    'pose.position.x', 'pose.position.y', 'pose.position.z',
    'pose.orientation.x', 'pose.orientation.y', 'pose.orientation.z', 'pose.orientation.w'
]

pose_interp = {}
for col in pose_cols:
    f_interp = interp1d(pose_time, df_pose[col].values, kind='linear', bounds_error=False, fill_value='extrapolate')
    pose_interp[col] = f_interp(lidar_time)

for col in pose_interp:
    lidar_df[col] = pose_interp[col]


In [6]:
lidar_df.head()

Unnamed: 0,offset_time,x,y,z,reflectivity,tag,line,frame_id,group_id,Time,...,linear_acceleration.x,linear_acceleration.y,linear_acceleration.z,pose.position.x,pose.position.y,pose.position.z,pose.orientation.x,pose.orientation.y,pose.orientation.z,pose.orientation.w
0,0,-3.625,-0.643,3.022,49,0,0,0,0,1690899000.0,...,-0.099241,0.025477,0.985722,3.573874,3.724474,0.285453,-0.001514,-0.019598,0.022424,-0.999555
1,4947,-2.117,-0.433,1.925,4,0,1,0,0,1690899000.0,...,-0.099241,0.025477,0.985722,3.573874,3.724474,0.285453,-0.001514,-0.019598,0.022424,-0.999555
2,9894,-4.483,-1.061,4.473,101,0,2,0,0,1690899000.0,...,-0.099241,0.025477,0.985722,3.573874,3.724474,0.285453,-0.001514,-0.019598,0.022424,-0.999555
3,14841,-4.138,-1.122,4.512,83,0,3,0,0,1690899000.0,...,-0.099241,0.025477,0.985722,3.573874,3.724474,0.285453,-0.001514,-0.019598,0.022424,-0.999555
4,19788,-3.63,-0.733,3.044,38,0,0,0,0,1690899000.0,...,-0.099241,0.025477,0.985722,3.573874,3.724474,0.285453,-0.001514,-0.019598,0.022424,-0.999555


## Performing grouping

In [7]:
group_df = lidar_df.copy().sort_values("Time")
#group_df.drop(["offset_time", "timestamp", 'header.seq', 'header.stamp.secs', 'header.stamp.nsecs'], axis = 1)
#group_df['group_id'] = np.floor(np.arange(len(group_df)) / 500).astype(int)

grouped_df = group_df.groupby('frame_id').agg('first').reset_index(drop=True)
grouped_df.head(2)

Unnamed: 0,offset_time,x,y,z,reflectivity,tag,line,group_id,Time,header.seq,...,linear_acceleration.x,linear_acceleration.y,linear_acceleration.z,pose.position.x,pose.position.y,pose.position.z,pose.orientation.x,pose.orientation.y,pose.orientation.z,pose.orientation.w
0,0,-3.625,-0.643,3.022,49,0,0,0,1690899000.0,264840,...,-0.099241,0.025477,0.985722,3.573874,3.724474,0.285453,-0.001514,-0.019598,0.022424,-0.999555
1,6720000,6.894,-2.74,0.115,0,0,0,1,1690899000.0,264865,...,-0.102737,0.029373,1.007642,3.57395,3.724581,0.285471,-0.001519,-0.020128,0.022197,-0.999549


In [9]:
# import plotly.graph_objects as go

# # Sort by timestamp to ensure time order
# group_df = group_df.sort_values('timestamp')

# # Create the figure
# fig = go.Figure()

# # Add scatter points
# fig.add_trace(go.Scatter3d(
#     x=group_df['x'],
#     y=group_df['y'],
#     z=group_df['z'],
#     mode='markers+lines',  # Connect points in time order
#     marker=dict(size=3, color=group_df['timestamp'].astype(int), colorscale='Viridis'),
#     line=dict(color='red', width=1),
#     name='LiDAR Path'
# ))
# fig.show()

## Preprocessing & Model Fitting

In [8]:
# checking for missing values
total = 0
for i in range(len(grouped_df.columns)):
    total += sum(grouped_df.iloc[:,i].isna())
total

0

In [9]:
lidar_df.columns

Index(['offset_time', 'x', 'y', 'z', 'reflectivity', 'tag', 'line', 'frame_id',
       'group_id', 'Time', 'header.seq', 'header.stamp.secs',
       'header.stamp.nsecs', 'timebase', 'point_num', 'lidar_id',
       'angular_velocity.x', 'angular_velocity.y', 'angular_velocity.z',
       'linear_acceleration.x', 'linear_acceleration.y',
       'linear_acceleration.z', 'pose.position.x', 'pose.position.y',
       'pose.position.z', 'pose.orientation.x', 'pose.orientation.y',
       'pose.orientation.z', 'pose.orientation.w'],
      dtype='object')

In [10]:

continuous = ['x', 'y', 'z', 'pose.position.x', 'pose.position.y',
       'pose.position.z', 'pose.orientation.x', 'pose.orientation.y',
       'pose.orientation.z', 'pose.orientation.w']
response = [
    'angular_velocity.x', 'angular_velocity.y', 'angular_velocity.z',
    'linear_acceleration.x', 'linear_acceleration.y', 'linear_acceleration.z'
]
discrete = ['reflectivity', 'tag', 'line', 'point_num', 'lidar_id', 'Time']

# scale data
scaler = StandardScaler()
grouped_df[continuous] = scaler.fit_transform(grouped_df[continuous])

# combine
X_final = grouped_df[discrete + continuous]

grouped_df = grouped_df.sample(frac=1, random_state=42).reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X_final, grouped_df[response], test_size=0.2, random_state=42)

model = MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=42))
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.4f}")

Mean Squared Error: 0.0001
