In [14]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor

## Importing data

In [2]:
df = pd.read_csv('subset_mid360_livox_lidar_01.csv')
print(df.head(5))

df_action = pd.read_csv("mid360_livox_imu.csv")
df_action.drop("Unnamed: 0", axis = 1, inplace = True)
df_action = df_action.loc[:, (df_action != 0).any(axis=0)]
print(df_action.head(5))

           Time  header.seq  header.stamp.secs  header.stamp.nsecs   
0  1.690899e+09      264840         1690899086           430251598  \
1  1.690899e+09      264841         1690899086           440331459   
2  1.690899e+09      264842         1690899086           450411558   
3  1.690899e+09      264843         1690899086           460011482   
4  1.690899e+09      264844         1690899086           470091581   

  header.frame_id             timebase  point_num  lidar_id             rsvd   
0     livox_frame  1690899086430251472       2016       192  b'\x00\x00\x00'  \
1     livox_frame  1690899086440331472       2016       192  b'\x00\x00\x00'   
2     livox_frame  1690899086450411472       1920       192  b'\x00\x00\x00'   
3     livox_frame  1690899086460011472       2016       192  b'\x00\x00\x00'   
4     livox_frame  1690899086470091472       2016       192  b'\x00\x00\x00'   

                                              points  
0  [offset_time: 0;x: -3.625;y: -0.64300000

# Trying point extraction, then interpolating. then grouping

In [3]:
def parse_points(raw_string, frame_id=None):
    raw_string = raw_string.strip("[]")
    entries = raw_string.split(", ")
    parsed = []
    for entry in entries:
        fields = entry.split(";")
        point = {}
        for field in fields:
            if ":" in field:
                key, value = field.split(":", 1)
                point[key.strip()] = value.strip()
        if frame_id is not None:
            point['frame_id'] = frame_id  # Track source frame
        parsed.append(point)
    return parsed

# Apply to all rows
all_points = []
for idx, row in df.iterrows():
    raw = row['points']
    frame_id = row.get('frame_index', idx)  # Use frame_index if available
    try:
        parsed = parse_points(raw, frame_id)
        for point in parsed:
            for col in df.columns:
                if col != 'points':
                    point[col] = row[col]
        all_points.extend(parsed)
    except Exception as e:
        print(f"Error parsing row {idx}: {e}")

# Convert to DataFrame
points_df = pd.DataFrame(all_points)
points_df = points_df.apply(pd.to_numeric, errors='ignore')

points_df.drop(["header.frame_id", "rsvd"], axis = 1)
points_df['Time'] = pd.to_datetime(points_df['Time'])
points_df['timestamp'] = points_df['Time'] + pd.to_timedelta(points_df['offset_time'], unit='ns')


In [4]:
df_action

Unnamed: 0,Time,header.seq,header.stamp.secs,header.stamp.nsecs,header.frame_id,angular_velocity.x,angular_velocity.y,angular_velocity.z,linear_acceleration.x,linear_acceleration.y,linear_acceleration.z
0,1.690899e+09,530005,1690899086,434588432,livox_frame,0.005998,-0.007360,-0.005033,-0.112818,0.030662,0.976075
1,1.690899e+09,530006,1690899086,440245867,livox_frame,0.012389,-0.009491,-0.010359,-0.099390,0.025535,0.985597
2,1.690899e+09,530007,1690899086,444532156,livox_frame,0.011324,-0.010556,-0.011425,-0.088648,0.021384,0.994630
3,1.690899e+09,530008,1690899086,450156450,livox_frame,0.011324,-0.003099,-0.008229,-0.099146,0.024802,0.997316
4,1.690899e+09,530009,1690899086,454552650,livox_frame,0.007063,0.004358,-0.003968,-0.112573,0.029685,1.003907
...,...,...,...,...,...,...,...,...,...,...,...
6318,1.690899e+09,536323,1690899118,25126696,livox_frame,0.002802,-0.009491,-0.007164,-0.117456,0.032615,0.974855
6319,1.690899e+09,536324,1690899118,30729771,livox_frame,0.009194,-0.010556,-0.007164,-0.099146,0.021872,0.968751
6320,1.690899e+09,536325,1690899118,35095215,livox_frame,0.015585,-0.007360,-0.007164,-0.088892,0.022849,0.986329
6321,1.690899e+09,536326,1690899118,40809631,livox_frame,0.012389,-0.009491,-0.009294,-0.094995,0.031150,1.005372


## Interpolating since timestamps do not perfectly line up

In [7]:
from scipy.interpolate import interp1d
imu_df = df_action.copy()
lidar_df = points_df.copy()


imu_time = imu_df['Time']  # nanoseconds since epoch
lidar_time = lidar_df['timestamp']


interpolated = {}
for col in ['angular_velocity.x', 'angular_velocity.y', 'angular_velocity.z',
            'linear_acceleration.x', 'linear_acceleration.y', 'linear_acceleration.z']:
    f_interp = interp1d(imu_time, imu_df[col].values, kind='linear', bounds_error=False, fill_value='extrapolate')
    interpolated[col] = f_interp(lidar_time)
for col in interpolated:
    lidar_df[col] = interpolated[col]

In [8]:
lidar_df.columns

Index(['offset_time', 'x', 'y', 'z', 'reflectivity', 'tag', 'line', 'frame_id',
       'Time', 'header.seq', 'header.stamp.secs', 'header.stamp.nsecs',
       'header.frame_id', 'timebase', 'point_num', 'lidar_id', 'rsvd',
       'timestamp', 'angular_velocity.x', 'angular_velocity.y',
       'angular_velocity.z', 'linear_acceleration.x', 'linear_acceleration.y',
       'linear_acceleration.z'],
      dtype='object')

## Performing grouping

In [18]:
group_df = lidar_df.copy()
group_df.drop(["offset_time", "timestamp", 'header.seq', 'header.stamp.secs', 'header.stamp.nsecs'], axis = 1)
#group_df['group_id'] = np.floor(np.arange(len(group_df)) / 150).astype(int)

agg_dict = {
    'x': 'mean',
    'y': 'mean',
    'z': 'mean',
    'linear_acceleration.x': 'mean',
    'linear_acceleration.y': 'mean',
    'linear_acceleration.z': 'mean',
    'angular_velocity.x': 'mean',
    'angular_velocity.y': 'mean',
    'angular_velocity.z': 'mean',
    'timestamp': 'first',  
    'header.frame_id': 'first' , 
    'tag': 'max',
    'line': 'max',
    'reflectivity': 'mean',
}

grouped_df = group_df.groupby('frame_id').agg(agg_dict).reset_index(drop=True)
grouped_df

Unnamed: 0,x,y,z,linear_acceleration.x,linear_acceleration.y,linear_acceleration.z,angular_velocity.x,angular_velocity.y,angular_velocity.z,timestamp,header.frame_id,tag,line,reflectivity
0,-3.347787,-2.429647,4.070207,-7.288951e+05,-3.001332e+05,-1.286275e+05,-6.547891e+05,9.354130e+04,9.354106e+04,1970-01-01 00:00:01.690899086,livox_frame,4,3,49.500000
1,-0.497073,-4.059253,4.205613,-2.203756e+06,-9.074289e+05,-3.888971e+05,-1.979703e+06,2.828147e+05,2.828140e+05,1970-01-01 00:00:01.691646224,livox_frame,4,3,14.800000
2,3.412627,-3.814640,5.351013,-3.680620e+06,-1.515550e+06,-6.495202e+05,-3.306416e+06,4.723452e+05,4.723439e+05,1970-01-01 00:00:01.692398450,livox_frame,4,3,33.800000
3,5.511620,-0.237033,5.713180,-5.156283e+06,-2.123175e+06,-9.099313e+05,-4.632050e+06,6.617214e+05,6.617197e+05,1970-01-01 00:00:01.693145588,livox_frame,4,3,49.700000
4,3.699807,3.644540,5.342113,-6.632346e+06,-2.730966e+06,-1.170413e+06,-5.958043e+06,8.511491e+05,8.511469e+05,1970-01-01 00:00:01.693897814,livox_frame,4,3,54.220000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1329,15.141653,1.929367,5.269127,-1.448481e+07,-5.964334e+06,-2.556142e+06,-1.301216e+07,1.858880e+06,1.858875e+06,1970-01-01 00:00:01.697886225,livox_frame,4,3,17.160000
1330,4.400753,6.415553,2.829353,-1.596168e+07,-6.572455e+06,-2.816765e+06,-1.433887e+07,2.048410e+06,2.048405e+06,1970-01-01 00:00:01.698638451,livox_frame,4,3,29.186667
1331,-0.714947,4.407620,1.652573,-1.743734e+07,-7.180081e+06,-3.077176e+06,-1.566450e+07,2.237787e+06,2.237781e+06,1970-01-01 00:00:01.699385589,livox_frame,16,3,47.640000
1332,-0.658060,0.326513,0.307927,-1.891340e+07,-7.787871e+06,-3.337658e+06,-1.699050e+07,2.427214e+06,2.427208e+06,1970-01-01 00:00:01.700137815,livox_frame,0,3,1.973333


In [None]:
import plotly.graph_objects as go

# Sort by timestamp to ensure time order
group_df = group_df.sort_values('timestamp')

# Create the figure
fig = go.Figure()

# Add scatter points
fig.add_trace(go.Scatter3d(
    x=group_df['x'],
    y=group_df['y'],
    z=group_df['z'],
    mode='markers+lines',  # Connect points in time order
    marker=dict(size=3, color=group_df['timestamp'].astype(int), colorscale='Viridis'),
    line=dict(color='red', width=1),
    name='LiDAR Path'
))
fig.show()

## Old code for reference

In [None]:
def parse_points(raw_string, frame_id=None, timestamp=None, imu_data=None):
    raw_string = raw_string.strip("[]")
    entries = raw_string.split(", ")
    parsed = []

    for entry in entries:
        fields = entry.split(";")
        point = {}
        for field in fields:
            if ":" in field:
                key, value = field.split(":", 1)
                point[key.strip()] = value.strip()
        # Attach metadata
        if frame_id is not None:
            point['frame_id'] = frame_id
        if timestamp is not None:
            point['Time'] = timestamp
        if imu_data is not None:
            for k, v in imu_data.items():
                point[k] = v
        parsed.append(point)

    return parsed

all_points = []

for idx, row in lidar_df.iterrows():
    raw = row['points']
    frame_id = row.get('frame_index', idx)
    timestamp = row.get('Time', None)
    imu_data = {
        'point_num' : row.get('point_num'),
        'lidar_id' : row.get('lidar_id'),
        'angular_velocity.x': row.get('angular_velocity.x'),
        'angular_velocity.y': row.get('angular_velocity.y'),
        'angular_velocity.z': row.get('angular_velocity.z'),
        'linear_acceleration.x': row.get('linear_acceleration.x'),
        'linear_acceleration.y': row.get('linear_acceleration.y'),
        'linear_acceleration.z': row.get('linear_acceleration.z')
       
    }

    try:
        parsed = parse_points(raw, frame_id=frame_id, timestamp=timestamp, imu_data=imu_data)
        all_points.extend(parsed)
    except Exception as e:
        print(f"Error parsing row {idx}: {e}")

points_df = pd.DataFrame(all_points)
points_df = points_df.apply(pd.to_numeric, errors='ignore')
print(len(points_df))
points_df.head()

## Preprocessing & Model Fitting

In [None]:
# checking for missing values
total = 0
for i in range(len(points_df.columns)):
    total += sum(points_df.iloc[:,i].isna())
total

In [None]:
continuous = ['offset_time', 'x', 'y', 'z']
response = [
    'angular_velocity.x', 'angular_velocity.y', 'angular_velocity.z',
    'linear_acceleration.x', 'linear_acceleration.y', 'linear_acceleration.z'
]
discrete = ['reflectivity', 'tag', 'line', 'point_num', 'lidar_id', 'frame_id']

# scale data
scaler = StandardScaler()
points_df[continuous] = scaler.fit_transform(points_df[continuous])

# combine
X_final = points_df[discrete + continuous]

points_df = points_df.sample(frac=1, random_state=42).reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X_final, points_df[response], test_size=0.2, random_state=42)

model = MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=42))
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.4f}")

In [None]:
print(points_df[response].nunique())
