# 01 - NCLT Data Exploration

Explore the NCLT dataset structure, visualize point clouds and trajectories.

In [None]:
import sys
sys.path.insert(0, '..')

import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

from src.utils.io_utils import load_config
from src.utils.point_cloud import load_velodyne_bin, voxel_downsample, remove_ground_plane

## 1. Load Configuration

In [None]:
config = load_config('../configs/dataset_config.yaml')
print('Sessions:', config['nclt']['sessions'])
print('Train:', config['nclt']['train_sessions'])
print('Val:', config['nclt']['val_sessions'])
print('Test:', config['nclt']['test_sessions'])

## 2. Explore Dataset Structure

In [None]:
# Check available data
data_path = Path(config['nclt']['local_path'])
if not data_path.exists():
    print(f'Data not found at {data_path}')
    print('Run: python scripts/download_nclt_sample.py')
else:
    for session_dir in sorted(data_path.glob('sessions/*')):
        velodyne_count = len(list((session_dir / 'velodyne').glob('*.bin'))) if (session_dir / 'velodyne').exists() else 0
        has_track = (session_dir / 'track.csv').exists()
        print(f'{session_dir.name}: {velodyne_count} scans, track.csv={has_track}')

## 3. Visualize Point Cloud

In [None]:
# Load and visualize a sample point cloud
# Adjust path to your data
# sample_bin = data_path / 'sessions' / '2012-01-08' / 'velodyne' / 'XXXXX.bin'
# points = load_velodyne_bin(sample_bin)
# print(f'Point cloud shape: {points.shape}')
# print(f'XYZ range: x=[{points[:,0].min():.1f}, {points[:,0].max():.1f}], '
#       f'y=[{points[:,1].min():.1f}, {points[:,1].max():.1f}], '
#       f'z=[{points[:,2].min():.1f}, {points[:,2].max():.1f}]')

# Create synthetic example for visualization
np.random.seed(42)
points = np.random.randn(10000, 4).astype(np.float32)
points[:, 2] = points[:, 2] * 0.5 - 1.0  # flatten vertically

fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Bird's eye view
axes[0].scatter(points[:, 0], points[:, 1], s=0.1, c=points[:, 2], cmap='viridis')
axes[0].set_xlabel('X (m)')
axes[0].set_ylabel('Y (m)')
axes[0].set_title('Bird\'s Eye View')
axes[0].set_aspect('equal')

# Side view
axes[1].scatter(points[:, 0], points[:, 2], s=0.1, c=points[:, 3], cmap='plasma')
axes[1].set_xlabel('X (m)')
axes[1].set_ylabel('Z (m)')
axes[1].set_title('Side View (colored by intensity)')

plt.tight_layout()
plt.show()

## 4. Visualize Trajectory

In [None]:
# Load and plot ground truth trajectory
# Uncomment when data is available:
# import pandas as pd
# track = pd.read_csv(data_path / 'sessions' / '2012-01-08' / 'track.csv', header=None)
# track.columns = ['timestamp', 'x', 'y', 'z', 'roll', 'pitch', 'yaw']
# 
# fig, ax = plt.subplots(1, 1, figsize=(10, 10))
# ax.plot(track['x'], track['y'], linewidth=0.5)
# ax.set_xlabel('X (m)')
# ax.set_ylabel('Y (m)')
# ax.set_title('Ground Truth Trajectory - 2012-01-08')
# ax.set_aspect('equal')
# ax.grid(True, alpha=0.3)
# plt.show()

print('Uncomment the code above when NCLT data is available.')

## 5. Test Preprocessing Pipeline

In [None]:
from src.datasets.transforms import (
    Compose, RandomRotation, RandomJitter, RandomSubsample,
    VoxelDownsample, RemoveGround
)

# Build transform pipeline
transform = Compose([
    RemoveGround(threshold=-1.5),
    VoxelDownsample(voxel_size=0.2),
    RandomRotation(max_angle=180),
    RandomJitter(sigma=0.01),
    RandomSubsample(num_points=10000),
])

print(f'Before: {points.shape}')
transformed = transform(points)
print(f'After: {transformed.shape}')
print(f'Transform pipeline:\n{transform}')