In [1]:
# Cell 1: Import Libraries
import h5py
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
from IPython.display import display, clear_output

# Cell 2: Define Helper Functions
def load_episode(file_path):
    """Load data from an HDF5 episode file."""
    with h5py.File(file_path, 'r') as f:
        rgb = np.array(f['/observations/images/rgb']) if '/observations/images/rgb' in f else None
        depth = np.array(f['/observations/images/depth']) if '/observations/images/depth' in f else None
        qpos_arm1 = np.array(f['/observations/qpos'])
        camera_pose_qpos = np.array(f['/camera_pose/qpos'])
    return rgb, depth, qpos_arm1, camera_pose_qpos

def plot_image(image, title="Image", cmap=None):
    """Plot an image using matplotlib."""
    plt.figure(figsize=(8, 6))
    plt.imshow(image, cmap=cmap)
    plt.title(title)
    plt.axis('off')
    plt.show()

def plot_joint_positions(qpos, title="Joint Positions"):
    """Plot joint positions as a line graph."""
    plt.figure(figsize=(10, 4))
    plt.plot(qpos, marker='o', linestyle='-', color='b')
    plt.title(title)
    plt.xlabel("Joint Index")
    plt.ylabel("Position (rad)")
    plt.grid(True)
    plt.show()

# Cell 3: Create MP4 from RGB Images
def create_rgb_video(dataset_dir, folder_name, output_video_path="output_rgb_video.mp4", fps=20):
    """Create an MP4 video from RGB images in all episodes."""
    episode_files = sorted([f for f in os.listdir(os.path.join(dataset_dir, folder_name)) 
                           if f.endswith('.h5')], key=lambda x: int(x.split('_')[1].split('.')[0]))
    
    if not episode_files:
        print("No HDF5 files found in the specified folder.")
        return
    
    # Load the first episode to get image dimensions
    first_file = os.path.join(dataset_dir, folder_name, episode_files[0])
    rgb, _, _, _ = load_episode(first_file)
    if rgb is None:
        print("No RGB data found in the first episode.")
        return
    
    height, width, _ = rgb.shape
    
    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for MP4
    video_writer = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
    
    # Process each episode
    for episode_file in episode_files:
        file_path = os.path.join(dataset_dir, folder_name, episode_file)
        rgb, _, _, _ = load_episode(file_path)
        
        if rgb is not None:
            # Convert BGR to RGB for display consistency (optional, remove if you want raw BGR)
            rgb_frame = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
            # Write frame to video
            video_writer.write(rgb_frame)
            print(f"Added frame from {episode_file}")
        else:
            print(f"Skipping {episode_file} - No RGB data found.")
    
    # Release the video writer
    video_writer.release()
    print(f"Video saved as {output_video_path}")

# Specify dataset directory and folder name
dataset_dir = "datasets"
folder_name = "20250324_203250"  # Replace with your timestamp
output_video_path = f"{dataset_dir}/{folder_name}/rgb_video.mp4"

# Create the video
create_rgb_video(dataset_dir, folder_name, output_video_path, fps=30)

# Cell 4: Load and Visualize a Single Episode (Optional)
episode_idx = 0
file_path = os.path.join(dataset_dir, folder_name, f"episode_{episode_idx}.h5")
rgb, depth, qpos_arm1, camera_pose_qpos = load_episode(file_path)

# if rgb is not None:
#     rgb_display = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
#     plot_image(rgb_display, title=f"RGB Image - Episode {episode_idx}")

# if depth is not None:
#     plot_image(depth, title=f"Depth Image - Episode {episode_idx}", cmap='gray')

# plot_joint_positions(qpos_arm1, title=f"Arm 1 Joint Positions - Episode {episode_idx}")
# plot_joint_positions(camera_pose_qpos, title=f"Camera Pose (Arm 2) Joint Positions - Episode {episode_idx}")

# Cell 5: Summary Statistics (Optional)
def print_summary_stats(file_path):
    """Print summary statistics of the data."""
    with h5py.File(file_path, 'r') as f:
        print(f"File: {file_path}")
        if '/observations/images/rgb' in f:
            rgb = np.array(f['/observations/images/rgb'])
            print(f"RGB Shape: {rgb.shape}, Min: {rgb.min()}, Max: {rgb.max()}")
        if '/observations/images/depth' in f:
            depth = np.array(f['/observations/images/depth'])
            print(f"Depth Shape: {depth.shape}, Min: {depth.min()}, Max: {depth.max()}")
        qpos_arm1 = np.array(f['/observations/qpos'])
        print(f"Arm 1 qpos: {qpos_arm1}, Length: {len(qpos_arm1)}")
        camera_pose_qpos = np.array(f['/camera_pose/qpos'])
        print(f"Camera Pose qpos: {camera_pose_qpos}, Length: {len(camera_pose_qpos)}")

print_summary_stats(file_path)



Added frame from episode_0.h5
Added frame from episode_1.h5
Added frame from episode_2.h5
Added frame from episode_3.h5
Added frame from episode_4.h5
Added frame from episode_5.h5
Added frame from episode_6.h5
Added frame from episode_7.h5
Added frame from episode_8.h5
Added frame from episode_9.h5
Added frame from episode_10.h5
Added frame from episode_11.h5
Added frame from episode_12.h5
Added frame from episode_13.h5
Added frame from episode_14.h5
Added frame from episode_15.h5
Added frame from episode_16.h5
Added frame from episode_17.h5
Added frame from episode_18.h5
Added frame from episode_19.h5
Added frame from episode_20.h5
Added frame from episode_21.h5
Added frame from episode_22.h5
Added frame from episode_23.h5
Added frame from episode_24.h5
Added frame from episode_25.h5
Added frame from episode_26.h5
Added frame from episode_27.h5
Added frame from episode_28.h5
Added frame from episode_29.h5
Added frame from episode_30.h5
Added frame from episode_31.h5
Added frame from e

In [2]:
from matplotlib.animation import FuncAnimation
from IPython.display import HTML
import os
from IPython.display import Video

import matplotlib.pyplot as plt
rgb, depth, qpos_arm1, camera_pose_qpos = load_episode(file_path)

rgb = rgb.reshape([-1, 480, 848, 3])
# Create a figure and axis for the animation
fig, ax = plt.subplots()
im = ax.imshow(rgb[0])  # Display the first frame initially
ax.axis('off')  # Turn off the axis

# Update function for the animation
def update(frame):
    im.set_array(rgb[frame])
    return [im]

# Create the animation
ani = FuncAnimation(fig, update, frames=len(rgb), interval=50, blit=True)

# Save the animation as an MP4 file
output_file = "output_video.mp4"
ani.save(output_file, writer="ffmpeg", fps=10)

# Close the figure to avoid displaying it twice
plt.close(fig)

# Display the video in the notebook
Video(output_file, embed=True)

In [6]:
from PIL import Image

# Image.fromarray(rgb[0]).save("tasks/mug/ref_rgb.png")
# Image.fromarray(depth[0]).save("tasks/mug/ref_depth.png")
np.save("tasks/mug/ref_depth.npy", depth[0])

In [None]:
create_rgb_video

In [None]:
# Cell 6: Visualize Joint Position Changes Over Episodes
def plot_joint_changes(dataset_dir, folder_name):
    """Plot changes in joint positions for arm_1 and arm_2 across all episodes."""
    episode_files = sorted([f for f in os.listdir(os.path.join(dataset_dir, folder_name)) 
                           if f.endswith('.h5')], key=lambda x: int(x.split('_')[1].split('.')[0]))
    
    if not episode_files:
        print("No HDF5 files found in the specified folder.")
        return
    
    # Initialize lists to store joint data
    arm1_qpos_history = []
    arm2_qpos_history = []
    episode_nums = []
    
    # Load joint positions from each episode
    for episode_file in episode_files:
        file_path = os.path.join(dataset_dir, folder_name, episode_file)
        _, _, qpos_arm1, camera_pose_qpos = load_episode(file_path)
        print(qpos_arm1)
        arm1_qpos_history.append(qpos_arm1)
        arm2_qpos_history.append(camera_pose_qpos)
        episode_nums.append(int(episode_file.split('_')[1].split('.')[0]))
    
    # Convert to numpy arrays for easier manipulation
    arm1_qpos_history = np.array(arm1_qpos_history)  # Shape: (n_episodes, n_joints_arm1)
    arm2_qpos_history = np.array(arm2_qpos_history)  # Shape: (n_episodes, n_joints_arm2)
    
    # Plot arm_1 joint changes
    plt.figure(figsize=(12, 6))
    for joint_idx in range(arm1_qpos_history.shape[1]):
        plt.plot(episode_nums, arm1_qpos_history[:, joint_idx], marker='o', linestyle='-', 
                 label=f'Joint {joint_idx}')
    plt.title("Arm 1 Joint Position Changes Over Episodes")
    plt.xlabel("Episode Number")
    plt.ylabel("Joint Position (rad)")
    plt.legend(loc='best')
    plt.grid(True)
    plt.show()
    
    # Plot arm_2 (camera pose) joint changes
    plt.figure(figsize=(12, 6))
    for joint_idx in range(arm2_qpos_history.shape[1]):
        plt.plot(episode_nums, arm2_qpos_history[:, joint_idx], marker='o', linestyle='-', 
                 label=f'Joint {joint_idx}')
    plt.title("Arm 2 (Camera Pose) Joint Position Changes Over Episodes")
    plt.xlabel("Episode Number")
    plt.ylabel("Joint Position (rad)")
    plt.legend(loc='best')
    plt.grid(True)
    plt.show()

# Specify dataset directory and folder name (same as previous cells)
dataset_dir = "datasets"
folder_name = "20250324_204534"  # Replace with your timestamp

# Plot joint changes
plot_joint_changes(dataset_dir, folder_name)

In [None]:
import os
import h5py
import numpy as np
from pathlib import Path

# Define the base directory containing your datasets
base_dir = Path("~/ObAct_realworld/datasets").expanduser()

# Function to merge .h5 files from a folder into a single .h5 file
def merge_h5_files(folder_path, output_file):
    # List all .h5 files in the folder
    h5_files = sorted([f for f in os.listdir(folder_path) if f.endswith(".h5")])
    if not h5_files:
        print(f"No .h5 files found in {folder_path}")
        return

    # Open the output .h5 file in write mode
    with h5py.File(output_file, "w") as out_f:
        # Track the datasets to concatenate
        data_dict = {}

        # Iterate through each .h5 file (episode)
        for h5_file in h5_files:
            input_file = folder_path / h5_file
            with h5py.File(input_file, "r") as in_f:
                # Recursively copy and concatenate datasets
                def collect_datasets(name, obj):
                    if isinstance(obj, h5py.Dataset):
                        if name not in data_dict:
                            data_dict[name] = []
                        # print(obj[()].shape)
                        data_dict[name].append(obj[()])  # Get the data as a NumPy array

                in_f.visititems(collect_datasets)

        # Write concatenated datasets to the output file
        for name, data_list in data_dict.items():
            # print(len(data_list))
            # # Concatenate along the first axis (assuming episodes are stacked this way)
            concatenated_data = np.stack(data_list)
            # print(concatenated_data.shape)
            out_f.create_dataset(name, data=concatenated_data)
    print(f"Created {output_file} with data from {len(h5_files)} episodes.")

# Process each folder in the dataset directory
for folder in sorted(base_dir.iterdir()):
    if folder.is_dir():
        output_file = base_dir / f"{folder.name}_v2.h5"
        if output_file.exists():
            print(f"Skipping {output_file} as it already exists.")
            continue
        print(f"Processing {folder.name}...")
        merge_h5_files(folder, output_file)

print("Merging complete!")