In [42]:
# Import necessary Libraries
import pandas as pd
import numpy as np
import os
import glob

In [43]:
def get_timestamp_from_filename(file_path):
    filename = os.path.basename(file_path)
    parts = filename.split('_')
    hr = int(parts[3])
    minute = int(parts[4])
    second = int(parts[5])  # Removing the ".csv" part
    # second = int(parts[-2].split('.')[0])  # Removing the ".csv" part
    return (hr, minute, second)

def get_ep_order(file_path):
    filename = os.path.basename(file_path)
    parts = filename.split('_')
    ep = int(parts[-1])
    # minute = int(parts[4])
    # second = int(parts[5])  # Removing the ".csv" part
    # second = int(parts[-2].split('.')[0])  # Removing the ".csv" part
    return (ep)

def get_frame_order(file_path):
    filename = os.path.basename(file_path)
    parts = filename.split('_')
    frame = int(parts[-1].split('.')[0])  # Removing the ".csv" part
    return (frame)

In [44]:
# Read the CSV Files - create a dictionary with the data

csv_folder = "/fs/nexus-scratch/zahirmd/act/data/csv"
data = {}

csv_files = glob.glob(os.path.join(csv_folder, '*.csv'))
sorted_file_paths = sorted(csv_files, key=get_timestamp_from_filename)
# Sort the CSV files based on name
# print(sorted_file_paths)
for n, csv_file in enumerate(sorted_file_paths):


    print(f"Reading CSV File: {csv_file}")
    timestamps = pd.read_csv(csv_file, usecols=[0], engine="python")
    encoder_values = pd.read_csv(csv_file, usecols=range(7, 13), engine="python").astype(np.float64) / 4096

    
    gripper_pos = pd.read_csv(csv_file, usecols=[7], engine="python").astype(np.float64)
    gripper_pos = np.array(gripper_pos)
    

    ee_pos_xyz = pd.read_csv(csv_file, usecols=range(8, 11), engine="python").astype(np.float64)
    ee_pos_rpy = pd.read_csv(csv_file, usecols=range(11, 14), engine="python").astype(np.float64)

    # Create a nested dictionary for each demonstration
    data[f'demo_{n}'] = {
        # 'timestamps': np.array(timestamps)[::2],
        # 'joint_angles': np.array(joint_angles)[::2],
        # 'gripper_pos': np.array(gripper_pos),
        # 'ee_pos_xyz': np.array(ee_pos_xyz)[::2],
        'timestamps': np.array(timestamps),
        'encoder_values': np.array(encoder_values),
        'gripper_pos': np.array(gripper_pos),
        'ee_pos_xyz': np.array(ee_pos_xyz),
        # convert rpy to quaternion
        # 'ee_pos_quat': np.array(eul_2_quat(np.array(ee_pos_rpy)[::2])),
    }
    print(f"length of demo: {len(data[f'demo_{n}']['gripper_pos'])}")

    # if n == 9:      # Only 10 trajectory
    #     break

print("----------------------------")
print(f"Total number of demos: {len(data)}")
print("----------------------------")
print(f"data: {data.keys()}")

Reading CSV File: /fs/nexus-scratch/zahirmd/act/data/csv/2025_06_14_13_23_45_ep_0.csv
length of demo: 168
Reading CSV File: /fs/nexus-scratch/zahirmd/act/data/csv/2025_06_14_13_27_30_ep_1.csv
length of demo: 123
Reading CSV File: /fs/nexus-scratch/zahirmd/act/data/csv/2025_06_14_13_30_07_ep_2.csv
length of demo: 124
Reading CSV File: /fs/nexus-scratch/zahirmd/act/data/csv/2025_06_14_13_31_24_ep_3.csv
length of demo: 100
Reading CSV File: /fs/nexus-scratch/zahirmd/act/data/csv/2025_06_14_13_34_23_ep_4.csv
length of demo: 157
Reading CSV File: /fs/nexus-scratch/zahirmd/act/data/csv/2025_06_14_13_35_49_ep_5.csv
length of demo: 143
Reading CSV File: /fs/nexus-scratch/zahirmd/act/data/csv/2025_06_14_13_38_49_ep_6.csv
length of demo: 116
Reading CSV File: /fs/nexus-scratch/zahirmd/act/data/csv/2025_06_14_13_55_01_ep_7.csv
length of demo: 103
Reading CSV File: /fs/nexus-scratch/zahirmd/act/data/csv/2025_06_14_14_09_22_ep_8.csv
length of demo: 105
Reading CSV File: /fs/nexus-scratch/zahirmd/ac

In [45]:
# Read Images [cam0 - Env Camera] folder - Create a dictionary with the images

import cv2

images = ""

image_folders = glob.glob(os.path.join("/fs/nexus-scratch/zahirmd/act/data/images", '*'))
sorted_image_folders = sorted(image_folders, key=get_ep_order)
# Sort the CSV files based on name
for n, image_folder in enumerate(sorted_image_folders):
    print(f"Reading Image Folder: {image_folder}")
    image_files = glob.glob(os.path.join(image_folder, '*.png'))
    
    # Sort the image files based on name
    sorted_image_files = sorted(image_files, key=get_frame_order)

    # print(sorted_image_files)
    # get all image files as numpy array
    cv_images=[]
    for i in range(0, len(sorted_image_files), 1):
        cv_image = cv2.imread(sorted_image_files[i])
        # cv_image = cv2.resize(cv_image, (96, 96))
        # Transpose the image to (3, 320, 320)
        # cv_image = np.transpose(cv_image, (2, 0, 1))
        cv_images.append(cv_image)

    print(f"Number of images: {len(cv_images)}")
    # Create a nested dictionary for each demonstration
    data[f'demo_{n}']['cam0'] = np.array(cv_images)

    # if n == 9:      # Only 10 datasets
    #     break


print("----------------------------------------")
print(f"Total number of images folders: {len(sorted_image_folders)}")
print("----------------------------------------")

Reading Image Folder: /fs/nexus-scratch/zahirmd/act/data/images/ep_0
Number of images: 168
Reading Image Folder: /fs/nexus-scratch/zahirmd/act/data/images/ep_1
Number of images: 123
Reading Image Folder: /fs/nexus-scratch/zahirmd/act/data/images/ep_2
Number of images: 124
Reading Image Folder: /fs/nexus-scratch/zahirmd/act/data/images/ep_3
Number of images: 100
Reading Image Folder: /fs/nexus-scratch/zahirmd/act/data/images/ep_4
Number of images: 157
Reading Image Folder: /fs/nexus-scratch/zahirmd/act/data/images/ep_5
Number of images: 143
Reading Image Folder: /fs/nexus-scratch/zahirmd/act/data/images/ep_6
Number of images: 116
Reading Image Folder: /fs/nexus-scratch/zahirmd/act/data/images/ep_7
Number of images: 103
Reading Image Folder: /fs/nexus-scratch/zahirmd/act/data/images/ep_8
Number of images: 105
Reading Image Folder: /fs/nexus-scratch/zahirmd/act/data/images/ep_9
Number of images: 104
Reading Image Folder: /fs/nexus-scratch/zahirmd/act/data/images/ep_10
Number of images: 80

In [46]:
# remove the extra alternatively from the timestamps until the min length
min_length = min(len(data[demo]['timestamps']) for demo in data.keys())

for demo in data.keys():
    demo_data = data[demo]

    length = len(demo_data['timestamps'])
    indices = list(range(length))

    while len(indices) > min_length:
        new_indices = [idx for i, idx in enumerate(indices) if i % 2 == 0]

        if len(new_indices) < min_length:
            dropped = [idx for idx in indices if idx not in new_indices]
            needed = min_length - len(new_indices)

            # Clamp needed to available dropped indices length
            needed = min(needed, len(dropped))
            step = max(len(dropped) // needed, 1)
            to_add_back = dropped[::step][:needed]

            indices = sorted(new_indices + to_add_back)
            # After recombination, ensure all indices are in valid range
            indices = [i for i in indices if i < length]
            break
        else:
            indices = new_indices

    # Apply safely
    for key in ['cam0', "encoder_values"]:
        arr = demo_data[key]
        if len(arr) >= max(indices) + 1:  # double-check array size
            demo_data[key] = arr[indices]
        else:
            # fallback: slice up to min length if something is off
            demo_data[key] = arr[:min_length]

    print(f"{demo}: Reduced to {len(indices)} steps (originally {length})")


    

demo_0: Reduced to 64 steps (originally 168)
demo_1: Reduced to 64 steps (originally 123)
demo_2: Reduced to 64 steps (originally 124)
demo_3: Reduced to 64 steps (originally 100)
demo_4: Reduced to 64 steps (originally 157)
demo_5: Reduced to 64 steps (originally 143)
demo_6: Reduced to 64 steps (originally 116)
demo_7: Reduced to 64 steps (originally 103)
demo_8: Reduced to 64 steps (originally 105)
demo_9: Reduced to 64 steps (originally 104)
demo_10: Reduced to 64 steps (originally 80)
demo_11: Reduced to 64 steps (originally 84)
demo_12: Reduced to 64 steps (originally 76)
demo_13: Reduced to 64 steps (originally 69)
demo_14: Reduced to 64 steps (originally 64)
demo_15: Reduced to 64 steps (originally 83)
demo_16: Reduced to 64 steps (originally 93)
demo_17: Reduced to 64 steps (originally 97)
demo_18: Reduced to 64 steps (originally 91)
demo_19: Reduced to 64 steps (originally 100)
demo_20: Reduced to 64 steps (originally 83)
demo_21: Reduced to 64 steps (originally 78)
demo_22: 

In [47]:
# create actions
for demo in data.keys():
    demo_data = data[demo]
    encoder_values = demo_data['encoder_values']

    actions = encoder_values[1:]

    # actions concatenate gripper_pos
    data[demo]["actions"] = actions.astype(np.float64)

    # print(type(actions[0, 0]))

    print(f"Actions Added: Dimension: {data[demo]['actions'].shape}")

Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added: Dimension: (63, 6)
Actions Added:

In [48]:
# Create hdf5
import h5py


    # data_group = f.create_group("data")
    
for i, demo in enumerate(data.keys()):

    with h5py.File(f'episode_{i}.hdf5', 'w') as f:

        # demo_group = data_group.create_group(demo)
        f.attrs['sim'] = False  # or False, depending on source

        # Create action
        f.create_dataset("action", data=data[demo]["actions"])       

        # Create obs group
        obs_group = f.create_group("observations")
        img_group = obs_group.create_group("images")
        # Create image
        img_group.create_dataset("cam_image", data=data[demo]["cam0"][:-1])

        # Create qpos
        obs_group.create_dataset("qpos", data=data[demo]["encoder_values"][:-1])

print("----------------------------")
print("Dataset - hdf5 Generated.. Wohoooo!")
print("----------------------------")

----------------------------
Dataset - hdf5 Generated.. Wohoooo!
----------------------------
