In [35]:
# Import necessary Libraries
import pandas as pd
import numpy as np
import os
import glob

In [36]:
#   Utility Functions
# 
# Function to extract the minute and second parts of the timestamp from the filename

from scipy.spatial.transform import Rotation as R

def get_timestamp_from_filename(file_path):
    filename = os.path.basename(file_path)
    parts = filename.split('_')
    hr = int(parts[-3])
    minute = int(parts[-2])
    second = int(parts[-1].split('.')[0])  # Removing the ".csv" part
    return (hr, minute, second)

def eul_2_quat(rpy):
    rotation = R.from_rotvec(rpy)
    return rotation.as_quat()

def quat_2_eul(quat):
    rotation = R.from_quat(quat)
    return rotation.as_euler('xyz', degrees=False)

In [37]:
# Read the CSV Files - create a dictionary with the data

csv_folder = "data/push_task/data"
data = {}

csv_files = glob.glob(os.path.join(csv_folder, '*.csv'))
sorted_file_paths = sorted(csv_files, key=get_timestamp_from_filename)
# Sort the CSV files based on name
# print(sorted_file_paths)
for n, csv_file in enumerate(sorted_file_paths):


    print(f"Reading CSV File: {csv_file}")
    timestamps = pd.read_csv(csv_file, usecols=[0], engine="python")
    joint_angles = pd.read_csv(csv_file, usecols=range(1, 7), engine="python").astype(np.float64)

    
    gripper_pos = pd.read_csv(csv_file, usecols=[7], engine="python").astype(np.float64)
    gripper_pos = np.array(gripper_pos)
    

    ee_pos_xyz = pd.read_csv(csv_file, usecols=range(8, 11), engine="python").astype(np.float64)
    ee_pos_rpy = pd.read_csv(csv_file, usecols=range(11, 14), engine="python").astype(np.float64)

    # Create a nested dictionary for each demonstration
    data[f'demo_{n}'] = {
        # 'timestamps': np.array(timestamps)[::2],
        # 'joint_angles': np.array(joint_angles)[::2],
        # 'gripper_pos': np.array(gripper_pos),
        # 'ee_pos_xyz': np.array(ee_pos_xyz)[::2],
        'timestamps': np.array(timestamps),
        'joint_angles': np.array(joint_angles),
        'gripper_pos': np.array(gripper_pos),
        'ee_pos_xyz': np.array(ee_pos_xyz),
        # convert rpy to quaternion
        # 'ee_pos_quat': np.array(eul_2_quat(np.array(ee_pos_rpy)[::2])),
    }
    print(f"length of demo: {len(data[f'demo_{n}']['gripper_pos'])}")

    # if n == 9:      # Only 10 trajectory
    #     break

print("----------------------------")
print(f"Total number of demos: {len(data)}")
print("----------------------------")
print(f"data: {data.keys()}")



Reading CSV File: data/push_task/data/2024_09_19_18_09_11.csv
length of demo: 110
Reading CSV File: data/push_task/data/2024_09_19_18_13_52.csv
length of demo: 108
Reading CSV File: data/push_task/data/2024_09_19_18_16_20.csv
length of demo: 74
Reading CSV File: data/push_task/data/2024_09_19_18_21_37.csv
length of demo: 80
Reading CSV File: data/push_task/data/2024_09_19_18_24_26.csv
length of demo: 65
Reading CSV File: data/push_task/data/2024_09_19_18_26_21.csv
length of demo: 71
Reading CSV File: data/push_task/data/2024_09_19_18_28_04.csv
length of demo: 61
Reading CSV File: data/push_task/data/2024_09_19_18_29_44.csv
length of demo: 69
Reading CSV File: data/push_task/data/2024_09_19_18_31_30.csv
length of demo: 76
Reading CSV File: data/push_task/data/2024_09_19_18_34_55.csv
length of demo: 66
----------------------------
Total number of demos: 10
----------------------------
data: dict_keys(['demo_0', 'demo_1', 'demo_2', 'demo_3', 'demo_4', 'demo_5', 'demo_6', 'demo_7', 'demo_8

In [38]:
# Read Images [cam0 - Env Camera] folder - Create a dictionary with the images

import cv2

images = ""

image_folders = glob.glob(os.path.join("data/push_task/cam0", '*'))
sorted_image_folders = sorted(image_folders, key=get_timestamp_from_filename)
# Sort the CSV files based on name
for n, image_folder in enumerate(sorted_image_folders):
    print(f"Reading Image Folder: {image_folder}")
    image_files = glob.glob(os.path.join(image_folder, '*.png'))
    
    # Sort the image files based on name
    sorted_image_files = sorted(image_files, key=get_timestamp_from_filename)

    # print(sorted_image_files)
    # get all image files as numpy array
    cv_images=[]
    for i in range(0, len(sorted_image_files), 1):
        cv_image = cv2.imread(sorted_image_files[i])
        cv_image = cv2.resize(cv_image, (96, 96))
        # Transpose the image to (3, 320, 320)
        cv_image = np.transpose(cv_image, (2, 0, 1))
        cv_images.append(cv_image)

    print(f"Number of images: {len(cv_images)}")
    # Create a nested dictionary for each demonstration
    data[f'demo_{n}']['cam0'] = np.array(cv_images)

    # if n == 9:      # Only 10 datasets
    #     break


print("----------------------------------------")
print(f"Total number of images folders: {len(sorted_image_folders)}")
print("----------------------------------------")


Reading Image Folder: data/push_task/cam0/2024_09_19_18_08_45
Number of images: 125
Reading Image Folder: data/push_task/cam0/2024_09_19_18_13_28
Number of images: 143
Reading Image Folder: data/push_task/cam0/2024_09_19_18_15_49
Number of images: 99
Reading Image Folder: data/push_task/cam0/2024_09_19_18_21_13
Number of images: 105
Reading Image Folder: data/push_task/cam0/2024_09_19_18_24_02
Number of images: 87
Reading Image Folder: data/push_task/cam0/2024_09_19_18_25_54
Number of images: 92
Reading Image Folder: data/push_task/cam0/2024_09_19_18_27_39
Number of images: 79
Reading Image Folder: data/push_task/cam0/2024_09_19_18_29_20
Number of images: 82
Reading Image Folder: data/push_task/cam0/2024_09_19_18_31_06
Number of images: 102
Reading Image Folder: data/push_task/cam0/2024_09_19_18_34_16
Number of images: 86
----------------------------------------
Total number of images folders: 10
----------------------------------------


In [39]:
# Read Images [cam1 - In-Hand Camera] folder - Create a dictionary with the images

import cv2

images = ""

image_folders = glob.glob(os.path.join("data/push_task/cam1", '*'))
sorted_image_folders = sorted(image_folders, key=get_timestamp_from_filename)
# Sort the CSV files based on name
for n, image_folder in enumerate(sorted_image_folders):
    print(f"Reading Image Folder: {image_folder}")
    image_files = glob.glob(os.path.join(image_folder, '*.png'))
    
    # Sort the image files based on name
    sorted_image_files = sorted(image_files, key=get_timestamp_from_filename)

    # print(sorted_image_files)
    # get all image files as numpy array
    cv_images=[]
    for i in range(0, len(sorted_image_files), 1):
        cv_image = cv2.imread(sorted_image_files[i])
        # cv_image = cv2.rotate(cv_image, cv2.ROTATE_90_CLOCKWISE)       # Temporary Edit
        cv_image = cv2.resize(cv_image, (96, 96))
        # Transpose the image to (3, 320, 320)
        cv_image = np.transpose(cv_image, (2, 0, 1))
        cv_images.append(cv_image)

    print(f"Number of images: {len(cv_images)}")
    # Create a nested dictionary for each demonstration
    data[f'demo_{n}']['cam1'] = np.array(cv_images)


    # if n == 9:      # Only 10 datasets
        # break



print("----------------------------------------")
print(f"Total number of images folders: {len(sorted_image_folders)}")
print("----------------------------------------")


Reading Image Folder: data/push_task/cam1/2024_09_19_18_08_52
Number of images: 126
Reading Image Folder: data/push_task/cam1/2024_09_19_18_13_32
Number of images: 144
Reading Image Folder: data/push_task/cam1/2024_09_19_18_15_57
Number of images: 100
Reading Image Folder: data/push_task/cam1/2024_09_19_18_21_18
Number of images: 106
Reading Image Folder: data/push_task/cam1/2024_09_19_18_24_07
Number of images: 88
Reading Image Folder: data/push_task/cam1/2024_09_19_18_25_58
Number of images: 94
Reading Image Folder: data/push_task/cam1/2024_09_19_18_27_44
Number of images: 81
Reading Image Folder: data/push_task/cam1/2024_09_19_18_29_24
Number of images: 83
Reading Image Folder: data/push_task/cam1/2024_09_19_18_31_11
Number of images: 103
Reading Image Folder: data/push_task/cam1/2024_09_19_18_34_21
Number of images: 87
----------------------------------------
Total number of images folders: 10
----------------------------------------


In [40]:
# Compare the length of csv files to the number of images in the folder

for demo in data:
    print(f"Data {demo}: {len(data[demo]['ee_pos_xyz'])} timestamps, cam0: {len(data[demo]['cam0'])}, cam1: {len(data[demo]['cam1'])}")
    

Data demo_0: 110 timestamps, cam0: 125, cam1: 126
Data demo_1: 108 timestamps, cam0: 143, cam1: 144
Data demo_2: 74 timestamps, cam0: 99, cam1: 100
Data demo_3: 80 timestamps, cam0: 105, cam1: 106
Data demo_4: 65 timestamps, cam0: 87, cam1: 88
Data demo_5: 71 timestamps, cam0: 92, cam1: 94
Data demo_6: 61 timestamps, cam0: 79, cam1: 81
Data demo_7: 69 timestamps, cam0: 82, cam1: 83
Data demo_8: 76 timestamps, cam0: 102, cam1: 103
Data demo_9: 66 timestamps, cam0: 86, cam1: 87


In [41]:
# Match the lengths of timestamps and images

def match_lengths(data):
    for demo in data:
        num_timestamps = len(data[demo]['ee_pos_xyz'])
        cam0 = len(data[demo]['cam0'])
        cam1 = len(data[demo]['cam1'])

        get_min_ind = np.argmin(np.array([num_timestamps, cam0, cam1]))
        print(get_min_ind)


        if get_min_ind == 0:
            # timestamps are less - remove cam0 and cam1
            excess_cam0 = cam0 - num_timestamps
            data[demo]['cam0'] = data[demo]['cam0'][excess_cam0:]

            excess_cam1 = cam1 - num_timestamps
            data[demo]['cam1'] = data[demo]['cam1'][excess_cam1:]
        elif get_min_ind == 1:
            # cam0 is less - remove timestamps and cam1
            excess_timestamps = num_timestamps - cam0
            for i in data[demo].keys():
                if i == "cam0" or i == "cam1":
                    continue
                data[demo][i] = data[demo][i][excess_timestamps:]

            excess_cam1 = cam1 - cam0
            data[demo]['cam1'] = data[demo]['cam1'][excess_cam1:]
            
        elif get_min_ind == 2:
            # cam1 is less - remove timestamps and cam0
            excess_timestamps = num_timestamps - cam1
            for i in data[demo].keys():
                if i == "cam1" or i == "cam0":
                    continue
                data[demo][i] = data[demo][i][excess_timestamps:]

            excess_cam0 = cam0 - cam1
            data[demo]['cam0'] = data[demo]['cam0'][excess_cam0:]


        
        # # ---- For cam0 [Env]-----
        # if cam0 > num_timestamps:
        #     # Calculate the number of excess images
        #     excess_images = cam0 - num_timestamps
            
        #     # Remove excess images from the beginning
        #     data[demo]['cam0'] = data[demo]['cam0'][excess_images:]
        # elif cam0 < num_timestamps:
        #     # Calucate the number of exxess timestamps
        #     excess_timestamps = num_timestamps - cam0
            
        #     # Remove excess timestamps from the end
        #     for i in data[demo].keys():
        #         if i == "cam0":
        #             pass
        #         data[demo][i] = data[demo][i][excess_timestamps:]

        # # ---- For cam1 [In-Hand]-----
        # if cam1 > num_timestamps:
        #     # Calculate the number of excess images
        #     excess_images = cam1 - num_timestamps
            
        #     # Remove excess images from the beginning
        #     data[demo]['cam1'] = data[demo]['cam1'][excess_images:]
        # elif cam1 < num_timestamps:
        #     # Calucate the number of exxess timestamps
        #     excess_timestamps = num_timestamps - cam1
            
        #     # Remove excess timestamps from the end
        #     for i in data[demo].keys():
        #         if i == "cam1":
        #             pass
        #         data[demo][i] = data[demo][i][excess_timestamps:]
        
        print(f"After matching: Data {demo}: {len(data[demo]['ee_pos_xyz'])} timestamps, cam0: {len(data[demo]['cam0'])}, cam1: {len(data[demo]['cam1'])}")

# Example usage
match_lengths(data)

0
After matching: Data demo_0: 110 timestamps, cam0: 110, cam1: 110
0
After matching: Data demo_1: 108 timestamps, cam0: 108, cam1: 108
0
After matching: Data demo_2: 74 timestamps, cam0: 74, cam1: 74
0
After matching: Data demo_3: 80 timestamps, cam0: 80, cam1: 80
0
After matching: Data demo_4: 65 timestamps, cam0: 65, cam1: 65
0
After matching: Data demo_5: 71 timestamps, cam0: 71, cam1: 71
0
After matching: Data demo_6: 61 timestamps, cam0: 61, cam1: 61
0
After matching: Data demo_7: 69 timestamps, cam0: 69, cam1: 69
0
After matching: Data demo_8: 76 timestamps, cam0: 76, cam1: 76
0
After matching: Data demo_9: 66 timestamps, cam0: 66, cam1: 66


In [42]:
# # # Preprocess the Gripper Position

# # indicies = np.array([44, 44, 43, 36, 44, 40, 34, 42, 48, 40])
# # print(indicies)

# # for n, i in enumerate(data.keys()):
# #     gripper_pos = data[i]['gripper_pos']

# #     # -------------- Change gripper pos at corresponding index --------------------
# #     gripper_pos[:indicies[n]] = 0.0
# #     gripper_pos[indicies[n]:] = 0.5

# #     data[i]['gripper_pos'] = gripper_pos

# # print("Gripper Poses Updated")


# ## Approximate gripper position - 0.6

# for demo in data.keys():
#     gripper_pos = data[demo]['gripper_pos']
#     gripper_pos_new = np.where(gripper_pos > 0.1, 0.6, 0.0)
#     data[demo]['gripper_pos'] = gripper_pos_new 
    
# print("--------------------------------------")
# print("Gripper Pos set to 0.0 or 0.6")
# print("--------------------------------------")

In [43]:
# Metaworld Dataset - HDF5
# 
# Actions - Difference (next - current)


for demo in data.keys():
    demo_data = data[demo]
    timestamps = demo_data['timestamps']
    ee_pos_xyz = demo_data['ee_pos_xyz']
    # ee_pos_quat = demo_data['ee_pos_quat']
    gripper_pos = demo_data['gripper_pos']
    cam0 = demo_data['cam0']
    cam1 = demo_data['cam1']

    # ee_pos = np.concatenate((ee_pos_xyz, ee_pos_quat), axis=1)
    # actions = ee_pos[1:] - ee_pos[:-1]
    actions = ee_pos_xyz[1:] - ee_pos_xyz[:-1]

    # actions concatenate gripper_pos
    actions = np.concatenate((actions, gripper_pos[:-1]), axis=1)
    data[demo]["actions"] = actions

    print(f"Actions Added: Dimension: {data[demo]['actions'].shape}")


Actions Added: Dimension: (109, 4)
Actions Added: Dimension: (107, 4)
Actions Added: Dimension: (73, 4)
Actions Added: Dimension: (79, 4)
Actions Added: Dimension: (64, 4)
Actions Added: Dimension: (70, 4)
Actions Added: Dimension: (60, 4)
Actions Added: Dimension: (68, 4)
Actions Added: Dimension: (75, 4)
Actions Added: Dimension: (65, 4)


In [44]:
# check the actions
# Concatenate gripper_pos
first_pos = data['demo_0']['ee_pos_xyz'][0]
# gripper_pos = d[0]
first_pos = np.concatenate((first_pos, [0]), axis=0)

print(f"first_pos: {first_pos}")

# Add actions
traj = []
check_actions = data["demo_0"]["actions"]
for i in check_actions:
    first_pos[:3]+=i[:3]
    first_pos[3:] = i[3:]
    print(first_pos)
# print(traj)

first_pos: [-0.13156664 -0.29868215  0.13892271  0.        ]
[-0.1315755  -0.29867592  0.13891415  0.97647059]
[-0.13157829 -0.29868213  0.13892212  0.97647059]
[-0.13157545 -0.2986816   0.13891919  0.97647059]
[-0.13156216 -0.29866601  0.13890684  0.97647059]
[-0.13159122 -0.29868046  0.1355753   0.97647059]
[-0.13157661 -0.29869771  0.13282828  0.97647059]
[-0.13158645 -0.29869324  0.13019808  0.97647059]
[-0.13159726 -0.29873024  0.12331053  0.97647059]
[-0.13157956 -0.29876661  0.11785576  0.97647059]
[-0.12900118 -0.29875361  0.11268324  0.97647059]
[-0.12529046 -0.29880881  0.1055784   0.97647059]
[-0.12259347 -0.29879958  0.10016354  0.97647059]
[-0.11891834 -0.29886077  0.09289456  0.97647059]
[-0.1161137  -0.29884411  0.08723928  0.97647059]
[-0.1135367  -0.2988539   0.08200964  0.97647059]
[-0.11316951 -0.29890079  0.07482324  0.97647059]
[-0.11316234 -0.29900134  0.06939508  0.97647059]
[-0.11314367 -0.29896604  0.06422421  0.97647059]
[-0.11314252 -0.2990449   0.05712804  0

In [45]:
# Lets build HDF5 - Metaworld Dataset


import h5py

with h5py.File('data.hdf5', 'w') as f:


    data_group = f.create_group("data")
    


    for demo in data.keys():

        demo_group = data_group.create_group(demo)

        # Create action
        demo_group.create_dataset("actions", data=data[demo]["actions"])


        # Create dones
        dones = np.zeros(len(data[demo]["actions"]))
        dones[-1] = 1
        demo_group.create_dataset("dones", data=dones)


        # Create obs group
        obs_group = demo_group.create_group("obs")
        # Create eye_in_hand_image
        obs_group.create_dataset("eye_in_hand_image", data=data[demo]["cam0"][:-1])
        obs_group.create_dataset("front_image", data=data[demo]["cam1"][:-1])
        # Create prop
        prop = np.concatenate((data[demo]["ee_pos_xyz"], data[demo]["gripper_pos"]), axis=1)
        obs_group.create_dataset("prop", data=prop[:-1])


        # Create rewards
        rewards = np.zeros(len(data[demo]["actions"]))
        rewards[-1] = 1
        demo_group.create_dataset("rewards", data=rewards)


        # Create states
        demo_group.create_dataset("states", data=prop[:-1])

print("----------------------------")
print("Dataset - hdf5 Generated.. Wohoooo!")
print("----------------------------")



----------------------------
Dataset - hdf5 Generated.. Wohoooo!
----------------------------


In [144]:
# # Lets build dataset - Robosuite Dataset
# # 
# # 1. -- Ignored attributes from default dataset --
# #       obs - [object, robot0_eef_vel_ang, robot0_eef_vel_lin, robot0_gripper_qvel, 
# #               robot0_joint_pos_cos, robot0_joint_pos_sin, robot0_joint_vel_cos, 
# #               robot0_joint_vel_sin, robot0_joint_vel]
# # 
# # 2. Skipped some columns in of states

# import h5py

# with h5py.File('data/dataset_2024_7_19/data_robosuite.hdf5', 'w') as f:

#     data_group_r = f.create_group("data")

#     for demo in data.keys():
        
#         demo_group_r = data_group_r.create_group(demo)

#         # Create actions
#         demo_group_r.create_dataset("actions", data=data[demo]["actions"])

#         # Create dones
#         dones_r = np.zeros(len(data[demo]["actions"]))
#         dones_r[-1] = 1
#         demo_group_r.create_dataset("dones", data=dones_r)

#         # Create obs group
#         obs_group_r = demo_group_r.create_group("obs")
#         # Create agentview_image
#         obs_group_r.create_dataset("agentview_image", data=data[demo]["images"][:-1])
#         # Create robot0_eef_pos
#         obs_group_r.create_dataset("robot0_eef_pos", data=data[demo]["ee_pos_rpy"][:, :3][:-1])
#         # Create robot0_eef_rpy
#         obs_group_r.create_dataset("robot0_eef_rpy", data=data[demo]["ee_pos_rpy"][:, 3:][:-1])
#         # Create robot0_gripper_qpos
#         obs_group_r.create_dataset("robot0_gripper_qpos", data=data[demo]["gripper_pos"][:-1])
#         # Create robot0_joint_pos
#         obs_group_r.create_dataset("robot0_joint_pos", data=data[demo]["joint_angles"][:-1])

#         # Create rewards
#         rewards_r = np.zeros(len(data[demo]["actions"]))
#         rewards_r[-1] = 1
#         demo_group_r.create_dataset("rewards", data=rewards_r)

#         # Create states
#         # [time, joint_angles, gripper_pos, ee_pos_rpy, first_obj_pose, second_obj_pose]
#         states = np.concatenate((data[demo]["joint_angles"], data[demo]["gripper_pos"], data[demo]["ee_pos_rpy"]), axis=1)
#         demo_group_r.create_dataset("states", data=states[:-1])



In [53]:
# # Dataset - Metaworld [Bad Data - demo1 and demo6]


# # def change_gripper_pos(gripper_pos_array):
# #     indices = [46, 49, 47, 47, 49, 43, 41, 50]
# #     gripper_pos = np.zeros()



# import h5py

# with h5py.File('data/dataset_2024_7_25_2/data_processed_new.hdf5', 'w') as f:


#     data_group_b = f.create_group("data")

#     indices = [46, 49, 47, 47, 49, 43, 41, 50]


#     n = 0
#     for demo in data.keys():

#         if demo == "demo_5":
#             continue

#         demo_group_b = data_group_b.create_group(f"demo_{n}")

#         # Create action
#         demo_group_b.create_dataset("actions", data=data[demo]["actions"])

#         # Create dones
#         dones_b = np.zeros(len(data[demo]["actions"]))
#         dones_b[-1] = 1
#         demo_group_b.create_dataset("dones", data=dones_b)


#         # Create obs group
#         obs_group_b = demo_group_b.create_group("obs")
#         # Create corner2_image
#         obs_group_b.create_dataset("eye_in_hand_image", data=data[demo]["cam0"][:-1])
#         obs_group_b.create_dataset("front_image", data=data[demo]["cam1"][:-1])
#         # obs_group_b.create_dataset("eye_in_hand_image", data=data[demo]["images"][:-1])
#         # Create prop
#         # ee_pos = np.concatenate((data[demo]["ee_pos_xyz"], data[demo]["ee_pos_quat"]), axis=1)
#         prop_b = np.concatenate((data[demo]["ee_pos_xyz"], data[demo]["gripper_pos"]), axis=1)
#         obs_group_b.create_dataset("prop", data=prop_b[:-1])


#         # Create rewards
#         rewards_b = np.zeros(len(data[demo]["actions"]))
#         rewards_b[-1] = 1
#         demo_group_b.create_dataset("rewards", data=rewards_b)


#         # Create states
#         states_b = np.concatenate((data[demo]["ee_pos_xyz"], data[demo]["gripper_pos"]), axis=1)
#         demo_group_b.create_dataset("states", data=states_b[:-1])

#         n+=1

# print("----------------------------")
# print("Processed Dataset - hdf5 Generated")
# print("----------------------------")

In [146]:
# ## --- remove some points in the dataset -----------

# import h5py
# import numpy as np

# with h5py.File('data/dataset_2024_07_25/data_removed_lift.hdf5', 'w') as f:
    
#     data_group_b = f.create_group("data")

#     indices = [46, 49, 47, 47, 49, 43, 41, 50]


#     n = 0
#     for demo in data.keys():

#         if demo == "demo_1":
#             continue

#         demo_group_b = data_group_b.create_group(f"demo_{n}")

#         # Create action
#         demo_group_b.create_dataset("actions", data=data[demo]["actions"][:indices[n]+1])

#         # Create dones
#         dones_b = np.zeros(len(data[demo]["actions"][:indices[n]+1]))
#         dones_b[-1] = 1
#         demo_group_b.create_dataset("dones", data=dones_b)


#         # Create obs group
#         obs_group_b = demo_group_b.create_group("obs")
#         # Create corner2_image
#         obs_group_b.create_dataset("corner2_image", data=data[demo]["cam0"][:indices[n]+1])
#         obs_group_b.create_dataset("front_image", data=data[demo]["cam1"][:indices[n]+1])
#         # obs_group_b.create_dataset("corner2_image", data=data[demo]["images"][:-1])
#         # Create prop
#         # ee_pos = np.concatenate((data[demo]["ee_pos_xyz"], data[demo]["ee_pos_quat"]), axis=1)
#         prop_b = np.concatenate((data[demo]["ee_pos_xyz"][:indices[n]+1], data[demo]["gripper_pos"][:indices[n]+1]), axis=1)
#         obs_group_b.create_dataset("prop", data=prop_b)


#         # Create rewards
#         rewards_b = np.zeros(len(data[demo]["actions"][:indices[n]+1]))
#         rewards_b[-1] = 1
#         demo_group_b.create_dataset("rewards", data=rewards_b)


#         # Create states
#         states_b = np.concatenate((data[demo]["ee_pos_xyz"][:indices[n]+1], data[demo]["gripper_pos"][:indices[n]+1]), axis=1)
#         demo_group_b.create_dataset("states", data=states_b)

#         n+=1

# print("----------------------------")
# print("Removed Dataset - hdf5 Generated")
# print("----------------------------")

In [None]:
## Get (min, max) of x, y, z

import h5py
import numpy as np

with h5py.File('data/dataset_2024_07_25/datanew.hdf5', 'r') as f:
    
    data_group_b = f["data"]
    all_x = []
    all_y = []
    all_z = []
    for demo in data_group_b:
        states = data_group_b[demo]["states"][:]
        x = states[:,0]
        y = states[:,1]
        z = states[:,2]
        # w_ = states[:,3]
        # x_ = states[:,4]
        # y_ = states[:,5]
        # z_ = states[:,6]
        all_x.append(x)
        all_y.append(y)
        all_z.append(z)

    all_x = np.concatenate(all_x)
    all_y = np.concatenate(all_y)
    all_z = np.concatenate(all_z)
    
    print(f"min x: {np.round(np.min(all_x), 3)}")
    print(f"min y: {np.round(np.min(all_y), 3)}")
    print(f"min z: {np.round(np.min(all_z), 3)}")
    print(f"max x: {np.round(np.max(all_x), 3)}")
    print(f"max y: {np.round(np.max(all_y), 3)}")
    print(f"max z: {np.round(np.max(all_z), 3)}")

In [51]:
# # Read HDF5 - HYRL Dataset Creation - Spase -> Dense

# import h5py
# import numpy as np


# with h5py.File('/home/amisha/HYBRID-RL/data/2S_2024_07_29/data_2S.hdf5', 'r') as f1:

#     with h5py.File('/home/amisha/HYBRID-RL/data/2S_2024_07_29/data_2S_hyrl.hdf5', 'w') as f2:

    
#         # indicies = [44, 44, 43, 36, 44, 40, 34, 42, 48, 40]
        
#         data_group = f1["data"]
        
#         n_data = f2.create_group("data")

#         for n, demo in enumerate(data_group):

            

#             # -------------- Change gripper pos at corresponding index --------------------
#             # actions = data_group[demo]["actions"][:]
#             # actions[:, 3][:indicies[n]] = 0.0
#             # actions[:, 3][indicies[n]:] = 1.0

#             # prop = data_group[demo]["obs"]["prop"][:]
#             # prop[:, 3][:indicies[n]] = 0.0
#             # prop[:, 3][indicies[n]:] = 1.0


#             # states = data_group[demo]["states"][:]
#             # states[:, 3][:indicies[n]] = 0.0
#             # states[:, 3][indicies[n]:] = 1.0


#             ## --------------------- Copy same data into new file ------------------------- ##

#             n_demo = n_data.create_group(demo)

#             # Create action
#             n_demo.create_dataset("actions", data=actions)


#             # Create dones
#             n_demo.create_dataset("dones", data=data_group[demo]["dones"])


#             # Create obs group
#             n_obs = n_demo.create_group("obs")
#             # Create corner2_image
#             n_obs.create_dataset("corner2_image", data=data_group[demo]["obs"]["corner2_image"])
#             n_obs.create_dataset("eye_in_hand_image", data=data_group[demo]["obs"]["eye_in_hand_image"])
#             # Create prop
#             n_obs.create_dataset("prop", data=prop)


#             # Create rewards
#             n_demo.create_dataset("rewards", data=data_group[demo]["rewards"])


#             # Create states
#             n_demo.create_dataset("states", data=data_group[demo]["states"])

#             ## ----------------------- New waypoint key ----------------------------------- ##
#             # position = indicies[n]-7
#             all_points = data_group[demo]["states"][:]
#             index = np.where(all_points[:, -1] == 0.5)[0][0]
#             position = int(index) - 7       # Adjust the value here
#             waypoint = np.array([all_points[position][:3]])
#             extended_waypoint = np.tile(waypoint, (len(all_points), 1))


#             n_demo.create_dataset("waypoint", data=extended_waypoint)

#             ## ----------------------- New mode key ----------------------------------- ##
#             mode = np.zeros((len(all_points), 1))
#             mode[position:] = 1.0

#             n_demo.create_dataset("mode", data=mode)


# print("New Dataset Created! Wohoooooo!!")


        

New Dataset Created! Wohoooooo!!


In [8]:
# Read HDF5 - HYRL Dataset Creation - Sparse -> Dense -> Sparse -> Dense [2S Dataset]

import h5py
import numpy as np


with h5py.File('/home/amisha/HYBRID-RL/data/2S_2024_07_29/data_2S.hdf5', 'r') as f1:

    with h5py.File('/home/amisha/HYBRID-RL/data/2S_2024_07_29/data_2S_hyrl_SDSSD.hdf5', 'w') as f2:

    
        # indicies = [44, 44, 43, 36, 44, 40, 34, 42, 48, 40]
        
        data_group = f1["data"]
        
        n_data = f2.create_group("data")

        for n, demo in enumerate(data_group):

            

            # -------------- Change gripper pos at corresponding index --------------------
            # actions = data_group[demo]["actions"][:]
            # actions[:, 3][:indicies[n]] = 0.0
            # actions[:, 3][indicies[n]:] = 1.0

            # prop = data_group[demo]["obs"]["prop"][:]
            # prop[:, 3][:indicies[n]] = 0.0
            # prop[:, 3][indicies[n]:] = 1.0


            # states = data_group[demo]["states"][:]
            # states[:, 3][:indicies[n]] = 0.0
            # states[:, 3][indicies[n]:] = 1.0


            ## --------------------- Copy same data into new file ------------------------- ##

            n_demo = n_data.create_group(demo)

            # Create action
            n_demo.create_dataset("actions", data=data_group[demo]["actions"])


            # Create dones
            n_demo.create_dataset("dones", data=data_group[demo]["dones"])


            # Create obs group
            n_obs = n_demo.create_group("obs")
            # Create eye_in_hand_image
            n_obs.create_dataset("eye_in_hand_image", data=data_group[demo]["obs"]["eye_in_hand_image"])
            n_obs.create_dataset("front_image", data=data_group[demo]["obs"]["front_image"])
            # Create prop
            n_obs.create_dataset("prop", data=data_group[demo]["obs"]["prop"])


            # Create rewards
            n_demo.create_dataset("rewards", data=data_group[demo]["rewards"])


            # Create states
            n_demo.create_dataset("states", data=data_group[demo]["states"])

            ## ----------------------- New waypoint key - 1 ----------------------------------- ##
            # # Waypoint 1
            all_points = data_group[demo]["states"][:]
            index1 = np.where((all_points[:, 2] <= 0.025) & (all_points[:, -1] == 0.0))[0]
            position1 = index1[0]       # Adjust the value here
            waypoint1 = np.array([all_points[position1][:3]])
            # print(f"Waypoint1: {waypoint1}")
            waypoint1 = np.array([-0.04714026, -0.29906368,  0.02476449])
            extended_waypoint1 = np.tile(waypoint1, (position1+1, 1))

            temp_dense_points = np.zeros((len(all_points) - position1 - 1, extended_waypoint1.shape[1]))
            extended_waypoint = np.concatenate((extended_waypoint1, temp_dense_points), axis=0)
            
            # # Waypoint 2
            index2 = np.where(all_points[:, 2] >= 0.14)[0]
            position2 = index2[0]
            start_index = np.where((all_points[:, 2] >= 0.04) & (all_points[:, -1] == 0.5))[0][0]

            waypoint2 = np.array([all_points[position2][:3]])
            # print(f"Waypoint2: {waypoint2}")
            waypoint2 = np.array([-0.04694496, -0.29975165,  0.14389902])
            extended_waypoint2 = np.tile(waypoint2, (position2 - start_index + 1, 1))
            extended_waypoint[start_index:position2+1] = extended_waypoint2

            # # Waypoint 3
            # for i in range(len(index2)-1):
            #     if index2[i] + 1 == index2[i+1]:
            #         pass
            #     else:
            #         end_index = index2[i]
            # end_index = index2[-1]
            closed_pos = all_points[all_points[:, -1] == 0.5]
            max_height = np.max(closed_pos[:, 2])
            position3 = np.where(all_points[:, 2] == max_height)[-1][0]
            # position3 = np.where(all_points[:, 2] == 0.14)[0][-1]
            waypoint3 = all_points[position3][:3]
            # print(f"Waypoint3: {waypoint3}")
            waypoint3 = np.array([-0.21113563, -0.33498523,  0.14742944])
            extended_waypoint3 = np.tile(waypoint3, (position3 - position2, 1))
            extended_waypoint[position2+1:position3+1] = extended_waypoint3


            n_demo.create_dataset("waypoint", data=extended_waypoint)

            ## ----------------------- New mode key - 1 ----------------------------------- ##
            mode = np.zeros((len(all_points), 1))
            mode[position1:] = 1.0

            n_demo.create_dataset("mode", data=mode)



print("New Dataset Created! Wohoooooo!!")


        

New Dataset Created! Wohoooooo!!
