In [9]:
import os
import h5py
import numpy as np
from natsort import natsorted
from collections import OrderedDict


class DataMerger:
    def __init__(self):
        self._data = OrderedDict()
        self.file_names = []

    def _clear_data(self):#清除属性
        for g in self._data.keys():
            self._data[g].clear()

        self.file_names = []

    def get_merged_data(self, dir, verbose=False):
        self._clear_data()

        os.chdir(dir)
        names = os.listdir(dir)

        for n in names:
            if n.endswith('.hdf5'):
                self.file_names.append(n)

        self.file_names = natsorted(self.file_names)
        print('Number of Files ', len(self.file_names))

        for idx, file_name in enumerate(self.file_names):
            file = h5py.File(file_name, 'r')
            if verbose: print(idx, 'Opening', file_name)
            for grp in file.keys():
                if grp == 'metadata':
                    continue

                if grp not in self._data:
                    self._data[grp] = OrderedDict()
                if verbose: print('\t Processing Group ', grp)
                for dset in file[grp].keys():
                    if grp == 'data' and dset != 'time'and 'pose_' not in dset:
                        continue

                    if len(file[grp][dset]) == 0:
                        continue

                    if verbose: print('\t\t Processing Dataset ', dset)
                    if dset not in self._data[grp]:
                        self._data[grp][dset] = file[grp][dset][()]
                    else:
                        self._data[grp][dset] = np.append(self._data[grp][dset], file[grp][dset][()], axis=0)
            file.close()
        return self._data


def main():
    data_merge = DataMerger()
    data = data_merge.get_merged_data('/Users/wangyi/Downloads/Source_Data/cxoxe_P0_L1_color/2023-04-13 10:15:01')

    output_file = h5py.File('output.hdf5', 'w')
    for grp in data.keys():
        output_grp = output_file.create_group(grp)
        for dset in data[grp].keys():
            print('Writing Dataset', dset)
            output_grp.create_dataset(dset, data=data[grp][dset], compression='gzip')

    output_file.close()


if __name__ == "__main__":
    main()

Number of Files  3
Writing Dataset pose_main_camera
Writing Dataset pose_mastoidectomy_drill
Writing Dataset pose_mastoidectomy_volume
Writing Dataset time
Writing Dataset time_stamp
Writing Dataset wrench
Writing Dataset voxel_color
Writing Dataset voxel_removed
Writing Dataset voxel_time_stamp


In [10]:
import h5py as h5
import numpy as np

f = h5.File("output.hdf5", "r")

print('# 1st layer keys: \n', f.keys())

print('# 2nd layer keys: ')
for key in f.keys():

    print(key, '\n layer contains',f[key].keys())

print('# 3nd layer values: ')
for key in f.keys():
    for key2 in f[key].keys():
        print(f[key][key2])
    

# 1st layer keys: 
 <KeysViewHDF5 ['burr_change', 'data', 'drill_force_feedback', 'voxels_removed']>
# 2nd layer keys: 
burr_change 
 layer contains <KeysViewHDF5 []>
data 
 layer contains <KeysViewHDF5 ['pose_main_camera', 'pose_mastoidectomy_drill', 'pose_mastoidectomy_volume', 'time']>
drill_force_feedback 
 layer contains <KeysViewHDF5 ['time_stamp', 'wrench']>
voxels_removed 
 layer contains <KeysViewHDF5 ['voxel_color', 'voxel_removed', 'voxel_time_stamp']>
# 3nd layer values: 
<HDF5 dataset "pose_main_camera": shape (1338, 7), type "<f8">
<HDF5 dataset "pose_mastoidectomy_drill": shape (1338, 7), type "<f8">
<HDF5 dataset "pose_mastoidectomy_volume": shape (1338, 7), type "<f8">
<HDF5 dataset "time": shape (1338,), type "<f8">
<HDF5 dataset "time_stamp": shape (214647,), type "<f8">
<HDF5 dataset "wrench": shape (214579, 6), type "<f8">
<HDF5 dataset "voxel_color": shape (41690, 5), type "<f8">
<HDF5 dataset "voxel_removed": shape (41690, 4), type "<f8">
<HDF5 dataset "voxel_tim