In [2]:
import h5py
import numpy as np

# Use the path you already have defined
path = "/media/dan/Data/outputs/ubiquitous-spork/pyspi_combined_patient_hdf5s/014_20250414.h5"

def print_hdf5_structure(file_path):
    """Print the general structure of an HDF5 file"""
    try:
        with h5py.File(file_path, 'r') as f:
            print(f"File: {file_path}")
            print(f"File size: {f.id.get_filesize() / (1024*1024):.2f} MB")
            print("\nStructure:")
            print("=" * 50)
            
            def print_structure(name, obj):
                indent = "  " * name.count('/')
                if isinstance(obj, h5py.Dataset):
                    shape = obj.shape
                    dtype = obj.dtype
                    size_mb = obj.nbytes / (1024*1024)
                    print(f"{indent}ðŸ“Š Dataset: {name.split('/')[-1]}")
                    print(f"{indent}    Shape: {shape}")
                    print(f"{indent}    Dtype: {dtype}")
                    print(f"{indent}    Size: {size_mb:.2f} MB")
                    if len(shape) > 0:
                        print(f"{indent}    First few values: {obj[0] if shape[0] > 0 else 'Empty'}")
                elif isinstance(obj, h5py.Group):
                    print(f"{indent}ï¿½ï¿½ Group: {name.split('/')[-1]}")
                    print(f"{indent}    Attributes: {list(obj.attrs.keys())}")
                    if len(obj.attrs) > 0:
                        print(f"{indent}    Sample attr: {list(obj.attrs.items())[:2]}")
                print()
            
            f.visititems(print_structure)
            
            # Also show top-level info
            print("Top-level groups and datasets:")
            print("-" * 30)
            for key in f.keys():
                obj = f[key]
                if isinstance(obj, h5py.Dataset):
                    print(f"Dataset: {key} - Shape: {obj.shape}, Dtype: {obj.dtype}")
                else:
                    print(f"Group: {key} - Items: {len(obj)}")
                    
    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
    except Exception as e:
        print(f"Error reading file: {e}")



In [3]:
# Call the function
print_hdf5_structure(path)

File: /media/dan/Data/outputs/ubiquitous-spork/pyspi_combined_patient_hdf5s/014_20250414.h5
File size: 8187.67 MB

Structure:
ï¿½ï¿½ Group: metadata
    Attributes: []

  ï¿½ï¿½ Group: adjacency_matrices
      Attributes: []

    ðŸ“Š Dataset: bary-sq_euclidean_max
        Shape: (98, 98, 604)
        Dtype: float64
        Size: 44.26 MB
        First few values: [[        nan         nan         nan ...         nan         nan
          nan]
 [ 6.62356786  0.25258938  3.51004806 ... 18.07188912 11.30855661
  10.53713139]
 [ 7.03391919  0.11963641  3.93609368 ... 15.76860742  8.83128827
   8.16946287]
 ...
 [ 3.62403348  0.93018345  4.11335704 ...  4.94092897  2.53794027
   2.66017213]
 [ 0.84202982  0.26244219  6.7183266  ...  2.66282171  1.02641095
   2.06589699]
 [ 1.71126207  1.18092525  4.23389944 ...  2.02741022  2.03923832
   2.04458064]]

    ðŸ“Š Dataset: bary-sq_euclidean_mean
        Shape: (98, 98, 604)
        Dtype: float64
        Size: 44.26 MB
        First few values

In [5]:
with h5py.File(path, "r") as f:
    if "metadata" in f:
        metadata_group = f["metadata"]
        print("Keys in 'metadata' group:")
        for key in metadata_group.keys():
            print(f"  {key}")


Keys in 'metadata' group:
  adjacency_matrices
  epoch_count
  metrics
  node_count
  patient_info


In [15]:
with h5py.File(path, "r") as f:
    for key in f["metadata/adjacency_matrices"].keys():
        print(key)


bary-sq_euclidean_max
bary-sq_euclidean_mean
bary_euclidean_max
bary_euclidean_mean
ce_gaussian
cohmag_multitaper_mean_fs-1_fmin-0-000488_fmax-0-00195
cohmag_multitaper_mean_fs-1_fmin-0-000488_fmax-0-0342
cohmag_multitaper_mean_fs-1_fmin-0-000488_fmax-0-122
cohmag_multitaper_mean_fs-1_fmin-0-00195_fmax-0-00391
cohmag_multitaper_mean_fs-1_fmin-0-00391_fmax-0-00586
cohmag_multitaper_mean_fs-1_fmin-0-00586_fmax-0-0146
cohmag_multitaper_mean_fs-1_fmin-0-0146_fmax-0-0342
cohmag_multitaper_mean_fs-1_fmin-0-0342_fmax-0-0732
cohmag_multitaper_mean_fs-1_fmin-0-0342_fmax-0-122
cohmag_multitaper_mean_fs-1_fmin-0-0732_fmax-0-122
cohmag_multitaper_mean_fs-1_fmin-0_fmax-0-5
cov-sq_EmpiricalCovariance
cov-sq_GraphicalLasso
cov-sq_GraphicalLassoCV
cov-sq_LedoitWolf
cov-sq_MinCovDet
cov-sq_OAS
cov-sq_ShrunkCovariance
cov_EmpiricalCovariance
cov_GraphicalLassoCV
cov_LedoitWolf
cov_MinCovDet
cov_OAS
cov_ShrunkCovariance
dspli_multitaper_mean_fs-1_fmin-0-000488_fmax-0-00195
dspli_multitaper_mean_fs-1_fmin

In [38]:
with h5py.File(path, "r") as f:
    # print(np.nanmean(f["metadata/adjacency_matrices/ce_gaussian"],axis=-1).shape)
    print("metadata")
    print(f["metadata"].keys())
    print("\npatient_info")
    print(f["metadata/patient_info"].keys())
    print("\nelectrode_data")
    print(f["metadata/patient_info/electrode_data"].keys())

metadata
<KeysViewHDF5 ['adjacency_matrices', 'epoch_count', 'metrics', 'node_count', 'patient_info']>

patient_info
<KeysViewHDF5 ['electrode_data', 'epoch_indices', 'epoch_names', 'ilae', 'pid', 'soz']>

electrode_data
<KeysViewHDF5 ['aal', 'aal2', 'age_days_at_recording', 'age_years_at_recording', 'brainnetome', 'electrode', 'harvard oxford', 'harvard-oxford', 'in_edf_file', 'in_xyz_file', 'miccai', 'ns_language', 'ns_oral', 'oral', 'pid', 'seizureFree', 'soz', 'white_matter', 'x', 'y', 'z']>


In [7]:
patient_info

<Closed HDF5 group>