In [1]:
import h5py
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
import os
import gc


In [3]:
groups = ['non_non', 'non_soz', 'soz_non', 'soz_soz']

In [39]:
def create_legend_plot(output_path=None, orientation='horizontal'):
    """
    Create a legend plot with specified orientation using patches.
    
    Parameters:
    -----------
    output_path : str, optional
        Path to save the legend plot. If None, the plot is displayed.
    orientation : str, optional
        'horizontal' or 'vertical' for the legend layout.
    """
    # Set figure size based on orientation
    if orientation == 'horizontal':
        plt.figure(figsize=(3, 0.5))
        ncol = 3
    else:  # vertical
        plt.figure(figsize=(1, 1.5))
        ncol = 1
    
    # Create a figure and axis
    ax = plt.gca()
    
    # Create patches for the legend
    from matplotlib.patches import Patch
    legend_elements = [
        Patch(facecolor='black', label='Non-EZ', alpha=0.6),
        Patch(facecolor='#D41159', label='EZ', alpha=0.6),
        Patch(facecolor='#1A85FF', label='Non->EZ', alpha=0.6)
    ]
    
    # Create legend with patches
    ax.legend(handles=legend_elements,
             loc='center',
             ncol=ncol,
             frameon=False)
    
    # Remove axes and background
    plt.axis('off')
    
    # Adjust layout to show only the legend
    plt.tight_layout()
    
    # Save the plot if output path is provided
    if output_path:
        # Add orientation to filename if not already present
        if not output_path.endswith('.png'):
            output_path += '.png'
        base_path = output_path.rsplit('.', 1)[0]
        output_path = f"{base_path}_{orientation}.png"
        
        plt.savefig(output_path, dpi=300, bbox_inches='tight', pad_inches=0)
        plt.close()
    else:
        plt.show()


create_legend_plot('/media/dan/Data/git/ubiquitous-spork/plots_for_seminar/legend.png', orientation='horizontal')
create_legend_plot('/media/dan/Data/git/ubiquitous-spork/plots_for_seminar/legend.png', orientation='vertical')

In [42]:
def combine_data(data, log=False):
    # Combine mixed groups
    mix = np.concatenate([data['non_soz'], data['soz_non']])
    
    # Create arrays for each group
    mix_data = mix
    non_data = data['non_non']
    soz_data = data['soz_soz']
    
    # Create labels for each group
    mix_labels = ['mix'] * len(mix_data)
    non_labels = ['non'] * len(non_data)
    soz_labels = ['soz'] * len(soz_data)
    
    # Combine all data and labels
    all_data = np.concatenate([mix_data, non_data, soz_data])
    all_labels = mix_labels + non_labels + soz_labels
    
    # Create DataFrame
    df = pd.DataFrame({
        'Measure Value': all_data,
        'group': all_labels
    })
    df = df.replace([np.inf, -np.inf], np.nan).dropna()
    return df

# Example usage:
# data = {'non_non': array1, 'non_soz': array2, 'soz_non': array3, 'soz_soz': array4}
# df = combine_data(data)

In [36]:
path = "/media/dan/Data/data/connectivity/conglomerated/six_run/mean_columns.h5"

# Open the HDF5 file
with h5py.File(path, 'r') as f:
    keys = list(f.keys())


for k in tqdm(keys):
    data = {}
    try:

        with h5py.File(path, 'r') as f:
            for g in groups:
                data[g] = f[k]['upper'][g][()]
        combined = combine_data(data)
        # subset = combined.sample(10000)
        sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
                    palette={'non': 'black', 'soz': '#D41159', 'mix': '#1A85FF'}, fill=False, alpha=0.6, legend=False)
        plt.title(k, fontsize=10)
        output_dir = "/media/dan/Data/git/ubiquitous-spork/plots_for_seminar/mean_columns"
        plt.savefig(os.path.join(output_dir, f"{k}~mean.png"), dpi=300)
        plt.close()
        del data, combined
        gc.collect()
    except Exception as e:
        print(f"Error processing {k}: {e}")
        continue


100%|██████████| 50/50 [02:31<00:00,  3.03s/it]


In [43]:
path = "/media/dan/Data/data/connectivity/conglomerated/additional/mean_columns.h5"

# Open the HDF5 file
with h5py.File(path, 'r') as f:
    keys = list(f.keys())


for k in tqdm(keys):
    data = {}
    
    with h5py.File(path, 'r') as f:
        for g in groups:
            data[g] = f[k]['upper'][g][()]
    try:
        combined = combine_data(data)
            # subset = combined.sample(10000)
        sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
                    palette={'non': 'black', 'soz': '#D41159', 'mix': '#1A85FF'}, fill=False, alpha=0.6, legend=False)
        plt.title(k, fontsize=10)
        output_dir = "/media/dan/Data/git/ubiquitous-spork/plots_for_seminar/mean_columns"
        plt.savefig(os.path.join(output_dir, f"{k}~mean.png"), dpi=300)
        plt.close()
        del data, combined
        gc.collect()
    except Exception as e:
        print(f"Error processing {k}: {e}")
        continue


  5%|▌         | 7/136 [00:19<05:02,  2.35s/it]

Error processing cov-sq_GraphicalLasso: array must not contain infs or NaNs


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
 30%|███       | 41/136 [02:11<03:50,  2.43s/it]

Error processing gd_multitaper_delay_fs-1_fmin-0-000488_fmax-0-00195: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
 33%|███▎      | 45/136 [02:18<02:13,  1.46s/it]

Error processing gd_multitaper_delay_fs-1_fmin-0-00195_fmax-0-00391: `dataset` input should have multiple elements.
Error processing gd_multitaper_delay_fs-1_fmin-0-00391_fmax-0-00586: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
 80%|████████  | 109/136 [05:50<01:06,  2.48s/it]

Error processing psi_multitaper_mean_fs-1_fmin-0-000488_fmax-0-00195: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
 83%|████████▎ | 113/136 [05:57<00:35,  1.52s/it]

Error processing psi_multitaper_mean_fs-1_fmin-0-00195_fmax-0-00391: `dataset` input should have multiple elements.
Error processing psi_multitaper_mean_fs-1_fmin-0-00391_fmax-0-00586: `dataset` input should have multiple elements.


100%|██████████| 136/136 [07:17<00:00,  3.21s/it]


In [48]:
path = "/media/dan/Data/data/connectivity/conglomerated/additional/mean_ilae.h5"

# Open the HDF5 file
with h5py.File(path, 'r') as f:
    ilae_keys = list(f.keys())
    keys = list(f[ilae_keys[0]].keys())

for ilae_k in tqdm(ilae_keys):
    for k in tqdm(keys):
        data = {}
        
        with h5py.File(path, 'r') as f:
            for g in groups:
                data[g] = f[ilae_k][k]['upper'][g][()]
        try:
            combined = combine_data(data)
                # subset = combined.sample(10000)
            sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
                        palette={'non': 'black', 'soz': '#D41159', 'mix': '#1A85FF'}, fill=False, alpha=0.6, legend=False)
            plt.title(k, fontsize=10)
            output_dir = "/media/dan/Data/git/ubiquitous-spork/plots_for_seminar/by_ilae/mean"
            plt.savefig(os.path.join(output_dir, f"{k}~{ilae_k}~mean.png"), dpi=300)
            plt.close()
            del data, combined
            gc.collect()
        except Exception as e:
            print(f"Error processing {k}: {e}")
            continue


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing gd_multitaper_delay_fs-1_fmin-0-000488_fmax-0-00195: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing gd_multitaper_delay_fs-1_fmin-0-00195_fmax-0-00391: `dataset` input should have multiple elements.
Error processing gd_multitaper_delay_fs-1_fmin-0-00391_fmax-0-00586: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing psi_multitaper_mean_fs-1_fmin-0-000488_fmax-0-00195: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing psi_multitaper_mean_fs-1_fmin-0-00195_fmax-0-00391: `dataset` input should have multiple elements.
Error processing psi_multitaper_mean_fs-1_fmin-0-00391_fmax-0-00586: `dataset` input should have multiple elements.


100%|██████████| 136/136 [01:35<00:00,  1.43it/s]
 14%|█▍        | 1/7 [01:35<09:30, 95.15s/it]

Error processing cov-sq_GraphicalLasso: array must not contain infs or NaNs


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing gd_multitaper_delay_fs-1_fmin-0-000488_fmax-0-00195: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing gd_multitaper_delay_fs-1_fmin-0-00195_fmax-0-00391: `dataset` input should have multiple elements.
Error processing gd_multitaper_delay_fs-1_fmin-0-00391_fmax-0-00586: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing psi_multitaper_mean_fs-1_fmin-0-000488_fmax-0-00195: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing psi_multitaper_mean_fs-1_fmin-0-00195_fmax-0-00391: `dataset` input should have multiple elements.
Error processing psi_multitaper_mean_fs-1_fmin-0-00391_fmax-0-00586: `dataset` input should have multiple elements.


100%|██████████| 136/136 [03:12<00:00,  1.41s/it]
  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing gd_multitaper_delay_fs-1_fmin-0-000488_fmax-0-00195: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing gd_multitaper_delay_fs-1_fmin-0-00195_fmax-0-00391: `dataset` input should have multiple elements.
Error processing gd_multitaper_delay_fs-1_fmin-0-00391_fmax-0-00586: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing psi_multitaper_mean_fs-1_fmin-0-000488_fmax-0-00195: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing psi_multitaper_mean_fs-1_fmin-0-00195_fmax-0-00391: `dataset` input should have multiple elements.
Error processing psi_multitaper_mean_fs-1_fmin-0-00391_fmax-0-00586: `dataset` input should have multiple elements.


100%|██████████| 136/136 [01:28<00:00,  1.54it/s]
  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing gd_multitaper_delay_fs-1_fmin-0-000488_fmax-0-00195: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing gd_multitaper_delay_fs-1_fmin-0-00195_fmax-0-00391: `dataset` input should have multiple elements.
Error processing gd_multitaper_delay_fs-1_fmin-0-00391_fmax-0-00586: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing psi_multitaper_mean_fs-1_fmin-0-000488_fmax-0-00195: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing psi_multitaper_mean_fs-1_fmin-0-00195_fmax-0-00391: `dataset` input should have multiple elements.
Error processing psi_multitaper_mean_fs-1_fmin-0-00391_fmax-0-00586: `dataset` input should have multiple elements.


100%|██████████| 136/136 [01:25<00:00,  1.58it/s]
  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing gd_multitaper_delay_fs-1_fmin-0-000488_fmax-0-00195: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing gd_multitaper_delay_fs-1_fmin-0-00195_fmax-0-00391: `dataset` input should have multiple elements.
Error processing gd_multitaper_delay_fs-1_fmin-0-00391_fmax-0-00586: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing psi_multitaper_mean_fs-1_fmin-0-000488_fmax-0-00195: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing psi_multitaper_mean_fs-1_fmin-0-00195_fmax-0-00391: `dataset` input should have multiple elements.
Error processing psi_multitaper_mean_fs-1_fmin-0-00391_fmax-0-00586: `dataset` input should have multiple elements.


100%|██████████| 136/136 [01:59<00:00,  1.14it/s]
 71%|███████▏  | 5/7 [09:40<03:44, 112.27s/it]

Error processing cov-sq_GraphicalLasso: array must not contain infs or NaNs


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing gd_multitaper_delay_fs-1_fmin-0-000488_fmax-0-00195: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing gd_multitaper_delay_fs-1_fmin-0-00195_fmax-0-00391: `dataset` input should have multiple elements.
Error processing gd_multitaper_delay_fs-1_fmin-0-00391_fmax-0-00586: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing psi_multitaper_mean_fs-1_fmin-0-000488_fmax-0-00195: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing psi_multitaper_mean_fs-1_fmin-0-00195_fmax-0-00391: `dataset` input should have multiple elements.
Error processing psi_multitaper_mean_fs-1_fmin-0-00391_fmax-0-00586: `dataset` input should have multiple elements.


100%|██████████| 136/136 [03:32<00:00,  1.57s/it]
  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing gd_multitaper_delay_fs-1_fmin-0-000488_fmax-0-00195: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing gd_multitaper_delay_fs-1_fmin-0-00195_fmax-0-00391: `dataset` input should have multiple elements.
Error processing gd_multitaper_delay_fs-1_fmin-0-00391_fmax-0-00586: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing psi_multitaper_mean_fs-1_fmin-0-000488_fmax-0-00195: `dataset` input should have multiple elements.


  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
  sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,


Error processing psi_multitaper_mean_fs-1_fmin-0-00195_fmax-0-00391: `dataset` input should have multiple elements.
Error processing psi_multitaper_mean_fs-1_fmin-0-00391_fmax-0-00586: `dataset` input should have multiple elements.


100%|██████████| 136/136 [03:25<00:00,  1.51s/it]
100%|██████████| 7/7 [16:39<00:00, 142.75s/it]


In [49]:
path = "/media/dan/Data/data/connectivity/conglomerated/six_run/mean_ilae.h5"

# Open the HDF5 file
with h5py.File(path, 'r') as f:
    ilae_keys = list(f.keys())
    keys = list(f[ilae_keys[0]].keys())

for ilae_k in tqdm(ilae_keys):
    for k in tqdm(keys):
        data = {}
        
        with h5py.File(path, 'r') as f:
            for g in groups:
                data[g] = f[ilae_k][k]['upper'][g][()]
        try:
            combined = combine_data(data)
                # subset = combined.sample(10000)
            sns.kdeplot(data=combined, x="Measure Value", hue="group", common_grid=True, common_norm=False, cut=0,
                        palette={'non': 'black', 'soz': '#D41159', 'mix': '#1A85FF'}, fill=False, alpha=0.6, legend=False)
            plt.title(k, fontsize=10)
            output_dir = "/media/dan/Data/git/ubiquitous-spork/plots_for_seminar/by_ilae/mean"
            plt.savefig(os.path.join(output_dir, f"{k}~{ilae_k}~mean.png"), dpi=300)
            plt.close()
            del data, combined
            gc.collect()
        except Exception as e:
            print(f"Error processing {k}: {e}")
            continue


100%|██████████| 50/50 [00:36<00:00,  1.38it/s]
100%|██████████| 50/50 [01:08<00:00,  1.38s/it]
100%|██████████| 50/50 [00:26<00:00,  1.86it/s]
100%|██████████| 50/50 [00:23<00:00,  2.14it/s]
100%|██████████| 50/50 [00:33<00:00,  1.48it/s]
100%|██████████| 50/50 [00:59<00:00,  1.19s/it]
100%|██████████| 50/50 [01:00<00:00,  1.21s/it]
100%|██████████| 7/7 [05:09<00:00, 44.17s/it]


In [8]:
path = "/media/dan/Data/outputs/ubiquitous-spork/pyspi_combined_patient_hdf5s"

for patient in list(sorted(os.listdir(path))):
    if not patient.endswith('.h5'):
        continue
    in_path = os.path.join(path, patient)
    # Open the HDF5 file
    with h5py.File(in_path, 'r') as f:
        keys = list(f['metadata/patient_info'].keys())
        soz = f['metadata/patient_info/soz'][()]
        if sum(soz) == 0:
            continue
        print(keys)
        print(soz)



['electrode_data', 'epoch_indices', 'epoch_names', 'ilae', 'pid', 'soz']
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0]
['electrode_data', 'epoch_indices', 'epoch_names', 'ilae', 'pid', 'soz']
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 1
 1 1 0 0 0 1 1 0 1 1 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1
 0 0 0 0 0 0]
['electrode_data', 'epoch_indices', 'epoch_names', 'ilae', 'pid', 'soz']
[0 0 0 0 0 0 1 1 1 1 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0]
['electrode_data', 'epoch_indices', 'epoch_names', 'ilae', 'pid', 'soz']
[1 1 1 1 0 0 0 0 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0
 0 0 0 0 0 1 1 0 0 0 0 1 1 1 1 1 0 1 1 1 1 1 1 1 0 0 0 0

KeyError: 'Unable to synchronously open object (component not found)'