In [1]:
# Download the files list from Zenodo
!zenodo_get 6513631 -w gwtc2p1_files.txt
!zenodo_get 8177023 -w gwtc3p0_files.txt
!sleep 1

zsh:1: command not found: zenodo_get
zsh:1: command not found: zenodo_get


In [2]:
# Download the files from Zenodo
# !find . -name "*gwtc*p*_files.txt" -exec grep "_nocosmo.h5" {} \; | xargs -I{} wget -P .. -nc {}

In [3]:
import jax
from tqdm import tqdm
import h5py
import os
import re
import subprocess
import pandas as pd
import configparser

# Set folder paths
folder_path = "../"
write_path = "../events_of_interest_converted/"

# Planck 2018 cosmology
PLANCK_2018_Ho = 2.1816926176539463e-18 ### CGS
PLANCK_2018_OmegaMatter = 0.3158
PLANCK_2018_OmegaLambda = 1. - PLANCK_2018_OmegaMatter
PLANCK_2018_OmegaRadiation = 0.

In [4]:
# Read config file and set false alarm rate
config = configparser.ConfigParser()
config.read('../../../sampler/config.ini')
max_far = float(config["INJECTIONS"]["max_far"])
max_far = 1000 # Include everything for mmms

In [5]:
# Read the data
files = (file for file in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, file)))
events_files_before_far_cut = list(filter(lambda x: x.endswith((".h5", ".hdf5")), files))

# Read the data
to_snapshot = "snapshot-2023-11-04/GWTC/"
events_files = []

# Parse the files if they agree with the far cut.
for i in tqdm(range(len(events_files_before_far_cut))):

    # Try catch statements to catch the name of the file and extract meaningful parameters from them.
    try:
        match = re.search(r'GW\d{6}_\d{6}', events_files_before_far_cut[i]).group(0)
        matching_files = [f for f in os.listdir(to_snapshot) if match in f][0]
        matching_json = [f for f in list(pd.read_json(to_snapshot + "GWTC.json").index) if match in f][0]
    except:
        match = re.search(r'GW\d{6}', events_files_before_far_cut[i]).group(0)
        if match in ["GW200105"]: # Get rid of cases where you've manually checked the far
            far = 0.36
            if far < max_far:
                events_files.append(events_files_before_far_cut[i])
            continue
        elif match in ["GW230529"]: # Get rid of cases where you've manually checked the far
            far = 1/1.1
            if far < max_far:
                events_files.append(events_files_before_far_cut[i])
            continue

        matching_files = [f for f in os.listdir(to_snapshot) if match in f][0]
        matching_json = [f for f in list(pd.read_json(to_snapshot + "GWTC.json").index) if match in f][0]


    event_df = pd.read_json(to_snapshot+matching_files)
    far = event_df["events"][matching_json]["far"]
    if far > max_far: # Don't include events with far > max_far
        print(f"{match} has far = {far} > {max_far}")
        continue
    else:
        events_files.append(events_files_before_far_cut[i])

for i in range(len(events_files)):print(f"{i+1}. {events_files[i]} (i = {i})")

100%|██████████| 7/7 [00:00<00:00, 483.29it/s]

1. IGWN-GWTC3p0-v2-GW200115_042309_PEDataRelease_mixed_nocosmo.h5 (i = 0)
2. IGWN-GWTC2p1-v2-GW190425_081805_PEDataRelease_mixed_nocosmo.h5 (i = 1)
3. IGWN-GWTC4p0-v0-GW230529_181500_PEDataRelease_mixed_cosmo.h5 (i = 2)
4. IGWN-GWTC2p1-v2-GW190814_211039_PEDataRelease_mixed_nocosmo.h5 (i = 3)
5. GW170817_GWTC-1.hdf5 (i = 4)
6. IGWN-GWTC2p1-v2-GW190917_114630_PEDataRelease_mixed_nocosmo.h5 (i = 5)
7. IGWN-GWTC3p0-v2-GW200105_162426_PEDataRelease_mixed_nocosmo.h5 (i = 6)





In [6]:
# Function to check if a string contains a pattern
def contains_pattern(s, options_list):
    combined_pattern = "|".join(options_list)
    return bool(re.search(combined_pattern, s))

# If the file doesn't exist, create it
if not os.path.exists(write_path + "event-list.txt"):
    with open(write_path + "event-list.txt", "w") as f:
        pass

In [7]:
# Regex to track different catalogs
re1 = r"GWTC-1"
re2= r"GWTC2p1"
re3 = r"GWTC3"
re4= r"GWTC4"

event_waveforms = []

# Data table to record information
compile_df = pd.DataFrame(columns=["event", "waveform", "cosmology", "distance_conversion", "prior_cosmology","prior_a_1 (or chi1)", "prior_a_2 (or chi2)", "prior_tilt_1", "prior_tilt_2", "prior_chirp_mass"])

# Loop through the events and convert them, then record the information, including specific events without analytic priors.
for i in tqdm(range(len(events_files))):
    file_path = events_files[i] # get specific file
    event_file = folder_path+file_path # get folder path
    with h5py.File(event_file,'r+') as file:
        wave_index = 0 # get the first waveform
        waveform = list(file.keys())[wave_index] # get the waveform name
        if contains_pattern(event_file, ["GW190720", "GW190425", "GW190924", "GW190814", "GW190728", "GW190707", "GW170608", "GW190725", "GW190917", "GW200105"]):
            cosmology_used = file[waveform]["meta_data"]['meta_data']["cosmology"][:][0]
            cosmo_prior = "NOT INCLUDED (dl^2)"
            spin1_used = "NOT INCLUDED" ; spin2_used = "NOT INCLUDED"
            tilt1_used = "NOT INCLUDED" ; tilt2_used = "NOT INCLUDED"
        elif contains_pattern(event_file, ["GW200115"]):
            cosmology_used = file[waveform]["meta_data"]['meta_data']["cosmology"][:][0]
            cosmo_prior = "NOT INCLUDED (dl^2)"
            spin1_used = "NOT INCLUDED" ; spin2_used = "NOT INCLUDED"
            tilt1_used = "NOT INCLUDED" ; tilt2_used = "NOT INCLUDED"
        elif contains_pattern(event_file, ["GW170817"]):
            cosmology_used = "planck 2015"
            cosmo_prior = "NOT INCLUDED (dl^2)"
            spin1_used = "chi < 0.89 (Uniform)"; spin2_used = "chi < 0.89 (Uniform)"
            tilt1_used = "NOT INCLUDED (Isotropic)"; tilt2_used = "NOT INCLUDED (Isotropic)"
        else:
            cosmo_prior = file[waveform]["priors"]['analytic']["luminosity_distance"][:][0]
            cosmology_used = file[waveform]["meta_data"]['meta_data']["cosmology"][:][0]
            spin1_used = str(file[waveform]["priors"]['analytic']["a_1"][:][0]) ; spin2_used = str(file[waveform]["priors"]['analytic']["a_2"][:][0])
            tilt1_used = str(file[waveform]["priors"]['analytic']["tilt_1"][:][0]) ; tilt2_used = str(file[waveform]["priors"]['analytic']["tilt_2"][:][0])
        try:
            chirp_mass_prior = file[waveform]["priors"]['analytic']["chirp_mass"][:][0]
        except:
            chirp_mass_prior = "NOT INCLUDED"

    # Arguments to pass to the subprocess
    args = [f"{waveform}", f"{event_file}",
        "--Ho", f"{PLANCK_2018_Ho}", "--OmegaMatter", f"{PLANCK_2018_OmegaMatter}", "--OmegaRadiation",
        f"{PLANCK_2018_OmegaRadiation}", "--OmegaLambda", f"{PLANCK_2018_OmegaLambda}", "-o", f"{write_path}"]

    # Check the catalog and run the appropriate conversion script
    if re.search(re1, file_path):
        distance_conversion_used = "d_l^2"
        subprocess.run(["python", "h52csv_GWTC1.py", *args])
    elif re.search(re2, file_path):
        distance_conversion_used = "d_l^2"
        subprocess.run(["python", "h52csv_no_cosmo.py", *args])
    elif re.search(re3, file_path):
        distance_conversion_used = "d_l^2"
        subprocess.run(["python", "h52csv_no_cosmo.py", *args])
    elif re.search(re4, file_path):
        distance_conversion_used = "uniform in source frame"
        subprocess.run(["python", "h52csv_GWTC4.py", *args])
    else:
        raise ValueError("No match found")

    # Record the information
    list_row = [file_path, waveform, cosmology_used, distance_conversion_used, cosmo_prior, spin1_used, spin2_used, tilt1_used, tilt2_used, chirp_mass_prior]
    compile_df.loc[i] = list_row

# Save the data
compile_df.to_csv("compile_df.csv", index=False)
compile_df

100%|██████████| 7/7 [00:04<00:00,  1.61it/s]


Unnamed: 0,event,waveform,cosmology,distance_conversion,prior_cosmology,prior_a_1 (or chi1),prior_a_2 (or chi2),prior_tilt_1,prior_tilt_2,prior_chirp_mass
0,IGWN-GWTC3p0-v2-GW200115_042309_PEDataRelease_...,C01:IMRPhenomNSBH:HighSpin,b'Planck15_lal',d_l^2,NOT INCLUDED (dl^2),NOT INCLUDED,NOT INCLUDED,NOT INCLUDED,NOT INCLUDED,"b""UniformInComponentsChirpMass(minimum=2.04957..."
1,IGWN-GWTC2p1-v2-GW190425_081805_PEDataRelease_...,C01:IMRPhenomPv2_NRTidal:HighSpin,b'Planck15_lal',d_l^2,NOT INCLUDED (dl^2),NOT INCLUDED,NOT INCLUDED,NOT INCLUDED,NOT INCLUDED,NOT INCLUDED
2,IGWN-GWTC4p0-v0-GW230529_181500_PEDataRelease_...,PROD2,b'Planck15',uniform in source frame,"b""bilby.gw.prior.UniformSourceFrame(minimum=1....","b""Uniform(minimum=0, maximum=0.99, name='a_1',...","b""Uniform(minimum=0, maximum=0.99, name='a_2',...","b""Sine(minimum=0, maximum=3.141592653589793, n...","b""Sine(minimum=0, maximum=3.141592653589793, n...","b""bilby.gw.prior.UniformInComponentsChirpMass(..."
3,IGWN-GWTC2p1-v2-GW190814_211039_PEDataRelease_...,C01:IMRPhenomXPHM,b'Planck15_lal',d_l^2,NOT INCLUDED (dl^2),NOT INCLUDED,NOT INCLUDED,NOT INCLUDED,NOT INCLUDED,NOT INCLUDED
4,GW170817_GWTC-1.hdf5,IMRPhenomPv2NRT_highSpin_posterior,planck 2015,d_l^2,NOT INCLUDED (dl^2),chi < 0.89 (Uniform),chi < 0.89 (Uniform),NOT INCLUDED (Isotropic),NOT INCLUDED (Isotropic),NOT INCLUDED
5,IGWN-GWTC2p1-v2-GW190917_114630_PEDataRelease_...,C01:IMRPhenomXPHM,b'Planck15_lal',d_l^2,NOT INCLUDED (dl^2),NOT INCLUDED,NOT INCLUDED,NOT INCLUDED,NOT INCLUDED,NOT INCLUDED
6,IGWN-GWTC3p0-v2-GW200105_162426_PEDataRelease_...,C01:IMRPhenomNSBH,b'Planck15_lal',d_l^2,NOT INCLUDED (dl^2),NOT INCLUDED,NOT INCLUDED,NOT INCLUDED,NOT INCLUDED,"b""UniformInComponentsChirpMass(minimum=3.5, ma..."


In [8]:
# read txt file
event_txt_files = []
with open(write_path + "event-list.txt", "r") as f:
    event_txt_files = f.readlines()

print("Number of events converted: ", len(event_txt_files))
print("Total number of events: ", len(events_files))

Number of events converted:  7
Total number of events:  7


In [9]:
# Read events and confirm the conversion.
import numpy as np
event_txt_files = []
with open(write_path + "event-list.txt", "r") as f:
    event_txt_files = f.read().splitlines()
event_txt_files.sort()
event_txt_files

['GW170817.csv.gz',
 'GW190425_081805.csv.gz',
 'GW190814_211039.csv.gz',
 'GW190917_114630.csv.gz',
 'GW200105_162426.csv.gz',
 'GW200115_042309.csv.gz',
 'GW230529_181500.csv.gz']

In [10]:
import pandas as pd
import matplotlib.pyplot as plt

# Plot the data
for i in tqdm(range(len(event_txt_files))):
    event = event_txt_files[i]
    df = pd.DataFrame(np.genfromtxt(write_path + event, delimiter=",", names=True))
    fig, ax = plt.subplots(nrows=2, ncols=4, figsize=(10, 4))
    ax[0, 0].hist(df["mass1_source"]); ax[0, 0].set_xlabel("mass1_source (Msun)")
    ax[0, 1].hist(df["mass2_source"]); ax[0, 1].set_xlabel("mass2_source (Msun)")
    ax[0, 2].hist(df["luminosity_distance"]); ax[0, 2].set_xlabel("luminosity_distance (MPc)")
    ax[0, 3].hist(df["redshift"]); ax[0, 3].set_xlabel("redshift (z)")
    ax[1, 0].hist(df["a_1"]); ax[1, 0].set_xlabel("a_1")
    ax[1, 1].hist(df["a_2"]); ax[1, 1].set_xlabel("a_2")
    ax[1, 2].hist(df["costilt1"]); ax[1, 2].set_xlabel("costilt1")
    ax[1, 3].hist(df["costilt2"]); ax[1, 3].set_xlabel("costilt1")
    plt.suptitle(event[:12] + f" - {df.shape[0]} samples")
    plt.tight_layout()
    plt.savefig("verify_PE/" + event[:12] + ".png")
    plt.close()


100%|██████████| 7/7 [00:01<00:00,  3.68it/s]


# Conversion of injections

In [11]:
vt_folder = "../../vt/"
vt_file = "o1+o2+o3_mixture_real+semianalytic-LIGO-T2100377-v2.hdf5"
vt_file_O3_full = "endo3_mixture-LIGO-T2100113-v12.hdf5"
vt_path = vt_folder + vt_file
# vt_path = vt_folder + vt_file_O3_full

In [12]:
with h5py.File(vt_folder + vt_file_O3_full, "r") as f:
    print("Keys: %s" % f.keys())
    a_group_key = list(f.keys())[0]
    print(type(f[a_group_key]))
    data = list(f[a_group_key])
    print(data)

with h5py.File(vt_path, "r") as f:
    print("Keys: %s" % f.keys())
    a_group_key = list(f.keys())[0]
    print(type(f[a_group_key]))
    data = list(f[a_group_key])
    print(data)

Keys: <KeysViewHDF5 ['injections']>
<class 'h5py._hl.group.Group'>
['declination', 'declination_sampling_pdf', 'detection_statistic_cwb', 'detection_statistic_mbta', 'detection_statistic_pycbc_bbh', 'detection_statistic_pycbc_hyperbank', 'distance', 'far_cwb', 'far_gstlal', 'far_mbta', 'far_pycbc_bbh', 'far_pycbc_hyperbank', 'gps_time', 'gps_time_int', 'ifar_cwb', 'ifar_gstlal', 'ifar_mbta', 'ifar_pycbc_bbh', 'ifar_pycbc_hyperbank', 'inclination', 'inclination_sampling_pdf', 'mass1', 'mass1_source', 'mass2', 'mass2_source', 'mixture_weight', 'optimal_snr_h', 'optimal_snr_l', 'optimal_snr_net', 'pastro_cwb', 'pastro_gstlal', 'pastro_mbta', 'pastro_pycbc_bbh', 'pastro_pycbc_hyperbank', 'polarization', 'redshift', 'right_ascension', 'right_ascension_sampling_pdf', 'sampling_pdf', 'spin1x', 'spin1y', 'spin1z', 'spin2x', 'spin2y', 'spin2z']
Keys: <KeysViewHDF5 ['injections']>
<class 'h5py._hl.group.Group'>
['far_cwb', 'far_gstlal', 'far_mbta', 'far_pycbc_bbh', 'far_pycbc_hyperbank', 'ifar_c

In [13]:
max_far = float(config["INJECTIONS"]["max_far"])

In [14]:
for i in range(len(events_files)):
    file_path = events_files[i]
    event_file = folder_path+file_path
    with h5py.File(event_file,'r+') as file:
        wave_index = 0
        waveform = list(file.keys())[wave_index]
        # print(file[waveform]["priors"]['analytic'].keys())

        try:
            print(file[waveform]["priors"]['analytic']["chirp_mass"][:][0])
        except:
            continue

b"UniformInComponentsChirpMass(minimum=2.0495724198500365, maximum=3.131580989697509, name='chirp_mass', latex_label='$\\\\mathcal{M}$', unit='$M_{\\\\odot}$', boundary=None)"
b"bilby.gw.prior.UniformInComponentsChirpMass(minimum=2.0214333426453837, maximum=2.033088646054829, name='chirp_mass', latex_label='$\\\\mathcal{M}$', unit='$M_{\\\\odot}$', boundary=None)"
b"UniformInComponentsChirpMass(minimum=3.5, maximum=3.7, name='chirp_mass', latex_label='$\\\\mathcal{M}$', unit='$M_{\\\\odot}$', boundary=None)"


In [15]:
for i in tqdm(range(len(events_files))):
    file_path = events_files[i] # get specific file
    event_file = folder_path+file_path # get folder path
    with h5py.File(event_file,'r+') as file:
        wave_index = 0 # get the first waveform
        waveform = list(file.keys())[wave_index] # get the waveform name
        print(file[waveform]["posterior_samples"].dtype.names)
        break

  0%|          | 0/7 [00:00<?, ?it/s]

('H1_matched_filter_snr', 'H1_optimal_snr', 'L1_matched_filter_snr', 'L1_optimal_snr', 'V1_matched_filter_snr', 'V1_optimal_snr', 'a_1', 'a_2', 'azimuth', 'chi_1', 'chi_1_in_plane', 'chi_2', 'chi_2_in_plane', 'chi_eff', 'chi_p', 'chirp_mass', 'comoving_distance', 'cos_tilt_1', 'cos_tilt_2', 'dec', 'geocent_time', 'lambda_1', 'lambda_2', 'log_likelihood', 'luminosity_distance', 'mass_1', 'mass_2', 'mass_ratio', 'maximum_frequency', 'phase', 'phi_1', 'phi_12', 'phi_2', 'phi_jl', 'pn_amplitude_order', 'pn_phase_order', 'pn_spin_order', 'pn_tidal_order', 'psi', 'ra', 'recalib_H1_amplitude_0', 'recalib_H1_amplitude_1', 'recalib_H1_amplitude_2', 'recalib_H1_amplitude_3', 'recalib_H1_amplitude_4', 'recalib_H1_amplitude_5', 'recalib_H1_amplitude_6', 'recalib_H1_amplitude_7', 'recalib_H1_amplitude_8', 'recalib_H1_amplitude_9', 'recalib_H1_frequency_0', 'recalib_H1_frequency_1', 'recalib_H1_frequency_2', 'recalib_H1_frequency_3', 'recalib_H1_frequency_4', 'recalib_H1_frequency_5', 'recalib_H1_fr


