In [21]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.pyplot import subplot
from tqdm import tqdm
import h5py 
import os
import re
import subprocess

In [22]:
folder_path = "../"
write_path = "../gwtc3_converted/"

PLANCK_2018_Ho = 2.1816926176539463e-18 ### CGS
PLANCK_2018_OmegaMatter = 0.3158
PLANCK_2018_OmegaLambda = 1. - PLANCK_2018_OmegaMatter
PLANCK_2018_OmegaRadiation = 0.

In [23]:
# Read the data
files = (file for file in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, file)))
events_files = list(filter(lambda x: x.endswith((".h5", ".hdf5")), files))
for i in range(len(events_files)):print(f"{i+1}. {events_files[i]}")

1. IGWN-GWTC3p0-v1-GW200225_060421_PEDataRelease_mixed_cosmo.h5
2. IGWN-GWTC2p1-v2-GW190521_074359_PEDataRelease_mixed_cosmo.h5
3. IGWN-GWTC2p1-v2-GW190915_235702_PEDataRelease_mixed_cosmo.h5
4. IGWN-GWTC3p0-v1-GW200224_222234_PEDataRelease_mixed_cosmo.h5
5. IGWN-GWTC2p1-v2-GW190706_222641_PEDataRelease_mixed_cosmo.h5
6. IGWN-GWTC2p1-v2-GW190412_053044_PEDataRelease_mixed_cosmo.h5
7. IGWN-GWTC2p1-v2-GW190929_012149_PEDataRelease_mixed_cosmo.h5
8. IGWN-GWTC3p0-v1-GW200209_085452_PEDataRelease_mixed_cosmo.h5
9. IGWN-GWTC3p0-v1-GW200219_094415_PEDataRelease_mixed_cosmo.h5
10. IGWN-GWTC2p1-v2-GW190828_065509_PEDataRelease_mixed_cosmo.h5
11. IGWN-GWTC3p0-v1-GW191129_134029_PEDataRelease_mixed_cosmo.h5
12. IGWN-GWTC3p0-v1-GW191109_010717_PEDataRelease_mixed_cosmo.h5
13. IGWN-GWTC2p1-v2-GW190408_181802_PEDataRelease_mixed_cosmo.h5
14. IGWN-GWTC2p1-v2-GW190707_093326_PEDataRelease_mixed_cosmo.h5
15. IGWN-GWTC3p0-v1-GW200115_042309_PEDataRelease_mixed_cosmo.h5
16. IGWN-GWTC3p0-v1-GW191204_17152

In [24]:
if not os.path.exists(write_path + "event-list.txt"):
    with open(write_path + "event-list.txt", "w") as f:
        pass

In [25]:
#regex
re1 = r"GWTC-1"
re2= r"GWTC2p1"
re3 = r"GWTC3"

In [26]:
file_open_error = []
file_convert_error = []
event_waveforms = []

for i in tqdm(range(len(events_files))): 
    file_path = events_files[i]
    event_file = folder_path+file_path
    
    with h5py.File(event_file,'r+') as file: 
            waveform = list(file.keys())[0]
            event_waveforms.append(f"Converting {waveform} from {event_file}")
            
    args = [f"{waveform}", f"{event_file}",
            "--Ho", f"{PLANCK_2018_Ho}", "--OmegaMatter", f"{PLANCK_2018_OmegaMatter}", "--OmegaRadiation",
            f"{PLANCK_2018_OmegaRadiation}", "--OmegaLambda", f"{PLANCK_2018_OmegaLambda}", "-o", f"{write_path}"]
    
    if re.search(re1, file_path):
        subprocess.run(["python", "h52csv_GWTC1.py", *args])
    elif re.search(re2, file_path):
        subprocess.run(["python", "h52csv_GWTC2p1.py", *args])
    elif re.search(re3, file_path):
        subprocess.run(["python", "h52csv_GWTC3.py", *args])
    else:
        raise ValueError("No match found")
    

100%|██████████| 66/66 [02:56<00:00,  2.68s/it]


In [27]:
# i = 52
# file_path = events_files[i]
# event_file = folder_path+file_path
# 
# with h5py.File(event_file,'r+') as file: 
#         waveform = list(file.keys())[0]
#         event_waveforms.append(f"Converting {waveform} from {event_file}")
#         
# args = [f"{waveform}", f"{event_file}",
#         "--Ho", f"{PLANCK_2018_Ho}", "--OmegaMatter", f"{PLANCK_2018_OmegaMatter}", "--OmegaRadiation",
#         f"{PLANCK_2018_OmegaRadiation}", "--OmegaLambda", f"{PLANCK_2018_OmegaLambda}", "-o", f"{write_path}", "-v"]
# 
# if re.search(re1, file_path):
#     subprocess.run(["python", "h52csv_GWTC1.py", *args])
# elif re.search(re2, file_path):
#     subprocess.run(["python", "h52csv_GWTC2p1.py", *args])
# elif re.search(re3, file_path):
#     subprocess.run(["python", "h52csv_GWTC3.py", *args])
# else:
#     raise ValueError("No match found")

In [28]:
# read txt file
event_txt_files = []
with open(write_path + "event-list.txt", "r") as f:
    event_txt_files = f.readlines()

print("Number of events converted: ", len(event_txt_files))
print("Number of events with errors: ", len(file_open_error)+len(file_convert_error))
print("Total number of events: ", len(events_files))

Number of events converted:  66
Number of events with errors:  0
Total number of events:  66


In [29]:
# Read events and confirm the conversion. 
import numpy as np
event_txt_files = []
with open(write_path + "event-list.txt", "r") as f:
    event_txt_files = f.read().splitlines()
event_txt_files

['GW200225_060421.csv.gz',
 'GW190521_074359.csv.gz',
 'GW190915_235702.csv.gz',
 'GW200224_222234.csv.gz',
 'GW190706_222641.csv.gz',
 'GW190412_053044.csv.gz',
 'GW190929_012149.csv.gz',
 'GW200209_085452.csv.gz',
 'GW200219_094415.csv.gz',
 'GW190828_065509.csv.gz',
 'GW191129_134029.csv.gz',
 'GW191109_010717.csv.gz',
 'GW190408_181802.csv.gz',
 'GW190707_093326.csv.gz',
 'GW200115_042309.csv.gz',
 'GW191204_171526.csv.gz',
 'GW190519_153544.csv.gz',
 'GW151226_033853.csv.gz',
 'GW190413_134308.csv.gz',
 'GW190512_180714.csv.gz',
 'GW170818_022509.csv.gz',
 'GW190620_030421.csv.gz',
 'GW200128_022011.csv.gz',
 'GW190814_211039.csv.gz',
 'GW191127_050227.csv.gz',
 'GW170608_020116.csv.gz',
 'GW150914_095045.csv.gz',
 'GW190602_175927.csv.gz',
 'GW190803_022701.csv.gz',
 'GW190701_203306.csv.gz',
 'GW190513_205428.csv.gz',
 'GW190630_185205.csv.gz',
 'GW190503_185404.csv.gz',
 'GW170814_103043.csv.gz',
 'GW170823_131358.csv.gz',
 'GW200129_065458.csv.gz',
 'GW191230_180458.csv.gz',
 

In [30]:
import pandas as pd
import matplotlib.pyplot as plt

for i in tqdm(range(len(event_txt_files))):
    try:
        event = event_txt_files[i]
        df = pd.DataFrame(np.genfromtxt(write_path + event, delimiter=",", names=True))
        fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(10, 4))
        ax[0].hist(df["mass1_source"]); ax[0].set_xlabel("mass1_source")
        ax[1].hist(df["mass2_source"]); ax[1].set_xlabel("mass2_source")
        ax[2].hist(df["luminosity_distance"]); ax[2].set_xlabel("luminosity_distance")
        plt.suptitle(event[:8] + f" - PE Size: {df.shape[0]}")
        plt.savefig("verify_PE/" + event[:8] + ".png")
        plt.close()
    except:
        print(f"Problem at i = {i} the following event", event)
        continue

100%|██████████| 66/66 [00:09<00:00,  6.92it/s]


In [31]:
i = 52
event = event_txt_files[i]
print(event)
df = pd.DataFrame(np.genfromtxt(write_path + event_txt_files[i], delimiter=",", names=True))
df_normal = pd.DataFrame(np.genfromtxt(write_path + event_txt_files[50], delimiter=",", names=True))
print(df.columns)
print(df_normal.columns) 
#Additionally includes geocenter_time, lnprob_geocenter_time, lnprob_spin1x_spin1y_spin1z, lnprob_spin2x_spin2y_spin2z, loglikelihood, luminosity_distance, spin1x, spin1y, spin1z, spin2x, spin2y, spin2z. (This has changed now but was true before modification of h52csv_GWTC1.py)

GW190527_092055.csv.gz
Index(['declination', 'geocenter_time', 'lnprob_declination',
       'lnprob_geocenter_time', 'lnprob_mass1_source', 'lnprob_mass2_source',
       'lnprob_redshift', 'lnprob_right_ascension',
       'lnprob_spin1x_spin1y_spin1z', 'lnprob_spin2x_spin2y_spin2z',
       'loglikelihood', 'luminosity_distance', 'mass1_source', 'mass2_source',
       'redshift', 'right_ascension', 'spin1x', 'spin1y', 'spin1z', 'spin2x',
       'spin2y', 'spin2z'],
      dtype='object')
Index(['declination', 'geocenter_time', 'lnprob_declination',
       'lnprob_geocenter_time', 'lnprob_mass1_source', 'lnprob_mass2_source',
       'lnprob_redshift', 'lnprob_right_ascension',
       'lnprob_spin1x_spin1y_spin1z', 'lnprob_spin2x_spin2y_spin2z',
       'loglikelihood', 'luminosity_distance', 'mass1_source', 'mass2_source',
       'redshift', 'right_ascension', 'spin1x', 'spin1y', 'spin1z', 'spin2x',
       'spin2y', 'spin2z'],
      dtype='object')


In [37]:
# DEBUGGING: Manually read the PE file for GW170817 and check to see if there are spin and Dl components, change the reading conversion script for GWTC1 to include these components
i = 48
file_path = events_files[i]
event_file = folder_path+file_path
print(file_path)

with h5py.File(event_file, "r") as f:
    print("Keys: %s" % f.keys())
    a_group_key = list(f.keys())[0]
    # print(type(f[a_group_key])) 
    data = f[a_group_key]
    print(data.dtype.descr)

with h5py.File(event_file,'r+') as file: 
        waveform = list(file.keys())[0]
        event_waveforms.append(f"Converting {waveform} from {event_file}")
        
args = [f"{waveform}", f"{event_file}",
        "--Ho", f"{PLANCK_2018_Ho}", "--OmegaMatter", f"{PLANCK_2018_OmegaMatter}", "--OmegaRadiation",
        f"{PLANCK_2018_OmegaRadiation}", "--OmegaLambda", f"{PLANCK_2018_OmegaLambda}", "-o", f"{write_path}"]

subprocess.run(["python", "h52csv_GWTC1.py", *args])


df = pd.DataFrame(np.genfromtxt(write_path + event_txt_files[i], delimiter=",", names=True))
# df.columns

GW170817_GWTC-1.hdf5
Keys: <KeysViewHDF5 ['IMRPhenomPv2NRT_highSpin_posterior', 'IMRPhenomPv2NRT_highSpin_prior', 'IMRPhenomPv2NRT_lowSpin_posterior', 'IMRPhenomPv2NRT_lowSpin_prior']>
[('costheta_jn', '<f8'), ('luminosity_distance_Mpc', '<f8'), ('right_ascension', '<f8'), ('declination', '<f8'), ('m1_detector_frame_Msun', '<f8'), ('m2_detector_frame_Msun', '<f8'), ('lambda1', '<f8'), ('lambda2', '<f8'), ('spin1', '<f8'), ('spin2', '<f8'), ('costilt1', '<f8'), ('costilt2', '<f8')]


# Conversion of Injections

In [38]:
vt_folder = "../../vt/"
vt_file = "o1+o2+o3_mixture_real+semianalytic-LIGO-T2100377-v2.hdf5"
vt_file_O3_full = "endo3_mixture-LIGO-T2100113-v12.hdf5"
vt_path = vt_folder + vt_file

In [39]:
with h5py.File(vt_folder + vt_file_O3_full, "r") as f:
    print("Keys: %s" % f.keys())
    a_group_key = list(f.keys())[0]
    print(type(f[a_group_key])) 
    data = list(f[a_group_key])
    print(data)
    
with h5py.File(vt_path, "r") as f:
    print("Keys: %s" % f.keys())
    a_group_key = list(f.keys())[0]
    print(type(f[a_group_key])) 
    data = list(f[a_group_key])
    print(data)

Keys: <KeysViewHDF5 ['injections']>
<class 'h5py._hl.group.Group'>
['declination', 'declination_sampling_pdf', 'detection_statistic_cwb', 'detection_statistic_mbta', 'detection_statistic_pycbc_bbh', 'detection_statistic_pycbc_hyperbank', 'distance', 'far_cwb', 'far_gstlal', 'far_mbta', 'far_pycbc_bbh', 'far_pycbc_hyperbank', 'gps_time', 'gps_time_int', 'ifar_cwb', 'ifar_gstlal', 'ifar_mbta', 'ifar_pycbc_bbh', 'ifar_pycbc_hyperbank', 'inclination', 'inclination_sampling_pdf', 'mass1', 'mass1_source', 'mass2', 'mass2_source', 'mixture_weight', 'optimal_snr_h', 'optimal_snr_l', 'optimal_snr_net', 'pastro_cwb', 'pastro_gstlal', 'pastro_mbta', 'pastro_pycbc_bbh', 'pastro_pycbc_hyperbank', 'polarization', 'redshift', 'right_ascension', 'right_ascension_sampling_pdf', 'sampling_pdf', 'spin1x', 'spin1y', 'spin1z', 'spin2x', 'spin2y', 'spin2z']
Keys: <KeysViewHDF5 ['injections']>
<class 'h5py._hl.group.Group'>
['far_cwb', 'far_gstlal', 'far_mbta', 'far_pycbc_bbh', 'far_pycbc_hyperbank', 'ifar_c

In [40]:
with h5py.File(vt_path,'r+') as file: 
        waveform = list(file.keys())[0]
        event_waveforms.append(f"Converting {waveform} from {vt_path}")
        
args = ["--max-far", f"{1}",
        "--Ho", f"{PLANCK_2018_Ho}", 
        "--OmegaMatter", f"{PLANCK_2018_OmegaMatter}", 
        "--OmegaRadiation", f"{PLANCK_2018_OmegaRadiation}", 
        "--OmegaLambda", f"{PLANCK_2018_OmegaLambda}", 
        f"{vt_path}",
        # "-v", 
        # f"{vt_folder}" + "sensitivity-estimate_O3.csv.gz"]
        f"{vt_folder}" + "sensitivity-estimate.csv.gz"]

subprocess.run(["python", "h52csv_injections.py", *args])
print("Done")

Done
