In [4]:
import os
import re
import pandas as pd
from collections import defaultdict

In [5]:
def parse_file(filename, root_dir = "../testing_mmms"):
    path = os.path.join(root_dir, filename)

    # Regex patterns
    prob_pattern = r"Probability:\s+P\(mass <= max_mass\(spin, eos\)\)\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"
    odds_pattern = r"Odds Ratio\s+:\s+O\^\{mass <= max_mass\(spin, eos\)\)\_\{else\}\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"
    filename_pattern1 = r'GW\d{6}'
    filename_pattern2 = r'(?<=\+)[^+]+(?=\+component\d+)'
    filename_pattern3 = r'component(\d+)'

    records = defaultdict(dict)

    with open(path, "r") as f:
        text = f.read()

    prob_match = re.search(prob_pattern, text)
    odds_match = re.search(odds_pattern, text)
    file_match1 = re.search(filename_pattern1, filename)
    file_match2 = re.search(filename_pattern2, filename)
    file_match3 = re.search(filename_pattern3, filename)

    if prob_match and odds_match and file_match:
        prefix = file_match1.group()  # e.g., GW230529_Combined_PHM_highSpin
        model_label = file_match2.group()  # e.g., pdbNG_betaSplit_brokenG_LEC-2020-logweight_...
        component = file_match3.group()     # e.g., 2

        prob = float(prob_match.group(1))
        odds = float(odds_match.group(1))

        prob_col = f"{prefix}_Probability_{component}"
        odds_col = f"{prefix}_OddsRatio_{component}"

        records[model_label][prob_col] = prob
        records[model_label][odds_col] = odds

    # Create DataFrame
    df = pd.DataFrame.from_dict(records, orient="index")
    df.index.name = "Model"
    # Optional: sort columns for readability
    df = df.reindex(sorted(df.columns), axis=1)

    #if dataframe is empty, return None
    if df.empty:
        # get rid of last 3 characters of the filename and replace with .err
        err_filename = path[:-3] + "err"
        with open(err_filename, "r") as f2:
            print(f2.read())
            return None
    return df


_pe = {
    "GW190425":"GW190425_C01:IMRPhenomPv2_NRTidal:HighSpin",
    "GW190814":"GW190814_C01:IMRPhenomXPHM",
    "GW190917":"GW190917_C01:IMRPhenomXPHM",
    "GW200105":"GW200105_C01:IMRPhenomXPHM",
    "GW200115":"GW200115_C01:IMRPhenomNSBH:HighSpin",
    "GW230529_highspin":"GW230529_Combined_PHM_highSpin",
    "GW230529_lowspin":"GW230529_Combined_PHM_lowSecondarySpin"
    }

In [7]:
event_name = "GW200105"
mass_dist = "pdbNG"
pairing = "betaSplit"
spin = "brokenG"
suffix = ""
component = "2"
filename = _pe[event_name]+"+"+mass_dist+"_"+pairing+"_"+spin+suffix+"+component"+component+".out"

parse_file(filename)

NameError: name 'file_match' is not defined

In [57]:
root_dir = "../testing_mmms"

# Regex patterns
prob_pattern = r"Probability:\s+P\(mass <= max_mass\(spin, eos\)\)\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"
odds_pattern = r"Odds Ratio\s+:\s+O\^\{mass <= max_mass\(spin, eos\)\)\_\{else\}\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"
filename_pattern = r"([A-Za-z0-9_]+)\+(.+?)\+component(\d+)\.out"

# Store results: {model_label: {col_name: value}}
records = defaultdict(dict)

for dirpath, _, filenames in os.walk(root_dir):
    for filename in filenames:
        if filename.endswith(".out"):
            filepath = os.path.join(dirpath, filename)
            with open(filepath, "r") as f:
                text = f.read()

            prob_match = re.search(prob_pattern, text)
            odds_match = re.search(odds_pattern, text)
            file_match = re.match(filename_pattern, filename)

            if prob_match and odds_match and file_match:
                prefix = file_match.group(1)  # e.g., gw230529_highSpin
                model_label = file_match.group(2)  # e.g., multiPDB_betaSplit3_brokenG
                component = file_match.group(3)     # e.g., 1

                prob = float(prob_match.group(1))
                odds = float(odds_match.group(1))

                prob_col = f"{prefix}_Probability_{component}"
                odds_col = f"{prefix}_OddsRatio_{component}"

                records[model_label][prob_col] = prob
                records[model_label][odds_col] = odds

# Create DataFrame
df = pd.DataFrame.from_dict(records, orient="index")
df.index.name = "Model"

# Optional: sort columns for readability
df = df.reindex(sorted(df.columns), axis=1)

# Save table to CSV
output_csv = os.path.join("probabilities_odds_ratios.csv")
df.to_csv(output_csv)

# Output DataFrame
display(df)

Unnamed: 0_level_0,GW230529_Combined_PHM_highSpin_OddsRatio_1,GW230529_Combined_PHM_highSpin_OddsRatio_2,GW230529_Combined_PHM_highSpin_Probability_1,GW230529_Combined_PHM_highSpin_Probability_2,GW230529_Combined_PHM_lowSecondarySpin_OddsRatio_1,GW230529_Combined_PHM_lowSecondarySpin_Probability_1
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
pdbNG_betaSplit_brokenG_sig_peak1_test,0.411146,,0.291356,,0.296469,0.228674
multiPDB_betaSplit_brokenG,0.259845,,0.206251,,0.224051,0.183041
pdbNG_betaSplit_brokenG_C,0.363429,,0.266555,,0.250567,0.200362
pdbNG_betaSplit_brokenG,0.420036,,0.295792,,0.295726,0.228232
pdbNG_betaSplit_brokenG_D,0.377184,,0.273881,,0.270484,0.212899
pdbNG_betaSplit_brokenG_tight_prior,0.375914,,0.273211,,0.279505,0.218448
pdbNG_betaSplit_brokenG_sig_peak1_large,0.263272,,0.208405,,0.205054,0.170161
pdbNG_betaSplit_brokenG_B,0.426877,,0.299169,,0.300033,0.230789
pdbNG_betaSplit3_brokenG,0.465182,,0.317491,,0.315452,0.239805
pdbNG_betaSplit_brokenG_E,0.42384,,0.297674,,0.297151,0.22908


In [3]:
import os
import re
import pandas as pd
from collections import defaultdict

root_dir = "../testing_mmms"

# Updated regex patterns based on actual file content
prob_pattern = r"Probability:\s+P\(mass <= max_mass\(spin, eos\)AND spin <= max_spin\(eos\)\)\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"
odds_pattern = r"Odds Ratio\s+:\s+O\^\{mass <= max_mass\(spin, eos\)AND spin <= max_spin\(eos\)\)_\{else\}\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"

# Updated to allow + and hyphens in model names
filename_pattern = r"([^+]+)\+(.+)\+component(\d+)\.out"

# Store results: {model_label: {col_name: value}}
records = defaultdict(dict)

for dirpath, _, filenames in os.walk(root_dir):
    for filename in filenames:
        if filename.endswith(".out"):
            filepath = os.path.join(dirpath, filename)
            with open(filepath, "r") as f:
                text = f.read()

            prob_match = re.search(prob_pattern, text)
            odds_match = re.search(odds_pattern, text)
            file_match = re.match(filename_pattern, filename)

            if prob_match and odds_match and file_match:
                prefix = file_match.group(1)  # e.g., GW230529_Combined_PHM_highSpin
                model_label = file_match.group(2)  # e.g., pdbNG_betaSplit_brokenG_LEC-2020-logweight_...
                component = file_match.group(3)     # e.g., 2

                prob = float(prob_match.group(1))
                odds = float(odds_match.group(1))

                prob_col = f"{prefix}_Probability_{component}"
                odds_col = f"{prefix}_OddsRatio_{component}"

                records[model_label][prob_col] = prob
                records[model_label][odds_col] = odds

# Create DataFrame
df = pd.DataFrame.from_dict(records, orient="index")
df.index.name = "Model"

# Optional: sort columns for readability
df = df.reindex(sorted(df.columns), axis=1)

# Save table to CSV
output_csv = os.path.join("eos_probabilities_odds_ratios.csv")
df.to_csv(output_csv)

# Output DataFrame
display(df)


Unnamed: 0_level_0,GW230529_Combined_PHM_highSpin_OddsRatio_1,GW230529_Combined_PHM_highSpin_OddsRatio_2,GW230529_Combined_PHM_highSpin_Probability_1,GW230529_Combined_PHM_highSpin_Probability_2,GW230529_Combined_PHM_lowSecondarySpin_OddsRatio_1,GW230529_Combined_PHM_lowSecondarySpin_OddsRatio_2,GW230529_Combined_PHM_lowSecondarySpin_Probability_1,GW230529_Combined_PHM_lowSecondarySpin_Probability_2
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
multiPDB_betaSplit_brokenG_LEC-2020-logweight_PSR_GW_Xray,0.011852,306.198498,0.011713,0.996745,4.5e-05,218704300.0,4.5e-05,1.0
pdbNG_betaSplit_brokenG_LEC-2020-logweight_PSR_GW_Xray,0.03687,119.501371,0.035559,0.991701,0.000512,52093480.0,0.000511,1.0
pdbNG_betaSplit_singleG_LEC-2020-logweight_PSR_GW_Xray,0.050193,144.02536,0.047794,0.993105,0.000667,7751333000.0,0.000667,1.0
multiPDB_betaSplit3_brokenG_LEC-2020-logweight_PSR_GW_Xray,0.026787,388.060053,0.026089,0.99743,0.000558,40355860.0,0.000558,1.0
pdbNG_betaSplit_brokenG_sig_peak1_test_LEC-2020-logweight_PSR_GW_Xray,0.087309,234.367337,0.080299,0.995751,0.000935,38588680.0,0.000934,1.0
multiPDB_betaSplitSmooth_brokenG_LEC-2020-logweight_PSR_GW_Xray,0.024739,225.129872,0.024141,0.995578,0.000426,431488300.0,0.000426,1.0
pdbNG_betaSplitSmooth_brokenG_LEC-2020-logweight_PSR_GW_Xray,0.04142,39.335541,0.039772,0.975208,0.001372,668907300.0,0.00137,1.0
pdbNG_betaSplit_brokenG_sig_peak1_large_LEC-2020-logweight_PSR_GW_Xray,0.014094,271.382836,0.013898,0.996329,0.000282,230106300.0,0.000282,1.0
multiPDB_betaSplit_singleG_LEC-2020-logweight_PSR_GW_Xray,0.013608,794.58951,0.013425,0.998743,4.8e-05,,4.8e-05,
pdbNG_betaSplit3_brokenG_LEC-2020-logweight_PSR_GW_Xray,0.276492,13.932663,0.216603,0.933033,0.003089,,0.003079,
