In [3]:
import os
import re
import pandas as pd
from collections import defaultdict

In [4]:
root_dir = "../testing_mmms"

# Regex patterns
prob_pattern = r"Probability:\s+P\(mass <= max_mass\(spin, eos\)\)\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"
odds_pattern = r"Odds Ratio\s+:\s+O\^\{mass <= max_mass\(spin, eos\)\)\_\{else\}\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"
filename_pattern = r"([a-zA-Z0-9]+_[a-zA-Z0-9]+)\+([a-zA-Z0-9_]+)\+component(\d+)\.out"

# Store results: {model_label: {col_name: value}}
records = defaultdict(dict)

for dirpath, _, filenames in os.walk(root_dir):
    for filename in filenames:
        if filename.endswith(".out"):
            filepath = os.path.join(dirpath, filename)
            with open(filepath, "r") as f:
                text = f.read()

            prob_match = re.search(prob_pattern, text)
            odds_match = re.search(odds_pattern, text)
            file_match = re.match(filename_pattern, filename)

            if prob_match and odds_match and file_match:
                prefix = file_match.group(1)  # e.g., gw230529_highSpin
                model_label = file_match.group(2)  # e.g., multiPDB_betaSplit3_brokenG
                component = file_match.group(3)     # e.g., 1

                prob = float(prob_match.group(1))
                odds = float(odds_match.group(1))

                prob_col = f"{prefix}_Probability_{component}"
                odds_col = f"{prefix}_OddsRatio_{component}"

                records[model_label][prob_col] = prob
                records[model_label][odds_col] = odds

# Create DataFrame
df = pd.DataFrame.from_dict(records, orient="index")
df.index.name = "Model"

# Optional: sort columns for readability
df = df.reindex(sorted(df.columns), axis=1)

# Output DataFrame
display(df)

Unnamed: 0_level_0,gw230529_highSpin_OddsRatio_1,gw230529_highSpin_OddsRatio_2,gw230529_highSpin_Probability_1,gw230529_highSpin_Probability_2
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
pdbNG_betaSplit_brokenG,0.480823,168.290052,0.3247,0.994093
pdbNG_betaSplit_singleG,0.383315,233.435375,0.277099,0.995734
multiPDB_betaSplit3_brokenG,0.314635,467.072908,0.239333,0.997864
pdbNG_betaSplitSmooth_brokenG,0.483706,196.210316,0.326012,0.994929
pdbNG_betaSplit_brokenG_tight_prior,0.390504,191.355038,0.280836,0.994801
pdbNG_betaSplit3_brokenG,0.509904,135.258184,0.337706,0.992661
multiPDB_betaSplit_singleG,0.239216,1205.409611,0.193038,0.999171
pdbNG_betaSplit_brokenG_sig_peak1_large,0.276077,444.482138,0.216348,0.997755
multiPDB_betaSplit_brokenG,0.25598,886.867148,0.203809,0.998874
multiPDB_betaSplitSmooth_brokenG,0.256408,807.711741,0.20408,0.998763
