In [1]:
import os
import re
import pandas as pd
from collections import defaultdict

In [2]:
root_dir = "../testing_mmms"

# Regex patterns
prob_pattern = r"Probability:\s+P\(mass <= max_mass\(spin, eos\)\)\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"
odds_pattern = r"Odds Ratio\s+:\s+O\^\{mass <= max_mass\(spin, eos\)\)\_\{else\}\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"
filename_pattern = r"([A-Za-z0-9_]+)\+(.+?)\+component(\d+)\.out"

# Store results: {model_label: {col_name: value}}
records = defaultdict(dict)

for dirpath, _, filenames in os.walk(root_dir):
    for filename in filenames:
        if filename.endswith(".out"):
            filepath = os.path.join(dirpath, filename)
            with open(filepath, "r") as f:
                text = f.read()

            prob_match = re.search(prob_pattern, text)
            odds_match = re.search(odds_pattern, text)
            file_match = re.match(filename_pattern, filename)

            if prob_match and odds_match and file_match:
                prefix = file_match.group(1)  # e.g., gw230529_highSpin
                model_label = file_match.group(2)  # e.g., multiPDB_betaSplit3_brokenG
                component = file_match.group(3)     # e.g., 1

                prob = float(prob_match.group(1))
                odds = float(odds_match.group(1))

                prob_col = f"{prefix}_Probability_{component}"
                odds_col = f"{prefix}_OddsRatio_{component}"

                records[model_label][prob_col] = prob
                records[model_label][odds_col] = odds

# Create DataFrame
df = pd.DataFrame.from_dict(records, orient="index")
df.index.name = "Model"

# Optional: sort columns for readability
df = df.reindex(sorted(df.columns), axis=1)

# Save table to CSV
output_csv = os.path.join("probabilities_odds_ratios.csv")
df.to_csv(output_csv)

# Output DataFrame
display(df)

Unnamed: 0_level_0,GW230529_Combined_PHM_highSpin_OddsRatio_1,GW230529_Combined_PHM_highSpin_OddsRatio_2,GW230529_Combined_PHM_highSpin_Probability_1,GW230529_Combined_PHM_highSpin_Probability_2,gw230529_highSpin_OddsRatio_1,gw230529_highSpin_OddsRatio_2,gw230529_highSpin_Probability_1,gw230529_highSpin_Probability_2
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
multiPDB_betaSplit_brokenG,0.27422,1246.886027,0.215206,0.999199,0.27422,1246.886027,0.215206,0.999199
pdbNG_betaSplit_brokenG,0.468977,212.602126,0.319254,0.995318,0.468977,212.602126,0.319254,0.995318
pdbNG_betaSplit_brokenG_tight_prior,0.399373,163.693987,0.285394,0.993928,0.399373,163.693987,0.285394,0.993928
pdbNG_betaSplit_singleG,0.38603,192.075428,0.278515,0.994821,0.38603,192.075428,0.278515,0.994821
pdbNG_betaSplit3_brokenG,0.481308,65.954116,0.324921,0.985064,0.481308,65.954116,0.324921,0.985064
multiPDB_betaSplit3_brokenG,0.376198,466.689067,0.273361,0.997862,0.376198,466.689067,0.273361,0.997862
multiPDB_betaSplitSmooth_brokenG,0.282656,868.094711,0.220368,0.998849,0.282656,868.094711,0.220368,0.998849
pdbNG_betaSplit_brokenG_same_mbrk,0.549018,507.685815,0.35443,0.998034,0.549018,507.685815,0.35443,0.998034
pdbNG_betaSplitSmooth_brokenG,0.592014,543.180049,0.371865,0.998162,0.592014,543.180049,0.371865,0.998162
multiPDB_betaSplit_singleG,0.202673,823.612422,0.168519,0.998787,0.202673,823.612422,0.168519,0.998787
