In [3]:
import os
import re
import pandas as pd
from collections import defaultdict

In [4]:
root_dir = "../testing_mmms"

# Regex patterns
prob_pattern = r"Probability:\s+P\(mass <= max_mass\(spin, eos\)\)\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"
odds_pattern = r"Odds Ratio\s+:\s+O\^\{mass <= max_mass\(spin, eos\)\)\_\{else\}\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"
filename_pattern = r"([A-Za-z0-9_]+)\+(.+?)\+component(\d+)\.out"

# Store results: {model_label: {col_name: value}}
records = defaultdict(dict)

for dirpath, _, filenames in os.walk(root_dir):
    for filename in filenames:
        if filename.endswith(".out"):
            filepath = os.path.join(dirpath, filename)
            with open(filepath, "r") as f:
                text = f.read()

            prob_match = re.search(prob_pattern, text)
            odds_match = re.search(odds_pattern, text)
            file_match = re.match(filename_pattern, filename)

            if prob_match and odds_match and file_match:
                prefix = file_match.group(1)  # e.g., gw230529_highSpin
                model_label = file_match.group(2)  # e.g., multiPDB_betaSplit3_brokenG
                component = file_match.group(3)     # e.g., 1

                prob = float(prob_match.group(1))
                odds = float(odds_match.group(1))

                prob_col = f"{prefix}_Probability_{component}"
                odds_col = f"{prefix}_OddsRatio_{component}"

                records[model_label][prob_col] = prob
                records[model_label][odds_col] = odds

# Create DataFrame
df = pd.DataFrame.from_dict(records, orient="index")
df.index.name = "Model"

# Optional: sort columns for readability
df = df.reindex(sorted(df.columns), axis=1)

# Save table to CSV
output_csv = os.path.join("probabilities_odds_ratios.csv")
df.to_csv(output_csv)

# Output DataFrame
display(df)

Unnamed: 0_level_0,GW230529_Combined_PHM_highSpin_OddsRatio_1,GW230529_Combined_PHM_highSpin_OddsRatio_2,GW230529_Combined_PHM_highSpin_Probability_1,GW230529_Combined_PHM_highSpin_Probability_2,gw230529_highSpin_OddsRatio_1,gw230529_highSpin_OddsRatio_2,gw230529_highSpin_Probability_1,gw230529_highSpin_Probability_2
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
multiPDB_betaSplit_brokenG,0.259845,2276.596867,0.206251,0.999561,0.259845,2276.596867,0.206251,0.999561
pdbNG_betaSplit_brokenG,0.420036,433.259458,0.295792,0.997697,0.420036,433.259458,0.295792,0.997697
pdbNG_betaSplit_brokenG_tight_prior,0.375914,474.323436,0.273211,0.997896,0.375914,474.323436,0.273211,0.997896
pdbNG_betaSplit_singleG,0.360573,524.903114,0.265016,0.998099,0.360573,524.903114,0.265016,0.998099
pdbNG_betaSplit3_brokenG,0.465182,230.045724,0.317491,0.995672,0.465182,230.045724,0.317491,0.995672
multiPDB_betaSplit3_brokenG,0.29888,852.455015,0.230106,0.998828,0.29888,852.455015,0.230106,0.998828
multiPDB_betaSplitSmooth_brokenG,0.257581,1778.37586,0.204823,0.999438,0.257581,1778.37586,0.204823,0.999438
pdbNG_betaSplit_brokenG_same_mbrk,0.417578,400.542963,0.294572,0.99751,0.417578,400.542963,0.294572,0.99751
pdbNG_betaSplitSmooth_brokenG,0.444089,350.475468,0.307522,0.997155,0.444089,350.475468,0.307522,0.997155
multiPDB_betaSplit_singleG,0.230897,2531.037021,0.187584,0.999605,0.230897,2531.037021,0.187584,0.999605
