In [1]:
import os
import re
import pandas as pd
from collections import defaultdict

In [2]:
root_dir = "../testing_mmms"  # <-- Change this to your folder

# Regex patterns
prob_pattern = r"Probability:\s+P\(mass <= max_mass\(spin, eos\)\)\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"
odds_pattern = r"Odds Ratio\s+:\s+O\^\{mass <= max_mass\(spin, eos\)\)\_\{else\}\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"
filename_pattern = r".+\+([a-zA-Z0-9_]+)\+component(\d+)\.out"

# Nested dictionary to collect values
records = defaultdict(dict)

for dirpath, _, filenames in os.walk(root_dir):
    for filename in filenames:
        if filename.endswith(".out"):
            filepath = os.path.join(dirpath, filename)
            with open(filepath, "r") as f:
                text = f.read()

            prob_match = re.search(prob_pattern, text)
            odds_match = re.search(odds_pattern, text)
            file_match = re.match(filename_pattern, filename)

            if prob_match and odds_match and file_match:
                model_label = file_match.group(1)
                component = file_match.group(2)

                prob = float(prob_match.group(1))
                odds = float(odds_match.group(1))

                records[model_label][f"Probability_{component}"] = prob
                records[model_label][f"OddsRatio_{component}"] = odds

# Create DataFrame and ensure all expected columns are present
df = pd.DataFrame.from_dict(records, orient="index")
df.index.name = "Model"

# Define the expected columns for full completeness
expected_columns = ["Probability_1", "OddsRatio_1", "Probability_2", "OddsRatio_2"]
for col in expected_columns:
    if col not in df.columns:
        df[col] = float("nan")  # Add missing column with NaNs

# Reorder columns to expected order
df = df[expected_columns]

# Output the DataFrame
display(df)


Unnamed: 0_level_0,Probability_1,OddsRatio_1,Probability_2,OddsRatio_2
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
multiPDB_betaSplit3_brokenG,0.273361,0.376198,0.997862,466.689067
