In [1]:
import os
import re
import pandas as pd
from collections import defaultdict

In [2]:
root_dir = "../testing_mmms"

# Regex patterns
prob_pattern = r"Probability:\s+P\(mass <= max_mass\(spin, eos\)\)\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"
odds_pattern = r"Odds Ratio\s+:\s+O\^\{mass <= max_mass\(spin, eos\)\)\_\{else\}\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"
filename_pattern = r"([A-Za-z0-9_]+)\+(.+?)\+component(\d+)\.out"

# Store results: {model_label: {col_name: value}}
records = defaultdict(dict)

for dirpath, _, filenames in os.walk(root_dir):
    for filename in filenames:
        if filename.endswith(".out"):
            filepath = os.path.join(dirpath, filename)
            with open(filepath, "r") as f:
                text = f.read()

            prob_match = re.search(prob_pattern, text)
            odds_match = re.search(odds_pattern, text)
            file_match = re.match(filename_pattern, filename)

            if prob_match and odds_match and file_match:
                prefix = file_match.group(1)  # e.g., gw230529_highSpin
                model_label = file_match.group(2)  # e.g., multiPDB_betaSplit3_brokenG
                component = file_match.group(3)     # e.g., 1

                prob = float(prob_match.group(1))
                odds = float(odds_match.group(1))

                prob_col = f"{prefix}_Probability_{component}"
                odds_col = f"{prefix}_OddsRatio_{component}"

                records[model_label][prob_col] = prob
                records[model_label][odds_col] = odds

# Create DataFrame
df = pd.DataFrame.from_dict(records, orient="index")
df.index.name = "Model"

# Optional: sort columns for readability
df = df.reindex(sorted(df.columns), axis=1)

# Save table to CSV
output_csv = os.path.join("probabilities_odds_ratios.csv")
df.to_csv(output_csv)

# Output DataFrame
display(df)

In [3]:
import os
import re
import pandas as pd
from collections import defaultdict

root_dir = "../testing_mmms"

# Updated regex patterns based on actual file content
prob_pattern = r"Probability:\s+P\(mass <= max_mass\(spin, eos\)AND spin <= max_spin\(eos\)\)\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"
odds_pattern = r"Odds Ratio\s+:\s+O\^\{mass <= max_mass\(spin, eos\)AND spin <= max_spin\(eos\)\)_\{else\}\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"

# Updated to allow + and hyphens in model names
filename_pattern = r"([^+]+)\+(.+)\+component(\d+)\.out"

# Store results: {model_label: {col_name: value}}
records = defaultdict(dict)

for dirpath, _, filenames in os.walk(root_dir):
    for filename in filenames:
        if filename.endswith(".out"):
            filepath = os.path.join(dirpath, filename)
            with open(filepath, "r") as f:
                text = f.read()

            prob_match = re.search(prob_pattern, text)
            odds_match = re.search(odds_pattern, text)
            file_match = re.match(filename_pattern, filename)

            if prob_match and odds_match and file_match:
                prefix = file_match.group(1)  # e.g., GW230529_Combined_PHM_highSpin
                model_label = file_match.group(2)  # e.g., pdbNG_betaSplit_brokenG_LEC-2020-logweight_...
                component = file_match.group(3)     # e.g., 2

                prob = float(prob_match.group(1))
                odds = float(odds_match.group(1))

                prob_col = f"{prefix}_Probability_{component}"
                odds_col = f"{prefix}_OddsRatio_{component}"

                records[model_label][prob_col] = prob
                records[model_label][odds_col] = odds

# Create DataFrame
df = pd.DataFrame.from_dict(records, orient="index")
df.index.name = "Model"

# Optional: sort columns for readability
df = df.reindex(sorted(df.columns), axis=1)

# Save table to CSV
output_csv = os.path.join("eos_probabilities_odds_ratios.csv")
df.to_csv(output_csv)

# Output DataFrame
display(df)


Unnamed: 0_level_0,GW230529_Combined_PHM_lowSecondarySpin_OddsRatio_1,GW230529_Combined_PHM_lowSecondarySpin_Probability_1
Model,Unnamed: 1_level_1,Unnamed: 2_level_1
pdbNG_betaSplit_brokenG_same_mbrk_LEC-2020-logweight_PSR_GW_Xray,0.00265,0.002643
