In [1]:
import os
import re
import pandas as pd
from collections import defaultdict
from tqdm import tqdm

In [2]:
def parse_file(filename, root_dir = "../testing_mmms"):
    path = os.path.join(root_dir, filename)

    # Regex patterns
    prob_pattern = r"Probability:\s+P\(mass <= max_mass\(spin, eos\)\)\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"
    odds_pattern = r"Odds Ratio\s+:\s+O\^\{mass <= max_mass\(spin, eos\)\)\_\{else\}\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"
    filename_pattern1 = r'GW\d{6}'
    filename_pattern2 = r'(?<=\+)[^+]+(?=\+component\d+)'
    filename_pattern3 = r'component(\d+)'

    records = defaultdict(dict)

    with open(path, "r") as f:
        text = f.read()

    prob_match = re.search(prob_pattern, text)
    odds_match = re.search(odds_pattern, text)
    file_match1 = re.search(filename_pattern1, filename)
    file_match2 = re.search(filename_pattern2, filename)
    file_match3 = re.search(filename_pattern3, filename)

    if prob_match and odds_match and file_match1 and file_match2 and file_match3:
        prefix = file_match1.group()  # e.g., GW230529_Combined_PHM_highSpin
        model_label = file_match2.group()  # e.g., pdbNG_betaSplit_brokenG_LEC-2020-logweight_...
        component = file_match3.group()     # e.g., 2

        prob = float(prob_match.group(1))
        odds = float(odds_match.group(1))

        prob_col = f"{prefix}_Probability_{component}"
        odds_col = f"{prefix}_OddsRatio_{component}"

        records[model_label][prob_col] = prob
        records[model_label][odds_col] = odds

    # Create DataFrame
    df = pd.DataFrame.from_dict(records, orient="index")
    df.index.name = "Model"
    # Optional: sort columns for readability
    df = df.reindex(sorted(df.columns), axis=1)

    #if dataframe is empty, return None
    if df.empty:
        # get rid of last 3 characters of the filename and replace with .err
        err_filename = path[:-3] + "err"
        with open(err_filename, "r") as f2:
            print(f2.read())
            return None
    return df


_pe = {
    "GW190425":"GW190425_C01:IMRPhenomPv2_NRTidal:HighSpin",
    "GW190814":"GW190814_C01:IMRPhenomXPHM",
    "GW190917":"GW190917_C01:IMRPhenomXPHM",
    "GW200105":"GW200105_C01:IMRPhenomXPHM",
    "GW200115":"GW200115_C01:IMRPhenomNSBH:HighSpin",
    "GW230529_highspin":"GW230529_Combined_PHM_highSpin",
    "GW230529_lowspin":"GW230529_Combined_PHM_lowSecondarySpin"
    }

In [3]:
def extract_variables(filename, _pe = _pe):
    # Strip directory and file extension
    basename = os.path.basename(filename).replace(".out", "")

    # Find event_name by matching with _pe
    matched_event_name = None
    for event_name, prefix in _pe.items():
        if basename.startswith(prefix):
            matched_event_name = event_name
            break
    if not matched_event_name:
        raise ValueError("No matching event_name found in _pe.")

    # Remove prefix and split the rest
    rest = basename[len(_pe[matched_event_name]) + 1:]  # +1 for the '+' separator
    mid_part, component_part = rest.split("+component")

    # Extract mass_dist, pairing, spin, suffix
    match = re.match(r"([^_]+)_([^_]+)_([a-zA-Z0-9]+)(.*)", mid_part)
    if not match:
        raise ValueError("Filename format is not recognized.")

    mass_dist, pairing, spin, suffix = match.groups()
    component = component_part  # already extracted

    return {
        "event_name": matched_event_name,
        "mass_dist": mass_dist,
        "pairing": pairing,
        "spin": spin,
        "suffix": suffix,
        "component": component,
    }

def validate_event_config(config):
    required_keys = ['event_name', 'mass_dist', 'pairing', 'spin', 'component']
    for key in required_keys:
        if not config.get(key):  # Checks for None, '', or other falsy values
            raise ValueError(f"Missing or empty value for required key: '{key}'")
    return "All required fields are valid."

def generate_latex_macro(odds_ratio, macro_save_name):
    return f"\\newcommand{{\\{macro_save_name}}}{{\\ensuremath{{{odds_ratio:.3f}}}}}"

def generate_latex_macro_null(odds_ratio, macro_save_name):
    return f"\\newcommand{{\\{macro_save_name}}}{{\\ensuremath{{{odds_ratio}}}}}"

In [4]:
event_name = "GW200105"
mass_dist = "pdbNG"
pairing = "betaSplit"
spin = "brokenG"
suffix = ""
component = "2"
filename = _pe[event_name]+"+"+mass_dist+"_"+pairing+"_"+spin+suffix+"+component"+component+".out"
macro_save_name = "macro_"+event_name+"_"+mass_dist+"_"+pairing+"_"+spin+suffix+"_component"+component

res = parse_file(filename)
odds_ratio = res.iloc[0,0]

In [5]:
# create macros.tex
macros_tex_path = "macros.tex"

if not os.path.exists(macros_tex_path):
    with open(macros_tex_path, "w") as f:
        f.write("% LaTeX Macros for Odds Ratios\n")

In [6]:
# filename = "GW230529_Combined_PHM_highSpin+pdbNG_betaSplit_brokenG+component1.out"

# get list of all files in the current directory that end with .out
files = os.listdir("../testing_mmms/")
files = [f for f in files if f.endswith(".out")]

for filename in tqdm(files):
    params = extract_variables(filename, _pe)
    validate_event_config(params)
    event_name = params['event_name']
    mass_dist = params['mass_dist']
    pairing = params['pairing']
    spin = params['spin']
    suffix = params['suffix']
    component = params['component']

    filename = _pe[event_name]+"+"+mass_dist+"_"+pairing+"_"+spin+suffix+"+component"+component+".out"
    macro_save_name = "ODDS"+event_name+""+mass_dist+""+pairing+""+spin+suffix+"component"+component
    macro_save_name = macro_save_name.replace("_", "").replace("0", "zero").replace("1", "one").replace("2", "two").replace("3", "three").replace("4", "four").replace("5", "five").replace("6", "six").replace("7", "seven").replace("8", "eight").replace("9", "nine")

    res = parse_file(filename)

    if res is None:
        odds_ratio = "Error"
        latex_macro = generate_latex_macro_null(odds_ratio, macro_save_name)
    else:
        odds_ratio = res.iloc[0,0]
        latex_macro = generate_latex_macro(odds_ratio, macro_save_name)

    # Append the macro to macros.tex if line does not already exist
    with open(macros_tex_path, "r+") as f:
        existing_macros = f.readlines()
        if not any(latex_macro in line for line in existing_macros):
            f.write(latex_macro + "\n")



100%|██████████| 259/259 [00:00<00:00, 2334.20it/s]

  return be.log(4*be.pi) + 2*be.log(Dc) + be.log(self.dDcdz(z))
  exp_O = exp_f / (exp_g - exp_f)
  var_O = var_f*exp_g**2/exp_d**4 + var_g*exp_f**2/exp_d**4 - 2*cov_fg*exp_f*exp_g/exp_d**4
  var_O = var_f*exp_g**2/exp_d**4 + var_g*exp_f**2/exp_d**4 - 2*cov_fg*exp_f*exp_g/exp_d**4
Traceback (most recent call last):
  File "/opt/anaconda3/envs/mmms-gw230529/bin/mmms", line 265, in <module>
    ans = engine.samples2prob_odds(
          ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/envs/mmms-gw230529/lib/python3.11/site-packages/mmms/engine.py", line 463, in samples2prob_odds
    odds, odds_stdv = _moments2odds(F, G, F_var, G_var, FG_cov)
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/envs/mmms-gw230529/lib/python3.11/site-packages/mmms/engine.py", line 358, in _moments2odds
    var_O = _check_variance(var_O) # sanity check
            ^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/envs/mmms-gw230529/lib/python3.11/site-packages/mmms/engine.py", l




In [7]:
# root_dir = "../testing_mmms"
#
# # Regex patterns
# prob_pattern = r"Probability:\s+P\(mass <= max_mass\(spin, eos\)\)\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"
# odds_pattern = r"Odds Ratio\s+:\s+O\^\{mass <= max_mass\(spin, eos\)\)\_\{else\}\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"
# filename_pattern = r"([A-Za-z0-9_]+)\+(.+?)\+component(\d+)\.out"
#
# # Store results: {model_label: {col_name: value}}
# records = defaultdict(dict)
#
# for dirpath, _, filenames in os.walk(root_dir):
#     for filename in filenames:
#         if filename.endswith(".out"):
#             filepath = os.path.join(dirpath, filename)
#             with open(filepath, "r") as f:
#                 text = f.read()
#
#             prob_match = re.search(prob_pattern, text)
#             odds_match = re.search(odds_pattern, text)
#             file_match = re.match(filename_pattern, filename)
#
#             if prob_match and odds_match and file_match:
#                 prefix = file_match.group(1)  # e.g., gw230529_highSpin
#                 model_label = file_match.group(2)  # e.g., multiPDB_betaSplit3_brokenG
#                 component = file_match.group(3)     # e.g., 1
#
#                 prob = float(prob_match.group(1))
#                 odds = float(odds_match.group(1))
#
#                 prob_col = f"{prefix}_Probability_{component}"
#                 odds_col = f"{prefix}_OddsRatio_{component}"
#
#                 records[model_label][prob_col] = prob
#                 records[model_label][odds_col] = odds
#
# # Create DataFrame
# df = pd.DataFrame.from_dict(records, orient="index")
# df.index.name = "Model"
#
# # Optional: sort columns for readability
# df = df.reindex(sorted(df.columns), axis=1)
#
# # Save table to CSV
# output_csv = os.path.join("probabilities_odds_ratios.csv")
# df.to_csv(output_csv)
#
# # Output DataFrame
# display(df)

In [8]:
# import os
# import re
# import pandas as pd
# from collections import defaultdict
#
# root_dir = "../testing_mmms"
#
# # Updated regex patterns based on actual file content
# prob_pattern = r"Probability:\s+P\(mass <= max_mass\(spin, eos\)AND spin <= max_spin\(eos\)\)\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"
# odds_pattern = r"Odds Ratio\s+:\s+O\^\{mass <= max_mass\(spin, eos\)AND spin <= max_spin\(eos\)\)_\{else\}\s+=\s+([0-9.eE+-]+)\s+\+/-\s+([0-9.eE+-]+)"
#
# # Updated to allow + and hyphens in model names
# filename_pattern = r"([^+]+)\+(.+)\+component(\d+)\.out"
#
# # Store results: {model_label: {col_name: value}}
# records = defaultdict(dict)
#
# for dirpath, _, filenames in os.walk(root_dir):
#     for filename in filenames:
#         if filename.endswith(".out"):
#             filepath = os.path.join(dirpath, filename)
#             with open(filepath, "r") as f:
#                 text = f.read()
#
#             prob_match = re.search(prob_pattern, text)
#             odds_match = re.search(odds_pattern, text)
#             file_match = re.match(filename_pattern, filename)
#
#             if prob_match and odds_match and file_match:
#                 prefix = file_match.group(1)  # e.g., GW230529_Combined_PHM_highSpin
#                 model_label = file_match.group(2)  # e.g., pdbNG_betaSplit_brokenG_LEC-2020-logweight_...
#                 component = file_match.group(3)     # e.g., 2
#
#                 prob = float(prob_match.group(1))
#                 odds = float(odds_match.group(1))
#
#                 prob_col = f"{prefix}_Probability_{component}"
#                 odds_col = f"{prefix}_OddsRatio_{component}"
#
#                 records[model_label][prob_col] = prob
#                 records[model_label][odds_col] = odds
#
# # Create DataFrame
# df = pd.DataFrame.from_dict(records, orient="index")
# df.index.name = "Model"
#
# # Optional: sort columns for readability
# df = df.reindex(sorted(df.columns), axis=1)
#
# # Save table to CSV
# output_csv = os.path.join("eos_probabilities_odds_ratios.csv")
# df.to_csv(output_csv)
#
# # Output DataFrame
# display(df)
