In [1]:
import src.data.breathe_data as breathe_data
import src.inference.long_inf_slicing as slicing
import src.models.builders as mb
import src.models.var_builders as var_builders
import src.inference.helpers as ih
from plotly.subplots import make_subplots
import src.models.helpers as mh

import pandas as pd
import numpy as np

In [2]:
df = breathe_data.load_meas_from_excel("BR_O2_FEV1_FEF2575_with_idx")

INFO:root:* Checking for same day measurements *


In [17]:
# Initialize variables (not used for inference)
(HFEV1, ecFEV1, AR, HO2Sat, O2SatFFA, IA, UO2Sat, O2Sat, ecFEF2575prctecFEV1) = (
    var_builders.o2sat_fev1_fef2575_point_in_time_model_shared_healthy_vars(
        160, 40, "Male"
    )
)

key_hfev1 = f"['{ecFEV1.name}', '{HFEV1.name}', '{AR.name}'] -> {HFEV1.name}"
key_ho2sat = f"['{O2SatFFA.name}', '{HO2Sat.name}', '{AR.name}'] -> {HO2Sat.name}"
HFEV1.set_factor_node_key(key_hfev1)
HO2Sat.set_factor_node_key(key_ho2sat)

vars = [AR, IA]
shared_vars = [HFEV1, HO2Sat]
# obs_vars = [ecFEV1.name, O2Sat.name]
# obs_vars = [ecFEV1.name, O2Sat.name, ecFEF2575prctecFEV1]
obs_vars = [ecFEV1.name]

# interconnected_AR = "_padded_identity_15"
interconnected_AR = False
ar_prior = "uniform"
# ar_prior = "uniform message to HFEV1"
save = True


def infer_and_plot_for_id(df_for_ID, shared_vars, vars, obs_vars, diff_threshold=1e-8):
    df_for_ID = df_for_ID.reset_index(drop=True)
    print(f"\nID: {df_for_ID.ID.iloc[0]}")
    print(f"Number of datapoints: {len(df_for_ID)}")

    height = df_for_ID.Height.iloc[0]
    age = df_for_ID.Age.iloc[0]
    sex = df_for_ID.Sex.iloc[0]
    (
        _,
        inf_alg,
        HFEV1,
        ecFEV1,
        AR,
        HO2Sat,
        O2SatFFA,
        IA,
        UO2Sat,
        O2Sat,
        ecFEF2575prctecFEV1,
    ) = mb.o2sat_fev1_fef2575_point_in_time_model_shared_healthy_vars(
        height, age, sex, ia_prior="breathe", ar_prior=ar_prior
    )

    df_query_res, df_res_before_convergence, shared_vars_final = (
        slicing.query_back_and_forth_across_days(
            df_for_ID,
            inf_alg,
            shared_vars,
            vars,
            obs_vars,
            diff_threshold,
            debug=False,
            max_passes=8,
            interconnect_AR=interconnected_AR,
        )
    )

    colorscale = [
        [0, "white"],
        [0.01, "red"],
        [0.05, "yellow"],
        [0.1, "cyan"],
        [0.6, "blue"],
        [1, "black"],
    ]

    obs_vars_str = ", ".join([mh.name_to_abbr(var) for var in obs_vars])
    inteconnected_AR_str = "interdays AR, " if interconnected_AR else ""
    ar_prior_str = f"AR {ar_prior}"
    passes = "back and forth"

    title = f"ID {df_for_ID.ID[0]} - Long inf validation - breathe IA - obs. {obs_vars_str}, {passes}, {inteconnected_AR_str}{ar_prior_str} ({df_for_ID.Sex[0]}, {df_for_ID.Age[0]}yr, {df_for_ID.Height[0]}cm)"
    slicing.plot_posterior_validation(
        df_res_before_convergence,
        HFEV1,
        HO2Sat,
        df_for_ID,
        title,
        colorscale,
        save,
    )

    title = f"ID {df_for_ID.ID[0]} - Long inf results - breathe IA - obs. {obs_vars_str}, {inteconnected_AR_str}{ar_prior_str} ({df_for_ID.Sex[0]}, {df_for_ID.Age[0]}yr, {df_for_ID.Height[0]}cm)"
    slicing.plot_query_res(
        df_for_ID,
        ecFEV1,
        O2Sat,
        df_query_res,
        AR,
        IA,
        HFEV1,
        HO2Sat,
        title,
        colorscale,
        save,
    )
    return df_query_res, df_res_before_convergence, shared_vars_final


# interesting_ids = [
#     "132",
#     "146",
#     "177",
#     "180",
#     "202",
#     "527",
#     "117",
#     "131",
#     "134",
#     "191",
#     "139",
#     "253",
#     "101",
# ]

# Consecutive entries are less than 3 days apart until
# 101     (592, 1680)

df_for_ID = df[df["ID"] == "101"].iloc[:592]
df_query_res, df_res_before_convergence, shared_vars_final = infer_and_plot_for_id(
    df_for_ID, shared_vars, vars, obs_vars, diff_threshold=1e-2
)
# df[df.ID.isin(interesting_ids)].groupby("ID").apply(
#     lambda df_for_ID: infer_and_plot_for_id(
#         df_for_ID, shared_vars, vars, obs_vars, diff_threshold=1e-6
#     )
# )


ID: 101
Number of datapoints: 592
Pass 0 - Posteriors' diff for Healthy FEV1 (L): 1.9599999853080488
Pass 0 - Posteriors' diff for Healthy O2 saturation (%): 1.4626076159497714
Pass 1 - Posteriors' diff for Healthy FEV1 (L): 8.326673578043737e-16
Pass 1 - Posteriors' diff for Healthy O2 saturation (%): 1.043507397442168e-09
Pass 2 - Posteriors' diff for Healthy FEV1 (L): 1.1102230411687698e-16
Pass 2 - Posteriors' diff for Healthy O2 saturation (%): 1.0300979437882184e-18


In [7]:
import src.data.helpers as dh

dftmp = dh.load_excel(
    f"{dh.get_path_to_main()}/ExcelFiles/BR/Refining_F3/infer_AR_with_two_days_model_O2Sat_FEV1.xlsx",
    # f"{dh.get_path_to_main()}/ExcelFiles/BR/Refining_F3/infer_AR_with_two_days_model_O2Sat_ecFEV1.xlsx",
    [AR.name],
    ["Day"],
).drop(columns=["Unnamed: 0", HO2Sat.name, IA.name, HFEV1.name])

In [13]:
from pgmpy.factors.discrete import TabularCPD

In [14]:
list = [
    TabularCPD(
        HO2Sat.name,
        HO2Sat.card,
        HO2Sat.cpt.reshape(-1, 1),
    ),
    TabularCPD(
        HFEV1.name,
        HFEV1.card,
        HFEV1.cpt.reshape(-1, 1),
    ),
]