In [1]:
import src.data.breathe_data as breathe_data
import src.inference.long_inf_slicing as slicing
import src.models.builders as mb
import src.models.var_builders as var_builders
import src.inference.helpers as ih
from plotly.subplots import make_subplots
import src.models.helpers as mh

import pandas as pd
import numpy as np

In [2]:
df = breathe_data.load_meas_from_excel("BR_O2_FEV1_FEF2575_with_idx")

INFO:root:* Checking for same day measurements *


In [4]:
# Initialize variables (not used for inference)
interconnected_AR = True
ar_change_cpt_suffix = "_padded_identity_15"
ar_change_cpt_suffix = "_shift_span_[-20;20]_samples"
save = True


def infer_and_plot_for_id(df_for_ID, diff_threshold=1e-8):
    df_for_ID = df_for_ID.reset_index(drop=True)
    print(f"\nID: {df_for_ID.ID.iloc[0]}")
    print(f"Number of datapoints: {len(df_for_ID)}")

    height = df_for_ID.Height.iloc[0]
    age = df_for_ID.Age.iloc[0]
    sex = df_for_ID.Sex.iloc[0]
    (
        _,
        inf_alg,
        HFEV1,
        ecFEV1,
        AR,
        HO2Sat,
        O2SatFFA,
        IA,
        UO2Sat,
        O2Sat,
        ecFEF2575prctecFEV1,
    ) = mb.o2sat_fev1_fef2575_long_model_shared_healthy_vars_and_temporal_ar(
        height, age, sex, ia_prior="breathe", ar_change_cpt_suffix=ar_change_cpt_suffix
    )

    key_hfev1 = f"['{ecFEV1.name}', '{HFEV1.name}', '{AR.name}'] -> {HFEV1.name}"
    key_ho2sat = f"['{O2SatFFA.name}', '{HO2Sat.name}', '{AR.name}'] -> {HO2Sat.name}"
    HFEV1.set_factor_node_key(key_hfev1)
    HO2Sat.set_factor_node_key(key_ho2sat)

    vars = [AR, IA]
    shared_vars = [HFEV1, HO2Sat]
    # obs_vars = [ecFEV1.name, O2Sat.name]
    # obs_vars = [ecFEV1.name, O2Sat.name, ecFEF2575prctecFEV1]
    obs_vars = [ecFEV1.name]

    df_query_res, df_res_before_convergence, shared_vars_final = (
        slicing.query_back_and_forth_across_days_AR(
            df_for_ID,
            inf_alg,
            shared_vars,
            vars,
            obs_vars,
            diff_threshold,
            debug=False,
            max_passes=1,
            interconnect_AR=interconnected_AR,
        )
    )

    colorscale = [
        [0, "white"],
        [0.01, "red"],
        [0.05, "yellow"],
        [0.1, "cyan"],
        [0.6, "blue"],
        [1, "black"],
    ]

    obs_vars_str = ", ".join([mh.name_to_abbr(var) for var in obs_vars])
    inteconnected_AR_str = "interdays AR v2, " if interconnected_AR else ""
    ar_prior_str = f"AR {ar_change_cpt_suffix}"
    passes = "back and forth"

    title = f"ID {df_for_ID.ID[0]} - obs. {obs_vars_str}, {passes}, {inteconnected_AR_str}{ar_prior_str} ({df_for_ID.Sex[0]}, {df_for_ID.Age[0]}yr, {df_for_ID.Height[0]}cm)"
    slicing.plot_posterior_validation(
        df_res_before_convergence,
        HFEV1,
        AR,
        df_for_ID,
        title,
        colorscale,
        save,
    )

    title = f"ID {df_for_ID.ID[0]} - obs. {obs_vars_str}, {inteconnected_AR_str}{ar_prior_str} ({df_for_ID.Sex[0]}, {df_for_ID.Age[0]}yr, {df_for_ID.Height[0]}cm)"
    slicing.plot_query_res(
        df_for_ID,
        ecFEV1,
        O2Sat,
        df_query_res,
        AR,
        IA,
        HFEV1,
        HO2Sat,
        title,
        colorscale,
        save,
    )
    return df_query_res, df_res_before_convergence, shared_vars_final


# interesting_ids = [
#     "132",
#     "146",
#     "177",
#     "180",
#     "202",
#     "527",
#     "117",
#     "131",
#     "134",
#     "191",
#     "139",
#     "253",
#     "101",
# ]

# Consecutive entries are less than 3 days apart until
# 101     (592, 1680)

df_for_ID = df[df["ID"] == "101"].iloc[:591]
df_query_res, df_res_before_convergence, shared_vars_final = infer_and_plot_for_id(
    df_for_ID, diff_threshold=1e-2
)
df_for_ID = df[df["ID"] == "405"]
df_query_res, df_res_before_convergence, shared_vars_final = infer_and_plot_for_id(
    df_for_ID, diff_threshold=1e-2
)
df_for_ID = df[df["ID"] == "272"].iloc[:417]
df_query_res, df_res_before_convergence, shared_vars_final = infer_and_plot_for_id(
    df_for_ID, diff_threshold=1e-2
)
df_for_ID = df[df["ID"] == "201"].iloc[:289]
df_query_res, df_res_before_convergence, shared_vars_final = infer_and_plot_for_id(
    df_for_ID, diff_threshold=1e-2
)
df_for_ID = df[df["ID"] == "203"].iloc[:285]
df_query_res, df_res_before_convergence, shared_vars_final = infer_and_plot_for_id(
    df_for_ID, diff_threshold=1e-2
)
# df[df.ID.isin(interesting_ids)].groupby("ID").apply(
#     lambda df_for_ID: infer_and_plot_for_id(
#         df_for_ID, diff_threshold=1e-6
#     )
# )


ID: 101
Number of datapoints: 591
Pass 1 - Posteriors' diff for Healthy FEV1 (L): 1.9779845487987422
Pass 1 - Posteriors' diff for Healthy O2 saturation (%): 1.4626076435576643
Pass 2 - Posteriors' diff for Healthy FEV1 (L): 5.5294510684767975e-05
Pass 2 - Posteriors' diff for Healthy O2 saturation (%): 2.2541406227908355e-11

ID: 405
Number of datapoints: 1035
Pass 1 - Posteriors' diff for Healthy FEV1 (L): 1.9799999999978015
Pass 1 - Posteriors' diff for Healthy O2 saturation (%): 1.4813421005423906
Pass 2 - Posteriors' diff for Healthy FEV1 (L): 1.1585762959529432e-14
Pass 2 - Posteriors' diff for Healthy O2 saturation (%): 1.8002429104085464e-11

ID: 272
Number of datapoints: 417
Pass 1 - Posteriors' diff for Healthy FEV1 (L): 1.9799999988343568
Pass 1 - Posteriors' diff for Healthy O2 saturation (%): 1.4714769846342857
Pass 2 - Posteriors' diff for Healthy FEV1 (L): 4.539466297436837e-11
Pass 2 - Posteriors' diff for Healthy O2 saturation (%): 3.5725633065370696e-11

ID: 201
Numb


invalid value encountered in divide


invalid value encountered in divide



Pass 1 - Posteriors' diff for Healthy FEV1 (L): nan
Pass 1 - Posteriors' diff for Healthy O2 saturation (%): nan
Pass 2 - Posteriors' diff for Healthy FEV1 (L): nan
Pass 2 - Posteriors' diff for Healthy O2 saturation (%): nan

ID: 203
Number of datapoints: 285
Pass 1 - Posteriors' diff for Healthy FEV1 (L): 1.9775483469005368
Pass 1 - Posteriors' diff for Healthy O2 saturation (%): 1.4675140055051212
Pass 2 - Posteriors' diff for Healthy FEV1 (L): 9.889319637654802e-05
Pass 2 - Posteriors' diff for Healthy O2 saturation (%): 9.821303212251652e-11


In [7]:
import src.data.helpers as dh

dftmp = dh.load_excel(
    f"{dh.get_path_to_main()}/ExcelFiles/BR/Refining_F3/infer_AR_with_two_days_model_O2Sat_FEV1.xlsx",
    # f"{dh.get_path_to_main()}/ExcelFiles/BR/Refining_F3/infer_AR_with_two_days_model_O2Sat_ecFEV1.xlsx",
    [AR.name],
    ["Day"],
).drop(columns=["Unnamed: 0", HO2Sat.name, IA.name, HFEV1.name])

In [14]:
def get_consec_days(df):
    df = df.sort_values(by="Date Recorded")
    df["Prev day"] = df["Date Recorded"].shift(1)
    # Compute n days between measurements
    df["Days elapsed"] = df["Date Recorded"] - df["Prev day"]
    # Remove nan
    df = df.dropna(subset=["Days elapsed"])

    # Get first idx where Days elapsed is greater than 1
    idx = df[df["Days elapsed"] > pd.Timedelta(days=3)].index
    print(idx)

    return df


df_tmp = df[df.ID == "101"]
get_consec_days(df_tmp).iloc[:592]
# Not finished