In [2]:
import src.data.breathe_data as breathe_data
import src.inference.long_inf_slicing as slicing
import src.models.builders as mb
import src.models.var_builders as var_builders
import src.inference.helpers as ih
from plotly.subplots import make_subplots


import pandas as pd
import numpy as np

## Breathe data processing
Applied get_bin_for_value to all inputs and updated excel file

## Slicing algorithm handling convergence (loops)

In [3]:
df = breathe_data.load_meas_from_excel("BR_O2_FEV1_FEF2575_with_idx")

INFO:root:* Checking for same day measurements *


In [5]:
# Creat factor graph to initialize variables
(HFEV1, ecFEV1, AR, HO2Sat, O2SatFFA, IA, UO2Sat, O2Sat, ecFEF2575prctecFEV1) = (
    var_builders.o2sat_fev1_fef2575_point_in_time_model_shared_healthy_vars(
        160, 40, "Male"
    )
)

key_hfev1 = f"['{ecFEV1.name}', '{HFEV1.name}', '{AR.name}'] -> {HFEV1.name}"
key_ho2sat = f"['{O2SatFFA.name}', '{HO2Sat.name}', '{AR.name}'] -> {HO2Sat.name}"
HFEV1.set_factor_node_key(key_hfev1)
HO2Sat.set_factor_node_key(key_ho2sat)

vars = [AR, IA]
shared_vars = [HFEV1, HO2Sat]
# obs_vars = [ecFEV1.name, O2Sat.name, ecFEF2575prctecFEV1.name]
obs_vars = [ecFEV1.name]


def infer_and_plot_for_id(df_for_ID, shared_vars, vars, obs_vars, diff_threshold=1e-8):
    df_for_ID = df_for_ID.reset_index(drop=True)
    print(f"\nID: {df_for_ID.ID.iloc[0]}")
    print(f"Amount of data: {len(df_for_ID)}")

    height = df_for_ID.Height.iloc[0]
    age = df_for_ID.Age.iloc[0]
    sex = df_for_ID.Sex.iloc[0]
    (
        _,
        inf_alg,
        HFEV1,
        ecFEV1,
        AR,
        HO2Sat,
        O2SatFFA,
        IA,
        UO2Sat,
        O2Sat,
        ecFEF2575prctecFEV1,
    ) = mb.o2sat_fev1_fef2575_point_in_time_model_shared_healthy_vars(
        height, age, sex, ia_prior="uniform", ar_prior="uniform message to HFEV1"
    )

    df_query_res, df_res_before_convergence, shared_vars_final = (
        slicing.query_across_days(
            df_for_ID, inf_alg, shared_vars, vars, obs_vars, diff_threshold, debug=False
        )
    )

    save = True

    colorscale = [
        [0, "white"],
        [0.01, "red"],
        [0.05, "yellow"],
        [0.1, "cyan"],
        [0.6, "blue"],
        [1, "black"],
    ]

    slicing.plot_posterior_validation(
        df_res_before_convergence,
        HFEV1,
        HO2Sat,
        df_for_ID,
        ecFEV1,
        O2Sat,
        colorscale,
        save,
    )

    slicing.plot_query_res(
        df_for_ID, ecFEV1, O2Sat, df_query_res, AR, IA, HFEV1, HO2Sat, colorscale, save
    )
    return df_query_res, df_res_before_convergence, shared_vars_final


interesting_ids = [
    "132",
    "146",
    "177",
    "180",
    "202",
    "527",
    "117",
    "131",
    "134",
    "191",
    "139",
    "253",
    '101'
]
# df_for_ID = df[df["ID"] == "101"]
# df_query_res, df_res_before_convergence, shared_vars_final = infer_and_plot_for_id(
#     df_for_ID, shared_vars, vars, obs_vars, diff_threshold=1e-2
# )
df[df.ID.isin(interesting_ids)].groupby("ID").apply(
    lambda df_for_ID: infer_and_plot_for_id(
        df_for_ID, shared_vars, vars, obs_vars, diff_threshold=1e-6
    )
)


ID: 101
Amount of data: 1680


FileNotFoundError: [Errno 2] No such file or directory: '/Users/tristan.trebaol/Desktop/PhD/PlotsBreathe/Longitudinal_model/Healthy_vars_inference_results/ID 101 - Longitudinal inference results (Male, 53yr, 173.0cm).pdf'

In [3]:
df_for_ID.head()

Unnamed: 0.1,Unnamed: 0,ID,Date Recorded,FEV1,O2 Saturation,ecFEV1,Age,Sex,Height,Predicted FEV1,Healthy O2 Saturation,ecFEV1 % Predicted,FEV1 % Predicted,O2 Saturation % Healthy,idx ecFEV1 (L),idx O2 saturation (%)
0,0,101,2019-02-20,1.31,97.0,1.32,53,Male,173.0,3.610061,97.22596,36.564477,36.287474,99.767593,26,47
1,1,101,2019-02-21,1.29,96.0,1.32,53,Male,173.0,3.610061,97.22596,36.564477,35.733466,98.739061,26,46
2,2,101,2019-02-22,1.32,96.0,1.32,53,Male,173.0,3.610061,97.22596,36.564477,36.564477,98.739061,26,46
3,3,101,2019-02-23,1.28,97.0,1.33,53,Male,173.0,3.610061,97.22596,36.841481,35.456463,99.767593,26,47
4,4,101,2019-02-24,1.33,98.0,1.36,53,Male,173.0,3.610061,97.22596,37.672492,36.841481,100.796125,27,48


In [30]:
fig = make_subplots(rows=1, cols=1)
p = np.multiply(HFEV1.vmessages["2021-05-25"], HFEV1.vmessages["2021-05-25"])
ih.plot_histogram(fig, HFEV1, p, HFEV1.a, HFEV1.b, 1, 1, HFEV1.name)
fig.show()

In [33]:
fig = make_subplots(rows=1, cols=1)
p = np.multiply(HO2Sat.vmessages["2021-05-25"], HO2Sat.vmessages["2021-05-22"])
ih.plot_histogram(fig, HO2Sat, p, HO2Sat.a, HO2Sat.b, 1, 1, HO2Sat.name)
fig.show()

# Selecting the stopping criteria

In [None]:
# Algorithm used to select the stopping criteria
# Moved to src.inference.long_inf_slicing and updated to be general to any shared variables input

# def query_across_days(
#     df,
#     belief_propagation,
#     shared_variables: List[SharedVariableNode],
#     variables: List[str],
#     n_epochs,
# ):
#     epoch = 0
#     df_res_hfev1 = pd.DataFrame(index=HFEV1.get_bins_str())

#     df_res = pd.DataFrame(
#         columns=["Epoch"] + list(map(lambda v: v.name, shared_variables))
#     )

#     post_hfev1_old_epoch = HFEV1._uniform_prior()
#     post_ho2sat_old_epoch = HO2Sat._uniform_prior()
#     while True:
#         print(f"epoch {epoch}")

#         post_hfev1_old_day = HFEV1._uniform_prior()
#         post_ho2sat_old_day = HO2Sat._uniform_prior()
#         diffs_hfev1_day = np.array([])
#         diffs_ho2sat_day = np.array([])
#         for i in range(len(df)):
#             day = df["Date Recorded"].iloc[i].strftime("%Y-%m-%d")

#             def build_evidence(variables):
#                 evidence = {}
#                 for variable in variables:
#                     idx_obs = df[variable].iloc[i]
#                     evidence[variable] = idx_obs
#                 return evidence

#             evidence = build_evidence(variables)

#             def build_virtual_evidence(shared_variables):
#                 virtual_evidence = {}
#                 for shared_var in shared_variables:
#                     virtual_message = shared_var.get_virtual_message(day)
#                     if virtual_message is not None:
#                         virtual_evidence[shared_var.name] = virtual_message
#                 return virtual_evidence

#             virtual_evidence = build_virtual_evidence(shared_variables)

#             var_to_infer = list(map(lambda v: v.name, shared_variables))

#             # Query the graph
#             res, messages = belief_propagation.query(
#                 var_to_infer, evidence, virtual_evidence, get_messages=True
#             )

#             # Save message for current day
#             for shared_var in shared_variables:
#                 shared_var.add_message(day, messages[shared_var.graph_key])

#             post_hfev1_old_day, diff_hfev1_day = get_diff(
#                 res, post_hfev1_old_day, HFEV1
#             )
#             post_ho2sat_old_day, diff_ho2sat_day = get_diff(
#                 res, post_ho2sat_old_day, HO2Sat
#             )
#             # print(
#             #     f"Epoch {epoch}, day {i} - Diff hfev1 {diff_hfev1_day}, diff ho2sat {diff_ho2sat_day}"
#             # )
#             diffs_hfev1_day = np.append(diffs_hfev1_day, post_ho2sat_old_day)
#             diffs_ho2sat_day = np.append(diffs_ho2sat_day, diff_ho2sat_day)

#         # print(
#         #     f"Epoch {epoch} - Sum daily diffs for HFEV1: {diffs_hfev1_day.sum()}, and HO2Sat: {diffs_ho2sat_day.sum()}"
#         # )
#         post_hfev1_old_epoch, diff_hfev1_epoch = get_diff(
#             res, post_hfev1_old_epoch, HFEV1
#         )
#         post_ho2sat_old_epoch, diff_ho2sat_epoch = get_diff(
#             res, post_ho2sat_old_epoch, HO2Sat
#         )
#         print(
#             f"Epoch {epoch} - Posteriors' diff for HFEV1: {diff_hfev1_epoch}, and HO2Sat: {diff_ho2sat_epoch}"
#         )

#         # Create new row df with epoch, and on shared variables array per row cel
#         new_row = [epoch] + list(map(lambda v: res[v.name].values, shared_variables))
#         # Same but as df
#         new_row = pd.DataFrame(
#             [new_row], columns=["Epoch"] + list(map(lambda v: v.name, shared_variables))
#         )

#         df_res = pd.concat([df_res, new_row], ignore_index=True)
#         df_res_hfev1[f"{epoch}"] = res[HFEV1.name].values

#         if epoch >= n_epochs:
#             return df_res, df_res_hfev1
#         epoch += 1


# def get_diff(res, old, var):
#     new = res[var.name].values
#     diff = np.abs(new - old).sum()
#     return new, diff

# Check that using aggregate message and using messages multiplication gives the same result

In [17]:
import src.data.helpers as dh

In [19]:
df_mult = dh.load_excel(
    dh.get_path_to_main()
    + "/ExcelFiles/BR/long_inf_res_ID101_messages_multiplication.xlsx",
    [HFEV1.name, HO2Sat.name],
)
df_agg = dh.load_excel(
    dh.get_path_to_main() + "/ExcelFiles/BR/long_inf_res_ID101_using_agg_m.xlsx",
    [HFEV1.name, HO2Sat.name],
)

In [22]:
for i in range(len(df_mult)):
    assert np.allclose(df_mult.loc[i, HFEV1.name], df_agg.loc[i, HFEV1.name], atol=1e-8)

In [23]:
for i in range(len(df_mult)):
    assert np.allclose(
        df_mult.loc[i, HO2Sat.name], df_agg.loc[i, HO2Sat.name], atol=1e-8
    )