In [2]:
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import BeliefPropagation
from pgmpy.models import BayesianNetwork

import src.models.cpts.load as cptloader
import src.models.helpers as mh
import src.inference.helpers as ih
import src.data.breathe_data as breathe_data

import numpy as np
import pandas as pd

In [2]:
# Point in time model without the AR-IA factor
def build_model(height, age, sex):
    hfev1_prior = {"type": "default", "height": height, "age": age, "sex": sex}
    ho2sat_prior = {
        "type": "default",
        "height": height,
        "sex": sex,
    }

    HFEV1 = mh.variableNode("Healthy FEV1 (L)", 1, 6, 0.05, prior=hfev1_prior)
    ecFEV1 = mh.variableNode("ecFEV1 (L)", 0, 6, 0.05, prior=None)
    # Lowest predicted FEV1 is 15% (AR = 1-predictedFEV1)
    AR = mh.variableNode("Airway resistance (%)", 0, 90, 2, prior={"type": "uniform"})

    # Res 0.5 takes 19s, res 0.2 takes 21s
    HO2Sat = mh.variableNode(
        "Healthy O2 saturation (%)", 90, 100, 0.5, prior=ho2sat_prior
    )
    # Highest drop is 92% (for AR = 90%)
    # Hence the lowest O2SatFFA is 90 * 0.92 = 82.8
    O2SatFFA = mh.variableNode(
        "O2 saturation if fully functional alveoli (%)", 80, 100, 0.5, prior=None
    )
    # O2 sat can't be below 70%.
    # If there's no airway resistance, it should still be possible to reach 70% O2 sat
    # Hence, min IA is 30% because i
    IA = mh.variableNode("Inactive alveoli (%)", 0, 30, 1, prior={"type": "uniform"})
    # In reality O2 sat can't be below 70%.
    # However, the CPT should account for the fact that the lowest O2 sat is 82.8%.
    # 82.8-30 = 52.8%
    # TODO: should we hardcode the fact that the sum of AR and IA should not be below 70% O2 Sat?
    UO2Sat = mh.variableNode("Underlying O2 saturation (%)", 50, 100, 0.5, prior=None)
    O2Sat = mh.variableNode("O2 saturation (%)", 49.5, 100.5, 1, prior=None)

    # Calculate CPTs
    ecFEV1.prior = cptloader.get_cpt([ecFEV1, HFEV1, AR])
    O2SatFFA.prior = cptloader.get_cpt([O2SatFFA, HO2Sat, AR])
    # IA.prior = cptloader.get_cpt([IA, AR])
    UO2Sat.prior = cptloader.get_cpt([UO2Sat, O2SatFFA, IA])
    O2Sat.prior = cptloader.get_cpt([O2Sat, UO2Sat])

    prior_hfev1 = TabularCPD(
        variable=HFEV1.name,
        variable_card=len(HFEV1.bins),
        values=HFEV1.prior,
        evidence=[],
        evidence_card=[],
    )
    cpt_ecfev1 = TabularCPD(
        variable=ecFEV1.name,
        variable_card=len(ecFEV1.bins),
        values=ecFEV1.prior,
        evidence=[HFEV1.name, AR.name],
        evidence_card=[len(HFEV1.bins), len(AR.bins)],
    )

    prior_ar = TabularCPD(
        variable=AR.name,
        variable_card=len(AR.bins),
        values=AR.prior,
        evidence=[],
        evidence_card=[],
    )
    prior_ho2sat = TabularCPD(
        variable=HO2Sat.name,
        variable_card=len(HO2Sat.bins),
        values=HO2Sat.prior,
        evidence=[],
        evidence_card=[],
    )
    cpt_o2satffa = TabularCPD(
        variable=O2SatFFA.name,
        variable_card=len(O2SatFFA.bins),
        values=O2SatFFA.prior,
        evidence=[HO2Sat.name, AR.name],
        evidence_card=[len(HO2Sat.bins), len(AR.bins)],
    )
    cpt_ia = TabularCPD(
        variable=IA.name,
        variable_card=len(IA.bins),
        values=IA.prior,
        evidence=[],
        evidence_card=[],
        # evidence=[AR.name],
        # evidence_card=[len(AR.bins)],
    )
    cpt_uo2sat = TabularCPD(
        variable=UO2Sat.name,
        variable_card=len(UO2Sat.bins),
        values=UO2Sat.prior,
        evidence=[O2SatFFA.name, IA.name],
        evidence_card=[len(O2SatFFA.bins), len(IA.bins)],
    )
    cpt_o2sat = TabularCPD(
        variable=O2Sat.name,
        variable_card=len(O2Sat.bins),
        values=O2Sat.prior,
        evidence=[UO2Sat.name],
        evidence_card=[len(UO2Sat.bins)],
    )

    model = BayesianNetwork(
        [
            (HFEV1.name, ecFEV1.name),
            (AR.name, ecFEV1.name),
            (HO2Sat.name, O2SatFFA.name),
            (AR.name, O2SatFFA.name),
            # (AR.name, IA.name),
            (O2SatFFA.name, UO2Sat.name),
            (IA.name, UO2Sat.name),
            (UO2Sat.name, O2Sat.name),
        ]
    )

    model.add_cpds(
        cpt_ecfev1,
        prior_ar,
        prior_hfev1,
        prior_ho2sat,
        cpt_o2satffa,
        cpt_ia,
        cpt_uo2sat,
        cpt_o2sat,
    )

    model.check_model()
    inf_alg = BeliefPropagation(model)
    return model, inf_alg, AR, IA, ecFEV1, O2Sat

In [3]:
df = breathe_data.load_from_excel()
df.head()

Unnamed: 0,ID,Date Recorded,FEV1,O2 Saturation,ecFEV1,Age,Sex,Height,Predicted FEV1,Healthy O2 Saturation,ecFEV1 % Predicted,FEV1 % Predicted,O2 Saturation % Healthy
0,101,2019-02-20,1.31,97.0,1.32,53,Male,173.0,3.610061,97.22596,36.564477,36.287474,99.767593
1,101,2019-02-21,1.29,96.0,1.32,53,Male,173.0,3.610061,97.22596,36.564477,35.733466,98.739061
2,101,2019-02-22,1.32,96.0,1.32,53,Male,173.0,3.610061,97.22596,36.564477,36.564477,98.739061
3,101,2019-02-23,1.28,97.0,1.33,53,Male,173.0,3.610061,97.22596,36.841481,35.456463,99.767593
4,101,2019-02-24,1.33,98.0,1.36,53,Male,173.0,3.610061,97.22596,37.672492,36.841481,100.796125


In [10]:
# Infer AR and IA for all data points given an individuals' age, sex, height, FEV1 and O2 saturation measurements
def infer_AR_IA_for_ID(df):
    df.reset_index(inplace=True)
    _, inf_alg, AR, IA, ecFEV1, O2Sat = build_model(df.Height[0], df.Age[0], df.Sex[0])
    # _, inf_alg, AR, IA, ecFEV1, O2Sat = build_model(173, 53, "Male")

    # inf_res = ih.infer(inf_alg, [AR, IA], [[ecFEV1, 1.31], [O2Sat, 97.0]], show_progress=False, joint=False, get_time=True)

    def infer_and_unpack(ecfev1_obs, o2sat_obs):
        res = ih.infer(
            inf_alg,
            [AR, IA],
            [[ecFEV1, ecfev1_obs], [O2Sat, o2sat_obs]],
            show_progress=False,
            joint=False,
            get_time=False,
        )
        mean_ar = AR.get_mean(res[AR.name].values)
        mean_ia = IA.get_mean(res[IA.name].values)
        return mean_ar, mean_ia

    inf_res = df.apply(
        lambda row: infer_and_unpack(row["ecFEV1"], row["O2 Saturation"]),
        axis=1,
    )
    return inf_res


resraw = df.groupby("ID").apply(infer_AR_IA_for_ID)
res = (
    resraw.apply(pd.Series)
    .reset_index()
    .rename(columns={0: "AR", 1: "IA"})
    .drop(columns="level_1")
)
res

  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values
  phi.values = phi.values / phi1.values


Unnamed: 0,ID,AR,IA
0,101,58.502814,0.736011
1,101,59.743958,0.968994
2,101,59.743958,0.968994
3,101,58.502814,0.736011
4,101,55.679250,0.622275
...,...,...,...
20392,358,8.649044,0.602668
20393,358,8.649044,0.602668
20394,358,8.649044,0.727311
20395,358,9.104899,0.980036


In [23]:
# Save rawres as excel to ../../../../ExcelFiles
# res.to_excel(
#     "../../../../ExcelFiles/inferred_AR_IA_with_FEV1_O2Sat_no_AR-IA_factor.xlsx"
# )

Unnamed: 0,ID,AR,IA
0,101,58.502814,0.736011
1,101,59.743958,0.968994
2,101,59.743958,0.968994
3,101,58.502814,0.736011
4,101,55.67925,0.622275
