Validation of the cutset conditioning algorithm: using the light model, compare my won implementation for 3 days against the variable elimination algorithm from the library. Both are exact inference algorithms, they should give the exact same results.

My won implementation: model with FEV1 noise, and temporal airway resistance

In [1]:
import itertools
import time

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import src.data.breathe_data as bd
import src.data.helpers as dh
import src.inference.helpers as ih
import src.modelling_ar.ar as ar
import src.inf_cutset_conditioning.helpers as cutseth
import src.models.builders as mb
import src.models.var_builders as var_builders
from pgmpy.inference.ExactInference import VariableElimination

In [2]:
height = 180
age = 35
sex = "Male"
(
    HFEV1,
    uecFEV1,
    ecFEV1,
    AR,
    HO2Sat,
    O2SatFFA,
    IA,
    UO2Sat,
    O2Sat,
    ecFEF2575prctecFEV1,
) = var_builders.o2sat_fev1_fef2575_long_model_noise_shared_healthy_vars_and_temporal_ar_light(
    height,
    age,
    sex,
)

df_mock = pd.DataFrame(
    {
        "ID": ["1", "1", "1"],
        "Date Recorded": [1, 2, 3],
        "Height": 180,
        "Age": 35,
        "Sex": "Male",
        "ecFEV1": [1, 3, 5],
        "ecFEF2575%ecFEV1": [4, 4, 4],
    }
)
df_mock["Date Recorded"] = pd.to_datetime(
    df_mock["Date Recorded"], unit="D", origin="2020-01-01"
)

df_mock

Unnamed: 0,ID,Date Recorded,Height,Age,Sex,ecFEV1,ecFEF2575%ecFEV1
0,1,2020-01-02,180,35,Male,1,4
1,1,2020-01-03,180,35,Male,3,4
2,1,2020-01-04,180,35,Male,5,4


## Run Variable Elimination

In [3]:
height = 180
age = 35
sex = "Male"
(
    model,
    # inf_alg,
    HFEV1,
    HO2Sat,
    AR_vars,
    uecFEV1_vars,
    ecFEV1_vars,
    O2SatFFA_vars,
    IA_vars,
    UO2Sat_vars,
    O2Sat_vars,
    ecFEF2575prctecFEV1_vars,
) = mb.o2sat_fev1_fef2575_n_days_model_noise_shared_healthy_vars_and_temporal_ar_light(
    3,
    height,
    age,
    sex,
)
var_elim = VariableElimination(model)

evidence_dict = {}
for i in range(3):
    evidence_dict[ecFEV1_vars[i].name] = df_mock.loc[i, "ecFEV1"]
    evidence_dict[ecFEF2575prctecFEV1_vars[i].name] = df_mock.loc[i, "ecFEF2575%ecFEV1"]
print(evidence_dict)

res = var_elim.query(
    variables=[AR_vars[0].name, AR_vars[1].name, AR_vars[2].name, HFEV1.name],
    evidence=evidence_dict,
    joint=False,
)
res[AR_vars[1].name].values

Airway resistance (%) day 2, cpt: (9, 9)
Airway resistance (%) day 3, cpt: (9, 9)
{'ecFEV1 (L) day 1': 1, 'ecFEF25-75 % ecFEV1 (%) day 1': 4, 'ecFEV1 (L) day 2': 3, 'ecFEF25-75 % ecFEV1 (%) day 2': 4, 'ecFEV1 (L) day 3': 5, 'ecFEF25-75 % ecFEV1 (%) day 3': 4}


array([1.87948259e-01, 3.91191647e-01, 2.96429539e-01, 1.14851899e-01,
       9.47946998e-03, 9.89185973e-05, 2.67180655e-07, 0.00000000e+00,
       0.00000000e+00])

## Run Cutset Cond BP

In [4]:
import src.inf_cutset_conditioning.cutset_conditioning_temporal_AR_HFEV1_noise_light as cutset

INFO:root:* Checking for same day measurements *


In [5]:
fig, p_M_given_D_full, p_M_given_D, AR_dist_given_M_matrix = (
    cutset.compute_log_p_D_given_M_per_HFEV1_HO2Sat_obs_temporal_AR(
        df_mock, ar_prior="uniform", ar_change_cpt_suffix=""
    )
)

ID 1 - Number of HFEV1 specific models: 1, max ecFEV1: 5, first possible bin for HFEV1: [5.00; 6.00)
Time for 3 entries: 0.01 s
Shape of P(M|D) (1,)


In [7]:
fig = make_subplots(rows=4, cols=1, vertical_spacing=0.13)
# Add HFEV1
ih.plot_histogram(
    fig, HFEV1, res[HFEV1.name].values, HFEV1.a, HFEV1.b, 1, 1, annot=False
)
ih.plot_histogram(fig, AR, res[AR_vars[0].name].values, AR.a, AR.b, 2, 1, annot=False)
ih.plot_histogram(fig, AR, res[AR_vars[1].name].values, AR.a, AR.b, 3, 1, annot=False)
ih.plot_histogram(fig, AR, res[AR_vars[2].name].values, AR.a, AR.b, 4, 1, annot=False)

# Add HFEV1
ih.plot_histogram(
    fig, HFEV1, p_M_given_D_full, HFEV1.a, HFEV1.b, 1, 1, title=HFEV1.name
)
ih.plot_histogram(
    fig,
    AR,
    AR_dist_given_M_matrix[0, :, 0],
    AR.a,
    AR.b,
    2,
    1,
    title=f"{AR.name} day 1",
    annot=False,
)
ih.plot_histogram(
    fig,
    AR,
    AR_dist_given_M_matrix[1, :, 0],
    AR.a,
    AR.b,
    3,
    1,
    title=f"{AR.name} day 2",
    annot=False,
)
ih.plot_histogram(
    fig,
    AR,
    AR_dist_given_M_matrix[2, :, 0],
    AR.a,
    AR.b,
    4,
    1,
    title=f"{AR.name} day 3",
    annot=False,
)

# Set colour for first 3 traces to blue, and last 3 to red
for i in range(4):
    fig.data[i].marker.color = "#636EFA"
    fig.data[i + 4].marker.color = "#EF553B"
# Reduce x axis title font size
fig.update_xaxes(title_font=dict(size=12), title_standoff=7)

# Hide legend
title = "Cutset conditioning (red) vs variable elimination (blue)"
fig.update_layout(showlegend=False, height=550, width=800, title=title)
fig.show()