In [1]:
from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import BeliefPropagation

import model_helpers as mh
import numpy as np

# np.set_printoptions(suppress=True)

import plotly.express as px
import plotly.figure_factory as ff

ModuleNotFoundError: No module named 'model_helpers'

### Model of the lung's health

In [2]:
U = mh.variableNode(
    "Unblocked FEV1", 2, 6, 0.1, prior={"type": "gaussian", "mu": 4, "sigma": 0.5}
)
C = mh.variableNode("Small airway clearance", 0.9, 1, 0.1)
FEV1 = mh.variableNode("FEV1", 0.2, 6, 0.1)

graph = BayesianNetwork([(U.name, FEV1.name), (C.name, FEV1.name)])

cpt_fev1 = TabularCPD(
    variable=FEV1.name,
    variable_card=len(FEV1.bins) - 1,
    values=mh.calc_pgmpy_cpt_X_x_1_minus_Y(U, C, FEV1),
    evidence=[C.name, U.name],
    evidence_card=[len(C.bins) - 1, len(U.bins) - 1],
)

prior_b = TabularCPD(
    variable=C.name,
    variable_card=len(C.bins) - 1,
    values=C.prior,
    evidence=[],
    evidence_card=[],
)

prior_u = TabularCPD(
    variable=U.name,
    variable_card=len(U.bins) - 1,
    values=U.prior,
    evidence=[],
    evidence_card=[],
)

graph.add_cpds(cpt_fev1, prior_b, prior_u)

graph.check_model()

inference = BeliefPropagation(graph)

calculating cpt of shape 58 x 40 x 1 (C x (A x B)) 


### Interactive inference

In [7]:
from dash import Dash, dcc, html, Input, Output

app = Dash(__name__)

app.layout = html.Div(
    [
        html.H4("Interactive inference with FEV1, Unblocked FEV1"),
        dcc.Graph(id="graph"),
        html.P("FEV1:"),
        dcc.Slider(
            id="fev1",
            min=FEV1.bins[0],
            max=FEV1.bins[-2],
            value=0.2,
            marks={0: "0.2", (len(C.bins) - 1): "5.9"},
        ),
    ]
)


@app.callback(Output("graph", "figure"), Input("fev1", "value"))
def display_color(fev1):
    print("set FEV1 to", fev1)

    [_fev1_bin, fev1_idx] = mh.get_bin_for_value(fev1, FEV1.bins)

    res = inference.query(variables=[U.name], evidence={FEV1.name: fev1_idx})
    fig = px.bar(y=res.values, x=U.bins[:-1])
    fig.update_layout(
        xaxis_title="Distribution of unblocked FEV1 (L)",
        yaxis_title="Probability",
        legend_title="Legend Title",
        font=dict(family="Courier New, monospace", size=12, color="#7f7f7f"),
    )
    return fig


app.run_server(debug=True, port=8049, use_reloader=False)

Dash is running on http://127.0.0.1:8049/

 * Serving Flask app '__main__'
 * Debug mode: on
get fev1 0.2
fev1_idx 0
fev1_bin [0.2; 0.30000000000000004[


  0%|          | 0/1 [00:00<?, ?it/s]


invalid value encountered in true_divide



get fev1 0.2
fev1_idx 0
fev1_bin [0.2; 0.30000000000000004[


  0%|          | 0/1 [00:00<?, ?it/s]


invalid value encountered in true_divide



get fev1 5.9
fev1_idx 57
fev1_bin [5.900000000000002; 6.000000000000002[


  0%|          | 0/1 [00:00<?, ?it/s]

get fev1 5.65
fev1_idx 54
fev1_bin [5.600000000000002; 5.700000000000002[


  0%|          | 0/1 [00:00<?, ?it/s]

get fev1 5.9
fev1_idx 57
fev1_bin [5.900000000000002; 6.000000000000002[


  0%|          | 0/1 [00:00<?, ?it/s]

get fev1 5.6
fev1_idx 54
fev1_bin [5.600000000000002; 5.700000000000002[


  0%|          | 0/1 [00:00<?, ?it/s]

get fev1 5.4
fev1_idx 52
fev1_bin [5.400000000000002; 5.500000000000002[


  0%|          | 0/1 [00:00<?, ?it/s]

get fev1 4.6
fev1_idx 44
fev1_bin [4.600000000000001; 4.700000000000002[


  0%|          | 0/1 [00:00<?, ?it/s]

### Gaussian prior for healthy fev1

* Compute std dev of the predicted FEV1 for CF population and for normal population
* Add a gaussian prior for healthy FEV1 based centered on predicted FEV1 with age, height, height and a std


#### Find std dev of predicted FEV1
Reference values for lung function: past, present and future: https://erj.ersjournals.com/content/36/1/12

This paper discusses what are the best reference equations to compute lung function for healthy individuals.
* Main attacks to current equations: disparity of equipment quality, software and metholodogies used, equations made on old measurements that are non representative today because of shift in population characteristics.
* Healthy subject is difficult to define (choice of exclusion criteria)
* Population-specific equations might be less acurrate (less individuals), but more precise (there are a well known ethnic differences in lung function).

Take aways:
* Between-subject variability in lung function is highly age-dependent
* There's a high number of reference statistics for FEV1, which make it hard to know which one to trust - ask Andres

My learnings: 
* Past 20 years old, the spread of FEV1 is ~3L for men and ~2.5L for women -- is that because men are taller leading to higher max?


In [4]:
U.bins

array([2. , 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3. , 3.1, 3.2,
       3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4. , 4.1, 4.2, 4.3, 4.4, 4.5,
       4.6, 4.7, 4.8, 4.9, 5. , 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.7, 5.8,
       5.9, 6. ])

In [7]:
import src.modelling_fev1.pred_fev1 as pred_fev1

set_age = 26
set_height = 175
set_gender = "Male"
pred_FEV1, pred_FEV1_std = pred_fev1.calc_predicted_FEV1_linear(
    set_height, set_age, set_gender
)
pred_FEV1

4.280999999999999

In [12]:
import numpy as np
from scipy.stats import norm
from scipy.signal import normalize
import plotly.graph_objects as go

proba_per_bin = norm.pdf(U.bins, loc=pred_FEV1, scale=pred_FEV1_std)
proba_per_bin_norm = proba_per_bin / sum(proba_per_bin)

fig = go.Figure(data=[go.Bar(x=U.bins, y=proba_per_bin_norm)])
fig.show()

sum(proba_per_bin_norm)

1.0000000000000002