In this page, I will benchmark the inferred airway resistance inferred using different models.

The main target is to identify the models that most collaspes the uncertainty in AR without getting overconfident.

In [1]:
import src.models.var_builders as var_builders
import src.data.helpers as dh
import pandas as pd
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import src.models.helpers as mh
import src.models.cpts.helpers as cpth
from scipy.stats import iqr

In [2]:
(
    HFEV1,
    ecFEV1,
    AR,
    HO2Sat,
    O2SatFFA,
    IA,
    UO2Sat,
    O2Sat,
    ecFEF2575prctecFEV1,
) = var_builders.o2sat_fev1_fef2575_point_in_time_model_shared_healthy_vars(
    180, 10, "Male"
)

## Load models AR posteriors

In [3]:
df_o2_fev1 = dh.load_excel(
    f"{dh.get_path_to_main()}/ExcelFiles/BR/Refining_F3/infer_AR_using_O2Sat_ecFEV1.xlsx",
    ["AR"],
    ["Date Recorded"],
).drop(
    columns=[
        "FEV1",
        "O2 Saturation",
        "FEF2575",
        "ecFEV1",
        "ecFEF2575",
        "Sex",
        "Height",
        "Age",
        "Predicted FEV1",
        "Healthy O2 Saturation",
        "ecFEV1 % Predicted",
        "FEV1 % Predicted",
        "O2 Saturation % Healthy",
        "ecFEF2575%ecFEV1",
        "IA",
        "AR mean",
        "IA mean",
    ]
)
df_o2_fev1.head()

Unnamed: 0,ID,Date Recorded,AR
0,101,2019-01-25,"[2.0632992e-05, 2.54837638e-05, 3.20761463e-05..."
1,101,2019-01-26,"[4.09288154e-05, 5.05510913e-05, 6.36281286e-0..."
2,101,2019-01-27,"[1.14391623e-05, 1.41284846e-05, 1.77833754e-0..."
3,101,2019-01-28,"[1.14391623e-05, 1.41284846e-05, 1.77833754e-0..."
4,101,2019-01-29,"[4.09288154e-05, 5.05510913e-05, 6.36281286e-0..."


In [4]:
df_o2_fev1_fef2575 = dh.load_excel(
    f"{dh.get_path_to_main()}/ExcelFiles/BR/Refining_F3/infer_AR-and-IA_using_O2Sat_ecFEV1_ecFEF2575.xlsx",
    ["AR"],
    ["Date Recorded"],
).drop(
    columns=[
        "FEV1",
        "O2 Saturation",
        "FEF2575",
        "ecFEV1",
        "ecFEF2575",
        "Sex",
        "Height",
        "Age",
        "Predicted FEV1",
        "Healthy O2 Saturation",
        "ecFEV1 % Predicted",
        "FEV1 % Predicted",
        "O2 Saturation % Healthy",
        "ecFEF2575%ecFEV1",
        "IA",
        "AR mean",
        "IA mean",
    ]
)
df_o2_fev1_fef2575.head()

Unnamed: 0,ID,Date Recorded,AR
0,101,2019-01-25,"[3.43364417e-08, 2.32112156e-07, 5.9144776e-07..."
1,101,2019-01-26,"[8.54273639e-08, 5.19924525e-07, 1.27561745e-0..."
2,101,2019-01-27,"[5.91779338e-08, 2.44032527e-07, 5.19355733e-0..."
3,101,2019-01-28,"[7.87646411e-08, 2.96955127e-07, 6.11314835e-0..."
4,101,2019-01-29,"[1.32404313e-07, 6.59248843e-07, 1.50366107e-0..."


In [5]:
df_2days_o2sat_ecfev1 = (
    dh.load_excel(
        f"{dh.get_path_to_main()}/ExcelFiles/BR/Refining_F3/infer_AR_with_two_days_model_O2Sat_ecFEV1.xlsx",
        [AR.name],
        ["Day"],
    )
    .drop(columns=["Unnamed: 0", HO2Sat.name, IA.name, HFEV1.name])
    .rename({"Day": "Date Recorded", AR.name: "AR"}, axis=1)
)
df_2days_o2sat_ecfev1.head()

Unnamed: 0,ID,Date Recorded,AR
0,101,2019-01-25,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,101,2019-01-26,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,101,2019-01-27,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,101,2019-01-28,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,101,2019-01-29,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [6]:
df_2days_o2sat_fev1 = (
    dh.load_excel(
        f"{dh.get_path_to_main()}/ExcelFiles/BR/Refining_F3/infer_AR_with_two_days_model_O2Sat_FEV1.xlsx",
        [AR.name],
        ["Day"],
    )
    .drop(columns=["Unnamed: 0", HO2Sat.name, IA.name, HFEV1.name])
    .rename({"Day": "Date Recorded", AR.name: "AR"}, axis=1)
)
df_2days_o2sat_fev1.head()

Unnamed: 0,ID,Date Recorded,AR
0,101,2019-01-25,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,101,2019-01-26,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,101,2019-01-27,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,101,2019-01-28,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,101,2019-01-29,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [7]:
# Input using ecFEF2575 and ecFEV1, O2sat
df_2days_o2sat_ecfev1_ecfef2575 = (
    dh.load_excel(
        f"{dh.get_path_to_main()}ExcelFiles/BR/Refining_F3/infer_AR_with_two_days_model_O2Sat_ecFEV1_ecFEF2575.xlsx",
        [AR.name],
        ["Day"],
    )
    .drop(columns=[HO2Sat.name, IA.name, HFEV1.name])
    .rename({"Day": "Date Recorded", AR.name: "AR"}, axis=1)
)

In [8]:
df_2days_ecfev1_ecfef2575 = (
    dh.load_excel(
        f"{dh.get_path_to_main()}ExcelFiles/BR/Refining_F3/infer_AR_with_two_days_model_ecFEV1_ecFEF2575.xlsx",
        [AR.name],
        ["Day"],
    )
    .drop(columns=[HO2Sat.name, IA.name, HFEV1.name])
    .rename({"Day": "Date Recorded", AR.name: "AR"}, axis=1)
)
df_2days_ecfev1_ecfef2575.head()

Unnamed: 0,ID,Date Recorded,AR
0,101,2019-01-25,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,101,2019-01-26,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,101,2019-01-27,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,101,2019-01-28,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,101,2019-01-29,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


## Inter-percentile range for each model

In [9]:
p1 = 0.15865
p2 = 0.84135
p1 = 0.02275
p2 = 0.97725
# p1=0.00135
# p2=0.99865

df_o2_fev1["1 day O2sat ecFEV1"] = df_o2_fev1.apply(
    lambda row: AR.get_IPR(row["AR"], p1, p2), axis=1
)
df_o2_fev1_fef2575["1 day O2sat ecFEV1 ecFEF2575"] = df_o2_fev1_fef2575.apply(
    lambda row: AR.get_IPR(row["AR"], p1, p2), axis=1
)
df_2days_o2sat_fev1["2 days O2Sat FEV1"] = df_2days_o2sat_fev1.apply(
    lambda row: AR.get_IPR(row["AR"], p1, p2), axis=1
)
df_2days_o2sat_ecfev1["2 days O2Sat ecFEV1"] = df_2days_o2sat_ecfev1.apply(
    lambda row: AR.get_IPR(row["AR"], p1, p2), axis=1
)
df_2days_o2sat_ecfev1_ecfef2575["2 days O2Sat ecFEV1 ecFEF2575"] = (
    df_2days_o2sat_ecfev1_ecfef2575.apply(
        lambda row: AR.get_IPR(row["AR"], p1, p2), axis=1
    )
)
df_2days_ecfev1_ecfef2575["2 days ecFEV1 ecFEF2575"] = df_2days_ecfev1_ecfef2575.apply(
    lambda row: AR.get_IPR(row["AR"], p1, p2), axis=1
)

In [19]:
method = "std dev"
xbins = 0.1
method = "1 sigma interpercentile range"
xbins = 0.2
xrange = [0, 30]
method = "2 sigma interpercentile range"
xbins = 0.4
xrange = [0, 50]
# method = "3 sigma interpercentile range"
# xbins = 0.4
# xrange = [0, 70]

title = f"Airway resistance uncertainty collapse benchmark ({method})"

fig = make_subplots(rows=6, cols=1, shared_xaxes=True, y_title="Frequency (%)")

# Y axis title font size
fig.layout.annotations[0]["font"] = {"size": 14}

# Add histogram
fig.add_trace(
    go.Histogram(
        x=df_o2_fev1["1 day O2sat ecFEV1"],
        name="1 day O2Sat ecFEV1",
        histnorm="percent",
        xbins=dict(size=xbins),
    ),
    row=1,
    col=1,
)
fig.add_trace(
    go.Histogram(
        x=df_o2_fev1_fef2575["1 day O2sat ecFEV1 ecFEF2575"],
        name="1 day O2Sat ecFEV1 ecFEF2575",
        histnorm="percent",
        xbins=dict(size=xbins),
    ),
    row=2,
    col=1,
)
fig.add_trace(
    go.Histogram(
        x=df_2days_o2sat_ecfev1["2 days O2Sat ecFEV1"],
        name="2 days O2Sat ecFEV1",
        histnorm="percent",
        xbins=dict(size=xbins),
    ),
    row=3,
    col=1,
)
fig.add_trace(
    go.Histogram(
        x=df_2days_o2sat_fev1["2 days O2Sat FEV1"],
        name="2 days O2Sat FEV1",
        histnorm="percent",
        xbins=dict(size=xbins),
    ),
    row=4,
    col=1,
)
fig.add_trace(
    go.Histogram(
        x=df_2days_ecfev1_ecfef2575["2 days ecFEV1 ecFEF2575"],
        name="2 days ecFEV1 ecFEF2575",
        histnorm="percent",
        xbins=dict(size=xbins),
    ),
    row=5,
    col=1,
)
fig.add_trace(
    go.Histogram(
        x=df_2days_o2sat_ecfev1_ecfef2575["2 days O2Sat ecFEV1 ecFEF2575"],
        name="2 days O2Sat ecFEV1 ecFEF2575",
        histnorm="percent",
        xbins=dict(size=xbins),
    ),
    row=6,
    col=1,
)


# Add x axis xrange
fig.update_xaxes(range=xrange, title=f"Airway resistance {method} (%)", row=6, col=1)
# Set y axis range
fig.update_yaxes(range=[0, 5.5])

fig.update_layout(title=title, height=600, width=800)
fig.show()

fig.write_image(f"{dh.get_path_to_main()}/PlotsBreathe/AR_modelling/{title}.png")

## Avg airway resistance for each model

In [14]:
import src.inference.helpers as ih

In [18]:
def calc_population_level_ar(df):
    avg_ar = df.AR.sum()
    # Normalising directly instead of dividing by entry count to limit operations with too small numbers
    # Norm again to account of small irregularities
    avg_ar = avg_ar / avg_ar.sum()
    return avg_ar


avg_ar_o2_fev1 = calc_population_level_ar(df_o2_fev1)
avg_ar_o2_fev1_fef2575 = calc_population_level_ar(df_o2_fev1_fef2575)
avg_ar_2days_o2sat_fev1 = calc_population_level_ar(df_2days_o2sat_fev1)
avg_ar_2days_o2sat_ecfev1 = calc_population_level_ar(df_2days_o2sat_ecfev1)
avg_ar_2days_o2sat_ecfev1_ecfef2575 = calc_population_level_ar(
    df_2days_o2sat_ecfev1_ecfef2575
)
avg_ar_2days_ecfev1_ecfef2575 = calc_population_level_ar(df_2days_ecfev1_ecfef2575)

In [30]:
xbins = 0.1
method = "1 sigma interpercentile range"
xbins = 0.2
xrange = [0, 30]
method = "2 sigma interpercentile range"
xbins = 0.4
xrange = [0, 50]
# method = "3 sigma interpercentile range"
# xbins = 0.4
# xrange = [0, 70]

title = f"Average airway resistance of the population for different models"

fig = make_subplots(rows=6, cols=1, shared_xaxes=True, y_title="p")

# Y axis title font size
fig.layout.annotations[0]["font"] = {"size": 14}

# Add histogram
ih.plot_histogram(
    fig, AR, avg_ar_o2_fev1, AR.a, AR.b, 1, 1, name="1 day O2Sat ecFEV1", annot=False
)
ih.plot_histogram(
    fig,
    AR,
    avg_ar_o2_fev1_fef2575,
    AR.a,
    AR.b,
    2,
    1,
    name="1 day O2Sat ecFEV1 ecFEF2575",
    annot=False,
)
ih.plot_histogram(
    fig,
    AR,
    avg_ar_2days_o2sat_ecfev1,
    AR.a,
    AR.b,
    3,
    1,
    name="2 days O2Sat ecFEV1",
    annot=False,
)
ih.plot_histogram(
    fig,
    AR,
    avg_ar_2days_o2sat_fev1,
    AR.a,
    AR.b,
    4,
    1,
    name="2 days O2Sat FEV1",
    annot=False,
)
ih.plot_histogram(
    fig,
    AR,
    avg_ar_2days_ecfev1_ecfef2575,
    AR.a,
    AR.b,
    5,
    1,
    name="2 days ecFEV1 ecFEF2575",
    annot=False,
)
ih.plot_histogram(
    fig,
    AR,
    avg_ar_2days_o2sat_ecfev1_ecfef2575,
    AR.a,
    AR.b,
    6,
    1,
    name="2 days O2Sat ecFEV1 ecFEF2575",
    annot=False,
)

# Add x axis xrange
fig.update_xaxes(title=f"Airway resistance (%)", row=6, col=1)
# Set y axis range
fig.update_yaxes(range=[0, 0.05])

fig.update_layout(title=title, height=600, width=800)
fig.show()

fig.write_image(f"{dh.get_path_to_main()}/PlotsBreathe/AR_modelling/{title}.png")