In this notebook, I will use the information about IA to refine F3. Initially, I buildt F3 using FEF25-75%FEV1 against FEV1-based-AR. The latter variable contains only the aspect of AR as measured by FEV1. In CF, high AR usually correlates with high IA. We can use this correlation to refine the uncertainty present in the FEV1-based-AR. The corrected FEV1-based-AR can therefore be closer to the true AR. We can use it to improve the model fo F3.

In [1]:
import src.data.breathe_data as br
import src.modelling_fef2575.hfef2575 as hfef2575
import src.o2_fev1_analysis.smooth as smooth
import src.data.helpers as dh
import src.models.helpers as mh
import numpy as np
import pandas as pd
import src.inference.helpers as ih
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy.stats import norm

In [2]:
# Need dataset with O2sat, FEV1, FEF25-75. Use as many datapoints as possible
# Infer FEV1-based-AR using FEV1
# Model F3 using this AR
# Infer FEV1-FEF2575-based-AR using FEV1 and FEF25-75
# Model AR-IA
# Infer IA-FEV1-FEF2575-based-AR using FEV1, FEF25-75, IA
# Model F3 using this new AR
# Compare the two models: compare the mean, median, std-percentiles plots of both -> std should be smaller

# Optionally repeat until std doesn't change

In [3]:
df = br.load_meas_from_excel('BR_O2_FEV1_FEF2575_PEF_Nan')

In [4]:
# Remove PEF related rows
df = df.drop(columns=["PEF", "ecPEF (L/s)", "PEF (L/s)"])
# Remove NaN on FEV1, O2 saturation columns
print(df.shape)
df = df.dropna(subset=["FEV1", "O2 Saturation"])
print(df.shape)
df.head()

(48978, 15)
(36745, 15)


Unnamed: 0,ID,Date Recorded,FEV1,O2 Saturation,FEF2575,ecFEV1,ecFEF2575,Age,Sex,Height,Predicted FEV1,Healthy O2 Saturation,ecFEV1 % Predicted,FEV1 % Predicted,O2 Saturation % Healthy
0,101,2019-01-25,1.31,97.0,0.54,1.31,0.67,53,Male,173.0,3.610061,97.150104,36.287474,36.287474,99.845492
1,101,2019-01-26,1.31,98.0,0.57,1.31,0.67,53,Male,173.0,3.610061,97.150104,36.287474,36.287474,100.874827
2,101,2019-01-27,1.31,96.0,0.67,1.31,0.69,53,Male,173.0,3.610061,97.150104,36.287474,36.287474,98.816157
3,101,2019-01-28,1.3,96.0,0.69,1.31,0.69,53,Male,173.0,3.610061,97.150104,36.287474,36.01047,98.816157
4,101,2019-01-29,1.28,98.0,0.6,1.3,0.69,53,Male,173.0,3.610061,97.150104,36.01047,35.456463,100.874827


In [12]:
# Infer FEV1-based-AR using FEV1

# inf_res_df = ih.infer_AR_IA_HFEV1_HO2sat_get_back_df(df,  observed_variables=["ecFEV1", "O2Sat"])
inf_res_df = ih.infer_AR_IA_HFEV1_HO2sat_get_back_df(df.iloc[np.r_[10:13, 3000:3007]], observed_variables=["ecFEV1", "O2Sat"])

In [13]:
inf_res_df.head()

Unnamed: 0,ID,Date Recorded,AR,IA,HFEV1,HO2Sat,AR mean,IA mean,HFEV1 mean,HO2Sat mean
0,101,2019-02-04,"[1.1439162288473596e-05, 1.4128484584136257e-0...","[0.6241724220633212, 0.29586869250194675, 0.07...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.5229394971636...","[0.0, 0.0, 0.0, 0.0, 4.007041516284258e-25, 2....",59.963931,0.965472,3.383607,97.371804
1,101,2019-02-05,"[4.09288153819515e-05, 5.055109129394316e-05, ...","[0.890457889593489, 0.1037294460784223, 0.0057...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.4072655212317...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",57.337637,0.615463,3.190448,97.723863
2,101,2019-02-06,"[1.1439162288473596e-05, 1.4128484584136257e-0...","[0.6241724220633212, 0.29586869250194675, 0.07...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.5229394971636...","[0.0, 0.0, 0.0, 0.0, 4.007041516284258e-25, 2....",59.963931,0.965472,3.383607,97.371804
3,106,2019-04-10,"[1.0275126512612341e-05, 1.4198133607256637e-0...","[0.2528841881807302, 0.3706674213892799, 0.266...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.62805186...","[0.0, 0.0, 0.0, 0.0, 1.1400993848164513e-33, 4...",50.817986,1.751621,2.840039,98.355269
4,106,2019-04-11,"[9.504619460482739e-06, 1.3133449677765221e-05...","[0.011152445689254557, 0.07137695168926153, 0....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.05604258...","[7.711491286214119e-54, 4.728493594562206e-47,...",51.00453,3.58466,2.850286,98.308229
