In [1]:
import models.helpers as mh
import models.builders as mb
import inference.helpers as ih
import data.breathe_data as br
import data.helpers as dh
import o2_fev1_analysis.smooth as smooth


import numpy as np
import pandas as pd

In [2]:
df = br.load_meas_from_excel("BR_O2_FEV1_FEF2575_PEF_Nan")

INFO:root:* Checking for same day measurements *


In [3]:
# Remove PEF related rows
df = df.drop(columns=["PEF", "ecPEF (L/s)", "PEF (L/s)"])
# Remove NaN on FEV1, O2 saturation columns
print(df.shape)
df = df.dropna(subset=["FEV1", "O2 Saturation", "FEF2575"])
print(df.shape)
df.head()

(60785, 15)
(41260, 15)


Unnamed: 0,ID,Date Recorded,FEV1,O2 Saturation,FEF2575,ecFEV1,ecFEF2575,Sex,Height,Age,Predicted FEV1,Healthy O2 Saturation,ecFEV1 % Predicted,FEV1 % Predicted,O2 Saturation % Healthy
0,101,2019-01-25,1.31,97.0,0.54,1.31,0.67,Male,173.0,53,3.610061,97.150104,36.287474,36.287474,99.845492
1,101,2019-01-26,1.31,98.0,0.57,1.31,0.67,Male,173.0,53,3.610061,97.150104,36.287474,36.287474,100.874827
2,101,2019-01-27,1.31,96.0,0.67,1.31,0.69,Male,173.0,53,3.610061,97.150104,36.287474,36.287474,98.816157
3,101,2019-01-28,1.3,96.0,0.69,1.31,0.69,Male,173.0,53,3.610061,97.150104,36.287474,36.01047,98.816157
4,101,2019-01-29,1.28,98.0,0.6,1.3,0.69,Male,173.0,53,3.610061,97.150104,36.01047,35.456463,100.874827


In [4]:
# Infer AR and IA for all data points given an individuals' age, sex, height, FEV1 and O2 saturation measurements

def infer_AR_IA_for_ID(df):
    df.reset_index(inplace=True)
    _, inf_alg, HFEV1, ecFEV1, AR, HO2Sat, _, IA, _, O2Sat, ecFEF2575prctecFEV1 = (
        mb.o2sat_fev1_fef2575_point_in_time_model_shared_healthy_vars(
            df.Height[0], df.Age[0], df.Sex[0]
        )
    )

    def infer_and_unpack(ecfev1_obs, o2sat_obs, ecfef2575_obs):

        res = ih.infer_on_factor_graph(
            inf_alg,
            [AR, IA, HFEV1, HO2Sat],
            [
                [ecFEV1, ecfev1_obs],
                [O2Sat, o2sat_obs],
            ],
        )
        res_with_fef2575 = ih.infer_on_factor_graph(
            inf_alg,
            [AR, IA, HFEV1, HO2Sat],
            [
                [ecFEV1, ecfev1_obs],
                [O2Sat, o2sat_obs],
                [ecFEF2575prctecFEV1, ecfef2575_obs / ecfev1_obs * 100],
            ],
        )
        return (
            res[AR.name].values,
            res[IA.name].values,
            res[HFEV1.name].values,
            res[HO2Sat.name].values,
            res_with_fef2575[AR.name].values,
            res_with_fef2575[IA.name].values,
            res_with_fef2575[HFEV1.name].values,
            res_with_fef2575[HO2Sat.name].values,
        )

    res = df.apply(
        lambda row: infer_and_unpack(
            row["ecFEV1"], row["O2 Saturation"], row["ecFEF2575"]
        ),
        axis=1,
    )
    return res


# resraw = df.groupby("ID").apply(infer_AR_IA_for_ID)
resraw = df.iloc[np.r_[10:13, 3000:3007]].groupby("ID").apply(infer_AR_IA_for_ID)
res = (
    resraw.apply(pd.Series)
    .reset_index()
    .rename(
        columns={
            0: "AR",
            1: "IA",
            2: "HFEV1",
            3: "HO2Sat",
            4: "AR_2",
            5: "IA_2",
            6: "HFEV1_2",
            7: "HO2Sat_2",
        }
    )
    .drop(columns="level_1")
)

In [8]:
resraw

ID    
101  0    ([1.1439162288473599e-05, 1.4128484584136248e-...
     1    ([4.092881538195151e-05, 5.055109129394314e-05...
     2    ([1.1439162288473599e-05, 1.4128484584136248e-...
107  0    ([0.04310877297060921, 0.05129046235708093, 0....
     1    ([0.04311470643106022, 0.05129752193936402, 0....
     2    ([0.04310877297060921, 0.05129046235708093, 0....
     3    ([0.03632507325651121, 0.043875252521203424, 0...
     4    ([0.030320985793437637, 0.0371513870120688, 0....
     5    ([0.030334547672527717, 0.03716800397241827, 0...
     6    ([0.00011375050486170885, 0.000159555382893474...
dtype: object

In [None]:
AR = mh.VariableNode("Airway resistance (%)", 0, 90, 2, prior=None)
IA = mh.VariableNode("Inactive alveoli (%)", 0, 30, 1, prior=None)
HFEV1 = mh.VariableNode("Healthy FEV1 (L)", 1, 6, 0.05, prior=None)
HO2Sat = mh.VariableNode("Healthy O2 saturation (%)", 90, 100, 0.5, prior=None)


res["AR_FEF2575 mean"] = res["AR_FEF2575"].apply(lambda x: AR.get_mean(x))
res["IA_FEF2575 mean"] = res["IA_FEF2575"].apply(lambda x: IA.get_mean(x))
res["HFEV1_FEF2575 mean"] = res["HFEV1_FEF2575"].apply(lambda x: HFEV1.get_mean(x))
res["HO2Sat_FEF2575 mean"] = res["HO2Sat_FEF2575"].apply(lambda x: HO2Sat.get_mean(x))

In [6]:
res

Unnamed: 0,ID,AR,IA,HFEV1,HO2Sat,AR_2,IA_2,HFEV1_2,HO2Sat_2
0,101,"[1.1439162288473599e-05, 1.4128484584136248e-0...","[0.6241724220633212, 0.29586869250194675, 0.07...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.5229394971636...","[0.0, 0.0, 0.0, 0.0, 4.0070415162842564e-25, 2...","[4.224505541441449e-08, 2.0708831729950947e-07...","[0.6302131339226663, 0.29303846367256264, 0.06...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.7563407007961...","[0.0, 0.0, 0.0, 0.0, 1.5306633364109298e-25, 1..."
1,101,"[4.092881538195151e-05, 5.055109129394314e-05,...","[0.890457889593489, 0.10372944607842229, 0.005...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.4072655212317...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[1.578945166303679e-07, 7.740103413060107e-07,...","[0.8945427712437167, 0.1002009797511856, 0.005...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.1514816908058...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,101,"[1.1439162288473599e-05, 1.4128484584136248e-0...","[0.6241724220633212, 0.29586869250194675, 0.07...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.5229394971636...","[0.0, 0.0, 0.0, 0.0, 4.0070415162842564e-25, 2...","[5.553360331641265e-08, 2.4644207885087794e-07...","[0.6293643699588668, 0.29351906687825663, 0.06...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.1942446141947...","[0.0, 0.0, 0.0, 0.0, 1.5732812234256495e-25, 1..."
3,107,"[0.04310877297060921, 0.05129046235708093, 0.0...","[0.6058836510157681, 0.3141241799159429, 0.072...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0193250349353699, 0.029106862046827494, 0.0...","[0.6058975061076478, 0.3141154543338157, 0.072...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,107,"[0.04311470643106022, 0.05129752193936402, 0.0...","[0.7882954863829731, 0.19130421251627647, 0.01...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.01932835117640684, 0.029111856882319338, 0....","[0.7883018066115406, 0.19129901652344067, 0.01...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
5,107,"[0.04310877297060921, 0.05129046235708093, 0.0...","[0.6058836510157681, 0.3141241799159429, 0.072...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0193250349353699, 0.029106862046827494, 0.0...","[0.6058975061076478, 0.3141154543338157, 0.072...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
6,107,"[0.03632507325651121, 0.043875252521203424, 0....","[0.6059299335692415, 0.3140950203123333, 0.072...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.013797456333670287, 0.02224157279699035, 0....","[0.6059667821308659, 0.3140718110759854, 0.072...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
7,107,"[0.030320985793437637, 0.0371513870120688, 0.0...","[0.6060099213615852, 0.3140446168522451, 0.072...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.012817294595239025, 0.01988072550693231, 0....","[0.6060394141288322, 0.3140260469309144, 0.072...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
8,107,"[0.030334547672527717, 0.03716800397241827, 0....","[0.7883529985648824, 0.19125692321923599, 0.01...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.012823954730640694, 0.019891055949349444, 0...","[0.7883664707764498, 0.19124584811251288, 0.01...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
9,107,"[0.00011375050486170885, 0.0001595553828934741...","[0.07032493485253674, 0.21653022784619885, 0.3...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 2.022087692312874e-34, 4.49978...","[5.340212557678789e-07, 3.2950628023624017e-06...","[0.07838576962007578, 0.23120479787468373, 0.3...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 1.2338881968144757e-34, 3.2577..."


In [11]:
# Concatenate the 4 new columns to the original dataframe
df1 = pd.concat([df, res.drop(columns=["ID"])], axis=1)
df1.head()

Unnamed: 0,ID,Date Recorded,FEV1,O2 Saturation,FEF2575,PEF,ecFEV1,Age,Sex,Height,...,FEV1 % Predicted,O2 Saturation % Healthy,AR,IA,HFEV1,HO2Sat,AR mean,IA mean,HFEV1 mean,HO2Sat mean
0,101,2021-05-25,1.68,98.0,1.17,227.0,1.69,53,Male,173.0,...,46.536607,100.874827,"[0.00020140268126914501, 0.0002563950730509337...","[0.8561467340582659, 0.13340126187709733, 0.01...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",47.601368,0.654616,3.285826,97.643061
1,101,2021-05-26,1.65,98.0,1.06,236.0,1.69,53,Male,173.0,...,45.705597,100.874827,"[0.00020140268126914501, 0.0002563950730509337...","[0.8561467340582659, 0.13340126187709733, 0.01...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",47.601368,0.654616,3.285826,97.643061
2,101,2021-05-27,1.69,98.0,1.12,183.0,1.69,53,Male,173.0,...,46.813611,100.874827,"[0.00020140268126914501, 0.0002563950730509337...","[0.8561467340582659, 0.13340126187709733, 0.01...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",47.601368,0.654616,3.285826,97.643061
3,101,2021-05-28,1.67,98.0,1.08,175.0,1.69,53,Male,173.0,...,46.259604,100.874827,"[0.00020140268126914501, 0.0002563950730509337...","[0.8561467340582659, 0.13340126187709733, 0.01...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",47.601368,0.654616,3.285826,97.643061
4,101,2021-05-29,1.69,98.0,1.16,171.0,1.76,53,Male,173.0,...,46.813611,100.874827,"[0.00032111950996979114, 0.0004109729606994951...","[0.8452978344899926, 0.1425778679219038, 0.011...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",44.593062,0.667227,3.288958,97.621469


In [12]:
df1.columns

Index(['ID', 'Date Recorded', 'FEV1', 'O2 Saturation', 'FEF2575', 'PEF',
       'ecFEV1', 'Age', 'Sex', 'Height', 'Predicted FEV1',
       'Healthy O2 Saturation', 'ecFEV1 % Predicted', 'FEV1 % Predicted',
       'O2 Saturation % Healthy', 'AR', 'IA', 'HFEV1', 'HO2Sat', 'AR mean',
       'IA mean', 'HFEV1 mean', 'HO2Sat mean'],
      dtype='object')

In [None]:
# Save rawres as excel to ../../../../ExcelFiles
df1.to_excel(
    dh.get_path_to_main()
    + "ExcelFiles/BR/BR_obs_O2_FEV1_FEF2575_infer_AR_IA_HFEV1_HO2Sat.xlsx",
    index=False,
)