In [1]:
import src.models.helpers as mh
import src.models.builders as mb
import src.inference.helpers as ih
import src.data.breathe_data as breathe_data
import src.data.smartcare_data as smartcare_data
import src.data.helpers as dh

import numpy as np
import pandas as pd

In [2]:
df = breathe_data.load_meas_from_excel(2)
df.head()

Unnamed: 0,ID,Date Recorded,FEV1,O2 Saturation,FEF2575,PEF,ecFEV1,Age,Sex,Height,Predicted FEV1,Healthy O2 Saturation,ecFEV1 % Predicted,FEV1 % Predicted,O2 Saturation % Healthy,PEF (L/s),FEF2575%PEF
0,101,2021-05-25,1.68,98,1.17,227,1.69,53,Male,173.0,3.610061,97.150104,46.813611,46.536607,100.874827,3.783333,30.92511
1,101,2021-05-26,1.65,98,1.06,236,1.69,53,Male,173.0,3.610061,97.150104,46.813611,45.705597,100.874827,3.933333,26.949153
2,101,2021-05-27,1.69,98,1.12,183,1.69,53,Male,173.0,3.610061,97.150104,46.813611,46.813611,100.874827,3.05,36.721311
3,101,2021-05-28,1.67,98,1.08,175,1.69,53,Male,173.0,3.610061,97.150104,46.813611,46.259604,100.874827,2.916667,37.028571
4,101,2021-05-29,1.69,98,1.16,171,1.76,53,Male,173.0,3.610061,97.150104,48.752636,46.813611,100.874827,2.85,40.701754


In [3]:
# Infer AR and IA for all data points given an individuals' age, sex, height, FEV1 and O2 saturation measurements
def infer_AR_IA_for_ID(df):
    df.reset_index(inplace=True)
    _, inf_alg, _, ecFEV1, AR, _, _, IA, _, O2Sat = mb.o2sat_fev1_point_in_time_model_shared_healthy_vars(
        df.Height[0], df.Age[0], df.Sex[0]
    )

    def infer_and_unpack(ecfev1_obs, o2sat_obs):
        res = ih.infer_on_factor_graph(
            inf_alg,
            [AR, IA],
            [[ecFEV1, ecfev1_obs], [O2Sat, o2sat_obs]],
        )
        return res[AR.name].values, res[IA.name].values

    res = df.apply(
        lambda row: infer_and_unpack(row["ecFEV1"], row["O2 Saturation"]),
        axis=1,
    )
    return res


resraw = df.groupby("ID").apply(infer_AR_IA_for_ID)
# resraw = df.iloc[np.r_[10:13, 1000:1007]].groupby("ID").apply(infer_AR_IA_for_ID)
res = (
    resraw.apply(pd.Series)
    .reset_index()
    .rename(columns={0: "AR", 1: "IA"})
    .drop(columns="level_1")
)
res

Unnamed: 0,ID,AR,IA
0,101,"[0.00020140268126914501, 0.0002563950730509337...","[0.8561467340582659, 0.13340126187709733, 0.01..."
1,101,"[0.00020140268126914501, 0.0002563950730509337...","[0.8561467340582659, 0.13340126187709733, 0.01..."
2,101,"[0.00020140268126914501, 0.0002563950730509337...","[0.8561467340582659, 0.13340126187709733, 0.01..."
3,101,"[0.00020140268126914501, 0.0002563950730509337...","[0.8561467340582659, 0.13340126187709733, 0.01..."
4,101,"[0.00032111950996979114, 0.0004109729606994951...","[0.8452978344899926, 0.1425778679219038, 0.011..."
...,...,...,...
15363,357,"[0.018894458144689832, 0.023900270613168705, 0...","[0.5938219299610586, 0.32073461779482193, 0.07..."
15364,358,"[0.12393173080736078, 0.12818807612289895, 0.1...","[0.794036182578991, 0.18549455618183203, 0.019..."
15365,358,"[0.12393173080736081, 0.12818807612289898, 0.1...","[0.3561906667243055, 0.39015595419992494, 0.20..."
15366,358,"[0.11364347821363134, 0.11951918572027004, 0.1...","[0.794036182578991, 0.18549455618183203, 0.019..."


In [4]:
AR = mh.VariableNode("Airway resistance (%)", 0, 90, 2, prior={"type": "uniform"})
IA = mh.VariableNode("Inactive alveoli (%)", 0, 30, 1, prior={"type": "uniform"})

res["AR mean"] = res["AR"].apply(lambda x: AR.get_mean(x))
res["IA mean"] = res["IA"].apply(lambda x: IA.get_mean(x))

In [5]:
# Concatenate the 4 new columns to the original dataframe
df1 = pd.concat([df, res.drop(columns=["ID"])], axis=1)
df1.head()

Unnamed: 0,ID,Date Recorded,FEV1,O2 Saturation,FEF2575,PEF,ecFEV1,Age,Sex,Height,...,Healthy O2 Saturation,ecFEV1 % Predicted,FEV1 % Predicted,O2 Saturation % Healthy,PEF (L/s),FEF2575%PEF,AR,IA,AR mean,IA mean
0,101,2021-05-25,1.68,98,1.17,227,1.69,53,Male,173.0,...,97.150104,46.813611,46.536607,100.874827,3.783333,30.92511,"[0.00020140268126914501, 0.0002563950730509337...","[0.8561467340582659, 0.13340126187709733, 0.01...",47.601368,0.654616
1,101,2021-05-26,1.65,98,1.06,236,1.69,53,Male,173.0,...,97.150104,46.813611,45.705597,100.874827,3.933333,26.949153,"[0.00020140268126914501, 0.0002563950730509337...","[0.8561467340582659, 0.13340126187709733, 0.01...",47.601368,0.654616
2,101,2021-05-27,1.69,98,1.12,183,1.69,53,Male,173.0,...,97.150104,46.813611,46.813611,100.874827,3.05,36.721311,"[0.00020140268126914501, 0.0002563950730509337...","[0.8561467340582659, 0.13340126187709733, 0.01...",47.601368,0.654616
3,101,2021-05-28,1.67,98,1.08,175,1.69,53,Male,173.0,...,97.150104,46.813611,46.259604,100.874827,2.916667,37.028571,"[0.00020140268126914501, 0.0002563950730509337...","[0.8561467340582659, 0.13340126187709733, 0.01...",47.601368,0.654616
4,101,2021-05-29,1.69,98,1.16,171,1.76,53,Male,173.0,...,97.150104,48.752636,46.813611,100.874827,2.85,40.701754,"[0.00032111950996979114, 0.0004109729606994951...","[0.8452978344899926, 0.1425778679219038, 0.011...",44.593062,0.667227


In [6]:
# Save rawres as excel to ../../../../ExcelFiles
df1.to_excel(
    dh.get_path_to_main() + "ExcelFiles/BR/BR_O2_FEV1_FEF2575_PEF_inferred_AR_IA.xlsx",
    index=False,
)

# Save arrays as objects
Arrays are currently saved as strings which is painful to process when reading from the file

In [None]:
import pandas as pd
import numpy as np
import src.data.helpers as data_helpers

In [None]:
data_helpers.load_excel(
    "../../../../ExcelFiles/inferred_AR_IA_with_FEV1_O2Sat_no_AR-IA_factor_1.xlsx",
    ["AR", "IA"],
).iloc[0].AR

In [None]:
df = pd.read_excel(
    "../../../../ExcelFiles/inferred_AR_IA_with_FEV1_O2Sat_no_AR-IA_factor_1.xlsx"
)

In [None]:
# Convert back to arrays
def str_to_array(s):
    s_cleaned = s.replace("\\n", "")
    return np.fromstring(s_cleaned[1:-1], sep=" ")


df["AR"] = df.AR.apply(str_to_array)
df["IA"] = df.IA.apply(str_to_array)

In [None]:
df.iloc[0].AR

In [None]:
df.to_excel(
    "../../../../ExcelFiles/inferred_AR_IA_with_FEV1_O2Sat_no_AR-IA_factor_1.xlsx",
    index=False,
)

In [None]:
df = pd.read_excel(
    "../../../../ExcelFiles/inferred_AR_IA_with_FEV1_O2Sat_no_AR-IA_factor.xlsx"
)

In [None]:
df.AR.apply(json.loads)[0]