In [1]:
import src.models.helpers as mh
import src.models.builders as mb
import src.inference.helpers as ih
import src.data.breathe_data as br
import src.data.helpers as dh
import src.o2_fev1_analysis.smooth as smooth


import numpy as np
import pandas as pd

In [2]:
df = br.load_meas_from_excel("BR_O2_FEV1_FEF2575_PEF_Nan")

In [5]:
def apply_new_smoothing(df, col, scale, shift):
    """
    Works on df with NaN in col.
    """

    df[f"ec{col}"] = df[col]
    no_nan_mask = ~df[f"ec{col}"].isna()
    df.iloc[no_nan_mask] = smooth.identify_and_replace_outliers_up(
        df[no_nan_mask], f"ec{col}", scale=scale, shift=shift
    ).copy()
    df[f"ec{col}"][no_nan_mask] = smooth.smooth_vector(
        df[f"ec{col}"][no_nan_mask].to_numpy(), "max"
    )

    return df


df = (
    df.groupby(by="ID")
    .apply(lambda x: apply_new_smoothing(x, "FEV1", 3, 0.5))
    .reset_index(drop=True)
)
df = (
    df.groupby(by="ID")
    .apply(lambda x: apply_new_smoothing(x, "FEF2575", 3, 0.5))
    .reset_index(drop=True)
)
# df = (
#     df.groupby(by="ID")
#     .apply(lambda x: apply_new_smoothing(x, "PEF (L/s)", 3, 1))
#     .reset_index(drop=True)
# )

ID 101 - Outlier up for ecFEV1, day 2023-10-24: 2.52 > 2.19 and > 2.22 (mean=1.72,std=0.16), update to 1.64
ID 105 - Outlier up for ecFEV1, day 2019-01-18: 2.19 > 1.76 and > 2.08 (mean=1.58,std=0.06), update to 2.01
ID 107 - Outlier up for ecFEV1, day 2023-09-19: 2.71 > 2.32 and > 2.29 (mean=1.78,std=0.18), update to 1.66
ID 123 - Outlier up for ecFEV1, day 2019-09-18: 3.86 > 3.76 and > 3.72 (mean=3.22,std=0.18), update to 3.21
ID 146 - Outlier up for ecFEV1, day 2019-10-26: 1.56 > 1.23 and > 1.53 (mean=1.03,std=0.07), update to 2.24
ID 221 - Outlier up for ecFEV1, day 2022-08-09: 3.03 > 2.45 and > 2.44 (mean=1.94,std=0.17), update to 2.06
ID 244 - Outlier up for ecFEV1, day 2022-08-27: 4.38 > 3.86 and > 3.61 (mean=3.11,std=0.25), update to 3.06
ID 104 - Outlier up for ecFEF2575, day 2023-07-26: 1.73 > 1.72 and > 1.67 (mean=1.17,std=0.18), update to 1.12
ID 105 - Outlier up for ecFEF2575, day 2019-01-18: 2.48 > 1.81 and > 1.99 (mean=1.49,std=0.11), update to 2.47
ID 107 - Outlier up fo

In [8]:
df.shape

(48978, 17)

In [19]:
df1 = df.dropna(subset=["ecFEV1", "ecFEF2575"]).drop(columns=["PEF", "ecPEF"])

In [20]:
print(df1.shape)
df1.head()

(37062, 15)


Unnamed: 0,ID,Date Recorded,FEV1,O2 Saturation,FEF2575,ecFEV1,ecFEF2575,Age,Sex,Height,Predicted FEV1,Healthy O2 Saturation,ecFEV1 % Predicted,FEV1 % Predicted,O2 Saturation % Healthy
0,101,2019-01-25,1.31,97.0,0.54,1.31,0.67,53,Male,173.0,3.610061,97.150104,36.287474,36.287474,99.845492
1,101,2019-01-26,1.31,98.0,0.57,1.31,0.67,53,Male,173.0,3.610061,97.150104,36.287474,36.287474,100.874827
2,101,2019-01-27,1.31,96.0,0.67,1.31,0.69,53,Male,173.0,3.610061,97.150104,36.287474,36.287474,98.816157
3,101,2019-01-28,1.3,96.0,0.69,1.31,0.69,53,Male,173.0,3.610061,97.150104,36.287474,36.01047,98.816157
4,101,2019-01-29,1.28,98.0,0.6,1.3,0.69,53,Male,173.0,3.610061,97.150104,36.01047,35.456463,100.874827


In [21]:
# Infer AR and IA for all data points given an individuals' age, sex, height, FEV1 and O2 saturation measurements
def infer_AR_IA_for_ID(df):
    df.reset_index(inplace=True)
    _, inf_alg, HFEV1, ecFEV1, AR, HO2Sat, _, IA, _, O2Sat, ecFEF2575prctFEV1 = (
        mb.o2sat_fev1_fef2575_point_in_time_model_shared_healthy_vars(
            df.Height[0], df.Age[0], df.Sex[0]
        )
    )

    def infer_and_unpack(ecfev1_obs, o2sat_obs, ecfef2575_obs):

        res = ih.infer_on_factor_graph(
            inf_alg,
            [AR, IA, HFEV1, HO2Sat],
            [
                [ecFEV1, ecfev1_obs],
                [O2Sat, o2sat_obs],
            ],
        )
        res_with_fef2575 = ih.infer_on_factor_graph(
            inf_alg,
            [AR, IA, HFEV1, HO2Sat],
            [
                [ecFEV1, ecfev1_obs],
                [O2Sat, o2sat_obs],
                [ecFEF2575prctFEV1, ecfef2575_obs / ecfev1_obs * 100],
            ],
        )
        return (
            res[AR.name].values,
            res[IA.name].values,
            res[HFEV1.name].values,
            res[HO2Sat.name].values,
            res_with_fef2575[AR.name].values,
            res_with_fef2575[IA.name].values,
            res_with_fef2575[HFEV1.name].values,
            res_with_fef2575[HO2Sat.name].values,
        )

    res = df.apply(
        lambda row: infer_and_unpack(
            row["ecFEV1"], row["O2 Saturation"], row["ecFEF2575"]
        ),
        axis=1,
    )
    return res


resraw = df.groupby("ID").apply(infer_AR_IA_for_ID)
# resraw = df.iloc[np.r_[10:13, 1000:1007]].groupby("ID").apply(infer_AR_IA_for_ID)
res = (
    resraw.apply(pd.Series)
    .reset_index()
    .rename(columns={0: "AR", 1: "IA", 2: "HFEV1", 3: "HO2Sat", 4: "AR_FEF2575", 5: "IA_FEF2575", 6: "HFEV1_FEF2575", 7: "HO2Sat_FEF2575"})
    .drop(columns="level_1")
)
res

  return outgoing_message / sum(outgoing_message)
  return outgoing_message / sum(outgoing_message)
  return outgoing_message / sum(outgoing_message)
  return outgoing_message / sum(outgoing_message)
  return outgoing_message / sum(outgoing_message)
  return outgoing_message / sum(outgoing_message)
  return outgoing_message / sum(outgoing_message)
  return outgoing_message / sum(outgoing_message)
  return outgoing_message / sum(outgoing_message)
  return outgoing_message / sum(outgoing_message)
  return outgoing_message / sum(outgoing_message)
  return outgoing_message / sum(outgoing_message)
  return outgoing_message / sum(outgoing_message)
  return outgoing_message / sum(outgoing_message)
  return outgoing_message / sum(outgoing_message)
  return outgoing_message / sum(outgoing_message)
  return outgoing_message / sum(outgoing_message)
  return outgoing_message / sum(outgoing_message)
  return outgoing_message / sum(outgoing_message)
  return outgoing_message / sum(outgoing_message)


AssertionError: Error computing prior: The sum of the probabilities should be 1, got nan

In [None]:
AR = mh.VariableNode("Airway resistance (%)", 0, 90, 2, prior=None)
IA = mh.VariableNode("Inactive alveoli (%)", 0, 30, 1, prior=None)
HFEV1 = mh.VariableNode("Healthy FEV1 (L)", 1, 6, 0.05, prior=None)
HO2Sat = mh.VariableNode("Healthy O2 saturation (%)", 90, 100, 0.5, prior=None)

res["AR mean"] = res["AR"].apply(lambda x: AR.get_mean(x))
res["IA mean"] = res["IA"].apply(lambda x: IA.get_mean(x))
res["HFEV1 mean"] = res["HFEV1"].apply(lambda x: HFEV1.get_mean(x))
res["HO2Sat mean"] = res["HO2Sat"].apply(lambda x: HO2Sat.get_mean(x))
res["AR_FEF2575 mean"] = res["AR_FEF2575"].apply(lambda x: AR.get_mean(x))
res["IA_FEF2575 mean"] = res["IA_FEF2575"].apply(lambda x: IA.get_mean(x))
res["HFEV1_FEF2575 mean"] = res["HFEV1_FEF2575"].apply(lambda x: HFEV1.get_mean(x))
res["HO2Sat_FEF2575 mean"] = res["HO2Sat_FEF2575"].apply(lambda x: HO2Sat.get_mean(x))

In [10]:
df.columns

Index(['ID', 'Date Recorded', 'FEV1', 'O2 Saturation', 'FEF2575', 'PEF',
       'ecFEV1', 'Age', 'Sex', 'Height', 'Predicted FEV1',
       'Healthy O2 Saturation', 'ecFEV1 % Predicted', 'FEV1 % Predicted',
       'O2 Saturation % Healthy'],
      dtype='object')

In [11]:
# Concatenate the 4 new columns to the original dataframe
df1 = pd.concat([df, res.drop(columns=["ID"])], axis=1)
df1.head()

Unnamed: 0,ID,Date Recorded,FEV1,O2 Saturation,FEF2575,PEF,ecFEV1,Age,Sex,Height,...,FEV1 % Predicted,O2 Saturation % Healthy,AR,IA,HFEV1,HO2Sat,AR mean,IA mean,HFEV1 mean,HO2Sat mean
0,101,2021-05-25,1.68,98.0,1.17,227.0,1.69,53,Male,173.0,...,46.536607,100.874827,"[0.00020140268126914501, 0.0002563950730509337...","[0.8561467340582659, 0.13340126187709733, 0.01...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",47.601368,0.654616,3.285826,97.643061
1,101,2021-05-26,1.65,98.0,1.06,236.0,1.69,53,Male,173.0,...,45.705597,100.874827,"[0.00020140268126914501, 0.0002563950730509337...","[0.8561467340582659, 0.13340126187709733, 0.01...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",47.601368,0.654616,3.285826,97.643061
2,101,2021-05-27,1.69,98.0,1.12,183.0,1.69,53,Male,173.0,...,46.813611,100.874827,"[0.00020140268126914501, 0.0002563950730509337...","[0.8561467340582659, 0.13340126187709733, 0.01...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",47.601368,0.654616,3.285826,97.643061
3,101,2021-05-28,1.67,98.0,1.08,175.0,1.69,53,Male,173.0,...,46.259604,100.874827,"[0.00020140268126914501, 0.0002563950730509337...","[0.8561467340582659, 0.13340126187709733, 0.01...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",47.601368,0.654616,3.285826,97.643061
4,101,2021-05-29,1.69,98.0,1.16,171.0,1.76,53,Male,173.0,...,46.813611,100.874827,"[0.00032111950996979114, 0.0004109729606994951...","[0.8452978344899926, 0.1425778679219038, 0.011...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",44.593062,0.667227,3.288958,97.621469


In [12]:
df1.columns

Index(['ID', 'Date Recorded', 'FEV1', 'O2 Saturation', 'FEF2575', 'PEF',
       'ecFEV1', 'Age', 'Sex', 'Height', 'Predicted FEV1',
       'Healthy O2 Saturation', 'ecFEV1 % Predicted', 'FEV1 % Predicted',
       'O2 Saturation % Healthy', 'AR', 'IA', 'HFEV1', 'HO2Sat', 'AR mean',
       'IA mean', 'HFEV1 mean', 'HO2Sat mean'],
      dtype='object')

In [None]:
# Save rawres as excel to ../../../../ExcelFiles
df1.to_excel(
    dh.get_path_to_main()
    + "ExcelFiles/BR/BR_obs_O2_FEV1_FEF2575_infer_AR_IA_HFEV1_HO2Sat.xlsx",
    index=False,
)