In this notebook I will plot all the longidutinal data for each individual separately

In [1]:
import src.data.breathe_data as br
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import src.data.helpers as dh
from datetime import timedelta
import src.o2_fev1_analysis.smooth as smooth

In [2]:
# df = br.build_O2_FEV1_FEF2575_df(meas_file=2, remove_nan=False)
df = br.load_meas_from_excel("BR_O2_FEV1_FEF2575_PEF_Nan")

In [3]:
# df.to_excel(f"{dh.get_path_to_main()}ExcelFiles/BR/BR_O2_FEV1_FEF2575_PEF_Nan.xlsx", index=False)

In [4]:
def apply_new_smoothing(df, col):
    df[f"ec{col}_2"] = df[col]
    df = smooth.identify_and_replace_outliers_up(df, f"ec{col}_2")
    df[f"ec{col}_2"] = smooth.smooth_vector(df[f"ec{col}_2"].to_numpy(), "max")
    return df


df1 = df[0:2000].groupby(by="ID").apply(apply_new_smoothing, "FEV1").drop(
    columns=["ID"]
).reset_index().drop(columns=["level_1"])
df1 = df1.groupby(by="ID").apply(apply_new_smoothing, "FEF2575").drop(
    columns=["ID"]
).reset_index().drop(columns=["level_1"])
df1 = df1.groupby(by="ID").apply(apply_new_smoothing, "PEF").drop(columns=['ID']).reset_index().drop(columns=['level_1'])

ID 101 - Outlier up for ecFEV1_2, day 2023-10-24: 2.52 > 1.7189655172413794, update to 1.66
ID 101 - Outlier up for ecPEF_2, day 2021-06-24: 271.0 > 191.0, update to 206.0
ID 101 - Outlier up for ecPEF_2, day 2022-08-19: 547.0 > 222.13793103448276, update to 207.0
ID 101 - Outlier up for ecPEF_2, day 2023-03-26: 602.0 > 281.2916666666667, update to 274.0
ID 101 - Outlier up for ecPEF_2, day 2023-07-18: 530.0 > 282.8965517241379, update to 277.0
ID 101 - Outlier up for ecPEF_2, day 2023-08-27: 607.0 > 274.8965517241379, update to 266.0
ID 101 - Outlier up for ecPEF_2, day 2023-10-26: 572.0 > 266.92857142857144, update to 251.0


In [4]:
df.head()

Unnamed: 0,ID,Date Recorded,FEV1,O2 Saturation,FEF2575,PEF,ecFEV1,Age,Sex,Height,Predicted FEV1,Healthy O2 Saturation,ecFEV1 % Predicted,FEV1 % Predicted,O2 Saturation % Healthy
0,101,2019-01-25,1.31,97.0,0.54,,1.31,53,Male,173.0,3.610061,97.150104,36.287474,36.287474,99.845492
1,101,2019-01-26,1.31,98.0,0.57,,1.31,53,Male,173.0,3.610061,97.150104,36.287474,36.287474,100.874827
2,101,2019-01-27,1.31,96.0,0.67,,1.31,53,Male,173.0,3.610061,97.150104,36.287474,36.287474,98.816157
3,101,2019-01-28,1.3,96.0,0.69,,1.31,53,Male,173.0,3.610061,97.150104,36.287474,36.01047,98.816157
4,101,2019-01-29,1.28,98.0,0.6,,1.3,53,Male,173.0,3.610061,97.150104,36.01047,35.456463,100.874827


In [5]:
def plot_profile_for_ID(df_for_ID):
    df_for_ID = df_for_ID.reset_index()
    fig = make_subplots(
        rows=4,
        cols=1,
        shared_xaxes=True,
        vertical_spacing=0.04,
    )
    fig.add_trace(
        go.Scatter(
            y=df_for_ID["O2 Saturation"], x=df_for_ID["Date Recorded"], mode="markers"
        ),
        row=1,
        col=1,
    )
    fig.add_trace(
        go.Scatter(y=df_for_ID.FEV1, x=df_for_ID["Date Recorded"], mode="markers"),
        row=2,
        col=1,
    )
    fig.update_traces(marker=dict(color="yellow"), row=2, col=1)
    fig.add_trace(
        go.Scatter(y=df_for_ID.ecFEV1, x=df_for_ID["Date Recorded"], mode="markers"),
        row=2,
        col=1,
    )
    fig.add_trace(
        go.Scatter(
            y=df_for_ID.FEF2575,
            x=df_for_ID["Date Recorded"],
            mode="markers",
        ),
        row=3,
        col=1,
    )
    fig.update_traces(marker=dict(color="yellow"), row=3, col=1)
    fig.add_trace(
        go.Scatter(y=df_for_ID.ecFEF2575, x=df_for_ID["Date Recorded"], mode="markers"),
        row=3,
        col=1,
    )
    fig.add_trace(
        go.Scatter(
            y=df_for_ID.PEF / 60,
            x=df_for_ID["Date Recorded"],
            mode="markers",
        ),
        row=4,
        col=1,
    )
    fig.update_traces(marker=dict(color="yellow"), row=4, col=1)
    fig.add_trace(
        go.Scatter(
            y=df_for_ID.ecPEF / 60, x=df_for_ID["Date Recorded"], mode="markers"
        ),
        row=4,
        col=1,
    )
    fig.update_yaxes(
        title="SpO2<br>(%)",
        row=1,
        col=1,
        range=[min(df["O2 Saturation"]) * 0.98, max(df["O2 Saturation"]) * 1.02],
    )
    fig.update_yaxes(
        title="FEV1<br>(L)",
        row=2,
        col=1,
        range=[min(df.FEV1) * 0.98, max(df.FEV1) * 1.02],
    )
    # fig.update_yaxes(
    #     title="ecFEV1<br>(L)",
    #     row=3,
    #     col=1,
    #     range=[min(df.ecFEV1) * 0.98, max(df.ecFEV1) * 1.02],
    # )
    fig.update_yaxes(
        title="FEF2575<br>(L/s)",
        row=3,
        col=1,
        range=[min(df.FEF2575) * 0.98, max(df.FEF2575) * 1.02],
    )
    fig.update_yaxes(
        title="PEF<br>(L/s)",
        row=4,
        col=1,
        range=[min(df.PEF) * 0.98, max(df.PEF) * 1.02],
    )
    fig.update_yaxes(nticks=10)
    fig.update_traces(marker=dict(size=3))
    title = f"Measures for ID {df_for_ID.ID[0]}"
    fig.update_layout(
        font=dict(size=8), showlegend=False, title=title, height=1000, width=1400
    )
    # fig.show()
    fig.write_image(f"{dh.get_path_to_main()}PlotsBreathe/ID_profiles/{title}.pdf")
    return -1

In [None]:
df[0:10000].groupby(by="ID").apply(plot_profile_for_ID)

In [5]:
df_for_ID = a[a.ID == "101"]
df_for_ID.head()

Unnamed: 0,ID,Date Recorded,FEV1,O2 Saturation,FEF2575,PEF,ecFEV1,ecFEF2575,ecPEF,Age,Sex,Height,Predicted FEV1,Healthy O2 Saturation,ecFEV1 % Predicted,FEV1 % Predicted,O2 Saturation % Healthy,ecFEV1_2
0,101,2019-01-25,1.31,97.0,0.54,,1.31,0.67,,53,Male,173.0,3.610061,97.150104,36.287474,36.287474,99.845492,1.31
1,101,2019-01-26,1.31,98.0,0.57,,1.31,0.67,,53,Male,173.0,3.610061,97.150104,36.287474,36.287474,100.874827,1.31
2,101,2019-01-27,1.31,96.0,0.67,,1.31,0.69,,53,Male,173.0,3.610061,97.150104,36.287474,36.287474,98.816157,1.31
3,101,2019-01-28,1.3,96.0,0.69,,1.31,0.69,,53,Male,173.0,3.610061,97.150104,36.287474,36.01047,98.816157,1.31
4,101,2019-01-29,1.28,98.0,0.6,,1.3,0.69,,53,Male,173.0,3.610061,97.150104,36.01047,35.456463,100.874827,1.3


In [16]:
dftmp = df_for_ID

df_for_ID["ecFEV1_2"] = df_for_ID.FEV1
smooth.identify_and_replace_outliers_up(df_for_ID, "FEV1")
# df_for_ID["ecFEV1_3"] = smooth.smooth_vector(df_for_ID["ecFEV1_2"].to_numpy(), "max")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,ID,Date Recorded,FEV1,O2 Saturation,FEF2575,PEF,ecFEV1,ecFEF2575,ecPEF,Age,Sex,Height,Predicted FEV1,Healthy O2 Saturation,ecFEV1 % Predicted,FEV1 % Predicted,O2 Saturation % Healthy,ecFEV1_2
0,101,2019-01-25,1.31,97.0,0.54,,1.31,0.67,,53,Male,173.0,3.610061,97.150104,36.287474,36.287474,99.845492,1.31
1,101,2019-01-26,1.31,98.0,0.57,,1.31,0.67,,53,Male,173.0,3.610061,97.150104,36.287474,36.287474,100.874827,1.31
2,101,2019-01-27,1.31,96.0,0.67,,1.31,0.69,,53,Male,173.0,3.610061,97.150104,36.287474,36.287474,98.816157,1.31
3,101,2019-01-28,1.30,96.0,0.69,,1.31,0.69,,53,Male,173.0,3.610061,97.150104,36.287474,36.010470,98.816157,1.30
4,101,2019-01-29,1.28,98.0,0.60,,1.30,0.69,,53,Male,173.0,3.610061,97.150104,36.010470,35.456463,100.874827,1.28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1735,101,2023-11-08,1.75,98.0,1.05,273.0,1.75,1.17,273.0,53,Male,173.0,3.610061,97.150104,48.475633,48.475633,100.874827,1.75
1736,101,2023-11-09,1.68,98.0,1.08,252.0,1.75,1.14,273.0,53,Male,173.0,3.610061,97.150104,48.475633,46.536607,100.874827,1.68
1737,101,2023-11-10,1.71,98.0,1.14,249.0,1.73,1.16,275.0,53,Male,173.0,3.610061,97.150104,47.921625,47.367618,100.874827,1.71
1738,101,2023-11-11,1.73,98.0,1.16,275.0,1.73,1.16,275.0,53,Male,173.0,3.610061,97.150104,47.921625,47.921625,100.874827,1.73


In [15]:
fig = make_subplots(
    rows=1,
    cols=1,
    shared_xaxes=True,
    vertical_spacing=0.04,
)
fig.add_trace(
    go.Scatter(y=dftmp["FEV1"], x=dftmp["Date Recorded"], mode="markers"),
    row=1,
    col=1,
)
fig.add_trace(
    go.Scatter(y=dftmp.ecFEV1_2, x=dftmp["Date Recorded"], mode="markers"),
    row=1,
    col=1,
)
fig.update_traces(marker=dict(size=3))
fig.update_layout(font=dict(size=8))
fig.show()