An exacerbation's start can be spotted by the pattern [False, True] in Is Exacerbated. Similarly, it's [True, False] for an exacerbation's end.

Functions exported to `ex_labels_data.py``

In [47]:
import ex_labels_data
import numpy as np
import pandas as pd


In [2]:
df = ex_labels_data.load(exclude_no_ex=True).reset_index()
df.head()

Dropping 57/103 individuals that don't have a measurement in exacerbated period
Exacerbated labels data from the predictive classifier has 7457 entries (1445 exacerbated, 6012 not exacerbated measurements, 0 NaN)


Unnamed: 0,ID,Date recorded,PatientNbr,Study,CalcDate,CalcDatedn,ScenType,Scenario,BaseExample,Measure,Frequency,Percentage,MSExample,Is Exacerbated
0,23,2015-08-05,1,SC,05-Aug-2015,1,0,Actual,0,,0,0,0,False
1,23,2015-08-06,1,SC,06-Aug-2015,2,0,Actual,0,,0,0,0,False
2,23,2015-08-07,1,SC,07-Aug-2015,3,0,Actual,0,,0,0,0,False
3,23,2015-08-08,1,SC,08-Aug-2015,4,0,Actual,0,,0,0,0,False
4,23,2015-08-09,1,SC,09-Aug-2015,5,0,Actual,0,,0,0,0,False


In [54]:
def get_ex_start_date(row):
    if row["Is Exacerbated"] == True and row["Is Exacerbated Prev"] == False:
        return "start"
    # Can higlight the end of an exacerbation period
    # elif row["Is Exacerbated"] == False and row["Is Exacerbated Prev"] == True:
    #     return "end"
    else:
        return row["Is Exacerbated"]


# This function provides more conservative ex labels by marking the days before and after an exacerbation start as in "transition"
# We are using a model that marks exacerbated periods as 1, and non-exacerbated periods as 0.
# However, it's not a binary variable, you don't become exacerbated from one day to another.
def overwrite_when_in_transition_period(
    ex_state: pd.Series, n_days_before=2, n_days_after=2
):
    ex_state_new = ex_state.copy()
    # Get indices where ex_state is "start"
    get_start_idx = np.where(ex_state == "start")[0]
    for idx in get_start_idx:
        from_idx = max(0, idx - n_days_before)
        to_idx = min(len(ex_state), idx + n_days_after + 1)
        ex_state_new.iloc[from_idx:to_idx] = "transition"
    return ex_state_new

In [37]:
test_df = df[df.ID == "241"].copy()
test_df["Is Exacerbated Prev"] = test_df["Is Exacerbated"].shift(1)
test_df["Exacerbation State"] = test_df.apply(lambda x: get_ex_start_date(x), axis=1)
test_df["Exacerbation State bis"] = overwrite_when_in_transition_period(
    test_df["Exacerbation State"]
)
test_df.iloc[30:41]

get_start_idx [37]
idx 37
idx change 35:40


Unnamed: 0,ID,Date recorded,PatientNbr,Study,CalcDate,CalcDatedn,ScenType,Scenario,BaseExample,Measure,Frequency,Percentage,MSExample,Is Exacerbated,Exacerbation State,Is Exacerbated Prev,Exacerbation State bis
7440,241,2016-04-08,104,SC,08-Apr-2016,31,0,Actual,0,,0,0,0,False,False,False,False
7441,241,2016-04-09,104,SC,09-Apr-2016,32,0,Actual,0,,0,0,0,False,False,False,False
7442,241,2016-04-10,104,SC,10-Apr-2016,33,0,Actual,0,,0,0,0,False,False,False,False
7443,241,2016-04-11,104,SC,11-Apr-2016,34,0,Actual,0,,0,0,0,False,False,False,False
7444,241,2016-04-12,104,SC,12-Apr-2016,35,0,Actual,0,,0,0,0,False,False,False,False
7445,241,2016-04-13,104,SC,13-Apr-2016,36,0,Actual,0,,0,0,0,False,False,False,transition
7446,241,2016-04-14,104,SC,14-Apr-2016,37,0,Actual,0,,0,0,0,False,False,False,transition
7447,241,2016-04-15,104,SC,15-Apr-2016,38,0,Actual,0,,0,0,0,True,start,False,transition
7448,241,2016-04-16,104,SC,16-Apr-2016,39,0,Actual,0,,0,0,0,True,True,True,transition
7449,241,2016-04-17,104,SC,17-Apr-2016,40,0,Actual,0,,0,0,0,True,True,True,transition


In [56]:
# Create a new column named Exacerbation state and fill it with nans
def mark_ex_transition_period(df):
    df["Exacerbation State"] = np.nan

    for id in df.ID.unique():
        df_for_ID = df[df.ID == id].copy().reset_index(drop=True)
        df_for_ID["Is Exacerbated Prev"] = df_for_ID["Is Exacerbated"].shift(1)
        df_for_ID["Exacerbation State"] = df_for_ID.apply(
            lambda x: get_ex_start_date(x), axis=1
        )

        df.loc[df.ID == id, "Exacerbation State"] = overwrite_when_in_transition_period(
            df_for_ID["Exacerbation State"]
        ).to_numpy()
    return df


mark_ex_transition_period(df)

Unnamed: 0,ID,Date recorded,PatientNbr,Study,CalcDate,CalcDatedn,ScenType,Scenario,BaseExample,Measure,Frequency,Percentage,MSExample,Is Exacerbated,Exacerbation State
0,23,2015-08-05,1,SC,05-Aug-2015,1,0,Actual,0,,0,0,0,False,False
1,23,2015-08-06,1,SC,06-Aug-2015,2,0,Actual,0,,0,0,0,False,False
2,23,2015-08-07,1,SC,07-Aug-2015,3,0,Actual,0,,0,0,0,False,False
3,23,2015-08-08,1,SC,08-Aug-2015,4,0,Actual,0,,0,0,0,False,False
4,23,2015-08-09,1,SC,09-Aug-2015,5,0,Actual,0,,0,0,0,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7452,241,2016-04-20,104,SC,20-Apr-2016,43,0,Actual,0,,0,0,0,True,True
7453,241,2016-04-21,104,SC,21-Apr-2016,44,0,Actual,0,,0,0,0,True,True
7454,241,2016-04-22,104,SC,22-Apr-2016,45,0,Actual,0,,0,0,0,True,True
7455,241,2016-04-23,104,SC,23-Apr-2016,46,0,Actual,0,,0,0,0,True,True
