In [None]:
# %pip install yasa

In [3]:
import pandas as pd
import yasa

In [29]:
labels_df = pd.read_csv('Dataset\Labels.csv')
agitation_df = labels_df[labels_df['type'] == 'Agitation']

sleep_df = pd.read_csv('Dataset\Sleep.csv')

In [30]:
# Given the structure of sleep_df, we will convert the date columns in both sleep_df and agitation_df to datetime format
# to enable easier manipulation and comparison of dates and times.

# Convert the 'date' column in sleep_df to datetime
sleep_df['date'] = pd.to_datetime(sleep_df['date'])

# Load agitation_df from the previously filtered dataset and convert its 'date' column to datetime
agitation_df['date'] = pd.to_datetime(agitation_df['date'])

# We will now proceed to implement the logic to mark the agitation in sleep_df as explained earlier.
# This involves creating the 'agitation' column in sleep_df and iterating through the agitation_df to mark the relevant sleep data.

# Initialize the 'agitation' column in sleep_df with 0s
sleep_df['agitation'] = 0

# Process agitation instances at 6pm
for index, row in agitation_df[agitation_df['date'].dt.hour == 18].iterrows():
    patient_id = row['patient_id']
    date = row['date'].normalize()  # Normalize to get the date without time for comparison
    # Mark sleep data between 12pm and 6pm on the same day with agitation = 1
    start_time = date + pd.Timedelta(hours=12)  # 12pm on the day of agitation
    end_time = date + pd.Timedelta(hours=18)  # 6pm on the day of agitation
    sleep_df.loc[(sleep_df['patient_id'] == patient_id) &
                 (sleep_df['date'] >= start_time) &
                 (sleep_df['date'] <= end_time), 'agitation'] = 1

# Process agitation instances at 12pm
for index, row in agitation_df[agitation_df['date'].dt.hour == 12].iterrows():
    patient_id = row['patient_id']
    date = row['date'].normalize()  # Normalize to get the date without time for comparison
    # Mark sleep data from 6pm of the previous day to 12pm of the day of agitation with agitation = 1
    start_time = date - pd.Timedelta(hours=6)  # 6pm on the previous day
    end_time = date + pd.Timedelta(hours=12)  # 12pm on the day of agitation
    sleep_df.loc[(sleep_df['patient_id'] == patient_id) &
                 (sleep_df['date'] > start_time) &
                 (sleep_df['date'] < end_time), 'agitation'] = 1

# Display a summary of the changes made to confirm the logic has been applied correctly
sleep_df['agitation'].value_counts()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  agitation_df['date'] = pd.to_datetime(agitation_df['date'])


agitation
0    449241
1     12182
Name: count, dtype: int64

In [15]:
sleep_df[sleep_df['agitation'] == 1]

Unnamed: 0,patient_id,date,state,heart_rate,respiratory_rate,snoring,agitation
2152,16f4b,2019-04-14 00:44:00,AWAKE,69.0,19.0,False,1
2153,16f4b,2019-04-14 00:45:00,AWAKE,56.0,13.0,False,1
2154,16f4b,2019-04-14 00:46:00,AWAKE,63.0,24.0,False,1
2155,16f4b,2019-04-14 00:47:00,AWAKE,65.0,15.0,False,1
2156,16f4b,2019-04-14 00:48:00,AWAKE,54.0,14.0,False,1
...,...,...,...,...,...,...,...
412290,ec812,2019-05-11 07:22:00,LIGHT,59.0,15.0,False,1
412291,ec812,2019-05-11 07:23:00,LIGHT,60.0,15.0,False,1
412292,ec812,2019-05-11 07:24:00,LIGHT,60.0,15.0,False,1
412293,ec812,2019-05-11 07:25:00,LIGHT,59.0,15.0,False,1


In [16]:
agitation_df[agitation_df['patient_id'] == '16f4b']

Unnamed: 0,patient_id,date,type
2,16f4b,2019-04-11 12:00:22,Agitation
4,16f4b,2019-04-14 12:00:07,Agitation
5,16f4b,2019-04-15 18:00:24,Agitation
6,16f4b,2019-04-16 18:00:38,Agitation
11,16f4b,2019-04-21 12:00:55,Agitation
12,16f4b,2019-04-22 12:00:10,Agitation
21,16f4b,2019-04-26 12:00:19,Agitation
104,16f4b,2019-05-10 12:00:40,Agitation
110,16f4b,2019-05-11 18:01:01,Agitation
126,16f4b,2019-05-14 12:01:01,Agitation


In [31]:
# Let's attempt an alternative, more efficient approach to avoid the internal error encountered previously.

# Define functions to determine the window start and end for each row based on the date and time.
def determine_window_type(row):
    hour = row['date'].hour
    if 12 <= hour < 18:  # Afternoon window
        return 'afternoon'
    else:  # Nighttime window (considering hours outside 12-18 fall into the night window of the previous or current day)
        return 'night'

def window_start(row):
    if row['window_type'] == 'afternoon':
        return pd.Timestamp(row['date'].date()) + pd.Timedelta(hours=12)
    else:  # For nighttime, we need to check if it's before or after midnight to adjust the start time accordingly
        if row['date'].hour < 12:  # This is part of the night window that starts on the previous day
            return pd.Timestamp(row['date'].date() - pd.Timedelta(days=1)) + pd.Timedelta(hours=18)
        else:
            return pd.Timestamp(row['date'].date()) + pd.Timedelta(hours=18)

def window_end(row):
    if row['window_type'] == 'afternoon':
        return pd.Timestamp(row['date'].date()) + pd.Timedelta(hours=18)  # 6pm of the same day
    else:  # For nighttime, we adjust the end time based on if the time is before noon (ending today at noon) or after 6pm (ending tomorrow at noon)
        if row['date'].hour < 12:  # This part of the night window ends today at noon
            return pd.Timestamp(row['date'].date()) + pd.Timedelta(hours=12)
        else:
            return pd.Timestamp(row['date'].date() + pd.Timedelta(days=1)) + pd.Timedelta(hours=12)

# Apply the functions to determine window type, start, and end for each row
sleep_df['window_type'] = sleep_df.apply(determine_window_type, axis=1)
sleep_df['window_start'] = sleep_df.apply(window_start, axis=1)
sleep_df['window_end'] = sleep_df.apply(window_end, axis=1)


In [20]:
# Now aggregate the data based on the window start, window end, and window type
# This will give us the unique windows and the count of entries in each window
aggregated_df = sleep_df.groupby(['patient_id', 'window_start', 'window_end', 'window_type'])

# Display the first few rows of the aggregated data to ensure the logic has been correctly applied
aggregated_df.head()

Unnamed: 0,patient_id,date,state,heart_rate,respiratory_rate,snoring,agitation,window_type,window_start,window_end
0,0f352,2019-06-25 22:53:00,AWAKE,69.0,14.0,False,0,night,2019-06-25 18:00:00,2019-06-26 12:00:00
1,0f352,2019-06-25 22:54:00,AWAKE,66.0,14.0,False,0,night,2019-06-25 18:00:00,2019-06-26 12:00:00
2,0f352,2019-06-25 22:55:00,AWAKE,70.0,14.0,False,0,night,2019-06-25 18:00:00,2019-06-26 12:00:00
3,0f352,2019-06-25 22:56:00,AWAKE,70.0,13.0,False,0,night,2019-06-25 18:00:00,2019-06-26 12:00:00
4,0f352,2019-06-25 22:57:00,AWAKE,68.0,13.0,False,0,night,2019-06-25 18:00:00,2019-06-26 12:00:00
...,...,...,...,...,...,...,...,...,...,...
461320,f220c,2019-06-30 09:09:00,AWAKE,76.0,14.0,False,0,night,2019-06-29 18:00:00,2019-06-30 12:00:00
461321,f220c,2019-06-30 09:10:00,AWAKE,73.0,18.0,False,0,night,2019-06-29 18:00:00,2019-06-30 12:00:00
461322,f220c,2019-06-30 09:11:00,AWAKE,69.0,11.0,False,0,night,2019-06-29 18:00:00,2019-06-30 12:00:00
461323,f220c,2019-06-30 09:12:00,AWAKE,75.0,11.0,False,0,night,2019-06-29 18:00:00,2019-06-30 12:00:00


In [32]:
snoring_map = {True: 1, False: 0}
sleep_df['snoring'] = sleep_df['snoring'].map(snoring_map)

sleep_df['state'] = sleep_df['state'].str.lower()
states_map = {'awake': 0, 'light': 1, 'deep': 2, 'rem': 3}
sleep_df['state'] = sleep_df['state'].map(states_map)

In [34]:
sleep_df.to_csv('Dataset\Processed_Sleep.csv', index=False)

In [35]:
processed_sleep_df = pd.read_csv('Dataset\Processed_Sleep.csv')
processed_sleep_df['date'] = pd.to_datetime(processed_sleep_df['date'])

In [37]:
processed_sleep_df

Unnamed: 0,patient_id,date,state,heart_rate,respiratory_rate,snoring,agitation,window_type,window_start,window_end
0,0f352,2019-06-25 22:53:00,0,69.0,14.0,0,0,night,2019-06-25 18:00:00,2019-06-26 12:00:00
1,0f352,2019-06-25 22:54:00,0,66.0,14.0,0,0,night,2019-06-25 18:00:00,2019-06-26 12:00:00
2,0f352,2019-06-25 22:55:00,0,70.0,14.0,0,0,night,2019-06-25 18:00:00,2019-06-26 12:00:00
3,0f352,2019-06-25 22:56:00,0,70.0,13.0,0,0,night,2019-06-25 18:00:00,2019-06-26 12:00:00
4,0f352,2019-06-25 22:57:00,0,68.0,13.0,0,0,night,2019-06-25 18:00:00,2019-06-26 12:00:00
...,...,...,...,...,...,...,...,...,...,...
461418,f220c,2019-06-30 10:47:00,0,76.0,20.0,0,0,night,2019-06-29 18:00:00,2019-06-30 12:00:00
461419,f220c,2019-06-30 10:48:00,0,73.0,21.0,0,0,night,2019-06-29 18:00:00,2019-06-30 12:00:00
461420,f220c,2019-06-30 10:49:00,0,65.0,18.0,0,0,night,2019-06-29 18:00:00,2019-06-30 12:00:00
461421,f220c,2019-06-30 10:50:00,0,75.0,15.0,0,0,night,2019-06-29 18:00:00,2019-06-30 12:00:00


In [39]:
processed_sleep_df = processed_sleep_df.sort_values(by=['patient_id', 'date'])

In [42]:
def heart_rate_variability(data):
    return data['heart_rate'].std()

def mean_respiratory_rate(data):
    return data['respiratory_rate'].mean()

def respiratory_rate_variability(data):
    return data['respiratory_rate'].std()

def snoring_counts(data):
    return data['snoring'].sum()

def calculate_WASO(data):
    # Assuming 'state' column exists with encoded states and 'date' is in datetime format
    awake_periods = data[data['state'] == 0]  # Filter only awake states
    if not awake_periods.empty:
        waso_duration = awake_periods['date'].diff().dt.total_seconds().sum() / 60  # Convert seconds to minutes
        return waso_duration
    else:
        return 0

def calculate_SOL(data):
    # Assuming the data is sorted by date
    first_sleep_index = data[data['state'] != 0].index.min()  # Get the index of the first non-awake state
    if pd.notna(first_sleep_index):
        # Use .iloc to access rows by integer location
        sol_duration = (data.loc[first_sleep_index, 'date'] - data.iloc[0]['date']).total_seconds() / 60
        return sol_duration
    else:
        return 0

def calculate_TST(data):
    sleep_periods = data[data['state'] != 0]
    if not sleep_periods.empty:
        tst_duration = sleep_periods['date'].diff().dt.total_seconds().sum() / 60
        return tst_duration
    else:
        return 0

def calculate_SE(data, tst, sol):
    total_duration = (data['date'].iloc[-1] - data['date'].iloc[0]).total_seconds() / 60
    if total_duration > 0:
        se_percentage = (tst / (total_duration + sol)) * 100
        return se_percentage
    else:
        return 0

def calculate_metrics(group):
    metrics = {}
    metrics['mean_heart_rate'] = group['heart_rate'].mean()
    metrics['heart_rate_variability'] = heart_rate_variability(group)
    metrics['mean_respiratory_rate'] = mean_respiratory_rate(group)
    metrics['respiratory_rate_variability'] = respiratory_rate_variability(group)
    metrics['snoring_counts'] = snoring_counts(group)
    metrics['WASO'] = calculate_WASO(group)
    metrics['SOL'] = calculate_SOL(group)
    tst = calculate_TST(group)
    metrics['TST'] = tst
    metrics['SE'] = calculate_SE(group, tst, metrics['SOL'])
    metrics['awake_Counts'] = group[group['state'] == 0].shape[0]
    
    # Check for any instance of agitation within the group
    metrics['agitation'] = group['agitation'].max()  # Assuming agitation is marked as 1 for presence
    
    return pd.Series(metrics)

aggregated_df = processed_sleep_df.groupby(['patient_id', 'window_start', 'window_end', 'window_type']).apply(calculate_metrics).reset_index()


In [45]:
aggregated_df[aggregated_df['agitation'] == 1]

Unnamed: 0,patient_id,window_start,window_end,window_type,mean_heart_rate,heart_rate_variability,mean_respiratory_rate,respiratory_rate_variability,snoring_counts,WASO,SOL,TST,SE,awake_Counts,agitation
10,16f4b,2019-04-13 18:00:00,2019-04-14 12:00:00,night,62.223577,9.519626,14.321138,2.350536,0.0,382.0,15.0,372.0,92.537313,130.0,1.0
18,16f4b,2019-04-20 18:00:00,2019-04-21 12:00:00,night,70.110837,6.473881,14.08867,2.058891,3.0,405.0,17.0,352.0,83.412322,79.0,1.0
19,16f4b,2019-04-21 18:00:00,2019-04-22 12:00:00,night,63.283757,11.216572,15.438356,2.714452,4.0,430.0,8.0,521.0,97.020484,75.0,1.0
23,16f4b,2019-04-25 18:00:00,2019-04-26 12:00:00,night,69.777414,6.21259,16.291326,2.285429,0.0,610.0,94.0,488.0,69.318182,323.0,1.0
45,16f4b,2019-05-09 18:00:00,2019-05-10 12:00:00,night,71.997938,7.793963,15.896907,2.74496,0.0,484.0,100.0,334.0,57.191781,292.0,1.0
48,16f4b,2019-05-11 12:00:00,2019-05-11 18:00:00,afternoon,67.722222,6.350536,16.794444,2.432824,0.0,169.0,72.0,287.0,66.589327,91.0,1.0
49,16f4b,2019-05-11 18:00:00,2019-05-12 12:00:00,night,68.51073,5.10606,16.39485,2.517542,0.0,865.0,0.0,953.0,100.0,516.0,1.0
52,16f4b,2019-05-13 18:00:00,2019-05-14 12:00:00,night,67.797633,5.795937,15.87574,2.515345,0.0,821.0,241.0,618.0,56.181818,655.0,1.0
316,93c14,2019-05-19 18:00:00,2019-05-20 12:00:00,night,57.208547,3.604259,13.663248,1.31291,27.0,533.0,19.0,576.0,93.811075,52.0,1.0
321,93c14,2019-05-24 18:00:00,2019-05-25 12:00:00,night,58.400697,4.784326,14.398955,1.63707,35.0,683.0,6.0,671.0,97.387518,105.0,1.0


In [46]:
aggregated_df.to_csv('Dataset\Aggregated_Sleep.csv', index=False)

In [48]:
# 16f4b	6/6/2019 18:00	6/7/2019 12:00	night

sleep_df['window_start'].dtype
# sleep_df[sleep_df['patient_id'] == '16f4b' & sleep_df['window_start'] == '6/6/2019 18:00' & sleep_df['window_end'] == '6/7/2019 12:00' & sleep_df['window_type'] == 'night']

dtype('<M8[ns]')

In [None]:
processed_sleep_df