# Section 1: Load, define criteria for EDA stress factor

In [2]:
import pandas as pd
import numpy as np
import os

# === CONFIG ===
base_dir = "C:/Users/lpnhu/Downloads/Stress_Testing_Analysis"
qc_path = os.path.join(base_dir, "eda_qc_reports", "eda_signal_quality_all_participants.csv")
output_path = os.path.join(base_dir, "eda_qc_reports", "eda_stress_features.csv")
os.makedirs(os.path.dirname(output_path), exist_ok=True)

# === LOAD QC SUMMARY ===
qc_df = pd.read_csv(qc_path)
qc_df["date"] = pd.to_datetime(qc_df["date"])

# === FILTER TO VALID EDA DAYS ONLY ===
valid_df = qc_df[qc_df["valid_EDA_day"] == 1].copy()
valid_df = valid_df.sort_values(["participant_id", "date"])


In [3]:
# Helper to find 3-day continuous runs
def find_3day_windows(df):
    windows = []
    for pid, group in df.groupby("participant_id"):
        group = group.sort_values("date").reset_index(drop=True)
        group["day_diff"] = group["date"].diff().dt.days.fillna(1)
        group["is_consecutive"] = (group["day_diff"] == 1)
        for i in range(len(group) - 2):
            d1, d2, d3 = group.loc[i:i+2, "date"]
            if (d2 - d1).days == 1 and (d3 - d2).days == 1:
                windows.append({
                    "participant_id": pid,
                    "start_date": d1,
                    "mid_date": d2,
                    "end_date": d3
                })
    return pd.DataFrame(windows)

three_day_windows = find_3day_windows(valid_df)
print(f"✅ Found {len(three_day_windows)} 3-day continuous valid windows")

✅ Found 42 3-day continuous valid windows


In [4]:
three_day_windows["participant_id"].nunique()

8

In [5]:
valid_ids = set(valid_df["participant_id"].unique())
window_ids = set(three_day_windows["participant_id"].unique())
missing_ids = valid_ids - window_ids
print(f"Participants with valid days but no 3-day continuous window: {missing_ids}")

Participants with valid days but no 3-day continuous window: {9}


## Extract 4-day continuous valid windows

In [6]:
def find_4day_windows(df):
    windows = []
    for pid, group in df.groupby("participant_id"):
        group = group.sort_values("date").reset_index(drop=True)
        group["day_diff"] = group["date"].diff().dt.days.fillna(1)
        for i in range(len(group) - 3):
            d1, d2, d3, d4 = group.loc[i:i+3, "date"]
            if (d2 - d1).days == 1 and (d3 - d2).days == 1 and (d4 - d3).days == 1:
                windows.append({
                    "participant_id": pid,
                    "start_date": d1,
                    "mid_date1": d2,
                    "mid_date2": d3,
                    "end_date": d4
                })
    return pd.DataFrame(windows)


In [7]:
four_day_windows = find_4day_windows(valid_df)
print(f"✅ Found {len(four_day_windows)} 4-day continuous valid windows")
print(f"Participants: {four_day_windows['participant_id'].unique()}")


✅ Found 32 4-day continuous valid windows
Participants: [1 2 3 4 5 6 7 8]


In [9]:
import os

output_path = "eda_qc_reports/eda_4day_stress_windows.csv"
os.makedirs(os.path.dirname(output_path), exist_ok=True)  # Create folder if it doesn't exist
four_day_windows.to_csv(output_path, index=False)


In [10]:
import pandas as pd

df = pd.read_csv("C:/Users/lpnhu/Downloads/stress_label_participant1.csv")
df.head()

Unnamed: 0,"participant_id,date,stress_level,notes"
0,"1,2023-12-24,2,""Night shift in CCU, code strok..."
1,"1,2023-12-25,2,""Continued night shift, fatigue..."
2,"1,2023-12-26,1,""More sugar and overeating, CCU..."
3,"1,2023-12-27,2,""Off day, interrupted sleep by ..."
4,"1,2023-12-28,1,""CCU day shift, high caffeine, ..."
