In [1]:
import pandas as pd
import os
from tqdm import tqdm

In [2]:
# === CONFIG ===
summary_dir = "C:/Users/lpnhu/Downloads/Stress_Testing_Analysis/na_profiles"
output_dir = "C:/Users/lpnhu/Downloads/Stress_Testing_Analysis/preprocessing"

In [3]:
# === EDA and HRV feature names from participant summary files ===
eda_cols = [
    'eda_scl_usiemens',
    'temperature_celsius',
    'wearing_detection_percentage'
]

hrv_cols = [
    'prv_rmssd_ms',
    'pulse_rate_bpm',
    'respiratory_rate_brpm'
]

In [4]:
# EDA features
eda_cols = [
    'eda_scl_usiemens'
    # Add more if later compute SCR count, amplitude, etc.
]

hrv_cols = ['prv_rmssd_ms']
extra_vitals = ['pulse_rate_bpm', 'respiratory_rate_brpm']

In [6]:
# Choose one participant file to inspect
sample_file = os.path.join(summary_dir, "participant1_na_summary.csv")

# Read and preview the file
df_sample = pd.read_csv(sample_file)

# Show column names and first few rows
print("📋 Columns:\n", df_sample.columns.tolist())
print("\n🔍 Sample rows:")
df_sample.head()


📋 Columns:
 ['file', 'timestamp_iso', 'accelerometers_std_g', 'missing_value_reason', 'counts_x_axis', 'counts_y_axis', 'counts_z_axis', 'vector_magnitude', 'activity_class', 'activity_counts', 'activity_intensity', 'body_position_left', 'body_position_right', 'eda_scl_usiemens', 'met', 'pulse_rate_bpm', 'sleep_detection_stage', 'step_counts', 'temperature_celsius', 'wearing_detection_percentage', 'hour', 'minute', 'prv_rmssd_ms', 'respiratory_rate_brpm']

🔍 Sample rows:


Unnamed: 0,file,timestamp_iso,accelerometers_std_g,missing_value_reason,counts_x_axis,counts_y_axis,counts_z_axis,vector_magnitude,activity_class,activity_counts,...,met,pulse_rate_bpm,sleep_detection_stage,step_counts,temperature_celsius,wearing_detection_percentage,hour,minute,prv_rmssd_ms,respiratory_rate_brpm
0,cleaned_2023-12-24.csv,0.0,99.9,0.1,99.9,99.9,99.9,99.9,99.9,99.9,...,99.9,99.9,99.9,99.9,99.9,99.9,0.0,0.0,,
1,cleaned_2023-12-25.csv,0.0,5.6,94.4,5.6,5.6,5.6,5.6,5.6,5.6,...,5.6,5.6,5.6,5.6,5.6,1.3,0.0,0.0,89.5,84.4
2,cleaned_2023-12-26.csv,0.0,27.2,72.8,27.2,27.2,27.2,27.2,27.2,27.2,...,27.2,27.2,27.2,27.2,27.2,23.5,0.0,0.0,97.8,96.5
3,cleaned_2023-12-27.csv,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,74.6,68.4
4,cleaned_2023-12-28.csv,0.0,6.5,93.5,6.5,6.5,6.5,6.5,6.5,6.5,...,6.5,6.5,6.5,6.5,6.5,0.0,0.0,0.0,92.0,90.7


In [7]:
eda_rows = []
hrv_rows = []
vitals_rows = []

for fname in tqdm(os.listdir(summary_dir)):
    if not fname.endswith("_na_summary.csv"):
        continue

    pid = fname.split('_')[0].replace('participant', '')
    fpath = os.path.join(summary_dir, fname)

    try:
        df = pd.read_csv(fpath)
    except Exception as e:
        print(f"❌ Error reading {fname}: {e}")
        continue

    if 'file' not in df.columns:
        print(f"⚠️ Skipping {fname} — missing 'file' column.")
        continue

    # Extract date from file column like 'cleaned_2023-12-24.csv'
    df['date'] = df['file'].str.extract(r'(\d{4}-\d{2}-\d{2})')[0]
    df['date'] = pd.to_datetime(df['date'], errors='coerce')

    # === EDA ===
    eda_feats = df[['date'] + [col for col in eda_cols if col in df.columns]].copy()
    eda_feats['participant_id'] = pid
    eda_rows.append(eda_feats)

    # === HRV ===
    hrv_feats = df[['date'] + [col for col in hrv_cols if col in df.columns]].copy()
    hrv_feats['participant_id'] = pid
    hrv_rows.append(hrv_feats)

    # === Extra vitals (optional) ===
    vitals_feats = df[['date'] + [col for col in extra_vitals if col in df.columns]].copy()
    vitals_feats['participant_id'] = pid
    vitals_rows.append(vitals_feats)


100%|██████████| 9/9 [00:00<00:00, 135.55it/s]


In [8]:
# Concatenate extracted rows into full DataFrames
eda_df = pd.concat(eda_rows, ignore_index=True)
hrv_df = pd.concat(hrv_rows, ignore_index=True)
vitals_df = pd.concat(vitals_rows, ignore_index=True)

# Save to CSVs
eda_path = os.path.join(output_dir, "eda_feature_matrix.csv")
hrv_path = os.path.join(output_dir, "hrv_feature_matrix.csv")
vitals_path = os.path.join(output_dir, "extra_vitals_matrix.csv")

eda_df.to_csv(eda_path, index=False)
hrv_df.to_csv(hrv_path, index=False)
vitals_df.to_csv(vitals_path, index=False)

print(f"✅ Saved EDA features to: {eda_path} — shape: {eda_df.shape}")
print(f"✅ Saved HRV features to: {hrv_path} — shape: {hrv_df.shape}")
print(f"✅ Saved Extra vitals to: {vitals_path} — shape: {vitals_df.shape}")


✅ Saved EDA features to: C:/Users/lpnhu/Downloads/Stress_Testing_Analysis/preprocessing\eda_feature_matrix.csv — shape: (85, 3)
✅ Saved HRV features to: C:/Users/lpnhu/Downloads/Stress_Testing_Analysis/preprocessing\hrv_feature_matrix.csv — shape: (85, 3)
✅ Saved Extra vitals to: C:/Users/lpnhu/Downloads/Stress_Testing_Analysis/preprocessing\extra_vitals_matrix.csv — shape: (85, 4)


In [9]:
eda_df.head()

Unnamed: 0,date,eda_scl_usiemens,participant_id
0,2023-12-24,99.9,1
1,2023-12-25,5.6,1
2,2023-12-26,27.2,1
3,2023-12-27,0.0,1
4,2023-12-28,6.5,1
