**WESAD (the wrist/chest experiment)**

**What you did**
Opened pickles (special files) and got wrist signals (EDA, TEMP, ACC) and labels.


Flattened any strange array shapes to normal lists of numbers.


**Made timestamps:** WESAD often lacks clock times, so you created a synthetic clock (start at some date/time) and matched the number of samples.


**Resampled to 4 Hz** (EDA and TEMP down to 4 Hz, ACC from 32 Hz down to 4 Hz).


**Mapped labels**: turned WESAD labels into binary: 1 = stress, 0 = not stress (we used {0:0, 1:1, 2:0}).


Z-scored the signals (made their heights similar so one signal doesn’t dominate).


**Added ACC magnitude.**


Saved Parquet and created windows (60s windows, 30s stride) saved as .npz.


**Why**
WESAD is perfect for learning stress recognition because it has labeled stress moments. We reshaped it so the model sees the same type of input as PhysioNet.


In [None]:
import pickle
import os

BASE = "/content/drive/MyDrive/stress-project/data_raw/wesad"
subject_file = "S3.pkl"
file_path = os.path.join(BASE, subject_file)

try:
    with open(file_path, "rb") as f:
        data = pickle.load(f, encoding="latin1")  # <- important
    print("Pickle file loaded successfully!")
except Exception as e:
    print(f"Error loading pickle file {file_path}: {e}")
    data = None


Pickle file loaded successfully!


In [None]:
wrist_signals = data['signal']['wrist']
eda = wrist_signals['EDA']  # shape: (n_samples, 1)


Each subject’s data is saved in a pickle file, which is like a treasure chest with all the sensor readings.

We open it and look at the wrist signals.

In [None]:
# WESAD Preprocessing Script for Subjects S3, S5, S9, S11, S17

import os
import pickle
import pandas as pd
import numpy as np

# -----------------------------
# Helper functions
# -----------------------------
def flatten_signal(sig):
    """Flatten column vectors if needed"""
    if isinstance(sig, np.ndarray) and sig.ndim == 2 and sig.shape[1] == 1:
        return sig.flatten()
    return sig

def add_acc_mag(df):
    """Compute acceleration magnitude"""
    df['ACC_mag'] = np.sqrt(df['ACC_x']**2 + df['ACC_y']**2 + df['ACC_z']**2)
    return df

def zscore_cols(df, cols):
    """Z-score normalization for selected columns"""
    for c in cols:
        df[c] = (df[c] - df[c].mean()) / df[c].std()
    return df

def create_time_index(length, freq_hz):
    """Create synthetic timestamp index for a given length and frequency (Hz)"""
    period_ms = int(1000 / freq_hz)
    return pd.date_range("2025-01-01", periods=length, freq=f"{period_ms}ms")

def make_windows(df, cols, win_sec, stride_sec, fs_out):
    """Convert DataFrame into sliding windows"""
    data = df[cols].values
    n_samples_per_win = int(win_sec * fs_out)
    stride_samples = int(stride_sec * fs_out)
    n_samples, n_channels = data.shape

    X = []
    spans = []
    for start in range(0, n_samples - n_samples_per_win + 1, stride_samples):
        end = start + n_samples_per_win
        X.append(data[start:end, :])
        spans.append((df.index[start], df.index[end-1]))

    X = np.array(X)
    y = df['label'].values[:X.shape[0]] if 'label' in df.columns else np.zeros(X.shape[0])
    return X, y, spans

# -----------------------------
# Constants
# -----------------------------
BASE = "/content/drive/MyDrive/stress-project"
RAW_DIR = os.path.join(BASE, "data_raw", "wesad")
PROC_DIR = os.path.join(BASE, "data_processed")
os.makedirs(PROC_DIR, exist_ok=True)

FS_OUT = 4        # Target sampling rate (Hz)
WIN_SEC = 60      # Window length (seconds)
STRIDE_SEC = 30   # Window stride (seconds)

SUBJECTS = ['S3', 'S5', 'S9', 'S11', 'S17']

# -----------------------------
# Preprocessing loop
# -----------------------------
for subj in SUBJECTS:
    print(f"\nProcessing subject {subj}...")

    file_path = os.path.join(RAW_DIR, f"{subj}.pkl")

    # Load pickle
    try:
        with open(file_path, "rb") as f:
            data = pickle.load(f, encoding="latin1")
    except Exception as e:
        print(f"Error loading {subj}: {e}")
        continue

    wrist = data['signal']['wrist']
    labels_array = data['label']

    # Flatten signals
    eda = pd.Series(flatten_signal(wrist['EDA']))
    temp = pd.Series(flatten_signal(wrist['TEMP']))
    acc_array = wrist['ACC']  # shape: (n_samples,3)
    acc_x = pd.Series(acc_array[:,0])
    acc_y = pd.Series(acc_array[:,1])
    acc_z = pd.Series(acc_array[:,2])

    # Create timestamps
    eda.index = create_time_index(len(eda), 4)
    temp.index = create_time_index(len(temp), 4)
    acc_idx = create_time_index(len(acc_x), 32)
    acc_x.index = acc_idx
    acc_y.index = acc_idx
    acc_z.index = acc_idx

    # Resample to 4 Hz
    eda_4hz = eda.resample("250ms").median().interpolate()
    temp_4hz = temp.resample("250ms").median().interpolate()
    acc_df = pd.DataFrame({"ACC_x": acc_x, "ACC_y": acc_y, "ACC_z": acc_z})
    acc_4hz = acc_df.resample("250ms").median().interpolate()

    # Create labels at 4 Hz
    labels = pd.Series(labels_array)
    labels.index = create_time_index(len(labels), 4)
    labels_bin = labels.apply(lambda x: 1 if x == 1 else 0)

    # Merge features
    df = pd.concat([eda_4hz, temp_4hz, acc_4hz], axis=1)
    df.columns = ["EDA", "TEMP", "ACC_x", "ACC_y", "ACC_z"]
    df = df.reindex(labels_bin.index).interpolate().ffill().bfill()
    df['label'] = labels_bin

    # Show sample
    print("Sample data (first 10 rows):")
    print(df.head(10))

    # Normalize and add ACC magnitude
    feat_cols = ["EDA", "TEMP", "ACC_x", "ACC_y", "ACC_z"]
    df = zscore_cols(df, feat_cols)
    df = add_acc_mag(df)

    # Save parquet
    out_parquet = os.path.join(PROC_DIR, f"wesad_{subj}.parquet")
    df.to_parquet(out_parquet)
    print(f"Saved cleaned data: {out_parquet}")

    # Create sliding windows
    win_cols = feat_cols + ["ACC_mag"]
    X, y, spans = make_windows(df, win_cols, WIN_SEC, STRIDE_SEC, FS_OUT)

    out_windows = os.path.join(PROC_DIR, f"wesad_{subj}_windows.npz")
    np.savez(out_windows, X=X, y=y)
    print(f"Saved windows: {out_windows}")
    print(f"Windows shape (samples, time, features): {X.shape}")



Processing subject S3...
Sample data (first 10 rows):
                              EDA   TEMP  ACC_x  ACC_y  ACC_z  label
2025-01-01 00:00:00.000  2.248749  31.73  -66.0  -12.0   46.0      0
2025-01-01 00:00:00.250  2.710001  31.73  -27.0  -33.0  -17.0      0
2025-01-01 00:00:00.500  2.554969  31.73  -49.0  -29.5    9.5      0
2025-01-01 00:00:00.750  2.818907  31.73  -56.5  -35.0    8.5      0
2025-01-01 00:00:01.000  2.630563  31.73  -54.5  -33.0    8.5      0
2025-01-01 00:00:01.250  2.622875  31.73  -56.0  -32.0    8.0      0
2025-01-01 00:00:01.500  2.628000  31.69  -55.0  -32.5    8.0      0
2025-01-01 00:00:01.750  2.680532  31.69  -56.0  -33.0    7.5      0
2025-01-01 00:00:02.000  2.822751  31.69  -55.5  -33.0    7.0      0
2025-01-01 00:00:02.250  2.982908  31.69  -56.0  -33.0    7.0      0
Saved cleaned data: /content/drive/MyDrive/stress-project/data_processed/wesad_S3.parquet
Saved windows: /content/drive/MyDrive/stress-project/data_processed/wesad_S3_windows.npz
Windows

flatten_signal: Make sure signals are 1D arrays (like straightening LEGO bricks).

add_acc_mag: Calculate total acceleration from x, y, z (like finding the speed of movement).

zscore_cols: Normalize values (make all LEGO pieces fit the same scale).

create_time_index: Create a fake clock for each sample so we know when it happens.

make_windows: Cut long sensor data into smaller sliding windows for analysis.


**Decide how we look at the data:**

FS_OUT = 4 Hz → look at 4 samples per second

WIN_SEC = 60 → 60-second windows

STRIDE_SEC = 30 → move 30 seconds at a time

Go through each subject and open their pickle file.

Pull out EDA, temperature, and acceleration.

Flatten them so each is a simple column of numbers.

Acceleration has 3 axes → separate x, y, z.

Normalize the numbers so everything is on the same scale.

Compute ACC magnitude → total movement speed.
Save cleaned data for later.

In [None]:
import os
import pandas as pd

BASE = "/content/drive/MyDrive/stress-project/data_processed"
subjects = ['S3', 'S5', 'S9', 'S11', 'S17']

all_subjects_data = {}

for subj in subjects:
    file_path = os.path.join(BASE, f"wesad_{subj}.parquet")
    if os.path.exists(file_path):
        df = pd.read_parquet(file_path)
        all_subjects_data[subj] = df
        print(f"Loaded {subj} -> {df.shape} rows, {df.shape[1]} columns")
    else:
        print(f"File not found for {subj}: {file_path}")

# Quick check: display head of each subject
for subj, df in all_subjects_data.items():
    print(f"\nSubject {subj} sample:")
    display(df.head())


Loaded S3 -> (4545100, 7) rows, 7 columns
Loaded S5 -> (4380600, 7) rows, 7 columns
Loaded S9 -> (3656100, 7) rows, 7 columns
Loaded S11 -> (3663100, 7) rows, 7 columns
Loaded S17 -> (4144000, 7) rows, 7 columns

Subject S3 sample:


Unnamed: 0,EDA,TEMP,ACC_x,ACC_y,ACC_z,label,ACC_mag
2025-01-01 00:00:00.000,13.603021,8.794141,-8.736963,6.050715,17.336684,0,20.334854
2025-01-01 00:00:00.250,16.833807,8.794141,19.541735,0.219436,-13.892985,0,23.97796
2025-01-01 00:00:00.500,15.747903,8.794141,3.589649,1.191316,-0.756695,0,3.857123
2025-01-01 00:00:00.750,17.596627,8.794141,-1.848562,-0.335923,-1.252404,0,2.257996
2025-01-01 00:00:01.000,16.277393,8.794141,-0.398373,0.219436,-1.252404,0,1.33243



Subject S5 sample:


Unnamed: 0,EDA,TEMP,ACC_x,ACC_y,ACC_z,label,ACC_mag
2025-01-01 00:00:00.000,-4.807151,13.36696,-6.959256,-1.124014,5.765709,0,9.107034
2025-01-01 00:00:00.250,-5.583763,13.36696,10.438816,1.845749,-21.5865,0,24.048964
2025-01-01 00:00:00.500,-5.15145,13.36696,6.52425,-1.866455,3.366393,0,7.575097
2025-01-01 00:00:00.750,-6.062483,13.45417,5.219394,0.063891,-10.789575,0,11.985871
2025-01-01 00:00:01.000,-6.630009,13.45417,4.784443,-0.530062,-3.111762,0,5.731921



Subject S9 sample:


Unnamed: 0,EDA,TEMP,ACC_x,ACC_y,ACC_z,label,ACC_mag
2025-01-01 00:00:00.000,-7.457208,17.395209,-8.385021,0.926926,8.339858,0,11.862588
2025-01-01 00:00:00.250,-7.57079,17.395209,-15.282007,-0.288905,6.393289,0,16.567961
2025-01-01 00:00:00.500,-7.116418,17.395209,4.695471,3.358589,1.94399,0,6.091524
2025-01-01 00:00:00.750,-7.286791,17.395209,8.500704,4.40073,-9.179259,0,13.262247
2025-01-01 00:00:01.000,-7.513999,17.395209,11.116803,4.22704,-16.687452,0,20.492004



Subject S11 sample:


Unnamed: 0,EDA,TEMP,ACC_x,ACC_y,ACC_z,label,ACC_mag
2025-01-01 00:00:00.000,21.945914,18.62139,-21.706157,6.126045,8.452899,0,24.086037
2025-01-01 00:00:00.250,26.046586,18.62139,-10.380626,4.505139,4.28359,0,12.099703
2025-01-01 00:00:00.500,24.54826,18.62139,-12.217199,2.000102,7.971825,0,14.724481
2025-01-01 00:00:00.750,26.395812,18.62139,-11.911103,2.589523,8.132183,0,14.653068
2025-01-01 00:00:01.000,25.951697,18.848247,-11.911103,2.294813,7.811467,0,14.427736



Subject S17 sample:


Unnamed: 0,EDA,TEMP,ACC_x,ACC_y,ACC_z,label,ACC_mag
2025-01-01 00:00:00.000,42.1272,20.830302,3.302536,5.181038,11.19891,0,12.773624
2025-01-01 00:00:00.250,42.039565,20.56577,-12.615603,3.764255,7.392765,0,15.098875
2025-01-01 00:00:00.500,41.645261,20.56577,-6.529256,0.080618,11.721322,0,13.417417
2025-01-01 00:00:00.750,42.652973,20.56577,-6.997437,0.78901,12.094474,0,13.995104
2025-01-01 00:00:01.000,40.900418,20.56577,-8.401978,0.505653,11.795952,0,14.49115


Combine all subjects’ processed data into one dictionary.

Easy to analyze all participants at once.