# Sensor Data — Quick Exploration

Walkthrough of the sample data from Session 1 (2024-06-13). Loads each CSV, runs basic sanity checks, and plots heart rate, pupil dilation, fixation durations, and IBI-derived HRV metrics.

All data lives in `../Sensor's data/`. See the [data dictionary](../Sensor's%20data/DATA_DICTIONARY.md) for full column definitions.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

plt.rcParams["figure.figsize"] = (12, 4)
plt.rcParams["figure.dpi"] = 100

DATA = "../Sensor's data"

## 1. Load everything

In [None]:
hr = pd.read_csv(f"{DATA}/hr.csv")
ibi = pd.read_csv(f"{DATA}/ibi.csv")
sed = pd.read_csv(f"{DATA}/sed_fix.csv")  # post-processed version with fixations
hrv = pd.read_csv(f"{DATA}/HRV.csv")
psych = pd.read_csv(
    f"{DATA}/Psychometric_Test_Results.csv",
    header=None,
    names=["Test", "Question", "Answer", "Score", "Time_s", "Question_Start", "Question_Answer"],
    skiprows=1,
)

print(f"hr:    {hr.shape[0]:>6,} rows")
print(f"ibi:   {ibi.shape[0]:>6,} rows")
print(f"sed:   {sed.shape[0]:>6,} rows")
print(f"hrv:   {hrv.shape[0]:>6,} rows  (3 sessions)")
print(f"psych: {psych.shape[0]:>6,} rows  (session 1 only)")

## 2. Heart rate over time

Two chest-strap sensors (`iSensor` 3 and 5) recorded simultaneously. Only rows with `confidence == 1.0` are valid.

In [None]:
hr_valid = hr[hr["confidence"] == 1.0].copy()

fig, ax = plt.subplots()
for sensor_id, group in hr_valid.groupby("iSensor"):
    ax.plot(group["reltime"], group["heart_rate"], label=f"Sensor {sensor_id}", alpha=0.7, linewidth=0.8)

ax.set_xlabel("Time (seconds)")
ax.set_ylabel("Heart rate (BPM)")
ax.set_title("Heart rate — Session 1")
ax.legend()
ax.set_ylim(50, 80)
plt.tight_layout()
plt.show()

## 3. Pupil dilation over time

Continuous pupil diameter from the eye tracker, with psychometric test boundaries overlaid.

In [None]:
# filter out tracking-loss samples (pupil == 0 or low quality)
sed_valid = sed[(sed["pupil"] > 0) & (sed["pupilQ"] > 0.5)].copy()

fig, ax = plt.subplots()
ax.plot(sed_valid["reltime"], sed_valid["pupil"], color="steelblue", alpha=0.4, linewidth=0.3)

# rolling average for readability
window = 120  # ~2 seconds at 60 Hz
rolling = sed_valid.set_index("reltime")["pupil"].rolling(window, min_periods=10).mean()
ax.plot(rolling.index, rolling.values, color="darkblue", linewidth=1.5, label="2-sec rolling mean")

# mark test boundaries from HRV.csv (session 1 only)
hrv_s1 = hrv[hrv["Test"] == "Test 01"].copy()
hrv_s1["Start Time"] = pd.to_datetime(hrv_s1["Start Time"])
t0 = hrv_s1["Start Time"].min()

colors = {"HADS": "#e74c3c", "STAI-S": "#e67e22", "STAI-T": "#2ecc71", "BFI": "#9b59b6", "FQ": "#3498db"}
for test_type, tg in hrv_s1.groupby("Type"):
    start_sec = (tg["Start Time"].min() - t0).total_seconds()
    # offset to align with reltime (first question starts around reltime ~8)
    ax.axvline(start_sec + 8, color=colors.get(test_type, "gray"), linestyle="--", alpha=0.7, label=test_type)

ax.set_xlabel("Time (seconds)")
ax.set_ylabel("Pupil diameter (mm)")
ax.set_title("Pupil dilation — Session 1")
ax.legend(loc="upper right", fontsize=8)
plt.tight_layout()
plt.show()

## 4. Fixation duration distribution

From `sed_fix.csv`. The anxiety threshold from the literature is **< 250 ms** — shorter fixations suggest attentional instability.

In [None]:
# get one duration value per fixation
fixations = sed[sed["fixation"] == True].groupby("fixation_id")["duration"].max()
fixations = fixations[fixations > 0]  # drop zero-duration entries

fig, ax = plt.subplots()
ax.hist(fixations * 1000, bins=80, range=(0, 2000), color="steelblue", edgecolor="white", alpha=0.8)
ax.axvline(250, color="red", linestyle="--", linewidth=1.5, label="Anxiety threshold (250 ms)")

below = (fixations * 1000 < 250).sum()
total = len(fixations)
ax.set_xlabel("Fixation duration (ms)")
ax.set_ylabel("Count")
ax.set_title(f"Fixation durations — {below}/{total} ({below/total:.0%}) below 250 ms threshold")
ax.legend()
plt.tight_layout()
plt.show()

## 5. Inter-beat intervals and HRV

IBI series from the sensor with the most data (sensor 5), plus RMSSD and SDNN computed over a sliding window. The anxiety threshold for both metrics is **< 50 ms**.

In [None]:
# use sensor 5 (most data points)
ibi_s5 = ibi[(ibi["iSensor"] == 5) & (ibi["ibi"] > 0)].copy().reset_index(drop=True)

fig, axes = plt.subplots(2, 1, figsize=(12, 7), sharex=True)

# IBI series
axes[0].plot(ibi_s5["reltime"], ibi_s5["ibi"], color="steelblue", linewidth=0.6)
axes[0].set_ylabel("IBI (ms)")
axes[0].set_title("Inter-beat intervals — Sensor 5")
axes[0].set_ylim(400, 1200)

# sliding-window HRV (30-beat windows)
w = 30
successive_diff = ibi_s5["ibi"].diff()
rmssd = (successive_diff ** 2).rolling(w, min_periods=w).mean().apply(np.sqrt)
sdnn = ibi_s5["ibi"].rolling(w, min_periods=w).std()

axes[1].plot(ibi_s5["reltime"], rmssd, label="RMSSD", color="#e74c3c", linewidth=1)
axes[1].plot(ibi_s5["reltime"], sdnn, label="SDNN", color="#3498db", linewidth=1)
axes[1].axhline(50, color="gray", linestyle="--", linewidth=1, label="Anxiety threshold (50 ms)")
axes[1].set_xlabel("Time (seconds)")
axes[1].set_ylabel("HRV metric (ms)")
axes[1].set_title(f"Sliding-window HRV ({w}-beat window)")
axes[1].legend()
axes[1].set_ylim(0, 200)

plt.tight_layout()
plt.show()

## 6. Per-question pupil dilation across sessions

Average pupil dilation per question from `HRV.csv`, grouped by psychometric test and session.

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(14, 4), sharey=True)

for i, (test_name, ax) in enumerate(zip(["Test 01", "Test 02", "Test 03"], axes)):
    session = hrv[hrv["Test"] == test_name]
    test_types = session["Type"].unique()
    
    x_offset = 0
    ticks, labels = [], []
    for t in test_types:
        subset = session[session["Type"] == t]
        positions = range(x_offset, x_offset + len(subset))
        ax.bar(positions, subset["Average Pupil Dilation"].values,
               color=colors.get(t, "gray"), alpha=0.8, width=0.9)
        ticks.append(x_offset + len(subset) // 2)
        labels.append(t)
        x_offset += len(subset) + 1
    
    ax.set_xticks(ticks)
    ax.set_xticklabels(labels, rotation=45, fontsize=8)
    ax.set_title(test_name)
    if i == 0:
        ax.set_ylabel("Avg pupil dilation (mm)")

fig.suptitle("Per-question pupil dilation across sessions", fontsize=12)
plt.tight_layout()
plt.show()

## 7. Response times by test

How long the participant took to answer each question. Longer response times on anxiety-related items could signal deliberation or avoidance.

In [None]:
fig, ax = plt.subplots(figsize=(8, 4))

test_order = ["HADS", "STAI-S", "STAI-T", "BFI", "FQ"]
data_by_test = [psych[psych["Test"] == t]["Time_s"].values for t in test_order]

bp = ax.boxplot(data_by_test, labels=test_order, patch_artist=True)
for patch, t in zip(bp["boxes"], test_order):
    patch.set_facecolor(colors.get(t, "gray"))
    patch.set_alpha(0.6)

ax.set_ylabel("Response time (seconds)")
ax.set_title("Response time by psychometric test — Session 1")
plt.tight_layout()
plt.show()

## Summary

This notebook covers the basics — loading, sanity-checking, and visualizing each data stream. For deeper analysis (feature extraction, cross-modal fusion, anxiety classification), see the [Multimodal](https://github.com/urme-b/Multimodal) and [CalmSense](https://github.com/urme-b/CalmSense) repos.