In [1]:
from pathlib import Path

if Path.cwd().stem == "features":
    %cd ../..
    %load_ext autoreload
    %autoreload 2

/Users/visser/drive/PhD/Code/pain-measurement


In [2]:
import logging
from pathlib import Path

import holoviews as hv
import matplotlib.pyplot as plt
import plotly.io as pio
import polars as pl

from src.data.config_data_interim import INTERIM_DICT, INTERIM_LIST, InterimConfig
from src.data.config_data_raw import RAW_DICT, RAW_LIST, RawConfig
from src.data.config_participant import PARTICIPANT_LIST, ParticipantConfig
from src.data.make_dataset import load_dataset, load_participant_datasets
from src.data.utils import load_modality_data, merge_datasets
from src.features.quality_checks import check_sample_rate
from src.features.scaling import scale_min_max, scale_standard
from src.features.stimulus import corr_temperature_rating
from src.features.transformations import (
    add_timedelta_column,
    interpolate,
    map_participant_datasets,
    map_trials,
)
from src.helpers import to_describe
from src.log_config import configure_logging
from src.visualization.plot_data import (
    plot_data_panel,
    plot_trial_matplotlib,
    plot_trial_plotly,
)

configure_logging(
    stream_level=logging.DEBUG,
    ignore_libs=["matplotlib", "Comm", "bokeh", "tornado"],
)

hv.extension("plotly")
pio.templates.default = "plotly_white"  # set plotly theme
hv.opts.defaults(hv.opts.Curve(width=1800, height=800))  # does not work

pl.Config.set_tbl_rows(12)  # for the 12 trials
# plt.rcParams["figure.figsize"] = [15, 5]  # default is [6, 4]

polars.config.Config

In [3]:
dfs = load_participant_datasets(PARTICIPANT_LIST[0], INTERIM_LIST)
dfs.stimulus

14:16:07 | [36mDEBUG   [0m| make_dataset | Dataset 'stimulus' for participant 1 loaded from data/interim/1/1_stimulus.csv
14:16:07 | [36mDEBUG   [0m| make_dataset | Dataset 'eeg' for participant 1 loaded from data/interim/1/1_eeg.csv
14:16:07 | [36mDEBUG   [0m| make_dataset | Dataset 'eda' for participant 1 loaded from data/interim/1/1_eda.csv
14:16:07 | [36mDEBUG   [0m| make_dataset | Dataset 'ppg' for participant 1 loaded from data/interim/1/1_ppg.csv
14:16:07 | [36mDEBUG   [0m| make_dataset | Dataset 'pupillometry' for participant 1 loaded from data/interim/1/1_pupillometry.csv
14:16:07 | [36mDEBUG   [0m| make_dataset | Dataset 'affectiva' for participant 1 loaded from data/interim/1/1_affectiva.csv
14:16:07 | [92mINFO    [0m| make_dataset | Participant 1 loaded with datasets: dict_keys(['stimulus', 'eeg', 'eda', 'ppg', 'pupillometry', 'affectiva'])


Timestamp,Temperature,Rating,Stimulus_Seed,Participant,Trial,Skin_Area
f64,f64,f64,f64,f64,f64,f64
294197.3945,0.0,0.425,396.0,1.0,0.0,1.0
294357.9645,0.000069,0.425,396.0,1.0,0.0,1.0
294458.0292,0.000277,0.35375,396.0,1.0,0.0,1.0
294558.6006,0.000622,0.14875,396.0,1.0,0.0,1.0
294658.3354,0.001106,0.10125,396.0,1.0,0.0,1.0
294758.4957,0.001728,0.2275,396.0,1.0,0.0,1.0
…,…,…,…,…,…,…
3.0287e6,0.344497,0.38875,243.0,1.0,11.0,6.0
3.0288e6,0.343688,0.36875,243.0,1.0,11.0,6.0
3.0289e6,0.343059,0.3525,243.0,1.0,11.0,6.0


In [4]:
stimuli = load_modality_data(PARTICIPANT_LIST, INTERIM_DICT["stimulus"])

14:16:08 | [36mDEBUG   [0m| make_dataset | Dataset 'stimulus' for participant 1 loaded from data/interim/1/1_stimulus.csv
14:16:08 | [36mDEBUG   [0m| make_dataset | Dataset 'stimulus' for participant 2 loaded from data/interim/2/2_stimulus.csv
14:16:08 | [36mDEBUG   [0m| make_dataset | Dataset 'stimulus' for participant 3 loaded from data/interim/3/3_stimulus.csv
14:16:08 | [36mDEBUG   [0m| make_dataset | Dataset 'stimulus' for participant 4 loaded from data/interim/4/4_stimulus.csv


In [5]:
eda = load_modality_data(PARTICIPANT_LIST, INTERIM_DICT["eda"])

14:16:08 | [36mDEBUG   [0m| make_dataset | Dataset 'eda' for participant 1 loaded from data/interim/1/1_eda.csv
14:16:08 | [36mDEBUG   [0m| make_dataset | Dataset 'eda' for participant 2 loaded from data/interim/2/2_eda.csv
14:16:08 | [36mDEBUG   [0m| make_dataset | Dataset 'eda' for participant 3 loaded from data/interim/3/3_eda.csv
14:16:08 | [36mDEBUG   [0m| make_dataset | Dataset 'eda' for participant 4 loaded from data/interim/4/4_eda.csv


In [6]:
eda, stimuli

(shape: (1_013_889, 6)
 ┌─────────────┬───────────┬─────────────┬───────┬───────────┬────────────┐
 │ Timestamp   ┆ EDA_RAW   ┆ Participant ┆ Trial ┆ EDA_Tonic ┆ EDA_Phasic │
 │ ---         ┆ ---       ┆ ---         ┆ ---   ┆ ---       ┆ ---        │
 │ f64         ┆ f64       ┆ f64         ┆ f64   ┆ f64       ┆ f64        │
 ╞═════════════╪═══════════╪═════════════╪═══════╪═══════════╪════════════╡
 │ 294197.3945 ┆ 0.752359  ┆ 1.0         ┆ 0.0   ┆ 0.75195   ┆ 0.000409   │
 │ 294211.3575 ┆ 0.754579  ┆ 1.0         ┆ 0.0   ┆ 0.751953  ┆ 0.002626   │
 │ 294211.3575 ┆ 0.753247  ┆ 1.0         ┆ 0.0   ┆ 0.751956  ┆ 0.001291   │
 │ 294224.331  ┆ 0.753247  ┆ 1.0         ┆ 0.0   ┆ 0.751958  ┆ 0.001289   │
 │ 294242.275  ┆ 0.754135  ┆ 1.0         ┆ 0.0   ┆ 0.751961  ┆ 0.002174   │
 │ 294242.275  ┆ 0.752359  ┆ 1.0         ┆ 0.0   ┆ 0.751964  ┆ 0.000395   │
 │ …           ┆ …         ┆ …           ┆ …     ┆ …         ┆ …          │
 │ 2.5846e6    ┆ 17.367376 ┆ 4.0         ┆ 7.0   ┆ 17.474232 ┆ -0

In [7]:
merge_on = ["Timestamp", "Participant", "Trial"]
multiple_eda_plus_rating = stimuli.join(
    eda,
    on=merge_on,
    how="full",
    coalesce=True,
).sort(["Participant", "Trial", "Timestamp"])
multiple_eda_plus_rating

Timestamp,Temperature,Rating,Stimulus_Seed,Participant,Trial,Skin_Area,EDA_RAW,EDA_Tonic,EDA_Phasic
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
294197.3945,0.0,0.425,396.0,1.0,0.0,1.0,0.752359,0.75195,0.000409
294211.3575,,,,1.0,0.0,,0.754579,0.751953,0.002626
294211.3575,,,,1.0,0.0,,0.753247,0.751956,0.001291
294224.331,,,,1.0,0.0,,0.753247,0.751958,0.001289
294242.275,,,,1.0,0.0,,0.754135,0.751961,0.002174
294242.275,,,,1.0,0.0,,0.752359,0.751964,0.000395
…,…,…,…,…,…,…,…,…,…
2.5846e6,,,,4.0,7.0,,17.367376,17.474232,-0.003445
2.5846e6,,,,4.0,7.0,,17.330928,17.474232,-0.039647
2.5846e6,,,,4.0,7.0,,17.330928,17.474232,-0.039401


In [8]:
s = merge_datasets(
    [stimuli, eda],
    merge_on=["Timestamp", "Participant", "Trial"],
    sort_by=["Participant", "Trial", "Timestamp"],
)

In [9]:
features = ["Temperature", "Rating", "EDA_Tonic", "EDA_RAW"]
multiple_eda_plus_rating = interpolate(multiple_eda_plus_rating)
multiple_eda_plus_rating = scale_min_max(
    multiple_eda_plus_rating, exclude_additional_columns=["Temperature", "Rating"]
)
multiple_eda_plus_rating.hvplot(
    x="Timestamp",
    y=features,
    groupby=["Participant", "Trial"],
    kind="line",
    width=800,
    height=400,
    ylim=(0, 1),
)

BokehModel(combine_events=True, render_bundle={'docs_json': {'f689daa8-578d-4bd8-a830-c2a5db597fb8': {'version…

In [10]:
features = ["Temperature", "Rating", "EDA_Tonic"]
multiple_eda_plus_rating = interpolate(multiple_eda_plus_rating)
multiple_eda_plus_rating = scale_min_max(
    multiple_eda_plus_rating, exclude_additional_columns=["Temperature", "Rating"]
)
multiple_eda_plus_rating.hvplot(
    x="Timestamp",
    y=features,
    groupby=["Participant", "Trial"],
    kind="line",
    width=800,
    height=400,
    ylim=(0, 1),
)

BokehModel(combine_events=True, render_bundle={'docs_json': {'8764bfbd-0be2-409a-a52a-02fb53433cdf': {'version…

### Stimulus

In [11]:
features = ["Temperature", "Rating"]
stimulus = dfs.stimulus.clone()
# stimulus = interpolate(stimulus)
stimuli.sort("Stimulus_Seed").hvplot(
    x="Timestamp",
    y=features,
    groupby=["Participant", "Trial"],
    kind="line",
    width=800,
    height=400,
    ylim=(0, 1),
)

BokehModel(combine_events=True, render_bundle={'docs_json': {'418d1b24-85af-4025-846e-31edd757ad3c': {'version…

In [12]:
stimuli.sort("Stimulus_Seed")

Timestamp,Temperature,Rating,Stimulus_Seed,Participant,Trial,Skin_Area
f64,f64,f64,f64,f64,f64,f64
1.2081e6,0.0,0.0,133.0,1.0,4.0,5.0
1.2083e6,0.000243,0.0,133.0,1.0,4.0,5.0
1.2084e6,0.000973,0.1325,133.0,1.0,4.0,5.0
1.2085e6,0.002189,0.15125,133.0,1.0,4.0,5.0
1.2086e6,0.003889,0.175,133.0,1.0,4.0,5.0
1.2087e6,0.006072,0.17875,133.0,1.0,4.0,5.0
…,…,…,…,…,…,…
689647.2463,0.055744,0.0,952.0,4.0,1.0,5.0
689746.9787,0.055222,0.0,952.0,4.0,1.0,5.0
689847.7098,0.054815,0.0,952.0,4.0,1.0,5.0


In [13]:
correlations = corr_temperature_rating(dfs.stimulus)
correlations

Trial,Correlation,Participant,Stimulus_Seed,Skin_Area
f64,f64,f64,f64,f64
0.0,0.422594,1.0,396.0,1.0
1.0,0.676031,1.0,806.0,2.0
2.0,0.738281,1.0,743.0,3.0
3.0,0.572535,1.0,952.0,4.0
4.0,0.62758,1.0,133.0,5.0
5.0,0.630009,1.0,658.0,6.0
6.0,0.579105,1.0,841.0,1.0
7.0,0.66775,1.0,681.0,2.0
8.0,0.721076,1.0,870.0,3.0
9.0,0.458618,1.0,467.0,4.0


In [14]:
correlations.plot.scatter(x="Trial", y="Correlation", title="Correlation", ylim=(-1, 1))

In [15]:
dfs.stimulus.plot.scatter(x="Temperature", y="Rating", title="Temperature vs Rating")

In [16]:
stimuli.group_by("Trial", maintain_order=True).mean()

Trial,Timestamp,Temperature,Rating,Stimulus_Seed,Participant,Skin_Area
f64,f64,f64,f64,f64,f64,f64
0.0,333567.448748,0.484017,0.547322,534.5,2.5,3.5
1.0,564146.414488,0.456671,0.526354,858.5,2.5,3.5
2.0,785146.131892,0.469269,0.532979,539.0,2.5,3.5
3.0,1016200.0,0.471091,0.50086,738.25,2.5,3.5
4.0,1254000.0,0.471843,0.497638,514.25,2.5,3.5
5.0,1480900.0,0.467724,0.459173,347.75,2.5,3.5
6.0,1834800.0,0.482628,0.479843,860.0,2.5,3.5
7.0,2084300.0,0.475624,0.501288,545.75,2.5,3.5
8.0,2172000.0,0.464999,0.51577,490.0,2.0,3.333333
9.0,2405200.0,0.480179,0.443869,651.0,2.0,3.666667


In [17]:
stimulus.group_by("Trial", maintain_order=True).agg(
    *to_describe("Rating"),
    *to_describe("Temperature"),
)

Trial,Rating_count,Rating_null_count,Rating_mean,Rating_std,Rating_min,Rating_25%,Rating_50%,Rating_75%,Rating_max,Temperature_count,Temperature_null_count,Temperature_mean,Temperature_std,Temperature_min,Temperature_25%,Temperature_50%,Temperature_75%,Temperature_max
f64,u32,u32,f64,f64,f64,f64,f64,f64,f64,u32,u32,f64,f64,f64,f64,f64,f64,f64
0.0,1800,0,0.595312,0.395975,0.0,0.14875,0.73,1.0,1.0,1800,0,0.486072,0.279176,0.0,0.322646,0.463265,0.71454,1.0
1.0,1800,0,0.413944,0.41696,0.0,0.0,0.29375,0.91125,1.0,1800,0,0.434829,0.292505,0.0,0.211045,0.317968,0.662018,1.0
2.0,1800,0,0.34959,0.368374,0.0,0.0,0.19625,0.685,1.0,1800,0,0.479204,0.275895,0.0,0.312434,0.426262,0.698026,1.0
3.0,1800,0,0.465086,0.408783,0.0,0.0,0.4575,0.9525,1.0,1800,0,0.494327,0.274903,0.0,0.304869,0.462121,0.719205,1.0
4.0,1800,0,0.40339,0.406459,0.0,0.0,0.2375,0.8825,1.0,1800,0,0.450167,0.281601,0.0,0.277714,0.351679,0.656671,1.0
5.0,1800,0,0.413542,0.42581,0.0,0.0,0.25,0.9025,1.0,1800,0,0.488432,0.283829,0.0,0.255248,0.4926,0.697619,1.0
6.0,1800,0,0.536795,0.423139,0.0,0.0,0.7,0.99375,1.0,1800,0,0.500679,0.28949,0.0,0.274057,0.503916,0.755112,1.0
7.0,1800,0,0.53506,0.410217,0.0,0.0025,0.59875,1.0,1.0,1800,0,0.458104,0.273699,0.0,0.267112,0.46515,0.666279,1.0
8.0,1800,0,0.471049,0.393632,0.0,0.0,0.52375,0.84625,1.0,1800,0,0.462702,0.296501,0.0,0.266027,0.361478,0.733421,1.0
9.0,1800,0,0.481478,0.40403,0.0,0.0,0.49875,0.9025,1.0,1800,0,0.482129,0.282051,0.0,0.292548,0.485771,0.700556,1.0


In [18]:
(dfs.stimulus.group_by("Trial", maintain_order=True).mean().sort("Rating"))

Trial,Timestamp,Temperature,Rating,Stimulus_Seed,Participant,Skin_Area
f64,f64,f64,f64,f64,f64,f64
2.0,815253.955303,0.479204,0.34959,743.0,1.0,3.0
10.0,2707900.0,0.457641,0.360029,265.0,1.0,5.0
4.0,1298100.0,0.450167,0.40339,133.0,1.0,5.0
5.0,1531700.0,0.488432,0.413542,658.0,1.0,6.0
1.0,595419.290599,0.434829,0.413944,806.0,1.0,2.0
11.0,2939300.0,0.460884,0.440939,243.0,1.0,6.0
3.0,1034000.0,0.494327,0.465086,952.0,1.0,4.0
8.0,2241800.0,0.462702,0.471049,870.0,1.0,3.0
9.0,2491100.0,0.482129,0.481478,467.0,1.0,4.0
7.0,2009500.0,0.458104,0.53506,681.0,1.0,2.0
