In [1]:
%%capture
from pathlib import Path

if Path.cwd().stem == "notebooks":
    %cd ..
    %load_ext autoreload
    %autoreload 2

In [2]:
import logging

import holoviews as hv
import hvplot.polars  # noqa
import neurokit2 as nk
import pandas as pd
import polars as pl
from icecream import ic
from polars import col

from src.data.database_manager import DatabaseManager
from src.data.quality_checks import check_sample_rate
from src.features.eda import nk_process_eda
from src.features.scaling import scale_min_max
from src.features.transforming import map_trials
from src.log_config import configure_logging
from src.plots.utils import prepare_multiline_hvplot

configure_logging(
    stream_level=logging.DEBUG, ignore_libs=("Comm", "bokeh", "tornado", "matplotlib")
)
logger = logging.getLogger(__name__.rsplit(".", maxsplit=1)[-1])

pl.Config.set_tbl_rows(12)  # for the 12 trials
hv.output(widget_location="bottom", size=130)

In [3]:
db = DatabaseManager()

In [4]:
with db:
    # df = db.get_table("Preprocess_eda")
    df = db.get_table("Feature_eda")  # decimated to 10 Hz


## Comparing neurokit / biopac / highpass filter (0.05 Hz) vs Median smoothing

From the neurokit2 documentation:
  * **High-pass filtering**: Method implemented in Biopac's Acqknowledge. The raw EDA signal
    is passed through a high pass filter with a cutoff frequency of 0.05 Hz
    (cutoff frequency can be adjusted by the ``cutoff`` argument).
  * **Median smoothing**: Method implemented in Biopac's Acqknowledge. The raw EDA signal is
    passed through a median value smoothing filter, which removes areas of rapid change. The
    phasic component is then calculated by subtracting the smoothed signal from the original.
    This method is computationally intensive and the processing time depends on the smoothing
    factor, which can be controlled by the as ``smoothing_factor`` argument, set by default to
    ``4`` seconds. Higher values will produce results more rapidly.

-> high pass filter is much faster than median smoothing

In [5]:
scale_min_max(df).hvplot(
    x="timestamp",
    y=["eda_raw", "eda_phasic", "eda_tonic"],
    groupby="trial_id",
)

BokehModel(combine_events=True, render_bundle={'docs_json': {'2b0a42e8-12dc-4c8a-91fc-1bb932355aba': {'version…

## Join with Stimulus data

In [6]:
with db:
    eda = db.get_table("Preprocess_eda")
    stimulus = db.get_table("Raw_Stimulus")
    trials = db.get_table("Trials")

d = eda.join_asof(
    stimulus,
    on="timestamp",
    by=["trial_id", "trial_number", "participant_id"],
    strategy="nearest",
    coalesce=True,
)
ic(d.equals(df))
ic(eda.height, stimulus.height, d.height)
d


ic| d.equals(df): False
ic| eda.height: 5936076, stimulus.height: 597860, d.height: 5936076


trial_id,trial_number,participant_id,rownumber,timestamp,samplenumber,eda_raw,eda_tonic,eda_phasic,rownumber_right,temperature,rating
u16,u8,u8,u32,f64,i64,f64,f64,f64,u32,f64,f64
1,1,1,37660,294210.3603,57892,0.752359,0.752113,0.000246,0,45.75,42.5
1,1,1,37661,294211.3575,57893,0.754579,0.752115,0.002464,0,45.75,42.5
1,1,1,37663,294224.331,57895,0.753247,0.752117,0.00113,0,45.75,42.5
1,1,1,37664,294242.275,57896,0.754135,0.752119,0.002016,0,45.75,42.5
1,1,1,37666,294248.2588,57898,0.754135,0.752121,0.002014,0,45.75,42.5
1,1,1,37667,294276.1835,57899,0.754135,0.752123,0.002012,0,45.75,42.5
…,…,…,…,…,…,…,…,…,…,…,…
332,12,28,355476,2.7771e6,467073,13.679468,13.578894,-0.012187,21610,45.582614,85.0
332,12,28,355477,2.7771e6,467074,13.679468,13.578894,-0.012765,21611,45.582614,85.0
332,12,28,355478,2.7771e6,467075,13.674363,13.578894,-0.018451,21611,45.582614,85.0


In [None]:
from src.features.transforming import merge_dfs

df = merge_dfs([stimulus, trials], on=["trial_id", "participant_id", "trial_number"])
df

trial_id,trial_number,participant_id,rownumber,timestamp,temperature,rating,stimulus_seed,skin_area,timestamp_start,timestamp_end,duration
u16,u8,u8,u32,f64,f64,f64,u16,u8,f64,f64,f64
1,1,1,0,294224.331,45.75,42.5,396,1,294197.3945,474206.7098,180009.3153
1,1,1,1,294357.9645,45.750102,42.5,396,1,294197.3945,474206.7098,180009.3153
1,1,1,2,294458.0292,45.75041,35.375,396,1,294197.3945,474206.7098,180009.3153
1,1,1,3,294558.6006,45.750922,14.875,396,1,294197.3945,474206.7098,180009.3153
1,1,1,4,294658.3354,45.751639,10.125,396,1,294197.3945,474206.7098,180009.3153
1,1,1,5,294758.4957,45.75256,22.75,396,1,294197.3945,474206.7098,180009.3153
…,…,…,…,…,…,…,…,…,…,…,…
332,12,28,21606,2.7766e6,45.587356,85.0,133,1,2.5971e6,2.7771e6,180026.123
332,12,28,21607,2.7767e6,45.585285,85.0,133,1,2.5971e6,2.7771e6,180026.123
332,12,28,21608,2.7768e6,45.583802,85.0,133,1,2.5971e6,2.7771e6,180026.123


In [9]:
d = d.with_columns(
    [
        (col("timestamp") - col("timestamp").min().over("trial_id")).alias(
            "normalized_timestamp"
        )
    ]
)
prepare_multiline_hvplot(scale_min_max(d)).hvplot(
    x="normalized_timestamp",
    y=["eda_raw", "eda_phasic", "eda_tonic", "rating"],
    groupby="trial_id",
)

BokehModel(combine_events=True, render_bundle={'docs_json': {'4a4f5ccc-f8fe-4d15-8b7e-f150c3084b33': {'version…

In [None]:
query = """
SELECT * FROM Preprocess_EDA AS pe
ASOF JOIN Raw_Stimulus rs USING (trial_id, timestamp)
ORDER BY pe.trial_id, pe.timestamp
"""

with db:
    df = db.execute(query).pl()
    trials = db.get_table("Trials")

assert df.height == df.unique(subset=["trial_id", "timestamp"]).height
df = df.join(trials, on="trial_id").sort("trial_id", "timestamp")
df

trial_id,trial_number,participant_id,rownumber,timestamp,samplenumber,eda_d_battery,eda_raw,eda_d_packetreceptionrate,eda_tonic,eda_phasic,temperature,rating,trial_number_right,participant_id_right,stimulus_seed,skin_area,timestamp_start,timestamp_end,duration
u16,u8,u8,u32,f64,i64,f64,f64,i64,f64,f64,f64,f64,u8,u8,u16,u8,f64,f64,f64
1,1,1,0,294224.331,57895,3677.435897,0.753247,100,0.752117,0.00113,45.75,42.5,1,1,396,1,294197.3945,474206.7098,180009.3153
1,1,1,0,294242.275,57896,3687.630769,0.754135,100,0.752119,0.002016,45.75,42.5,1,1,396,1,294197.3945,474206.7098,180009.3153
1,1,1,0,294248.2588,57898,3692.0,0.754135,100,0.752121,0.002014,45.75,42.5,1,1,396,1,294197.3945,474206.7098,180009.3153
1,1,1,0,294276.1835,57899,3678.892308,0.754135,100,0.752123,0.002012,45.75,42.5,1,1,396,1,294197.3945,474206.7098,180009.3153
1,1,1,0,294277.1819,57900,3692.0,0.752359,100,0.752125,0.000234,45.75,42.5,1,1,396,1,294197.3945,474206.7098,180009.3153
1,1,1,0,294309.0952,57902,3705.107692,0.752359,100,0.752127,0.000232,45.75,42.5,1,1,396,1,294197.3945,474206.7098,180009.3153
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
332,12,28,21610,2.7771e6,467073,3603.158974,13.679468,99,13.578894,-0.012187,45.582614,85.0,12,28,133,1,2.5971e6,2.7771e6,180026.123
332,12,28,21610,2.7771e6,467074,3617.723077,13.679468,99,13.578894,-0.012765,45.582614,85.0,12,28,133,1,2.5971e6,2.7771e6,180026.123
332,12,28,21610,2.7771e6,467075,3582.769231,13.674363,99,13.578894,-0.018451,45.582614,85.0,12,28,133,1,2.5971e6,2.7771e6,180026.123


In [None]:
df = df.with_columns(
    [
        (col("timestamp") - col("timestamp").min().over("trial_id")).alias(
            "normalized_timestamp"
        )
    ]
)
prepare_multiline_hvplot(scale_min_max(df)).plot(
    x="normalized_timestamp",
    y=["eda_raw", "eda_phasic", "eda_tonic", "rating"],
    groupby="trial_id",
)

BokehModel(combine_events=True, render_bundle={'docs_json': {'961779e4-7e02-4d34-a831-96d26ecb9dd3': {'version…

In [None]:
result = (
    df.with_columns([(col("normalized_timestamp") // 1000 * 1000).alias("time_bin")])
    .group_by(["stimulus_seed", "time_bin"])
    .agg(
        [
            col("eda_tonic").mean().alias("avg_eda_tonic"),
            col("eda_phasic").mean().alias("avg_eda_phasic"),
            col("eda_tonic").std().alias("std_dev"),
            col("rating").mean().alias("avg_rating"),
            col("temperature").mean().alias("avg_temperature"),
            pl.len().alias("sample_size"),
        ]
    )
    .with_columns(
        [
            (
                col("avg_eda_tonic")
                - 1.96 * (col("std_dev") / col("sample_size").sqrt())
            ).alias("ci_lower"),
            (
                col("avg_eda_tonic")
                + 1.96 * (col("std_dev") / col("sample_size").sqrt())
            ).alias("ci_upper"),
        ]
    )
    # .select(
    #     [
    #         "stimulus_seed",
    #         "time_bin",
    #         "avg_eda_tonic",
    #         "ci_lower",
    #         "ci_upper",
    #         "avg_eda_phasic",
    #     ]
    # )
    .sort(["stimulus_seed", "time_bin"])
)
result

stimulus_seed,time_bin,avg_eda_tonic,avg_eda_phasic,std_dev,avg_rating,avg_temperature,sample_size,ci_lower,ci_upper
u16,f64,f64,f64,f64,f64,f64,u32,f64,f64
133,0.0,22.886292,0.373452,10.601662,43.415821,44.254339,2808,22.494161,23.278423
133,1000.0,22.782184,0.812295,10.551183,58.040525,44.346072,2773,22.389465,23.174904
133,2000.0,22.919694,0.456742,10.536789,62.033432,44.557211,2793,22.528917,23.310471
133,3000.0,22.831353,-0.118834,10.50778,65.607251,44.828431,2796,22.441861,23.220845
133,4000.0,22.763015,-0.396032,10.499473,69.239004,45.185155,2785,22.373063,23.152967
133,5000.0,22.797022,-0.443184,10.393702,73.333363,45.533857,2777,22.410443,23.183601
…,…,…,…,…,…,…,…,…,…
952,174000.0,19.440774,0.052874,10.663146,14.925999,44.729711,2777,19.044173,19.837374
952,175000.0,19.343655,0.02307,10.596552,12.240214,44.606854,2759,18.948248,19.739063
952,176000.0,19.377804,0.000619,10.627787,10.576108,44.499223,2797,18.983934,19.771674


In [None]:
result = scale_min_max(result, exclude_additional_columns=["time_bin"])
result



stimulus_seed,time_bin,avg_eda_tonic,avg_eda_phasic,std_dev,avg_rating,avg_temperature,sample_size,ci_lower,ci_upper
u16,f64,f64,f64,f64,f64,f64,u32,f64,f64
133,0.0,0.853967,0.661401,0.604283,0.439204,0.426908,2808,0.818985,0.81036
133,1000.0,0.841162,0.890469,0.589217,0.593368,0.449696,2773,0.802396,0.794426
133,2000.0,0.858076,0.704877,0.584922,0.635459,0.502146,2793,0.824492,0.815293
133,3000.0,0.847209,0.404436,0.576264,0.673131,0.569521,2796,0.810698,0.801497
133,4000.0,0.838804,0.259744,0.573784,0.711415,0.658137,2785,0.799797,0.791049
133,5000.0,0.842987,0.235131,0.542217,0.754575,0.74476,2777,0.80572,0.795765
…,…,…,…,…,…,…,…,…,…
952,174000.0,0.430152,0.494065,0.622633,0.138884,0.544998,2777,0.272339,0.280686
952,175000.0,0.418206,0.478508,0.602758,0.110572,0.514478,2759,0.25714,0.265553
952,176000.0,0.422406,0.466789,0.61208,0.09303,0.487741,2797,0.262795,0.270572


In [None]:
result.plot(
    x="time_bin",
    y=["avg_temperature", "avg_eda_tonic"],  # "avg_eda_phasic"],
    groupby="stimulus_seed",
    kind="line",
    grid=True,
) * result.hvplot.area(
    x="time_bin",
    y="ci_lower",
    y2="ci_upper",
    groupby="stimulus_seed",
    alpha=0.5,
    line_width=0,
    fill_color="lightblue",
    grid=True,
    xlim=(0, 180 * 1000),
)

BokehModel(combine_events=True, render_bundle={'docs_json': {'71811756-e8e1-402e-b23e-5a1c03c1dd93': {'version…

In [None]:
scale_min_max(df).plot(
    x="timestamp",
    y=["eda_raw", "eda_phasic", "eda_tonic", "rating", "temperature"],
    groupby="trial_id",
)

BokehModel(combine_events=True, render_bundle={'docs_json': {'5917d8ca-d26c-411d-83ff-36eca57ff399': {'version…