In [2]:
%%capture
from pathlib import Path

if Path.cwd().stem == "notebooks":
    %cd ..
    %load_ext autoreload
    %autoreload 2

In [3]:
import logging
from pathlib import Path

import altair as alt
import holoviews as hv
import hvplot.polars  # noqa
import matplotlib.pyplot as plt
import numpy as np
import polars as pl
from polars import col

from src.data.database_manager import DatabaseManager
from src.features.labels import add_labels
from src.features.resampling import interpolate_and_fill_nulls
from src.features.scaling import scale_min_max
from src.features.transforming import map_trials, merge_dfs
from src.features.utils import to_describe
from src.log_config import configure_logging
from src.plots.confidence_intervals import plot_confidence_intervals
from src.plots.correlations import (
    aggregate_correlations_fisher_z,
    calculate_correlations_by_trial,
    plot_correlations_by_participant,
    plot_correlations_by_trial,
)
from src.plots.utils import prepare_multiline_hvplot

logger = logging.getLogger(__name__.rsplit(".", maxsplit=1)[-1])
configure_logging(
    stream_level=logging.DEBUG,
    ignore_libs=["matplotlib", "Comm", "bokeh", "tornado", "param", "numba"],
)

pl.Config.set_tbl_rows(12)  # for the 12 trials
hv.output(widget_location="bottom", size=130)

In [4]:
db = DatabaseManager()

In [None]:
with db:
    pupil = db.get_table("Feature_Pupil")
    eda = db.get_table("Feature_EDA")
    ppg = db.get_table("Feature_PPG")
    stimulus = db.get_table("Feature_Stimulus")
    trials = db.get_table("Trials")

# Merge data dfs
df = merge_dfs([pupil, stimulus, eda, ppg])


# Add random walk column
def add_random_walk(group):
    return group.with_columns(
        walk=pl.Series(np.cumsum(np.random.normal(0, 1, len(group))))
    )


df = df.group_by("trial_id").map_groups(add_random_walk)

# Merge with trial metadata
df = merge_dfs(
    dfs=[df, trials],
    on=["trial_id", "participant_id", "trial_number"],
)
df = interpolate_and_fill_nulls(df)
# Rename columns
df = df.with_columns(col("pupil_mean").alias("pupil"))
df

trial_id,trial_number,participant_id,rownumber,timestamp,pupil_l_raw,pupil_r_raw,pupil_r,pupil_l,pupil_mean,temperature,rating,samplenumber,eda_raw,eda_tonic,eda_phasic,ppg_raw,ppg_heartrate,ppg_ibi,ppg_clean,ppg_rate,ppg_quality,ppg_peaks,heartrate,walk,stimulus_seed,skin_area,timestamp_start,timestamp_end,duration,pupil
u16,u8,u8,u32,f64,f64,f64,f64,f64,f64,f64,f64,i64,f64,f64,f64,f64,i64,f64,f64,f64,f64,i64,f64,f64,u16,u8,f64,f64,f64,f64
1,1,1,17631,294198.9762,5.73628,6.245389,5.640989,5.183107,5.412048,0.0,0.425,,0.743774,0.743503,0.000271,1424.175824,,-1.0,-14.227641,65.896546,0.975548,,65.999319,0.88908,396,1,294197.3945,474206.7098,180009.3153,5.412048
1,1,1,,294210.3603,5.729314,6.242299,5.640126,5.18257,5.411348,0.0,0.425,57892,0.743774,0.743503,0.000271,1424.175824,-1,-1.0,-14.227641,65.896546,0.975548,0,65.999319,0.772848,396,1,294197.3945,474206.7098,180009.3153,5.411348
1,1,1,,294211.3575,5.728704,6.242028,5.64005,5.182523,5.411286,0.0,0.425,,0.743784,0.743503,0.00028,1438.095238,-1,-1.0,-16.831259,65.896546,0.975548,0,65.999213,0.585711,396,1,294197.3945,474206.7098,180009.3153,5.411286
1,1,1,17632,294215.605,5.726105,6.240875,5.639728,5.182322,5.411025,0.0,0.425,,0.743825,0.743504,0.000321,1439.054647,,-1.0,-17.843339,65.896546,0.975548,,65.999177,1.480752,396,1,294197.3945,474206.7098,180009.3153,5.411025
1,1,1,,294224.331,5.718118,6.236929,5.639069,5.181911,5.41049,0.0,0.425,,0.743911,0.743505,0.000405,1441.025641,-1,-1.0,-19.922539,65.896546,0.975548,0,65.999102,1.424725,396,1,294197.3945,474206.7098,180009.3153,5.41049
1,1,1,17633,294232.4178,5.710716,6.233272,5.638458,5.18153,5.409994,0.000004,0.425,,0.74399,0.743506,0.000483,1425.838269,,-1.0,-21.676652,65.896546,0.975548,,65.99905,4.299474,396,1,294197.3945,474206.7098,180009.3153,5.409994
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
332,12,28,,2.7771e6,4.105768,3.997393,3.98118,4.025593,4.003387,0.155438,0.85,,13.524713,13.423457,-0.011911,1814.652015,72,-1.0,72.383604,65.934066,0.987849,0,72.398497,68.703018,133,1,2.5971e6,2.7771e6,180026.123,4.003387
332,12,28,166440,2.7771e6,4.102623,3.993397,3.981166,4.025572,4.003369,0.155438,0.85,,13.525269,13.423457,-0.011587,1811.196612,,-1.0,64.610616,65.934066,0.987849,,72.39791,69.307137,133,1,2.5971e6,2.7771e6,180026.123,4.003369
332,12,28,,2.7771e6,4.09266,3.976325,3.981131,4.025521,4.003326,0.155438,0.85,,13.526833,13.423457,-0.010673,1801.465201,72,-1.0,42.719644,65.934066,0.987849,0,72.396258,69.879252,133,1,2.5971e6,2.7771e6,180026.123,4.003326


In [None]:
col1, col2 = "walk", "rating"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, f"{col1}_{col2}_corr", "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, f"{col1}_{col2}_corr")
plot_correlations_by_participant(corr_by_participant, f"{col1}_{col2}_corr")

In [53]:
plot_correlations_by_participant(corr_by_participant, f"{col1}_{col2}_corr")

In [19]:
col1, col2 = "temperature", "rating"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, f"{col1}_{col2}_corr", "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, f"{col1}_{col2}_corr")
# plot_correlations_by_participant(corr_by_participant, f"{col1}_{col2}_corr")

In [7]:
col1, col2 = "pupil", "rating"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, f"{col1}_{col2}_corr", "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, f"{col1}_{col2}_corr")
# plot_correlations_by_participant(corr_by_participant, f"{col1}_{col2}_corr")

In [8]:
scale_min_max(df).filter(col("trial_id") == 259).hvplot(
    x="timestamp", y=["pupil", "rating", "eda_tonic"]
)

In [9]:
col1, col2 = "pupil", "temperature"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, f"{col1}_{col2}_corr", "participant_id", include_ci=True
)
# plot_correlations_by_trial(corr_by_trial, f"{col1}_{col2}_corr")
plot_correlations_by_participant(corr_by_participant, f"{col1}_{col2}_corr")

In [10]:
col1, col2 = "eda_tonic", "rating"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, f"{col1}_{col2}_corr", "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, f"{col1}_{col2}_corr")
# plot_correlations_by_participant(corr_by_participant, f"{col1}_{col2}_corr")

In [11]:
scale_min_max(df).filter(col("trial_id") == 202).hvplot(
    x="timestamp", y=["pupil", "rating", "eda_tonic"]
)

In [12]:
col1, col2 = "eda_phasic", "rating"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, f"{col1}_{col2}_corr", "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, f"{col1}_{col2}_corr")
# plot_correlations_by_participant(corr_by_participant, f"{col1}_{col2}_corr")

In [13]:
col1, col2 = "heartrate", "rating"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, f"{col1}_{col2}_corr", "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, f"{col1}_{col2}_corr")
# plot_correlations_by_participant(corr_by_participant, f"{col1}_{col2}_corr")

In [None]:
scale_min_max(df).filter(col("participant_id") == 16).hvplot(
    x="timestamp", y=["heartrate", "rating"], groupby="trial_id"
)

BokehModel(combine_events=True, render_bundle={'docs_json': {'d4ba6cd2-2a17-4f3d-8b0c-17fc1ae343cb': {'version…

Hypothesis: mediated by respiration
-> instruct participants to exhale while pain decreases?