In [95]:
%%capture
from pathlib import Path

if Path.cwd().stem == "notebooks":
    %cd ..
    %load_ext autoreload
    %autoreload 2

In [96]:
import logging
from pathlib import Path

import altair as alt
import holoviews as hv
import hvplot.polars  # noqa
import numpy as np
import polars as pl
from polars import col

from src.data.database_manager import DatabaseManager
from src.features.eda import detrend_tonic_component
from src.features.scaling import scale_min_max
from src.features.utils import to_describe
from src.log_config import configure_logging
from src.plots.correlations import (
    aggregate_correlations_fisher_z,
    calculate_correlations_by_trial,
    plot_correlations_by_participant,
    plot_correlations_by_trial,
    plot_max_correlations_by_participant,
)

logger = logging.getLogger(__name__.rsplit(".", maxsplit=1)[-1])
configure_logging(
    stream_level=logging.DEBUG,
    ignore_libs=["matplotlib", "Comm", "bokeh", "tornado", "param", "numba"],
)

pl.Config.set_tbl_rows(12)  # for the 12 trials
hv.output(widget_location="bottom", size=130)

In [97]:
db = DatabaseManager()
with db:
    df = db.get_table(
        "Merged_and_Labeled_Data",
        exclude_trials_with_measurement_problems=True,
    )

# Add detrended EDA
# different paradigms: statistics and statistical learning / non-causal and causal
df = detrend_tonic_component(df)

# Rename columns
df = df.with_columns(col("pupil_mean").alias("pupil"))

In [98]:
# Remove first 20 seconds
df = df.filter(col("normalized_timestamp") >= 20 * 1000)
df

trial_id,trial_number,participant_id,timestamp,temperature,rating,eda_raw,eda_tonic,eda_phasic,ppg_raw,ppg_ibi_shimmer,heart_rate,pupil_l_raw,pupil_r_raw,pupil_r,pupil_l,pupil_mean,brow_furrow,cheek_raise,mouth_open,upper_lip_raise,nose_wrinkle,temperature_absolute,normalized_timestamp,stimulus_seed,skin_patch,decreasing_intervals,major_decreasing_intervals,increasing_intervals,strictly_increasing_intervals,strictly_increasing_intervals_without_plateaus,plateau_intervals,prolonged_minima_intervals,eda_tonic_detrended,pupil
u16,u8,u8,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,u16,u8,u16,u16,u16,u16,u16,u16,u16,f64,f64
1,1,1,314198.9762,0.463265,0.87188,0.743199,0.743294,-0.000095,1375.838609,-1.67065,59.961244,4.577485,4.825832,4.882398,4.528868,4.705633,0.00087,0.000512,0.000438,0.00001,0.000085,46.436483,20000.0,396,1,0,0,1,0,0,1,0,-0.000316,4.705633
1,1,1,314298.9762,0.463265,0.824514,0.74323,0.743297,-0.000067,1349.613188,-0.775775,60.332773,4.590687,4.831406,4.872379,4.528778,4.700579,0.000702,0.000403,0.000291,2.9873e-7,0.000041,46.436483,20100.0,396,1,0,0,1,0,0,1,0,-0.000313,4.700579
1,1,1,314398.9762,0.463265,0.815,0.743276,0.7433,-0.000023,1325.324829,-0.88756,60.725636,4.593487,4.862389,4.861224,4.529168,4.695196,0.000663,0.000418,0.000106,0.0,0.000058,46.436483,20200.0,396,1,0,0,1,0,0,1,0,-0.000311,4.695196
1,1,1,314498.9762,0.463265,0.815,0.743297,0.743303,-0.000006,1297.487842,-1.209588,61.149882,4.555876,4.885492,4.847552,4.526335,4.686943,0.000739,0.00045,0.00013,0.0,0.00005,46.436483,20300.0,396,1,0,0,1,0,0,1,0,-0.000307,4.686943
1,1,1,314598.9762,0.463265,0.815,0.743265,0.743306,-0.000041,1291.534487,-0.726068,61.587131,4.497064,4.882886,4.832568,4.50622,4.669394,0.000657,0.000379,0.000132,0.0,0.000058,46.436483,20400.0,396,1,0,0,1,0,0,1,0,-0.000304,4.669394
1,1,1,314698.9762,0.463265,0.815,0.743194,0.74331,-0.000117,1371.283878,-1.318697,62.052729,4.464745,4.862969,4.830177,4.488858,4.659517,0.000824,0.000453,0.000169,0.0,0.00005,46.436483,20500.0,396,1,0,0,1,0,0,1,0,-0.0003,4.659517
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
516,12,50,2.6956e6,0.031312,0.0,26.688608,26.716225,0.003169,1323.813266,-5.036073,59.279672,5.166699,4.766597,4.768156,5.167254,4.967705,0.000465,0.001124,0.007081,0.00022,0.004168,45.859463,179500.0,806,1,2580,1548,0,0,0,0,0,-0.071205,4.967705
516,12,50,2.6957e6,0.030874,0.0,26.68499,26.716212,0.000389,1244.871959,44.337204,58.673362,5.026438,4.683481,4.690238,5.025632,4.857935,0.000698,0.000846,0.007044,0.000348,0.004289,45.858632,179600.0,806,1,2580,1548,0,0,0,0,0,-0.070247,4.857935
516,12,50,2.6958e6,0.030555,0.0,26.683944,26.716204,0.000132,1223.099413,55.805969,58.073701,4.929092,4.633568,4.655852,4.930955,4.793404,0.001266,0.000706,0.011259,0.000379,0.006841,45.858026,179700.0,806,1,2580,1548,0,0,0,0,0,-0.069284,4.793404


In [99]:
scale_min_max(df).hvplot(
    x="timestamp",
    y=[
        "eda_raw",
        "eda_phasic",
        "eda_tonic",
        # "pupil",
        # "rating",
        "temperature",
        # "pupil_r_raw",
        # "pupil_l_raw",
    ],
    groupby=["participant_id", "trial_number"],
)

BokehModel(combine_events=True, render_bundle={'docs_json': {'a711a128-e907-4b23-a710-c7c4de9ce7fd': {'version…

# Temperature

In [100]:
COLORS = {
    "temperature_rating_corr": "#1f77b4",
    "temperature_pupil_corr": "#ff7f0e",
    "temperature_eda_tonic_corr": "#d62728",
    "temperature_eda_phasic_corr": "#8a2be2",
    "temperature_heart_rate_corr": "#2ca02c",
}


## Temperature / Rating

In [101]:
col1, col2 = "temperature", "rating"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, col1, col2)
rating = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=False
)
rating


In [102]:
corr_by_participant.describe()

statistic,participant_id,participant_id_temperature_rating_corr_mean,participant_id_temperature_rating_corr_ci_lower,participant_id_temperature_rating_corr_ci_upper
str,f64,f64,f64,f64
"""count""",42.0,42.0,42.0,42.0
"""null_count""",0.0,0.0,0.0,0.0
"""mean""",25.928571,0.775713,0.739026,0.807456
"""std""",14.68763,0.045116,0.054968,0.038745
"""min""",1.0,0.686459,0.627146,0.725711
"""25%""",13.0,0.744626,0.700885,0.786013
"""50%""",26.0,0.787325,0.752075,0.814521
"""75%""",38.0,0.80805,0.776999,0.830903
"""max""",50.0,0.867533,0.853177,0.880576


## Temperature / Pupil

In [103]:
col1, col2 = "temperature", "pupil"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, col1, col2)
pupil = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=-False, color_map=COLORS
)
pupil

In [104]:
corr_by_participant.describe()

statistic,participant_id,participant_id_temperature_pupil_corr_mean,participant_id_temperature_pupil_corr_ci_lower,participant_id_temperature_pupil_corr_ci_upper
str,f64,f64,f64,f64
"""count""",42.0,42.0,42.0,42.0
"""null_count""",0.0,0.0,0.0,0.0
"""mean""",25.928571,0.267595,0.151699,0.376333
"""std""",14.68763,0.214162,0.228416,0.200965
"""min""",1.0,-0.179057,-0.278839,-0.128282
"""25%""",13.0,0.125052,-0.010478,0.259849
"""50%""",26.0,0.290401,0.153955,0.415697
"""75%""",38.0,0.455457,0.334561,0.560071
"""max""",50.0,0.633123,0.545617,0.706972


## Temperature / EDA Tonic

In [105]:
# Note that correlation values are slightly higher if we'd used detrended EDA tonic
# components (see eda.py)
# EDA tonic detrended > EDA tonic > EDA raw > EDA phasic
col1, col2 = "temperature", "eda_tonic"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
# plot_correlations_by_trial(corr_by_trial, col1, col2)
eda_tonic = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=-False, color_map=COLORS
)
eda_tonic


In [106]:
corr_by_participant.describe()

statistic,participant_id,participant_id_temperature_eda_tonic_corr_mean,participant_id_temperature_eda_tonic_corr_ci_lower,participant_id_temperature_eda_tonic_corr_ci_upper
str,f64,f64,f64,f64
"""count""",42.0,42.0,42.0,42.0
"""null_count""",0.0,0.0,0.0,0.0
"""mean""",25.928571,0.216795,0.086044,0.338928
"""std""",14.68763,0.207387,0.208802,0.202433
"""min""",1.0,-0.094434,-0.203011,0.016438
"""25%""",13.0,0.051318,-0.078004,0.159491
"""50%""",26.0,0.156169,0.040391,0.319127
"""75%""",38.0,0.373497,0.229845,0.501141
"""max""",50.0,0.684461,0.591577,0.761265


## Temperature / EDA Phasic

In [107]:
col1, col2 = "temperature", "eda_phasic"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
# plot_correlations_by_trial(corr_by_trial, col1, col2)
eda_phasic = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=-False, color_map=COLORS
)
eda_phasic

In [108]:
corr_by_participant.describe()

statistic,participant_id,participant_id_temperature_eda_phasic_corr_mean,participant_id_temperature_eda_phasic_corr_ci_lower,participant_id_temperature_eda_phasic_corr_ci_upper
str,f64,f64,f64,f64
"""count""",42.0,42.0,42.0,42.0
"""null_count""",0.0,0.0,0.0,0.0
"""mean""",25.928571,0.07873,0.037873,0.119372
"""std""",14.68763,0.057321,0.065947,0.05117
"""min""",1.0,-0.058635,-0.134986,-0.007413
"""25%""",13.0,0.048436,-0.001593,0.088374
"""50%""",26.0,0.080869,0.044934,0.124931
"""75%""",38.0,0.114724,0.083222,0.15154
"""max""",50.0,0.189497,0.160349,0.218316


## Temperature / Heartrate

In [109]:
col1, col2 = "temperature", "heart_rate"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, col1, col2)
heart_rate = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=-False, color_map=COLORS
)
heart_rate

In [110]:
corr_by_participant.describe()

statistic,participant_id,participant_id_temperature_heart_rate_corr_mean,participant_id_temperature_heart_rate_corr_ci_lower,participant_id_temperature_heart_rate_corr_ci_upper
str,f64,f64,f64,f64
"""count""",42.0,42.0,42.0,42.0
"""null_count""",0.0,0.0,0.0,0.0
"""mean""",25.928571,0.123332,0.040691,0.204308
"""std""",14.68763,0.158353,0.162882,0.153987
"""min""",1.0,-0.15432,-0.263505,-0.089084
"""25%""",13.0,-0.020627,-0.102988,0.056041
"""50%""",26.0,0.117718,0.032998,0.191113
"""75%""",38.0,0.221064,0.135918,0.302688
"""max""",50.0,0.53165,0.477921,0.581416


## Temperature / All

In [111]:
(
    heart_rate
    + eda_tonic
    + eda_phasic
    + pupil
    + rating  # put the plot with the color_map first,
).configure_axis(
    grid=True,
).properties(
    title="Correlations with Temperature",
).configure_legend(labelFontSize=14)

# Rating

In [112]:
COLORS = {
    "rating_temperature_corr": "#1f77b4",
    "rating_pupil_corr": "#ff7f0e",
    "rating_eda_tonic_corr": "#d62728",
    "rating_eda_phasic_corr": "#8a2be2",
    "rating_heart_rate_corr": "#2ca02c",
}

## Rating / Temperature

In [113]:
col1, col2 = "rating", "temperature"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, col1, col2)
temperature = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=False, color_map=COLORS
)
temperature


## Rating / Pupil

In [114]:
col1, col2 = "rating", "pupil"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, col1, col2)
pupil = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=False, color_map=COLORS
)
pupil

## Rating / EDA Tonic

In [115]:
col1, col2 = "rating", "eda_tonic"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, col1, col2)
eda_tonic = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=False, color_map=COLORS
)
eda_tonic

## Rating / Phasic EDA

In [116]:
col1, col2 = "rating", "eda_phasic"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, col1, col2)
eda_phasic = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=False, color_map=COLORS
)
eda_phasic

## Rating / Heart Rate

In [117]:
col1, col2 = "rating", "heart_rate"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, col1, col2)
heart_rate = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=False, color_map=COLORS
)
heart_rate


## Rating / All

In [118]:
(
    heart_rate
    + eda_tonic
    + eda_phasic
    + pupil
    + temperature  # put the plot with the color_map first,
).configure_axis(
    grid=True,
).properties(
    title="Correlations with Rating",
).configure_legend(labelFontSize=14)

# Maximum correlation values only

In [119]:
col1, col2 = "temperature", "pupil"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, col1, col2)
pupil = plot_max_correlations_by_participant(
    corr_by_trial, col1, col2, with_config=-False
)
pupil

In [120]:
pupil + heart_rate