In [28]:
%%capture
from pathlib import Path

if Path.cwd().stem == "notebooks":
    %cd ..
    %load_ext autoreload
    %autoreload 2

In [29]:
import logging
from pathlib import Path

import holoviews as hv
import hvplot.polars  # noqa
import polars as pl
from polars import col

from src.data.database_manager import DatabaseManager
from src.features.exploratory.explore_eda import detrend_tonic_component
from src.features.scaling import scale_min_max
from src.log_config import configure_logging
from src.plots.correlations import (
    aggregate_correlations_fisher_z,
    calculate_correlations_by_trial,
    plot_correlations_by_participant,
    plot_correlations_by_trial,
    plot_max_correlations_by_participant,
)

logger = logging.getLogger(__name__.rsplit(".", maxsplit=1)[-1])
configure_logging(
    stream_level=logging.DEBUG,
    ignore_libs=["matplotlib", "Comm", "bokeh", "tornado", "param", "numba"],
)

pl.Config.set_tbl_rows(12)  # for the 12 trials
hv.output(widget_location="bottom", size=130)

In [30]:
db = DatabaseManager()
with db:
    df = db.get_trials("Explore_Data", exclude_problematic=True)

# Add detrended EDA
# different paradigms: statistics and statistical learning / non-causal and causal
df = detrend_tonic_component(df)

# Rename columns
df = df.rename(
    {
        "rating": "pain_rating",
        "pupil": "pupil_diameter",
    }
)

In [31]:
# Remove first 20 seconds
df = df.filter(col("normalized_timestamp") >= 20 * 1000)

In [13]:
scale_min_max(df).hvplot(
    x="timestamp",
    y=[
        "eda_raw",
        "eda_phasic",
        "eda_tonic",
        # "pupil",
        # "rating",
        "temperature",
        # "pupil_r_raw",
        # "pupil_l_raw",
    ],
    groupby=["participant_id", "trial_number"],
)

BokehModel(combine_events=True, render_bundle={'docs_json': {'3374b71f-2d3a-49b8-be90-fad41a25f83d': {'version…

# Temperature

In [14]:
COLORS = {
    "temperature_pain_rating_corr": "#1f77b4",
    "temperature_pupil_diameter_corr": "#ff7f0e",
    "temperature_eda_tonic_corr": "#d62728",
    "temperature_eda_phasic_corr": "#8a2be2",
    "temperature_heart_rate_corr": "#2ca02c",
}


## Temperature / Rating

In [15]:
col1, col2 = "temperature", "pain_rating"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, col1, col2)
rating = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=False
)
rating


In [16]:
corr_by_participant.describe()

statistic,participant_id,participant_id_temperature_pain_rating_corr_mean,participant_id_temperature_pain_rating_corr_ci_lower,participant_id_temperature_pain_rating_corr_ci_upper
str,f64,f64,f64,f64
"""count""",42.0,42.0,42.0,42.0
"""null_count""",0.0,0.0,0.0,0.0
"""mean""",21.5,0.775713,0.739026,0.807456
"""std""",12.267844,0.045116,0.054968,0.038745
"""min""",1.0,0.686459,0.627146,0.725711
"""25%""",11.0,0.744626,0.700885,0.786013
"""50%""",22.0,0.787325,0.752075,0.814521
"""75%""",32.0,0.80805,0.776999,0.830903
"""max""",42.0,0.867533,0.853177,0.880576


## Temperature / Pupil

In [17]:
col1, col2 = "temperature", "pupil_diameter"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, col1, col2)
pupil = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=-False, color_map=COLORS
)
pupil

In [18]:
corr_by_participant.describe()

statistic,participant_id,participant_id_temperature_pupil_diameter_corr_mean,participant_id_temperature_pupil_diameter_corr_ci_lower,participant_id_temperature_pupil_diameter_corr_ci_upper
str,f64,f64,f64,f64
"""count""",42.0,42.0,42.0,42.0
"""null_count""",0.0,0.0,0.0,0.0
"""mean""",21.5,0.266767,0.15064,0.375743
"""std""",12.267844,0.214422,0.228738,0.201129
"""min""",1.0,-0.180082,-0.279928,-0.129087
"""25%""",11.0,0.12486,-0.012048,0.2589
"""50%""",22.0,0.289883,0.152487,0.415585
"""75%""",32.0,0.455394,0.334317,0.56007
"""max""",42.0,0.632913,0.545386,0.706785


## Temperature / EDA Tonic

In [19]:
# Note that correlation values are slightly higher if we'd used detrended EDA tonic
# components (see eda.py)
# EDA tonic detrended > EDA tonic > EDA raw > EDA phasic
col1, col2 = "temperature", "eda_tonic"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
# plot_correlations_by_trial(corr_by_trial, col1, col2)
eda_tonic = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=-False, color_map=COLORS
)
eda_tonic


In [20]:
corr_by_participant.describe()

statistic,participant_id,participant_id_temperature_eda_tonic_corr_mean,participant_id_temperature_eda_tonic_corr_ci_lower,participant_id_temperature_eda_tonic_corr_ci_upper
str,f64,f64,f64,f64
"""count""",42.0,42.0,42.0,42.0
"""null_count""",0.0,0.0,0.0,0.0
"""mean""",21.5,0.221137,0.090683,0.342866
"""std""",12.267844,0.208193,0.210155,0.202772
"""min""",1.0,-0.089795,-0.197564,0.020118
"""25%""",11.0,0.059634,-0.079142,0.165973
"""50%""",22.0,0.158007,0.047549,0.326267
"""75%""",32.0,0.375014,0.230545,0.504211
"""max""",42.0,0.690343,0.599116,0.771526


## Temperature / EDA Phasic

In [21]:
col1, col2 = "temperature", "eda_phasic"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
# plot_correlations_by_trial(corr_by_trial, col1, col2)
eda_phasic = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=-False, color_map=COLORS
)
eda_phasic

In [22]:
corr_by_participant.describe()

statistic,participant_id,participant_id_temperature_eda_phasic_corr_mean,participant_id_temperature_eda_phasic_corr_ci_lower,participant_id_temperature_eda_phasic_corr_ci_upper
str,f64,f64,f64,f64
"""count""",42.0,42.0,42.0,42.0
"""null_count""",0.0,0.0,0.0,0.0
"""mean""",21.5,0.074601,0.03045,0.118528
"""std""",12.267844,0.062849,0.072844,0.055523
"""min""",1.0,-0.069669,-0.136067,-0.022631
"""25%""",11.0,0.035729,-0.011369,0.082063
"""50%""",22.0,0.078829,0.036493,0.122779
"""75%""",32.0,0.119757,0.087082,0.156856
"""max""",42.0,0.191738,0.161781,0.221341


## Temperature / Heartrate

In [23]:
col1, col2 = "temperature", "heart_rate"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, col1, col2)
heart_rate = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=-False, color_map=COLORS
)
heart_rate

In [24]:
corr_by_participant.describe()

statistic,participant_id,participant_id_temperature_heart_rate_corr_mean,participant_id_temperature_heart_rate_corr_ci_lower,participant_id_temperature_heart_rate_corr_ci_upper
str,f64,f64,f64,f64
"""count""",42.0,42.0,42.0,42.0
"""null_count""",0.0,0.0,0.0,0.0
"""mean""",21.5,0.123275,0.040523,0.20435
"""std""",12.267844,0.158307,0.162693,0.154069
"""min""",1.0,-0.154252,-0.263509,-0.089056
"""25%""",11.0,-0.020598,-0.102994,0.055937
"""50%""",22.0,0.117635,0.032935,0.191172
"""75%""",32.0,0.221134,0.135712,0.302772
"""max""",42.0,0.531761,0.477989,0.581562


## Temperature / All

In [25]:
combined_chart = (
    (
        # heart_rate
        eda_tonic
        # + eda_phasic
        + pupil
        + rating  # put the plot with the color_map first,
    )
    .configure_axis(
        grid=True,
    )
    .properties(
        title="Correlations with Temperature",
    )
    .configure_legend(labelFontSize=14)
)

combined_chart

In [26]:
combined_chart = (
    (
        # heart_rate
        eda_tonic
        # + eda_phasic
        + pupil
        + rating  # put the plot with the color_map first,
    )
    .configure_axis(
        grid=True,
    )
    .properties(
        title="Correlations with Temperature",
    )
    .configure_legend(labelFontSize=14)
)

combined_chart

In [27]:
# Save figure
import os
from pathlib import Path

from dotenv import load_dotenv

load_dotenv()
FIGURE_DIR = Path(os.getenv("FIGURE_DIR"))


# Save the figure
path = FIGURE_DIR / "correlations_with_temperature.png"
combined_chart.save(path)
# Or save as SVG for vector graphics:
# combined_chart.save(FIGURE_DIR / "correlations_with_temperature.svg")

# Rating

In [None]:
COLORS = {
    "pain_rating_temperature_corr": "#1f77b4",
    "pain_rating_pupil_diameter_corr": "#ff7f0e",
    "pain_rating_eda_tonic_corr": "#d62728",
    "pain_rating_eda_phasic_corr": "#8a2be2",
    "pain_rating_heart_rate_corr": "#2ca02c",
}

## Rating / Temperature

In [None]:
col1, col2 = "pain_rating", "temperature"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, col1, col2)
temperature = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=False, color_map=COLORS
)
temperature


## Rating / Pupil

In [None]:
col1, col2 = "pain_rating", "pupil_diameter"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, col1, col2)
pupil = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=False, color_map=COLORS
)
pupil

## Rating / EDA Tonic

In [None]:
col1, col2 = "pain_rating", "eda_tonic"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, col1, col2)
eda_tonic = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=False, color_map=COLORS
)
eda_tonic

## Rating / Phasic EDA

In [None]:
col1, col2 = "pain_rating", "eda_phasic"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, col1, col2)
eda_phasic = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=False, color_map=COLORS
)
eda_phasic

## Rating / Heart Rate

In [None]:
col1, col2 = "pain_rating", "heart_rate"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, col1, col2)
heart_rate = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=False, color_map=COLORS
)
heart_rate


## Rating / All

In [None]:
combined_chart = (
    (
        heart_rate
        + eda_tonic
        + eda_phasic
        + pupil
        + temperature  # put the plot with the color_map first,
    )
    .configure_axis(
        grid=True,
    )
    .properties(
        title="Correlations with Rating",
    )
    .configure_legend(labelFontSize=14)
)

# Display the chart
combined_chart


In [None]:
# Save figure
import os
from pathlib import Path

from dotenv import load_dotenv

load_dotenv()
FIGURE_DIR = Path(os.getenv("FIGURE_DIR"))


# Save the figure
path = FIGURE_DIR / "correlations_with_rating.png"
combined_chart.save(path)
# Or save as SVG for vector graphics:
# combined_chart.save(FIGURE_DIR / "correlations_with_rating.svg")

# Maximum correlation values only

In [None]:
col1, col2 = "temperature", "pupil"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
plot_correlations_by_trial(corr_by_trial, col1, col2)
pupil = plot_max_correlations_by_participant(
    corr_by_trial, col1, col2, with_config=-False
)
pupil

ColumnNotFoundError: pupil

Resolved plan until failure:

	---> FAILED HERE RESOLVING 'sink' <---
DF ["trial_id", "trial_number", "participant_id", "timestamp", ...]; PROJECT */35 COLUMNS

In [None]:
pupil + heart_rate