In [None]:
%%capture
from pathlib import Path

if Path.cwd().stem == "notebooks":
    %cd ../..
    %load_ext autoreload
    %autoreload 2

In [3]:
import logging

import holoviews as hv
import hvplot.polars  # noqa
import polars as pl
from icecream import ic
from polars import col

from src.data.database_manager import DatabaseManager
from src.data.quality_checks import check_sample_rate
from src.features.resampling import decimate, interpolate_and_fill_nulls
from src.features.scaling import scale_min_max
from src.features.transforming import map_trials, merge_dfs
from src.log_config import configure_logging
from src.plots.correlations import (
    aggregate_correlations_fisher_z,
    calculate_correlations_by_trial,
    plot_correlations_by_participant,
    plot_correlations_by_trial,
)

configure_logging(
    stream_level=logging.DEBUG,
    ignore_libs=["matplotlib", "Comm", "bokeh", "tornado"],
)

pl.Config.set_tbl_rows(12)  # for the 12 trials
hv.output(widget_location="bottom", size=130)

In [4]:
db = DatabaseManager()

In [5]:
with db:
    df = db.get_table("Merged_and_Labeled_data")

### "lowering of the eyebrows, squeezing of the eyes, wrinkling of the nose, raising of the upper lip and
opening of the mouth" (copied from Kappesser, 2019)

In [6]:
features = [
    "brow_furrow",
    "cheek_raise",
    "mouth_open",
    "upper_lip_raise",
    "nose_wrinkle",
]


In [7]:
df

trial_id,trial_number,participant_id,timestamp,temperature,rating,eda_raw,eda_tonic,eda_phasic,ppg_raw,ppg_ibi_shimmer,heartrate,pupil_l_raw,pupil_r_raw,pupil_r,pupil_l,pupil_mean,pupil_mean_tonic,brow_furrow,cheek_raise,mouth_open,upper_lip_raise,nose_wrinkle,normalized_timestamp,stimulus_seed,skin_patch,decreasing_intervals,major_decreasing_intervals,increasing_intervals,strictly_increasing_intervals,strictly_increasing_intervals_without_plateaus,plateau_intervals,prolonged_minima_intervals
u16,u8,u8,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,u16,u8,u16,u16,u16,u16,u16,u16,u16
1,1,1,294198.9762,0.0,0.425,0.743774,0.743503,0.000271,1408.404382,-1.062977,65.243881,5.670313,6.173421,5.641376,5.15944,5.400408,5.350094,0.00036,0.00286,0.0048,0.0,0.00007,0.0,396,1,0,0,1,0,0,0,0
1,1,1,294298.9762,0.000039,0.425,0.744641,0.743517,0.001124,1372.864366,-0.131651,65.242949,5.602895,6.142564,5.639922,5.157792,5.398857,5.343983,0.00036,0.00286,0.0048,0.0,0.00007,100.0,396,1,0,0,1,0,0,0,0
1,1,1,294398.9762,0.000154,0.395798,0.74488,0.743537,0.001343,1397.15563,-2.948082,65.241069,5.518026,6.072123,5.643301,5.160338,5.401819,5.337462,0.000341,0.002327,0.004199,0.0,0.000065,200.0,396,1,0,0,1,0,0,0,0
1,1,1,294498.9762,0.000417,0.270286,0.744808,0.743557,0.001252,1485.360361,1.488009,65.238629,5.399583,5.947853,5.638149,5.156922,5.397535,5.330919,0.000512,0.00122,0.002504,0.0,0.000065,300.0,396,1,0,0,1,0,0,0,0
1,1,1,294598.9762,0.000818,0.129521,0.74487,0.743578,0.001293,1496.64849,-4.735116,65.235505,5.276378,5.796069,5.646014,5.1618,5.403907,5.324177,0.001059,0.000701,0.002202,0.000006,0.000093,400.0,396,1,0,0,1,0,0,0,0
1,1,1,294698.9762,0.001358,0.152477,0.744957,0.743594,0.001363,1456.946658,1.28125,65.233139,5.158002,5.642459,5.62452,5.146081,5.3853,5.317484,0.001138,0.000565,0.001937,0.000017,0.00009,500.0,396,1,0,0,1,0,0,0,0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
516,12,50,2.6956e6,0.031312,0.0,26.688608,26.716225,0.003169,1323.813266,-5.036073,59.279672,5.166699,4.766597,4.768156,5.167254,4.967705,5.180142,0.000465,0.001124,0.007081,0.00022,0.004168,179500.0,806,1,2580,1548,0,0,0,0,0
516,12,50,2.6957e6,0.030874,0.0,26.68499,26.716212,0.000389,1244.871959,44.337204,58.673362,5.026438,4.683481,4.690238,5.025632,4.857935,5.174284,0.000698,0.000846,0.007044,0.000348,0.004289,179600.0,806,1,2580,1548,0,0,0,0,0
516,12,50,2.6958e6,0.030555,0.0,26.683944,26.716204,0.000132,1223.099413,55.805969,58.073701,4.929092,4.633568,4.655852,4.930955,4.793404,5.169684,0.001266,0.000706,0.011259,0.000379,0.006841,179700.0,806,1,2580,1548,0,0,0,0,0


In [None]:
features = [
    "brow_furrow",
    "cheek_raise",
    "mouth_open",
    "upper_lip_raise",
    "nose_wrinkle",
]

for feature in features:
    col1, col2 = "temperature", feature

    corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
    corr_by_participant = aggregate_correlations_fisher_z(
        corr_by_trial, col1, col2, "participant_id", include_ci=True
    )
    display(plot_correlations_by_trial(corr_by_trial, col1, col2))
    display(plot_correlations_by_participant(corr_by_participant, col1, col2))


17:47:47 | [36mDEBUG   [0m| correlations | Removing NaN correlations


17:47:47 | [36mDEBUG   [0m| correlations | Removing NaN correlations


In [9]:
df.hvplot(x="timestamp", y=[col1, col2], groupby="trial_id", ylim=(0, 1))

BokehModel(combine_events=True, render_bundle={'docs_json': {'1cd02707-ef41-42e1-a32a-18c9ded1ed05': {'version…

In [None]:
further_features = [
    "anger",
    "contempt",
    "disgust",
    "fear",
    "joy",
    "sadness",
    "surprise",
    "engagement",
    "valence",
    "sentimentality",
    "confusion",
    "neutral",
    "attention",
    "brow_raise",
    "chin_raise",
    "dimpler",
    "eye_closure",
    "eye_widen",
    "inner_brow_raise",
    "jaw_drop",
    "lip_corner_depressor",
    "lip_press",
    "lip_pucker",
    "lip_stretch",
    "lip_suck",
    "lid_tighten",
    "smile",
    "smirk",
    "blink",
    "blinkrate",
    "pitch",
    "yaw",
    "roll",
    "interocular_distance",
]

for feature in further_features:
    col1, col2 = feature, "rating"

    corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
    corr_by_participant = aggregate_correlations_fisher_z(
        corr_by_trial, col1, col2, "participant_id", include_ci=True
    )
    # display(plot_correlations_by_trial(corr_by_trial, col1, col2))
    display(plot_correlations_by_participant(corr_by_participant, col1, col2))


ColumnNotFoundError: anger

Resolved plan until failure:

	---> FAILED HERE RESOLVING 'group_by' <---
DF ["trial_id", "trial_number", "participant_id", "timestamp"]; PROJECT */33 COLUMNS

In [29]:
import logging

import altair as alt
import polars as pl

logger = logging.getLogger(__name__.rsplit(".", 1)[-1])


COLORS = {
    "temperature_brow_furrow_corr": "red",
    "temperature_cheek_raise_corr": "#2ca02c",
    "temperature_mouth_open_corr": "#d62728",
    "temperature_upper_lip_raise_corr": "#9467bd",
    "temperature_nose_wrinkle_corr": "#ff7f0e",
}


def plot_correlations_by_participant(
    df: pl.DataFrame,
    correlation_column: str,
    participant_column: str = "participant_id",
    title: str = None,
    width: int = 800,
    height: int = 400,
    y_domain: tuple = (-1, 1),
    with_config: bool = True,
):
    if title is None:
        title = f"Mean {correlation_column.replace('_', ' ').title()} by Participant with 95% CI"

    # Create column names
    mean_col = f"{participant_column}_{correlation_column}_mean"
    ci_lower = f"{participant_column}_{correlation_column}_ci_lower"
    ci_upper = f"{participant_column}_{correlation_column}_ci_upper"

    base = alt.Chart(
        df,
        width=width,
        height=height,
    ).encode(
        x=alt.X(f"{participant_column}:O", axis=alt.Axis(title="Participant ID")),
        y=alt.Y(
            f"{ci_lower}:Q",
            scale=alt.Scale(domain=y_domain),
            axis=alt.Axis(title="Correlation"),
        ),
    )

    error_bars = base.mark_rule().encode(y2=f"{ci_upper}:Q")
    points = base.mark_circle(
        size=100, color=COLORS.get(correlation_column, "#1f77b4")
    ).encode(
        y=f"{mean_col}:Q",
        tooltip=[
            alt.Tooltip(f"{participant_column}:N", title="Participant"),
            alt.Tooltip(f"{mean_col}:Q", title="Mean Correlation", format=".3f"),
            alt.Tooltip(f"{ci_lower}:Q", title="CI Lower", format=".3f"),
            alt.Tooltip(f"{ci_upper}:Q", title="CI Upper", format=".3f"),
        ],
    )

    if with_config:
        return (
            alt.layer(error_bars, points)
            .properties(title=title)
            .configure_axis(grid=True, gridColor="#ededed")
            .configure_view(strokeWidth=0)
            .configure_title(fontSize=16, anchor="middle")
        )

    return alt.layer(error_bars, points).properties(title=title)


In [96]:
correlation_columns = [
    "temperature_brow_furrow_corr",
    "temperature_cheek_raise_corr",
    "temperature_mouth_open_corr",
    "temperature_upper_lip_raise_corr",
    "temperature_nose_wrinkle_corr",
]


def plot_correlations_by_participant(
    df: pl.DataFrame,
    correlation_column: str,
    participant_column: str = "participant_id",
    title: str = None,
    width: int = 800,
    height: int = 400,
    y_domain: tuple = (-1, 1),
    with_config: bool = True,
):
    if title is None:
        title = f"Mean {correlation_column.replace('_', ' ').title()} by Participant with 95% CI"

    # Create column names
    mean_col = f"{participant_column}_{correlation_column}_mean"
    ci_lower = f"{participant_column}_{correlation_column}_ci_lower"
    ci_upper = f"{participant_column}_{correlation_column}_ci_upper"

    # Create a copy of the DataFrame with the legend column
    chart_df = df.clone()
    chart_df = chart_df.with_columns(
        pl.lit(correlation_column).alias("correlation_type")
    )

    base = alt.Chart(
        chart_df,
        width=width,
        height=height,
    ).encode(
        x=alt.X(f"{participant_column}:O", axis=alt.Axis(title="Participant ID")),
        y=alt.Y(
            f"{ci_lower}:Q",
            scale=alt.Scale(domain=y_domain),
            axis=alt.Axis(title="Correlation"),
        ),
        # Use the correlation_type for color encoding to create the legend
        color=alt.Color(
            "correlation_type:N",
            scale=alt.Scale(domain=list(COLORS.keys()), range=list(COLORS.values())),
            legend=alt.Legend(
                title="Correlation Type",
                labelExpr="replace(replace(datum.label, '_corr', ''), '_', ' ')",
            ),
        ),
    )

    error_bars = base.mark_rule().encode(
        y2=f"{ci_upper}:Q",
        color=alt.value("gray"),  # Make error bars gray
    )

    points = base.mark_circle(size=100).encode(
        # y=f"{mean_col}:Q",
        # # Use color encoding with a legend
        # color=alt.Color(
        #     "Metric:N",
        #     scale=alt.Scale(
        #         domain=list(COLORS.keys()),
        #         range=[COLORS.get(correlation_column, "#1f77b4")],
        #     ),
        #     # legend=alt.Legend(title="Correlation Type"),
        # ),
        tooltip=[
            alt.Tooltip(f"{participant_column}:N", title="Participant"),
            alt.Tooltip(f"{mean_col}:Q", title="Mean Correlation", format=".3f"),
            alt.Tooltip(f"{ci_lower}:Q", title="CI Lower", format=".3f"),
            alt.Tooltip(f"{ci_upper}:Q", title="CI Upper", format=".3f"),
        ],
    )

    if with_config:
        return (
            alt.layer(error_bars, points)
            .properties(title=title)
            .configure_axis(grid=True, gridColor="#ededed")
            .configure_view(strokeWidth=0)
            .configure_title(fontSize=16, anchor="middle")
            .configure_legend(titleFontSize=14, labelFontSize=12, orient="top-right")
        )

    return alt.layer(error_bars, points).properties(title=title)

In [None]:
features = [
    "brow_furrow",
    "cheek_raise",
    "mouth_open",
    "upper_lip_raise",
    "nose_wrinkle",
]


col1, col2 = "temperature", "brow_furrow"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
brow_furrow = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=False
)

col1, col2 = "temperature", "cheek_raise"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
cheek_raise = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=False
)

col1, col2 = "temperature", "mouth_open"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
mouth_open = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=False
)

col1, col2 = "temperature", "upper_lip_raise"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
upper_lip_raise = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=False
)

col1, col2 = "temperature", "nose_wrinkle"

corr_by_trial = calculate_correlations_by_trial(df, col1, col2)
corr_by_participant = aggregate_correlations_fisher_z(
    corr_by_trial, col1, col2, "participant_id", include_ci=True
)
nose_wrinkle = plot_correlations_by_participant(
    corr_by_participant, col1, col2, with_config=False
)


18:36:47 | [36mDEBUG   [0m| correlations | Removing NaN correlations
18:36:47 | [36mDEBUG   [0m| correlations | Removing NaN correlations


In [98]:
brow_furrow + cheek_raise

In [99]:
(
    brow_furrow + cheek_raise + mouth_open + upper_lip_raise + nose_wrinkle
).configure_axis(
    grid=True,
).properties(
    title="Correlations with Temperature",
).configure_legend(labelFontSize=14)

In [105]:
with db:
    df = db.get_table("Merged_And_labeled_data")
df.get_column("major_decreasing_intervals").unique()

major_decreasing_intervals
u16
0
1
2
3
4
5
…
1543
1544
1545
