In [14]:
%%capture
from pathlib import Path

if Path.cwd().stem == "notebooks":
    %cd ..
    %load_ext autoreload
    %autoreload 2

In [None]:
import logging

import duckdb
import holoviews as hv
import hvplot.polars  # noqa
import neurokit2 as nk
import numpy as np
import pandas as pd
import polars as pl
from polars import col

from src.data.database_manager import DatabaseManager
from src.data.quality_checks import check_sample_rate
from src.features.scaling import scale_min_max
from src.features.transforming import map_trials
from src.log_config import configure_logging
from src.plots.plot_modality import plot_modality_over_trials
from src.plots.utils import prepare_multiline_hvplot

configure_logging(
    stream_level=logging.DEBUG, ignore_libs=("Comm", "bokeh", "tornado", "matplotlib")
)
logger = logging.getLogger(__name__.rsplit(".", maxsplit=1)[-1])

pl.Config.set_tbl_rows(12)  # for the 12 trials
hv.output(widget_location="bottom", size=130)

In [16]:
plot_modality_over_trials("ppg", signals=["ppg_raw"])

BokehModel(combine_events=True, render_bundle={'docs_json': {'e8e15ba8-29d8-48f4-8cd1-08f47c6ba0ac': {'version…

In [17]:
db = DatabaseManager()

In [None]:
with db:
    ppg = db.get_table("raw_PPG", exclude_trials_with_measurement_problems=False)
    stimulus = db.get_table(
        "Feature_Stimulus", exclude_trials_with_measurement_problems=False
    )
ppg
ppg = ppg.rename({"ppg_heart_rate_shimmer": "heart_rate"})

In [None]:
import polars as pl

from src.features.filtering import (
    adaptive_ema_smooth,
    butterworth_filter_non_causal,
)
from src.features.resampling import decimate, interpolate_and_fill_nulls

SAMPLE_RATE = 100
MAX_HEARTRATE = 120


def ema_smooth(signal, alpha: float):
    """
    Simple Exponential Moving Average (EMA) smoothing.
    Can be used for real-time applications (causal).
    """
    smoothed = np.zeros_like(signal)
    smoothed[0] = signal[0]
    for i in range(1, len(signal)):
        smoothed[i] = alpha * signal[i] + (1 - alpha) * smoothed[i - 1]
    return smoothed


@map_trials
def ema_smooth_heart_rate(
    df: pl.DataFrame,
    heart_rate_column: str = "heart_rate",
) -> pl.DataFrame:
    """Causal median filter on heart_rate column."""
    return df.with_columns(
        col(heart_rate_column)
        .map_batches(
            lambda x: ema_smooth(
                x,
                alpha=0.3,
            )
        )
        .alias(heart_rate_column + "_smooth")
    )


ppg_ = ppg.filter(trial_id=1)
df = ema_smooth_heart_rate(ppg_)

In [32]:
df.hvplot(
    x="timestamp",
    y=["heart_rate", "heart_rate_smooth"],
    groupby="trial_id",
)

BokehModel(combine_events=True, render_bundle={'docs_json': {'603efb17-f725-4bf7-b69a-bb0ea713937d': {'version…

In [8]:
ppg.hvplot(
    x="timestamp",
    y=[
        "heart_rate",
    ],
    groupby="trial_id",
    ylim=(0, 200),
)

BokehModel(combine_events=True, render_bundle={'docs_json': {'cc07866e-8d95-4ed9-af35-7588fd73d99f': {'version…

## Heartrates per participant

In [None]:
heart_rates = []
for group in ppg.group_by("participant_id", maintain_order=True):
    heart_rates.append(group[1].get_column("heart_rate").mean())
pd.DataFrame(heart_rates).boxplot()

<Axes: >

## Neurokit analysis based on raw PPG data

In [36]:
with db:
    raw_ppg = db.get_table("Raw_PPG", exclude_trials_with_measurement_problems=False)

In [37]:
one_participant = raw_ppg.filter(col("participant_id") == 1)
one_trial = raw_ppg.filter(col("trial_id") == 1)

signals, info = nk.ppg_process(
    one_trial.get_column("ppg_raw").to_numpy(), sampling_rate=100
)


In [38]:
# Visualize the processing
nk.ppg_plot(signals, info)

In [39]:
@map_trials
def nk_process_ppg(
    df: pl.DataFrame,
    sampling_rate: int = 100,
) -> pl.DataFrame:
    """
    Process the raw PPG signal using NeuroKit2 and the "elgendi" method.

    Creates the following columns:
    - ppg_clean
    - ppg_rate
    - ppg_quality
    - ppg_peaks

    Note that neurokit approach is non-causal, i.e. it uses future data to calculate
    the signal.
    """

    return (
        df.with_columns(
            col("ppg_raw")
            .map_batches(
                lambda x: pl.from_pandas(
                    nk.ppg_process(  # returns a tuple, we only need the pd.DataFrame
                        ppg_signal=x.to_numpy(),
                        sampling_rate=sampling_rate,
                        method="elgendi",
                    )[0].drop("PPG_Raw", axis=1)
                ).to_struct()
            )
            .alias("ppg_components")
        )
        .unnest("ppg_components")
        .select(pl.all().name.to_lowercase())
    )
