In [18]:
%%capture
from pathlib import Path

if Path.cwd().stem == "notebooks":
    %cd ..
    %load_ext autoreload
    %autoreload 2

In [19]:
import logging
from functools import reduce

import holoviews as hv
import hvplot.polars  # noqa
import neurokit2 as nk
import pandas as pd
import polars as pl
from icecream import ic
from polars import col

from src.data.data_config import DataConfig
from src.data.database_manager import DatabaseManager
from src.data.quality_checks import check_sample_rate
from src.features.eda import nk_process_eda
from src.features.scaling import scale_min_max
from src.features.transforming import map_trials
from src.log_config import configure_logging
from src.plots.utils import prepare_multiline_hvplot

configure_logging(
    stream_level=logging.DEBUG, ignore_libs=("Comm", "bokeh", "tornado", "matplotlib")
)
logger = logging.getLogger(__name__.rsplit(".", maxsplit=1)[-1])

pl.Config.set_tbl_rows(12)  # for the 12 trials
hv.output(widget_location="bottom", size=130)

In [20]:
db = DatabaseManager()
modalities = DataConfig().MODALITIES

### Note that asof join uses a fill method if the right df is sampled at a lower frequency than the left df. In that case, the values are not interpolated.

### Note that interpolating should always depend on the time vector.
https://github.com/pola-rs/polars/issues/9616

https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.interpolate_by.html

In [21]:
query = """
SELECT * FROM Preprocess_EDA AS pe
ASOF JOIN Raw_Stimulus rs USING (trial_id, timestamp)
ORDER BY pe.trial_id, pe.timestamp
"""

with db:
    df = db.execute(query).pl()
df

trial_id,trial_number,participant_id,rownumber,timestamp,samplenumber,eda_raw,eda_tonic,eda_phasic,temperature,rating
u16,u8,u8,u32,f64,i64,f64,f64,f64,f64,f64
1,1,1,0,294224.331,57895,0.753247,0.752117,0.00113,45.75,42.5
1,1,1,0,294242.275,57896,0.754135,0.752119,0.002016,45.75,42.5
1,1,1,0,294248.2588,57898,0.754135,0.752121,0.002014,45.75,42.5
1,1,1,0,294276.1835,57899,0.754135,0.752123,0.002012,45.75,42.5
1,1,1,0,294277.1819,57900,0.752359,0.752125,0.000234,45.75,42.5
1,1,1,0,294309.0952,57902,0.752359,0.752127,0.000232,45.75,42.5
…,…,…,…,…,…,…,…,…,…,…
332,12,28,21610,2.7771e6,467073,13.679468,13.578894,-0.012187,45.582614,85.0
332,12,28,21610,2.7771e6,467074,13.679468,13.578894,-0.012765,45.582614,85.0
332,12,28,21610,2.7771e6,467075,13.674363,13.578894,-0.018451,45.582614,85.0


In [22]:
with db:
    eda = db.get_table("Preprocess_EDA")
    stim = db.get_table("Raw_Stimulus")

d = eda.join_asof(
    stim,
    on="timestamp",
    by=["trial_id", "trial_number", "participant_id"],
    strategy="nearest",
    coalesce=True,
)
d

trial_id,trial_number,participant_id,rownumber,timestamp,samplenumber,eda_raw,eda_tonic,eda_phasic,rownumber_right,temperature,rating
u16,u8,u8,u32,f64,i64,f64,f64,f64,u32,f64,f64
1,1,1,37660,294210.3603,57892,0.752359,0.752113,0.000246,0,45.75,42.5
1,1,1,37661,294211.3575,57893,0.754579,0.752115,0.002464,0,45.75,42.5
1,1,1,37663,294224.331,57895,0.753247,0.752117,0.00113,0,45.75,42.5
1,1,1,37664,294242.275,57896,0.754135,0.752119,0.002016,0,45.75,42.5
1,1,1,37666,294248.2588,57898,0.754135,0.752121,0.002014,0,45.75,42.5
1,1,1,37667,294276.1835,57899,0.754135,0.752123,0.002012,0,45.75,42.5
…,…,…,…,…,…,…,…,…,…,…,…
332,12,28,355476,2.7771e6,467073,13.679468,13.578894,-0.012187,21610,45.582614,85.0
332,12,28,355477,2.7771e6,467074,13.679468,13.578894,-0.012765,21611,45.582614,85.0
332,12,28,355478,2.7771e6,467075,13.674363,13.578894,-0.018451,21611,45.582614,85.0


In [23]:
e = eda.join(
    stim,
    on=["timestamp", "trial_id", "participant_id"],
    # by=["trial_id", "trial_number", "participant_id"],
    how="full",
    coalesce=True,
)
e

trial_id,trial_number,participant_id,rownumber,timestamp,samplenumber,eda_raw,eda_tonic,eda_phasic,trial_number_right,rownumber_right,temperature,rating
u16,u8,u8,u32,f64,i64,f64,f64,f64,u8,u32,f64,f64
1,1,1,37660,294210.3603,57892,0.752359,0.752113,0.000246,,,,
1,1,1,37661,294211.3575,57893,0.754579,0.752115,0.002464,,,,
1,1,1,37663,294224.331,57895,0.753247,0.752117,0.00113,1,0,45.75,42.5
1,1,1,37664,294242.275,57896,0.754135,0.752119,0.002016,,,,
1,1,1,37666,294248.2588,57898,0.754135,0.752121,0.002014,,,,
1,1,1,37667,294276.1835,57899,0.754135,0.752123,0.002012,,,,
…,…,…,…,…,…,…,…,…,…,…,…,…
112,,10,,1.9068e6,,,,,8,13466,46.115596,0.0
281,,24,,2.1922e6,,,,,9,14912,46.129921,85.0
249,,22,,322302.795,,,,,1,974,42.911697,0.0


In [24]:
def merge_dfs(
    dfs: list[pl.DataFrame],
    merge_on: list[str] = ["participant_id", "trial_id", "trial_number", "timestamp"],
    sort_by: list[str] = ["trial_id", "timestamp"],
) -> pl.DataFrame:
    """
    Merge multiple DataFrames into a single DataFrame.
    """
    if len(dfs) < 2:
        return dfs[0]

    df = reduce(
        lambda left, right: left.join(
            right,
            on=merge_on,
            how="full",
            coalesce=True,
        )
        .sort(sort_by)
        .drop(["rownumber_right", "samplenumber_right"], strict=False),
        dfs,
    )
    return df


modalities = ["EDA", "PPG"]
modalities = DataConfig().MODALITIES

with db:
    dfs = [db.get_table("Feature_" + modality) for modality in modalities]


In [25]:
@map_trials
def interpolate_and_fill_nulls(df: pl.DataFrame) -> pl.DataFrame:
    """
    Interpolate and fill null values in a DataFrame.
    """
    return (
        df.with_columns(df.select(pl.selectors.by_dtype(pl.Float64)).interpolate())
        .fill_null(strategy="forward")
        .fill_null(strategy="backward")
    )


df = merge_dfs(dfs)
df = interpolate_and_fill_nulls(df)
df

trial_id,trial_number,participant_id,rownumber,timestamp,temperature,rating,samplenumber,eda_raw,eda_tonic,eda_phasic,ch1,ch2,ch3,ch4,ch5,ch6,ch7,ch8,ppg_raw,ppg_heartrate,ppg_ibi,pupil_l,pupil_r,pupil_r_filtered,pupil_l_filtered,anger,contempt,disgust,fear,joy,sadness,surprise,engagement,valence,sentimentality,confusion,neutral,attention,brow_furrow,brow_raise,cheek_raise,chin_raise,dimpler,eye_closure,eye_widen,inner_brow_raise,jaw_drop,lip_corner_depressor,lip_press,lip_pucker,lip_stretch,lip_suck,lid_tighten,mouth_open,nose_wrinkle,smile,smirk,upper_lip_raise,blink,blinkrate,pitch,yaw,roll,interocular_distance
u16,u8,u8,u32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,i64,f64,f64,f64,f64
1,1,1,0,294100.0,0.0,0.425,57896.142857,0.753564,0.752119,0.001445,9948.540039,12283.850586,5801.344238,18263.294922,12240.93457,17119.123047,13553.095703,5451.536133,1416.012559,-1.0,-1.0,5.73628,6.245389,6.013662,5.558055,0.00125,0.00182,0.00024,0.00024,0.00026,0.00125,0.00061,0.00329,0.0,0.00036,0.00036,0.99636,0.98485,0.00036,0.00436,0.00286,0.0,0.00341,0.0,0.00066,0.00086,0.01865,0.0,0.00002,0.0,0.00006,0.00001,0.0,0.0048,0.00007,0.00642,0.0,0.0,0,18,-0.2371,-0.01128,0.03344,99.168541
1,1,1,0,294200.0,0.0,0.425,57896.142857,0.753564,0.752119,0.001445,9970.474824,12306.00459,5792.134512,18260.699883,12252.623574,17113.822773,13547.498867,5446.654512,1416.012559,-1.0,-1.0,5.697124,6.225716,5.994456,5.539501,0.00125,0.00182,0.00024,0.00024,0.00026,0.00125,0.00061,0.00329,0.0,0.00036,0.00036,0.99636,0.98485,0.00036,0.00436,0.00286,0.0,0.00341,0.0,0.00066,0.00086,0.01865,0.0,0.00002,0.0,0.00006,0.00001,0.0,0.0048,0.00007,0.00642,0.0,0.0,0,18,-0.2371,-0.01128,0.03344,99.168541
1,1,1,0,294224.331,0.0,0.425,57902.321429,0.753535,0.752128,0.001407,9966.447959,12305.027012,5790.673486,18257.995391,12252.077129,17114.441172,13547.30335,5447.133188,1396.254143,-1.0,-1.0,5.657676,6.201457,5.977658,5.523324,0.00125,0.00182,0.00024,0.00024,0.00026,0.00125,0.00061,0.00329,0.0,0.00036,0.00036,0.99636,0.98485,0.00036,0.00436,0.00286,0.0,0.00341,0.0,0.00066,0.00086,0.01865,0.0,0.00002,0.0,0.00006,0.00001,0.0,0.0048,0.00007,0.00642,0.0,0.0,0,18,-0.2371,-0.01128,0.03344,99.168541
1,1,1,0,294300.0,0.000023,0.425,57908.5,0.753506,0.752138,0.001368,9962.421094,12304.049434,5789.212461,18255.290898,12251.530684,17115.05957,13547.107832,5447.611865,1376.495726,-1.0,-1.0,5.618228,6.177197,5.96086,5.507147,0.00125,0.00182,0.00024,0.00024,0.00026,0.00125,0.00061,0.00329,0.0,0.00036,0.00036,0.99636,0.98485,0.00036,0.00436,0.00286,0.0,0.00341,0.0,0.00066,0.00086,0.01865,0.0,0.00002,0.0,0.00006,0.00001,0.0,0.0048,0.00007,0.00642,0.0,0.0,0,18,-0.2371,-0.01128,0.03344,99.168541
1,1,1,0,294350.9838,0.000046,0.425,57913.291667,0.753475,0.752144,0.001331,9962.341374,12306.027415,5790.438558,18258.331224,12254.625977,17116.528529,13550.302031,5449.578083,1406.644282,-1.0,-1.0,5.583938,6.141867,5.949447,5.496201,0.00125,0.00182,0.00024,0.00024,0.00026,0.00125,0.00061,0.00329,0.0,0.00036,0.00036,0.99636,0.98485,0.00036,0.00436,0.00286,0.0,0.00341,0.0,0.00066,0.00086,0.01865,0.0,0.00002,0.0,0.00006,0.00001,0.0,0.0048,0.00007,0.00642,0.0,0.0,0,18,-0.2371,-0.01128,0.03344,99.168541
1,1,1,1,294357.9645,0.000069,0.425,57918.083333,0.753444,0.752151,0.001293,9962.261654,12308.005397,5791.664655,18261.371549,12257.72127,17117.997487,13553.49623,5451.5443,1436.792837,-1.0,-1.0,5.549648,6.106537,5.938035,5.485255,0.001253,0.001827,0.00024,0.00024,0.00026,0.00125,0.00061,0.00329,0.0,0.000347,0.000347,0.996343,0.985317,0.000347,0.00417,0.002493,0.0,0.00244,0.0,0.00078,0.000743,0.018433,0.0,0.000017,0.0,0.00005,0.00001,0.0,0.004387,0.000067,0.005843,0.0,0.0,0,18,-0.240617,-0.00964,0.03041,98.977119
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
332,12,28,21609,2.7770e6,0.155571,0.85,467065.187879,13.668701,13.578895,-0.019537,-25075.839352,-25399.273645,-23847.429885,-20807.704739,-23084.115875,-26667.363497,-26580.206911,-24035.833048,1579.97114,72.0,24.125,4.155845,4.041419,3.965991,4.014051,0.00416,0.00411,0.00027,0.00058,0.00024,0.00495,0.00063,0.0087,-0.02842,0.0,0.03276,0.99009,0.98525,0.30386,0.0,0.00001,0.0,0.0,0.0,0.00001,0.0,0.03276,0.0,0.0,0.00001,0.0,0.0,0.00051,0.0331,0.00215,0.0,0.00002,0.00005,0,36,-0.13019,-0.00993,-0.01892,105.746185
332,12,28,21609,2.777e6,0.155504,0.85,467069.6,13.672831,13.578894,-0.017389,-25074.633281,-25398.124297,-23847.370586,-20806.829531,-23083.218906,-26667.958906,-26579.725195,-24034.985977,1693.699634,72.0,-1.0,4.143909,4.03194,3.965874,4.013927,0.00408,0.004033,0.000273,0.00057,0.00024,0.004793,0.00063,0.00845,-0.024117,0.0,0.032033,0.99041,0.98526,0.296327,0.0,0.00001,0.0,0.0,0.0,0.00001,0.000003,0.032033,0.0,0.0,0.00001,0.0,0.0,0.000473,0.036433,0.00218,0.0,0.000017,0.00005,0,36,-0.130567,-0.009897,-0.018767,105.834084
332,12,28,21610,2.7770e6,0.155438,0.85,467072.288889,13.676178,13.578894,-0.015295,-25071.247057,-25391.894964,-23844.100873,-20799.967434,-23077.114337,-26666.796723,-26574.34389,-24033.445479,1727.912088,72.0,-1.0,4.128057,4.015512,3.965834,4.013886,0.004,0.003957,0.000277,0.00056,0.00024,0.004637,0.00063,0.0082,-0.019813,0.0,0.031307,0.99073,0.98527,0.288793,0.0,0.00001,0.0,0.0,0.0,0.00001,0.000007,0.031307,0.0,0.0,0.00001,0.0,0.0,0.000437,0.039767,0.00221,0.0,0.000013,0.00005,0,36,-0.130943,-0.009863,-0.018613,105.921982


In [26]:
with db:
    df = db.get_final_feature_data()

df

trial_id,trial_number,participant_id,rownumber,timestamp,temperature,rating,samplenumber,eda_raw,eda_tonic,eda_phasic,ch1,ch2,ch3,ch4,ch5,ch6,ch7,ch8,ppg_raw,ppg_heartrate,ppg_ibi,pupil_l,pupil_r,pupil_r_filtered,pupil_l_filtered,anger,contempt,disgust,fear,joy,sadness,surprise,engagement,valence,sentimentality,confusion,neutral,attention,brow_furrow,brow_raise,cheek_raise,chin_raise,dimpler,eye_closure,eye_widen,inner_brow_raise,jaw_drop,lip_corner_depressor,lip_press,lip_pucker,lip_stretch,lip_suck,lid_tighten,mouth_open,nose_wrinkle,smile,smirk,upper_lip_raise,blink,blinkrate,pitch,yaw,roll,interocular_distance
u16,u8,u8,u32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,i64,f64,f64,f64,f64
1,1,1,0,294100.0,0.0,0.425,57896.142857,0.753564,0.752119,0.001445,9948.540039,12283.850586,5801.344238,18263.294922,12240.93457,17119.123047,13553.095703,5451.536133,1416.012559,-1.0,-1.0,5.73628,6.245389,6.013662,5.558055,0.00125,0.00182,0.00024,0.00024,0.00026,0.00125,0.00061,0.00329,0.0,0.00036,0.00036,0.99636,0.98485,0.00036,0.00436,0.00286,0.0,0.00341,0.0,0.00066,0.00086,0.01865,0.0,0.00002,0.0,0.00006,0.00001,0.0,0.0048,0.00007,0.00642,0.0,0.0,0,18,-0.2371,-0.01128,0.03344,99.168541
1,1,1,0,294200.0,0.0,0.425,57896.142857,0.753564,0.752119,0.001445,9970.474824,12306.00459,5792.134512,18260.699883,12252.623574,17113.822773,13547.498867,5446.654512,1416.012559,-1.0,-1.0,5.697124,6.225716,5.994456,5.539501,0.00125,0.00182,0.00024,0.00024,0.00026,0.00125,0.00061,0.00329,0.0,0.00036,0.00036,0.99636,0.98485,0.00036,0.00436,0.00286,0.0,0.00341,0.0,0.00066,0.00086,0.01865,0.0,0.00002,0.0,0.00006,0.00001,0.0,0.0048,0.00007,0.00642,0.0,0.0,0,18,-0.2371,-0.01128,0.03344,99.168541
1,1,1,0,294224.331,0.0,0.425,57902.321429,0.753535,0.752128,0.001407,9966.447959,12305.027012,5790.673486,18257.995391,12252.077129,17114.441172,13547.30335,5447.133188,1396.254143,-1.0,-1.0,5.657676,6.201457,5.977658,5.523324,0.00125,0.00182,0.00024,0.00024,0.00026,0.00125,0.00061,0.00329,0.0,0.00036,0.00036,0.99636,0.98485,0.00036,0.00436,0.00286,0.0,0.00341,0.0,0.00066,0.00086,0.01865,0.0,0.00002,0.0,0.00006,0.00001,0.0,0.0048,0.00007,0.00642,0.0,0.0,0,18,-0.2371,-0.01128,0.03344,99.168541
1,1,1,0,294300.0,0.000023,0.425,57908.5,0.753506,0.752138,0.001368,9962.421094,12304.049434,5789.212461,18255.290898,12251.530684,17115.05957,13547.107832,5447.611865,1376.495726,-1.0,-1.0,5.618228,6.177197,5.96086,5.507147,0.00125,0.00182,0.00024,0.00024,0.00026,0.00125,0.00061,0.00329,0.0,0.00036,0.00036,0.99636,0.98485,0.00036,0.00436,0.00286,0.0,0.00341,0.0,0.00066,0.00086,0.01865,0.0,0.00002,0.0,0.00006,0.00001,0.0,0.0048,0.00007,0.00642,0.0,0.0,0,18,-0.2371,-0.01128,0.03344,99.168541
1,1,1,0,294350.9838,0.000046,0.425,57913.291667,0.753475,0.752144,0.001331,9962.341374,12306.027415,5790.438558,18258.331224,12254.625977,17116.528529,13550.302031,5449.578083,1406.644282,-1.0,-1.0,5.583938,6.141867,5.949447,5.496201,0.00125,0.00182,0.00024,0.00024,0.00026,0.00125,0.00061,0.00329,0.0,0.00036,0.00036,0.99636,0.98485,0.00036,0.00436,0.00286,0.0,0.00341,0.0,0.00066,0.00086,0.01865,0.0,0.00002,0.0,0.00006,0.00001,0.0,0.0048,0.00007,0.00642,0.0,0.0,0,18,-0.2371,-0.01128,0.03344,99.168541
1,1,1,1,294357.9645,0.000069,0.425,57918.083333,0.753444,0.752151,0.001293,9962.261654,12308.005397,5791.664655,18261.371549,12257.72127,17117.997487,13553.49623,5451.5443,1436.792837,-1.0,-1.0,5.549648,6.106537,5.938035,5.485255,0.001253,0.001827,0.00024,0.00024,0.00026,0.00125,0.00061,0.00329,0.0,0.000347,0.000347,0.996343,0.985317,0.000347,0.00417,0.002493,0.0,0.00244,0.0,0.00078,0.000743,0.018433,0.0,0.000017,0.0,0.00005,0.00001,0.0,0.004387,0.000067,0.005843,0.0,0.0,0,18,-0.240617,-0.00964,0.03041,98.977119
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
332,12,28,21609,2.7770e6,0.155571,0.85,467065.187879,13.668701,13.578895,-0.019537,-25075.839352,-25399.273645,-23847.429885,-20807.704739,-23084.115875,-26667.363497,-26580.206911,-24035.833048,1579.97114,72.0,24.125,4.155845,4.041419,3.965991,4.014051,0.00416,0.00411,0.00027,0.00058,0.00024,0.00495,0.00063,0.0087,-0.02842,0.0,0.03276,0.99009,0.98525,0.30386,0.0,0.00001,0.0,0.0,0.0,0.00001,0.0,0.03276,0.0,0.0,0.00001,0.0,0.0,0.00051,0.0331,0.00215,0.0,0.00002,0.00005,0,36,-0.13019,-0.00993,-0.01892,105.746185
332,12,28,21609,2.777e6,0.155504,0.85,467069.6,13.672831,13.578894,-0.017389,-25074.633281,-25398.124297,-23847.370586,-20806.829531,-23083.218906,-26667.958906,-26579.725195,-24034.985977,1693.699634,72.0,-1.0,4.143909,4.03194,3.965874,4.013927,0.00408,0.004033,0.000273,0.00057,0.00024,0.004793,0.00063,0.00845,-0.024117,0.0,0.032033,0.99041,0.98526,0.296327,0.0,0.00001,0.0,0.0,0.0,0.00001,0.000003,0.032033,0.0,0.0,0.00001,0.0,0.0,0.000473,0.036433,0.00218,0.0,0.000017,0.00005,0,36,-0.130567,-0.009897,-0.018767,105.834084
332,12,28,21610,2.7770e6,0.155438,0.85,467072.288889,13.676178,13.578894,-0.015295,-25071.247057,-25391.894964,-23844.100873,-20799.967434,-23077.114337,-26666.796723,-26574.34389,-24033.445479,1727.912088,72.0,-1.0,4.128057,4.015512,3.965834,4.013886,0.004,0.003957,0.000277,0.00056,0.00024,0.004637,0.00063,0.0082,-0.019813,0.0,0.031307,0.99073,0.98527,0.288793,0.0,0.00001,0.0,0.0,0.0,0.00001,0.000007,0.031307,0.0,0.0,0.00001,0.0,0.0,0.000437,0.039767,0.00221,0.0,0.000013,0.00005,0,36,-0.130943,-0.009863,-0.018613,105.921982


In [27]:
df.hvplot(
    x="timestamp",
    y=[
        "eda_tonic",
        "eda_phasic",
        "ppg_raw",
        "rating",
    ],
    kind="line",
    groupby="trial_id",
)

BokehModel(combine_events=True, render_bundle={'docs_json': {'cb84d8d1-c2d3-4927-a099-7b83c7dbf6d3': {'version…

In [28]:
df = pl.DataFrame(
    {
        "value": [1.0, None],
    },
)
print(df)


shape: (2, 1)
┌───────┐
│ value │
│ ---   │
│ f64   │
╞═══════╡
│ 1.0   │
│ null  │
└───────┘


In [29]:
modalities = DataConfig().MODALITIES
modalities = ["EDA", "PPG"]

merge_on: list[str] = ["participant_id", "trial_id", "trial_number", "timestamp"]
sort_by: list[str] = ["trial_id", "timestamp"]

with db:
    eda = db.get_table("Feature_EDA")
    stimulus = db.get_table("Feature_Stimulus")

df = eda.join(
    stimulus,
    on=merge_on,
    how="full",
    coalesce=True,
).sort(sort_by)

df

trial_id,trial_number,participant_id,rownumber,timestamp,samplenumber,eda_raw,eda_tonic,eda_phasic,rownumber_right,temperature,rating
u16,u8,u8,f64,f64,f64,f64,f64,f64,u32,f64,f64
1,1,1,37664.142857,294200.0,57896.142857,0.753564,0.752119,0.001445,,,
1,1,1,,294224.331,,,,,0,0.0,0.425
1,1,1,37676.5,294300.0,57908.5,0.753506,0.752138,0.001368,,,
1,1,1,,294357.9645,,,,,1,0.000069,0.425
1,1,1,37690.875,294400.0,57922.875,0.753413,0.752158,0.001256,,,
1,1,1,,294458.0292,,,,,2,0.000277,0.35375
…,…,…,…,…,…,…,…,…,…,…,…
332,12,28,355459.363636,2.7769e6,467056.363636,13.660439,13.578896,-0.023833,,,
332,12,28,,2.7769e6,,,,,21609,0.155637,0.85
332,12,28,355472.6,2.777e6,467069.6,13.672831,13.578894,-0.017389,,,


In [30]:
df.fill_null(strategy="forward").plot(
    x="timestamp", y=["eda_tonic", "temperature"], groupby="trial_id"
)

BokehModel(combine_events=True, render_bundle={'docs_json': {'a3c2bed1-37da-4d24-89fc-3fd495447637': {'version…

In [31]:
stimulus.plot(x="timestamp", y="rating", groupby="trial_id")

BokehModel(combine_events=True, render_bundle={'docs_json': {'f6448d8a-c98b-406c-93d4-68e02874e0ed': {'version…

In [32]:
with db:
    # db.get_table("Raw_Stimulus")
    db.get_table("Preprocess_EDA")

In [33]:
# note that this is not an ASOF join TODO
def merge_datasets(
    dfs: list[pl.DataFrame],
    merge_on: list[str] = ["Timestamp", "Trial", "Participant"],
    sort_by: list[str] = ["Timestamp"],
) -> pl.DataFrame:
    """
    Merge multiple DataFrames into a single DataFrame.

    The default merge_on and sort_by columns are for merging different modalities of
    one participant.

    The function can also be used to merge multiple participants' modalities with
    a different merge_on and sort_by column.

    Examples:

    Merge two datasets of different modalities of one participant:
    >>> dfs = load_participant_datasets(PARTICIPANT_LIST[0], INTERIM_LIST)
    >>> eda_plus_rating = merge_datasets([dfs.eda, dfs.stimulus])


    Merge multiple participants' modalities:
    ````python
    # The load function loads one modality for multiple participants
    stimuli = load_modality_data(PARTICIPANT_LIST, INTERIM_DICT["stimulus"])
    eda = load_modality_data(PARTICIPANT_LIST, INTERIM_DICT["eda"])
    multiple_eda_plus_rating = merge_datasets(
        [stimuli, eda],
        on=["Timestamp", "Participant", "Trial"],
        sort_by=["Participant", "Trial", "Timestamp"],
    )
    # Normalzing, plotting, etc.
    features = ["Temperature", "Rating", "EDA_Tonic"]
    multiple_eda_plus_rating = interpolate(multiple_eda_plus_rating)
    multiple_eda_plus_rating = scale_min_max(
        multiple_eda_plus_rating, exclude_additional_columns=["Temperature", "Rating"]
    )
    multiple_eda_plus_rating.hvplot(
        x="Timestamp",
        y=features,
        groupby=["Participant", "Trial"],
        kind="line",
        width=800,
        height=400,
        ylim=(0, 1),
    )
    ````
    """
    if len(dfs) < 2:
        return dfs[0]

    df = reduce(
        lambda left, right: left.join(
            right,
            on=merge_on,
            how="outer_coalesce",
        ).sort(sort_by),
        dfs,
    )
    return df