In [11]:
%%capture
from pathlib import Path

if Path.cwd().stem == "models":
    %cd ../..
    %load_ext autoreload
    %autoreload 2

In [None]:
import logging
from pathlib import Path

import holoviews as hv
import hvplot.polars  # noqa
import ipywidgets
import matplotlib.pyplot as plt
import numpy as np
import polars as pl

from src.data.database_manager import DatabaseManager
from src.features.resampling import (
    add_normalized_timestamp,
)
from src.log_config import configure_logging
from src.models.data_loader import transform_sample_df_to_arrays
from src.models.sample_creation import create_samples, make_sample_set_balanced

configure_logging(
    stream_level=logging.DEBUG,
    ignore_libs=["matplotlib", "Comm", "bokeh", "tornado"],
)

pl.Config.set_tbl_rows(12)  # for the 12 trials
hv.output(widget_location="bottom", size=130)

db = DatabaseManager()

In [26]:
with db:
    df = db.get_table(
        "Merged_and_Labeled_Data",
        exclude_trials_with_measurement_problems=True,
    )

intervals = {
    # "decreases": "decreasing_intervals",
    "decreases": "major_decreasing_intervals",
    # "increases": "strictly_increasing_intervals_without_plateaus",
    "increases": "strictly_increasing_intervals",
    # "plateaus": "plateau_intervals",
}
label_mapping = {
    "decreases": 0,
    "increases": 1,
    # "plateaus": 1,
}
offsets_ms = {
    "decreases": 3000,
    "increases": 0,
}

sample_duration_ms = 5000
samples = create_samples(
    df,
    intervals,
    label_mapping,
    sample_duration_ms,
    offsets_ms,
)
# samples = make_sample_set_balanced(samples, random_seed=42)
samples = samples.select(
    "sample_id",
    "trial_id",
    "participant_id",
    "normalized_timestamp",
    "timestamp",
    "rating",
    "temperature",
    "eda_raw",
    "eda_tonic",
    "eda_phasic",
    "pupil_mean",
    "label",
)
feature_list = [
    # "temperature",  # only for visualization
    # "rating"
    # "eda_raw",
    "eda_tonic",
    "eda_phasic",
    "pupil_mean",
]
samples


16:39:58 | [36mDEBUG   [0m| sample_creation | Removed 157 samples that were shorter than 4900.0 ms


sample_id,trial_id,participant_id,normalized_timestamp,timestamp,rating,temperature,eda_raw,eda_tonic,eda_phasic,pupil_mean,label
u16,u16,u8,f64,f64,f64,f64,f64,f64,f64,f64,u8
1,1,1,72000.0,366198.9762,1.0,0.917611,0.744076,0.744163,-0.000086,3.770929,0
1,1,1,72100.0,366298.9762,1.0,0.91422,0.744136,0.744152,-0.000016,3.882127,0
1,1,1,72200.0,366398.9762,1.0,0.910721,0.744199,0.744139,0.00006,4.125996,0
1,1,1,72300.0,366498.9762,1.0,0.907126,0.744092,0.744125,-0.000033,4.309097,0
1,1,1,72400.0,366598.9762,1.0,0.90347,0.743948,0.744111,-0.000163,4.334012,0
1,1,1,72500.0,366698.9762,1.0,0.899683,0.744093,0.744097,-0.000004,4.326256,0
…,…,…,…,…,…,…,…,…,…,…,…
4128,516,50,151600.0,2.6677e6,0.994389,0.598511,27.490637,27.440795,0.049909,5.482657,1
4128,516,50,151700.0,2.6678e6,0.995,0.60579,27.473472,27.440962,0.032578,5.483765,1
4128,516,50,151800.0,2.6679e6,0.995,0.61283,27.463892,27.441078,0.022883,5.48144,1


In [29]:
sample_ids = (
    samples.group_by("sample_id").agg(pl.all().first()).select("sample_id", "label")
)
sample_ids_count = sample_ids.get_column("label").value_counts()
sample_ids_count

label,count
u8,u32
1,1885
0,1413


In [30]:
samples = make_sample_set_balanced(samples)

In [31]:
df.hvplot(
    x="timestamp", y="temperature", groupby="trial_id", height=300
) * samples.hvplot(
    x="timestamp",
    y="temperature",
    groupby="trial_id",
    height=300,
    kind="scatter",
    color="red",
)

BokehModel(combine_events=True, render_bundle={'docs_json': {'cc333566-87a7-4719-b8ba-7e713fa5e629': {'version…

In [32]:
from src.plots.utils import prepare_multiline_hvplot

prepare_multiline_hvplot(
    add_normalized_timestamp(
        samples,
        time_column="normalized_timestamp",
        trial_column="sample_id",
    ),
    time_column="normalized_timestamp",
    trial_column="sample_id",
).hvplot(
    x="normalized_timestamp",
    y=["rating", "temperature"],
    groupby="label",
    height=300,
    ylim=(0, 1.05),
    color=["blue", "orange"],
)

BokehModel(combine_events=True, render_bundle={'docs_json': {'e2eca0fa-5fa1-4830-83c7-c6a601a35526': {'version…

In [None]:
X, y, groups = transform_sample_df_to_arrays(
    samples,
    [
        "temperature",  # only for visualization
        "rating",
        # "eda_raw",
        # "eda_tonic",
        # "pupil_mean",
    ],
)


@ipywidgets.interact(trial=(0, X.shape[0] - 1))
def plot_trial(trial):
    for i in range(X.shape[2]):
        plt.plot(X[trial, :, i])
    # plt.ylim(0, 1.05)

interactive(children=(IntSlider(value=1412, description='trial', max=2825), Output()), _dom_classes=('widget-i…