In [10]:
from pathlib import Path

if Path.cwd().stem == "features":
    %cd ../..
    %load_ext autoreload
    %autoreload 2

In [11]:
import logging
import os
from dataclasses import dataclass
from functools import reduce, wraps
from pathlib import Path
from typing import Dict, List

import holoviews as hv
import hvplot.polars
import matplotlib.pyplot as plt
import neurokit2 as nk
import numpy as np
import pandas as pd
import panel as pn
import plotly.express as px
import polars as pl

from src.data.config_data import DataConfigBase
from src.data.config_data_interim import INTERIM_DICT, INTERIM_LIST, InterimConfig
from src.data.config_data_raw import RAW_DICT, RAW_LIST, RawConfig
from src.data.config_participant import PARTICIPANT_LIST, ParticipantConfig
from src.data.make_dataset import load_dataset, load_participant_datasets
from src.features.quality_checks import check_sample_rate
from src.features.scaling import scale_min_max, scale_standard
from src.features.stimulus import corr_temperature_rating
from src.features.transformations import (
    add_timedelta_column,
    interpolate,
    map_participant_datasets,
    map_trials,
    merge_dfs,
)
from src.log_config import configure_logging
from src.visualization.plot_data import (
    plot_data_panel,
    plot_trial_matplotlib,
    plot_trial_plotly,
)

configure_logging(
    stream_level=logging.DEBUG,
    ignore_libs=["matplotlib", "Comm", "bokeh", "tornado"],
)

hv.extension("plotly")
pl.Config.set_tbl_rows(7)  # don't print too many rows in the book
plt.rcParams["figure.figsize"] = [15, 5]  # default is [6, 4]

In [12]:
dfs = load_participant_datasets(PARTICIPANT_LIST[0], INTERIM_LIST)

15:29:38 | [36mDEBUG   [0m| make_dataset | Dataset 'stimulus' for participant 0 loaded from data/interim/0/0_stimulus.csv
15:29:39 | [36mDEBUG   [0m| make_dataset | Dataset 'eeg' for participant 0 loaded from data/interim/0/0_eeg.csv
15:29:39 | [36mDEBUG   [0m| make_dataset | Dataset 'eda' for participant 0 loaded from data/interim/0/0_eda.csv
15:29:39 | [36mDEBUG   [0m| make_dataset | Dataset 'ppg' for participant 0 loaded from data/interim/0/0_ppg.csv
15:29:39 | [36mDEBUG   [0m| make_dataset | Dataset 'pupillometry' for participant 0 loaded from data/interim/0/0_pupillometry.csv
15:29:39 | [36mDEBUG   [0m| make_dataset | Dataset 'affectiva' for participant 0 loaded from data/interim/0/0_affectiva.csv
15:29:39 | [92mINFO    [0m| make_dataset | Participant 0 loaded with datasets: dict_keys(['stimulus', 'eeg', 'eda', 'ppg', 'pupillometry', 'affectiva'])


### Stimulus

In [13]:
features = ["Temperature", "Rating"]
stimulus = dfs.stimulus.clone()
stimulus = scale_min_max(stimulus)
stimulus = interpolate(stimulus)
stimulus.hvplot(
    x="Timestamp", y=features, groupby="Trial", kind="line", width=800, height=400
)

BokehModel(combine_events=True, render_bundle={'docs_json': {'33170090-6212-4c99-8a41-244686494741': {'version…

In [14]:
correlations = corr_temperature_rating(dfs.stimulus)

thread '<unnamed>' panicked at py-polars/src/dataframe/general.rs:356:31:
UDF failed: Participant


PanicException: UDF failed: Participant

In [27]:
@map_trials
def calculate_corr(
    df: pl.DataFrame,
    feature1: str,
    feature2: str,
    add_columns: [str] = ["Trial", "Participant", "Stimulus_Seed"],
) -> pl.DataFrame:
    """
    Calculate the correlation between two features (columns of a DataFrame) for each
    trial.
    """
    return (
        df.select([feature1, feature2])
        .corr()
        .gather_every(2)  # corr method returns a 2x2 table
        .rename({feature1: "Trial", feature2: "Correlation"})  # repurpose the columns
        .with_columns(  # add the columns that were passed in if available
            # using list comprehension
            [
                pl.Series(add_column, [df[add_column][0]])
                for add_column in add_columns
                if add_column in df.columns
            ]
        )
    )


def corr_temperature_rating(df: pl.DataFrame) -> pl.DataFrame:
    """Calculate the correlation between 'Temperature' and 'Rating' for each trial."""
    return calculate_corr(df, "Temperature", "Rating")


corr_temperature_rating(dfs.stimulus)

Trial,Correlation,Participant,Stimulus_Seed
f64,f64,f64,f64
0.0,0.606159,0.0,280.0
1.0,0.51762,0.0,630.0
2.0,0.55184,0.0,659.0
3.0,0.70633,0.0,762.0
…,…,…,…
9.0,0.754231,0.0,989.0
10.0,0.663952,0.0,140.0
11.0,0.535749,0.0,306.0


In [23]:
a = [1, 2, 3, 3]

[i for i in a if i == 3]

[3, 3]

In [None]:
correlations.plot.scatter(x="Trial", y="Correlation", title="Correlation")