In [241]:
import os
import sys
from pathlib import Path


PROJECT_DIR = Path.cwd()
if PROJECT_DIR.stem == 'data':
    PROJECT_DIR = PROJECT_DIR.parents[1]
    sys.path.insert(0, PROJECT_DIR.as_posix())
    os.chdir(PROJECT_DIR.as_posix())
    %load_ext autoreload
    %autoreload 2

In [242]:
import os
from pathlib import Path
from functools import reduce
from dataclasses import dataclass
from typing import Dict, List
import logging

import numpy as np
import pandas as pd
import polars as pl
import matplotlib.pyplot as plt
import neurokit2 as nk

from src.data.quality_check import check_sample_rate
from src.data.process_data import load_participant_datasets
from src.data.transform_data import apply_func_participant
from src.data.config_data import DataConfigBase
from src.data.config_data_raw import RawConfig, RAW_LIST
from src.data.config_participant import ParticipantConfig, PARTICIPANT_LIST

from src.log_config import configure_logging
configure_logging(color=True, stream_level=logging.DEBUG, ignore_libs=['matplotlib'])
pl.Config.set_tbl_rows(7); # don't print too many rows in the book
plt.rcParams['figure.figsize'] = [15, 5] # default is [6, 4]

In [247]:
eda_raw = load_participant_datasets(PARTICIPANT_LIST[0], RAW_LIST).eda

sampling_rate = 100
eda_raw = eda_raw.unique('Timestamp').sort('Timestamp') # actually slightly faster than maintain_order=True
logging.warning("Working with unique timestamps.")

check_sample_rate(eda_raw)
check_sample_rate(eda_raw, unique_timestamp=True)

eda_raw_trials = [
    group.select('EDA_RAW')
    .to_numpy()
    .flatten() 
    for _, group in eda_raw.group_by(['Trial'])
    ]
"""Same as:
# Get each trial as a separate np.array
groups = eda_raw.group_by(["Trial"], maintain_order=True)
eda_raw_trials = []
for _, group in groups:
    eda_raw_trials.append(group.select('EDA_RAW').to_numpy().flatten())
"""
eda_raw#= eda_raw.select('EDA_RAW')


18:51:48 |[36m DEBUG   [0m| process_data | Dataset 'temperature' for participant 001_pilot_bjoern loaded from data/raw/001_pilot_bjoern/001_pilot_bjoern_temperature.csv
18:51:48 |[36m DEBUG   [0m| process_data | Dataset 'rating' for participant 001_pilot_bjoern loaded from data/raw/001_pilot_bjoern/001_pilot_bjoern_rating.csv
18:51:48 |[36m DEBUG   [0m| process_data | Dataset 'eda' for participant 001_pilot_bjoern loaded from data/raw/001_pilot_bjoern/001_pilot_bjoern_eda.csv
18:51:48 |[36m DEBUG   [0m| process_data | Dataset 'ecg' for participant 001_pilot_bjoern loaded from data/raw/001_pilot_bjoern/001_pilot_bjoern_ecg.csv
18:51:49 |[36m DEBUG   [0m| process_data | Dataset 'eeg' for participant 001_pilot_bjoern loaded from data/raw/001_pilot_bjoern/001_pilot_bjoern_eeg.csv
18:51:49 |[36m DEBUG   [0m| process_data | Dataset 'pupillometry' for participant 001_pilot_bjoern loaded from data/raw/001_pilot_bjoern/001_pilot_bjoern_pupillometry.csv
18:51:49 |[36m DEBUG   [0m| 

Timestamp,EDA_RAW,Trial
f64,f64,i64
188041.074,6.057357,0
188049.0512,6.062463,0
188064.0122,6.057357,0
…,…,…
2.3035e6,4.520683,5
2.3035e6,4.515577,5
2.3036e6,4.520683,5


In [None]:
def apply_func_trial(func, df, **kwargs) -> pl.DataFrame:
    """Apply function to each trial in a DataFrame."""
    # Pass additional kwargs to the function
    df = df.group_by('Trial').map_groups(lambda group: func(group, **kwargs))
    return df

## neurokit

In [263]:


def neurokit_eda_process(df: pl.DataFrame, sampling_rate) -> pl.DataFrame:
    array_np = df.select('EDA_RAW').to_numpy().flatten()
    df_pd, info = nk.eda_process(array_np, sampling_rate=sampling_rate)
    df_to_add = pl.from_pandas(df_pd[['EDA_Tonic', 'EDA_Phasic']])
    df = df.hstack(df_to_add)
    return df

# Example usage

eda = apply_func_trial(neurokit_eda_process, eda_raw, sampling_rate=sampling_rate)
eda


Timestamp,EDA_RAW,Trial,EDA_Tonic,EDA_Phasic
f64,f64,i64,f64,f64
188041.074,6.057357,0,6.906608,-0.849225
188049.0512,6.062463,0,6.909742,-0.852907
188064.0122,6.057357,0,6.912882,-0.856589
…,…,…,…,…
1.1793e6,7.139666,2,6.97129,0.0078
1.1793e6,7.144772,2,6.97129,0.007011
1.1793e6,7.139666,2,6.97129,0.006191


In [None]:
eda_processed, info = nk.eda_process(eda_raw_trial, sampling_rate=sampling_rate, method="neurokit")
plot = nk.eda_plot(eda_processed, info)
eda_processed['EDA_Tonic']


In [None]:
# Filter phasic and tonic components
eda_processed_2 = nk.eda_phasic(nk.standardize(eda_raw_trial), sampling_rate=sampling_rate)
eda_processed_2.plot()
plt.plot(eda_raw_trial)
eda_processed_2

In [None]:
# Decompose using different algorithms
smoothMedian = nk.eda_phasic(eda_raw_trial, method='smoothmedian')
highpass = nk.eda_phasic(eda_raw_trial, method='highpass')
sparse = nk.eda_phasic(eda_raw_trial, method='smoothmedian')
# NOTE: smoothMedian and highpass are quite slow (3 seconds for 1 trial)

# Extract tonic and phasic components for plotting
t1, p1 = smoothMedian["EDA_Tonic"].values, smoothMedian["EDA_Phasic"].values
t2, p2 = highpass["EDA_Tonic"].values, highpass["EDA_Phasic"].values
t3, p3 = sparse["EDA_Tonic"].values, sparse["EDA_Phasic"].values

In [None]:
# Plot tonic
nk.signal_plot([t1, t2, t3], labels=["SmoothMedian", "Highpass", "Sparse"])
# NOTE: SmoothMedian and Sparse are very similar

In [None]:
# Plot phasic
nk.signal_plot([p1, p2, p3], labels=["SmoothMedian", "Highpass", "Sparse"])

### biosppy

In [None]:
import numpy as np
from biosppy.signals import eda

# process it and plot
out = eda.eda(signal=eda_raw_trial, sampling_rate=100., show=True)
plt.style.use('default') # reset style

## ledapy

In [None]:
import ledapy
sampling_rate = 128

phasicdata = ledapy.runner.getResult(eda_raw_trial, 'phasicdata', sampling_rate, downsample=1, optimisation=2)
plt.plot(phasicdata)
phasicdata.shape, eda_raw_trial.shape

## pyEDA

# Comparison

In [None]:
eda_raw.plot(y='EDA_RAW', x='Timestamp', label='EDA_RAW', legend=True)