In [5]:
%%capture
from pathlib import Path

if Path.cwd().stem == "features":
    %cd ../..
    %load_ext autoreload
    %autoreload 2

In [6]:
import logging

import holoviews as hv
import hvplot.pandas  # noqa
import neurokit2 as nk
import pandas as pd
import polars as pl
from icecream import ic
from polars import col

from src.data.database_manager import DatabaseManager
from src.features.quality_checks import check_sample_rate
from src.features.scaling import scale_min_max
from src.features.transformations import add_time_column, map_trials
from src.log_config import configure_logging

configure_logging(
    stream_level=logging.DEBUG,
    ignore_libs=["matplotlib", "Comm", "bokeh", "tornado"],
)

pl.Config.set_tbl_rows(12)  # for the 12 trials
hv.output(widget_location="bottom", size=130)

In [7]:
db = DatabaseManager()

In [9]:
with db:
    df = db.read_table("Raw_Face")
df

trial_id,trial_number,participant_id,rownumber,timestamp,samplenumber,anger,contempt,disgust,fear,joy,sadness,surprise,engagement,valence,sentimentality,confusion,neutral,attention,brow_furrow,brow_raise,cheek_raise,chin_raise,dimpler,eye_closure,eye_widen,inner_brow_raise,jaw_drop,lip_corner_depressor,lip_press,lip_pucker,lip_stretch,lip_suck,lid_tighten,mouth_open,nose_wrinkle,smile,smirk,upper_lip_raise,blink,blinkrate,pitch,yaw,roll,interocular_distance
u16,u8,u8,u32,f64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,i64,f64,f64,f64,f64
1,1,1,8811,294350.9838,8803,0.12518,0.181806,0.023876,0.024252,0.02624,0.124534,0.061223,0.328766,0.0,0.036173,0.036173,99.636383,98.484863,0.036173,0.435721,0.286314,0.000047,0.341293,0.000226,0.065865,0.086099,1.865428,2.3786e-7,0.002053,4.7822e-7,0.006318,0.000696,0.000105,0.479706,0.007313,0.642375,0.0,0.000326,0,18,-23.710371,-1.128007,3.344138,99.168541
1,1,1,8814,294450.0503,8806,0.125883,0.184421,0.023928,0.02429,0.025578,0.125019,0.060915,0.328766,0.0,0.032122,0.032122,99.631157,98.625015,0.032122,0.378975,0.176308,0.000043,0.049774,0.000485,0.102114,0.05119,1.799533,1.9135e-7,0.001091,0.000001,0.00288,0.00058,0.000139,0.356424,0.006213,0.469142,0.0,0.000254,0,18,-24.764748,-0.635587,2.434715,98.594276
1,1,1,8817,294549.6316,8809,0.126686,0.188357,0.023969,0.02443,0.024682,0.126219,0.058203,0.328766,0.0,0.070808,0.070808,99.623291,98.602272,0.070808,0.361488,0.065987,0.000024,0.0,0.000516,0.126224,0.048451,0.702215,0.000001,0.000179,0.000005,0.001249,0.000078,0.000076,0.1413,0.007088,0.205794,0.0,0.000226,0,18,-23.246853,0.718773,2.603575,97.55748
1,1,1,8820,294634.3984,8812,0.127169,0.189015,0.024152,0.024633,0.024491,0.126433,0.061399,0.328766,0.0,0.131433,0.131433,99.621964,98.427483,0.131433,0.50136,0.072655,0.000024,0.0,0.000723,0.070128,0.02319,1.927209,0.000003,0.000121,0.000007,0.000954,0.000056,0.000321,0.276765,0.010896,0.132758,0.0,0.001234,0,18,-23.466768,1.316812,2.526687,98.262718
1,1,1,8823,294732.1368,8815,0.127182,0.190517,0.024137,0.024605,0.024171,0.126702,0.065395,0.328766,0.0,0.027317,0.105077,99.618958,98.354904,0.105077,0.474149,0.048332,0.000021,0.0,0.000477,0.064493,0.012793,3.484773,0.000009,0.000079,0.000007,0.000176,0.000142,0.000424,0.151191,0.007597,0.027317,0.0,0.001685,0,18,-24.3141,1.546099,2.309833,98.153336
1,1,1,8825,294819.3329,8817,0.127375,0.190466,0.024132,0.024714,0.024196,0.127041,0.062274,0.328766,0.0,0.037742,0.208099,99.619064,98.472748,0.208099,0.556748,0.046347,0.00003,0.0,0.000897,0.055136,0.009925,2.239793,0.000019,0.000066,0.000014,0.000259,0.00005,0.000514,0.142796,0.013089,0.037742,0.0,0.004319,0,18,-25.109367,1.168442,2.90777,95.516327
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
332,12,28,83102,2.7765e6,83092,2.185745,1.1203,0.034979,0.174728,0.024038,2.860603,0.058569,22.391489,-27.112289,0.000141,1.388582,94.278793,98.692047,70.784744,0.000114,0.004958,0.000001,0.0,0.211655,0.00008,0.000088,1.388582,0.000141,7.1548e-8,0.000992,0.000002,0.000002,3.990463,7.080192,2.628992,0.000125,0.000814,0.160832,0,36,-10.047065,0.382295,-2.44476,105.601097
332,12,28,83108,2.7767e6,83098,0.398232,0.399259,0.027236,0.055606,0.024033,0.469361,0.059506,0.798126,-2.224098,0.000043,1.76764,99.061279,98.787117,29.224678,0.000416,0.00104,0.000001,0.0,0.000109,0.001432,0.00029,1.76764,0.000043,4.0419e-7,0.000469,0.000005,0.000003,0.060905,3.258281,0.318643,0.000037,0.000768,0.013712,0,36,-12.295136,0.000177,-2.797651,106.322632
332,12,28,83111,2.7768e6,83101,0.338212,0.354203,0.028105,0.049393,0.024033,0.37715,0.061712,0.594023,0.0,0.000041,2.641749,99.245697,98.534729,24.41519,0.00046,0.001269,0.000002,0.0,0.000116,0.001541,0.00019,2.641749,0.000025,5.3460e-7,0.001035,0.000003,0.000006,0.073516,3.90411,0.512026,0.000041,0.001206,0.018322,0,36,-14.677032,-0.958164,-2.256958,105.994217


In [11]:
query = """
SELECT * FROM Preprocess_EDA AS ce
ASOF JOIN Raw_Stimulus rs USING (trial_id, timestamp)
ORDER BY ce.trial_id, ce.timestamp
"""

with db:
    df = db.execute(query).pl()
df
ic(df.height, df.unique(subset=["trial_id", "timestamp"], maintain_order=True).height)
ic(df.height - df.unique(subset=["trial_id", "timestamp"], maintain_order=True).height)
df

ic| df.height: 5935133
    df.unique(subset=["trial_id","timestamp"]).height: 5935133
ic| df.height - df.unique(subset=["trial_id","timestamp"]).height: 0


trial_id,trial_number,participant_id,rownumber,timestamp,samplenumber,eda_d_battery,eda_raw,eda_d_packetreceptionrate,eda_tonic,eda_phasic,temperature,rating
u16,u8,u8,u32,f64,i64,f64,f64,i64,f64,f64,f64,f64
1,1,1,0,294224.331,57895,3677.435897,0.753247,100,0.752117,0.00113,45.75,42.5
1,1,1,0,294242.275,57896,3687.630769,0.754135,100,0.752119,0.002016,45.75,42.5
1,1,1,0,294248.2588,57898,3692.0,0.754135,100,0.752121,0.002014,45.75,42.5
1,1,1,0,294276.1835,57899,3678.892308,0.754135,100,0.752123,0.002012,45.75,42.5
1,1,1,0,294277.1819,57900,3692.0,0.752359,100,0.752125,0.000234,45.75,42.5
1,1,1,0,294309.0952,57902,3705.107692,0.752359,100,0.752127,0.000232,45.75,42.5
…,…,…,…,…,…,…,…,…,…,…,…,…
332,12,28,21610,2.7771e6,467073,3603.158974,13.679468,99,13.578894,-0.012187,45.582614,85.0
332,12,28,21610,2.7771e6,467074,3617.723077,13.679468,99,13.578894,-0.012765,45.582614,85.0
332,12,28,21610,2.7771e6,467075,3582.769231,13.674363,99,13.578894,-0.018451,45.582614,85.0


In [10]:
query = """
SELECT * FROM Preprocess_EDA AS ce
ASOF JOIN Raw_Stimulus rs USING (trial_id, timestamp)
ORDER BY ce.trial_id, ce.timestamp
"""

with db:
    df = db.execute(query).pl()
df
ic(df.height, df.unique("timestamp").height)
ic(df.height - df.unique("timestamp").height)
df

ic| df.height: 5935133, df.unique("timestamp").height: 5934569
ic| df.height - df.unique("timestamp").height: 564


trial_id,trial_number,participant_id,rownumber,timestamp,samplenumber,eda_d_battery,eda_raw,eda_d_packetreceptionrate,eda_tonic,eda_phasic,temperature,rating
u16,u8,u8,u32,f64,i64,f64,f64,i64,f64,f64,f64,f64
1,1,1,0,294224.331,57895,3677.435897,0.753247,100,0.752117,0.00113,45.75,42.5
1,1,1,0,294242.275,57896,3687.630769,0.754135,100,0.752119,0.002016,45.75,42.5
1,1,1,0,294248.2588,57898,3692.0,0.754135,100,0.752121,0.002014,45.75,42.5
1,1,1,0,294276.1835,57899,3678.892308,0.754135,100,0.752123,0.002012,45.75,42.5
1,1,1,0,294277.1819,57900,3692.0,0.752359,100,0.752125,0.000234,45.75,42.5
1,1,1,0,294309.0952,57902,3705.107692,0.752359,100,0.752127,0.000232,45.75,42.5
…,…,…,…,…,…,…,…,…,…,…,…,…
332,12,28,21610,2.7771e6,467073,3603.158974,13.679468,99,13.578894,-0.012187,45.582614,85.0
332,12,28,21610,2.7771e6,467074,3617.723077,13.679468,99,13.578894,-0.012765,45.582614,85.0
332,12,28,21610,2.7771e6,467075,3582.769231,13.674363,99,13.578894,-0.018451,45.582614,85.0


In [56]:
# Time columns