In [1]:
%%capture
from pathlib import Path

if Path.cwd().stem == "features":
    %cd ../..
    %load_ext autoreload
    %autoreload 2

In [26]:
import logging

import holoviews as hv
import hvplot.pandas  # noqa
import polars as pl
from icecream import ic
from polars import col

from src.data.database_manager import DatabaseManager
from src.data.quality_checks import check_sample_rate
from src.features.resampling import add_time_column, downsample
from src.features.scaling import scale_min_max
from src.features.transforming import map_trials
from src.log_config import configure_logging

configure_logging(
    stream_level=logging.DEBUG,
    ignore_libs=["matplotlib", "Comm", "bokeh", "tornado"],
)

pl.Config.set_tbl_rows(12)  # for the 12 trials
hv.output(widget_location="bottom", size=130)

In [27]:
db = DatabaseManager()

In [28]:
with db:
    df = db.get_table("Raw_Face")
    df = db.get_table("Preprocess_Face")
df = add_time_column(df)
df

trial_id,trial_number,participant_id,rownumber,timestamp,samplenumber,anger,contempt,disgust,fear,joy,sadness,surprise,engagement,valence,sentimentality,confusion,neutral,attention,brow_furrow,brow_raise,cheek_raise,chin_raise,dimpler,eye_closure,eye_widen,inner_brow_raise,jaw_drop,lip_corner_depressor,lip_press,lip_pucker,lip_stretch,lip_suck,lid_tighten,mouth_open,nose_wrinkle,smile,smirk,upper_lip_raise,blink,blinkrate,pitch,yaw,roll,interocular_distance,time
u16,u8,u8,u32,f64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,i64,f64,f64,f64,f64,duration[ms]
1,1,1,8811,294350.9838,8803,0.00125,0.00182,0.00024,0.00024,0.00026,0.00125,0.00061,0.00329,0.0,0.00036,0.00036,0.99636,0.98485,0.00036,0.00436,0.00286,0.0,0.00341,0.0,0.00066,0.00086,0.01865,0.0,0.00002,0.0,0.00006,0.00001,0.0,0.0048,0.00007,0.00642,0.0,0.0,0,18,-0.2371,-0.01128,0.03344,99.168541,4m 54s 350ms
1,1,1,8814,294450.0503,8806,0.00126,0.00184,0.00024,0.00024,0.00026,0.00125,0.00061,0.00329,0.0,0.00032,0.00032,0.99631,0.98625,0.00032,0.00379,0.00176,0.0,0.0005,0.0,0.00102,0.00051,0.018,0.0,0.00001,0.0,0.00003,0.00001,0.0,0.00356,0.00006,0.00469,0.0,0.0,0,18,-0.24765,-0.00636,0.02435,98.594276,4m 54s 450ms
1,1,1,8817,294549.6316,8809,0.00127,0.00188,0.00024,0.00024,0.00025,0.00126,0.00058,0.00329,0.0,0.00071,0.00071,0.99623,0.98602,0.00071,0.00361,0.00066,0.0,0.0,0.00001,0.00126,0.00048,0.00702,0.0,0.0,0.0,0.00001,0.0,0.0,0.00141,0.00007,0.00206,0.0,0.0,0,18,-0.23247,0.00719,0.02604,97.55748,4m 54s 549ms
1,1,1,8820,294634.3984,8812,0.00127,0.00189,0.00024,0.00025,0.00024,0.00126,0.00061,0.00329,0.0,0.00131,0.00131,0.99622,0.98427,0.00131,0.00501,0.00073,0.0,0.0,0.00001,0.0007,0.00023,0.01927,0.0,0.0,0.0,0.00001,0.0,0.0,0.00277,0.00011,0.00133,0.0,0.00001,0,18,-0.23467,0.01317,0.02527,98.262718,4m 54s 634ms
1,1,1,8823,294732.1368,8815,0.00127,0.00191,0.00024,0.00025,0.00024,0.00127,0.00065,0.00329,0.0,0.00027,0.00105,0.99619,0.98355,0.00105,0.00474,0.00048,0.0,0.0,0.0,0.00064,0.00013,0.03485,0.0,0.0,0.0,0.0,0.0,0.0,0.00151,0.00008,0.00027,0.0,0.00002,0,18,-0.24314,0.01546,0.0231,98.153336,4m 54s 732ms
1,1,1,8825,294819.3329,8817,0.00127,0.0019,0.00024,0.00025,0.00024,0.00127,0.00062,0.00329,0.0,0.00038,0.00208,0.99619,0.98473,0.00208,0.00557,0.00046,0.0,0.0,0.00001,0.00055,0.0001,0.0224,0.0,0.0,0.0,0.0,0.0,0.00001,0.00143,0.00013,0.00038,0.0,0.00004,0,18,-0.25109,0.01168,0.02908,95.516327,4m 54s 819ms
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
332,12,28,83102,2.7765e6,83092,0.02186,0.0112,0.00035,0.00175,0.00024,0.02861,0.00059,0.22391,-0.27112,0.0,0.01389,0.94279,0.98692,0.70785,0.0,0.00005,0.0,0.0,0.00212,0.0,0.0,0.01389,0.0,0.0,0.00001,0.0,0.0,0.0399,0.0708,0.02629,0.0,0.00001,0.00161,0,36,-0.10047,0.00382,-0.02445,105.601097,46m 16s 486ms
332,12,28,83108,2.7767e6,83098,0.00398,0.00399,0.00027,0.00056,0.00024,0.00469,0.0006,0.00798,-0.02224,0.0,0.01768,0.99061,0.98787,0.29225,0.0,0.00001,0.0,0.0,0.0,0.00001,0.0,0.01768,0.0,0.0,0.0,0.0,0.0,0.00061,0.03258,0.00319,0.0,0.00001,0.00014,0,36,-0.12295,0.0,-0.02798,106.322632,46m 16s 675ms
332,12,28,83111,2.7768e6,83101,0.00338,0.00354,0.00028,0.00049,0.00024,0.00377,0.00062,0.00594,0.0,0.0,0.02642,0.99246,0.98535,0.24415,0.0,0.00001,0.0,0.0,0.0,0.00002,0.0,0.02642,0.0,0.0,0.00001,0.0,0.0,0.00074,0.03904,0.00512,0.0,0.00001,0.00018,0,36,-0.14677,-0.00958,-0.02257,105.994217,46m 16s 772ms


In [31]:
print(df.filter(col("trial_id") == 1)["timestamp"].diff().mean())
print(1000 / df.filter(col("trial_id") == 1)["timestamp"].diff().mean())

101.36292390078917
9.865540194743874


In [30]:
downsample(df, 10)

thread '<unnamed>' panicked at py-polars/src/dataframe/general.rs:352:31:
UDF failed: New sample rate 10 must be smaller than current sample rate 10
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace


PanicException: UDF failed: New sample rate 10 must be smaller than current sample rate 10

In [22]:
df.filter(col("trial_id") == 10)

trial_id,trial_number,participant_id,rownumber,timestamp,samplenumber,anger,contempt,disgust,fear,joy,sadness,surprise,engagement,valence,sentimentality,confusion,neutral,attention,brow_furrow,brow_raise,cheek_raise,chin_raise,dimpler,eye_closure,eye_widen,inner_brow_raise,jaw_drop,lip_corner_depressor,lip_press,lip_pucker,lip_stretch,lip_suck,lid_tighten,mouth_open,nose_wrinkle,smile,smirk,upper_lip_raise,blink,blinkrate,pitch,yaw,roll,interocular_distance,time
u16,u8,u8,u32,f64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,i64,f64,f64,f64,f64,duration[ms]
10,10,1,71868,2.4011e6,71860,0.127487,0.190916,0.024053,0.024395,0.024272,0.126869,0.057337,0.328766,0.0,0.069741,0.069741,99.618172,98.550827,0.069741,0.226616,0.037792,0.000083,0.0,0.00016,0.148086,0.023806,0.479644,0.00003,0.001118,5.6415e-8,0.001274,0.000397,0.000603,0.076417,0.018546,0.07015,0.0,0.002928,0,30,-24.569769,-0.902114,5.054742,92.630722,40m 1s 82ms
10,10,1,71870,2.4012e6,71862,0.127914,0.190201,0.024092,0.024654,0.024271,0.126431,0.058352,0.328766,0.0,0.05941,0.05941,99.619598,98.750511,0.05941,0.371026,0.040081,0.000107,0.0,0.000278,0.298905,0.01342,0.616223,0.000032,0.001613,8.8736e-8,0.000565,0.000475,0.000745,0.100799,0.030594,0.068661,0.0,0.00291,0,30,-24.592367,-0.150717,5.369899,91.002571,40m 1s 177ms
10,10,1,71874,2.4013e6,71866,0.128262,0.190164,0.024086,0.024865,0.024169,0.126317,0.059554,0.328766,0.0,0.03375,0.060448,99.619675,98.726997,0.060448,0.484564,0.034195,0.000025,0.0,0.000103,0.404417,0.006324,0.893932,0.00002,0.000182,9.7650e-7,0.00036,0.000131,0.000383,0.071352,0.020919,0.03375,0.0,0.00192,0,30,-26.423512,-0.245101,4.983922,90.283669,40m 1s 282ms
10,10,1,71876,2.4014e6,71868,0.128771,0.188046,0.024082,0.025329,0.024326,0.12552,0.060998,0.328766,0.0,0.044369,0.044369,99.623909,98.639122,0.044369,0.748467,0.037516,0.000038,0.0,0.000069,0.657472,0.017564,0.94127,0.000016,0.000775,1.6875e-7,0.001163,0.000189,0.000332,0.140108,0.013238,0.09008,0.0,0.001897,0,30,-23.903025,-0.583318,4.048125,91.313148,40m 1s 366ms
10,10,1,71879,2.4015e6,71871,0.128779,0.188414,0.024107,0.025465,0.024157,0.125571,0.061987,0.328766,0.0,0.032269,0.065495,99.623169,98.760185,0.065495,0.864647,0.028559,0.00001,0.0,0.000037,0.660009,0.009053,1.217274,0.000013,0.000084,0.000001,0.000201,0.000067,0.00026,0.103126,0.017054,0.032269,0.0,0.001984,0,30,-25.551376,0.11133,4.717494,90.73027,40m 1s 460ms
10,10,1,71882,2.4016e6,71874,0.129057,0.188033,0.024069,0.025556,0.024196,0.125373,0.060011,0.328766,0.0,0.041966,0.041966,99.623932,98.752525,0.041966,0.876473,0.027248,0.000054,0.0,0.000043,0.77608,0.011511,0.308851,0.000017,0.000583,2.7800e-7,0.000674,0.000318,0.000309,0.077705,0.012101,0.04754,0.0,0.002853,0,30,-24.035711,-0.142557,4.964402,91.051819,40m 1s 562ms
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
10,10,1,77241,2.5806e6,77233,0.126481,0.188433,0.023982,0.023993,0.025108,0.126429,0.057278,0.328766,0.0,0.15843,0.15843,99.623131,97.318314,0.15843,0.098415,0.148522,0.000073,0.0,0.014605,0.033702,0.002774,0.718605,0.000009,0.000315,0.000007,0.012437,0.000267,0.002508,0.085775,0.110081,0.317328,0.0,0.003775,0,24,-29.319355,4.042294,1.425686,90.206665,43m 607ms
10,10,1,77244,2.5807e6,77236,0.126475,0.188861,0.023961,0.023978,0.024989,0.126459,0.056824,0.328766,0.0,0.125038,0.125038,99.622284,97.465309,0.125038,0.081372,0.134966,0.000061,0.0,0.015676,0.026312,0.001626,0.553159,0.000006,0.000221,0.000004,0.008476,0.000295,0.002584,0.065182,0.083348,0.281528,0.0,0.002642,0,24,-29.727606,3.752884,1.495865,90.286819,43m 697ms
10,10,1,77247,2.5808e6,77239,0.126232,0.188161,0.023924,0.023992,0.025094,0.126176,0.056983,0.328766,0.0,0.110285,0.110285,99.62368,97.451393,0.110285,0.125237,0.149831,0.000081,0.0,0.014747,0.048078,0.002166,0.554236,0.000008,0.000292,0.000005,0.007931,0.000371,0.002534,0.046679,0.084366,0.311612,0.0,0.003313,0,24,-30.056351,3.780961,1.609186,90.367027,43m 792ms


In [11]:
query = """
SELECT * FROM Preprocess_EDA AS ce
ASOF JOIN Raw_Stimulus rs USING (trial_id, timestamp)
ORDER BY ce.trial_id, ce.timestamp
"""

with db:
    df = db.execute(query).pl()
df
ic(df.height, df.unique(subset=["trial_id", "timestamp"], maintain_order=True).height)
ic(df.height - df.unique(subset=["trial_id", "timestamp"], maintain_order=True).height)
df

ic| df.height: 5935133
    df.unique(subset=["trial_id","timestamp"]).height: 5935133
ic| df.height - df.unique(subset=["trial_id","timestamp"]).height: 0


trial_id,trial_number,participant_id,rownumber,timestamp,samplenumber,eda_d_battery,eda_raw,eda_d_packetreceptionrate,eda_tonic,eda_phasic,temperature,rating
u16,u8,u8,u32,f64,i64,f64,f64,i64,f64,f64,f64,f64
1,1,1,0,294224.331,57895,3677.435897,0.753247,100,0.752117,0.00113,45.75,42.5
1,1,1,0,294242.275,57896,3687.630769,0.754135,100,0.752119,0.002016,45.75,42.5
1,1,1,0,294248.2588,57898,3692.0,0.754135,100,0.752121,0.002014,45.75,42.5
1,1,1,0,294276.1835,57899,3678.892308,0.754135,100,0.752123,0.002012,45.75,42.5
1,1,1,0,294277.1819,57900,3692.0,0.752359,100,0.752125,0.000234,45.75,42.5
1,1,1,0,294309.0952,57902,3705.107692,0.752359,100,0.752127,0.000232,45.75,42.5
…,…,…,…,…,…,…,…,…,…,…,…,…
332,12,28,21610,2.7771e6,467073,3603.158974,13.679468,99,13.578894,-0.012187,45.582614,85.0
332,12,28,21610,2.7771e6,467074,3617.723077,13.679468,99,13.578894,-0.012765,45.582614,85.0
332,12,28,21610,2.7771e6,467075,3582.769231,13.674363,99,13.578894,-0.018451,45.582614,85.0


In [10]:
query = """
SELECT * FROM Preprocess_EDA AS ce
ASOF JOIN Raw_Stimulus rs USING (trial_id, timestamp)
ORDER BY ce.trial_id, ce.timestamp
"""

with db:
    df = db.execute(query).pl()
df
ic(df.height, df.unique("timestamp").height)
ic(df.height - df.unique("timestamp").height)
df

ic| df.height: 5935133, df.unique("timestamp").height: 5934569
ic| df.height - df.unique("timestamp").height: 564


trial_id,trial_number,participant_id,rownumber,timestamp,samplenumber,eda_d_battery,eda_raw,eda_d_packetreceptionrate,eda_tonic,eda_phasic,temperature,rating
u16,u8,u8,u32,f64,i64,f64,f64,i64,f64,f64,f64,f64
1,1,1,0,294224.331,57895,3677.435897,0.753247,100,0.752117,0.00113,45.75,42.5
1,1,1,0,294242.275,57896,3687.630769,0.754135,100,0.752119,0.002016,45.75,42.5
1,1,1,0,294248.2588,57898,3692.0,0.754135,100,0.752121,0.002014,45.75,42.5
1,1,1,0,294276.1835,57899,3678.892308,0.754135,100,0.752123,0.002012,45.75,42.5
1,1,1,0,294277.1819,57900,3692.0,0.752359,100,0.752125,0.000234,45.75,42.5
1,1,1,0,294309.0952,57902,3705.107692,0.752359,100,0.752127,0.000232,45.75,42.5
…,…,…,…,…,…,…,…,…,…,…,…,…
332,12,28,21610,2.7771e6,467073,3603.158974,13.679468,99,13.578894,-0.012187,45.582614,85.0
332,12,28,21610,2.7771e6,467074,3617.723077,13.679468,99,13.578894,-0.012765,45.582614,85.0
332,12,28,21610,2.7771e6,467075,3582.769231,13.674363,99,13.578894,-0.018451,45.582614,85.0


In [56]:
# Time columns