In [None]:
%run setup.py

import polars as pl
from configs import utils

In [2]:
lf_data_move = utils.load_large_data("../../data/processed/", "DogMoveData_Clean.csv", schema=utils.dog_move_data_schema)

In [3]:
lf_data_move_filtered = lf_data_move.filter(
    pl.col("Behavior_1").is_in(["Lying chest", "Sniffing", "Playing", "Walking", "Trotting", "Sitting", "Standing"])
)

lf_data_move_filtered = lf_data_move_filtered.drop(['Task', 'Behavior_2', 'Behavior_3', "PointEvent"])

In [4]:
lf_data_move_denoised = lf_data_move_filtered.with_columns(
    pl.col("ABack_x").rolling_mean(window_size=100, center=True).over(["DogID", "TestNum"]).round(6),
    pl.col("ABack_y").rolling_mean(window_size=100, center=True).over(["DogID", "TestNum"]).round(6),
    pl.col("ABack_z").rolling_mean(window_size=100, center=True).over(["DogID", "TestNum"]).round(6),
    pl.col("ANeck_x").rolling_mean(window_size=100, center=True).over(["DogID", "TestNum"]).round(6),
    pl.col("ANeck_y").rolling_mean(window_size=100, center=True).over(["DogID", "TestNum"]).round(6),
    pl.col("ANeck_z").rolling_mean(window_size=100, center=True).over(["DogID", "TestNum"]).round(6),
    pl.col("GBack_x").rolling_mean(window_size=100, center=True).over(["DogID", "TestNum"]).round(6),
    pl.col("GBack_y").rolling_mean(window_size=100, center=True).over(["DogID", "TestNum"]).round(6),
    pl.col("GBack_z").rolling_mean(window_size=100, center=True).over(["DogID", "TestNum"]).round(6),
    pl.col("GNeck_x").rolling_mean(window_size=100, center=True).over(["DogID", "TestNum"]).round(6),
    pl.col("GNeck_y").rolling_mean(window_size=100, center=True).over(["DogID", "TestNum"]).round(6),
    pl.col("GNeck_z").rolling_mean(window_size=100, center=True).over(["DogID", "TestNum"]).round(6),
).drop_nulls()

In [5]:
lf_duration = lf_data_move_denoised.with_columns(
    (pl.datetime(1970, 1, 1) + pl.duration(milliseconds=(pl.col("t_sec") * 1000).cast(pl.Int64))).alias("t_dt")
)

lf_sorted = lf_duration.sort(["DogID", "TestNum", "t_dt"])

In [6]:
WINDOW_SIZE = "2s"
STEP_DURATION = "1s"
MIN_SAMPLES = 50

lf_windowed = lf_sorted.group_by_dynamic(
    index_column="t_dt",
    every=STEP_DURATION,
    period=WINDOW_SIZE,
    group_by=["DogID", "TestNum"]
).agg([
    pl.len().alias("n_samples"),
    pl.all().implode(),
])

lf_window = lf_windowed.filter(
    pl.col("ABack_x").list.len() > 0
)

lf_window_clean = lf_window.filter(
    pl.col("n_samples") >= MIN_SAMPLES
)

In [None]:
try:
    lf_data_move_filtered.slice(50, None).sink_csv("../../data/processed/DogMoveData_Noised.csv")
except Exception as e:
    print(f"Erro ao salvar os dados: {e}")

In [None]:
try:
    lf_data_move_denoised.sink_csv("../../data/processed/DogMoveData_Denoised.csv")
except Exception as e:
    print(f"Erro ao salvar os dados: {e}")                                                                                                                                                                                                                                      

In [9]:
try:
    lf_window_clean.sink_parquet("../../data/processed/DogMoveData_Windowed_Denoised.parquet")
except Exception as e:
    print(f"Erro ao salvar os dados: {e}")

In [10]:
lf_window_clean.collect()

DogID,TestNum,t_dt,n_samples,t_sec,ABack_x,ABack_y,ABack_z,ANeck_x,ANeck_y,ANeck_z,GBack_x,GBack_y,GBack_z,GNeck_x,GNeck_y,GNeck_z,Behavior_1
i64,i64,datetime[μs],u32,list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[str]
16,1,1970-01-01 00:01:31,105,"[91.31, 91.32, … 92.35]","[0.162578, 0.168833, … 0.133535]","[0.093765, 0.092339, … 0.209229]","[0.960957, 0.957222, … 0.919751]","[-0.401973, -0.401698, … -0.325522]","[0.004992, 0.006203, … 0.25294]","[-0.920781, -0.919709, … -0.898197]","[-4.973145, -4.788208, … -3.602295]","[-8.567505, -8.477784, … 5.15503]","[11.469727, 9.768067, … 16.123048]","[7.676807, 8.405872, … 17.062519]","[-3.081958, -3.083972, … -7.086396]","[14.917262, 15.580714, … -4.907386]","[""Walking"", ""Walking"", … ""Walking""]"
16,1,1970-01-01 00:01:34,95,"[95.05, 95.06, … 95.99]","[0.13519, 0.136831, … 0.104966]","[0.21042, 0.211152, … 0.110728]","[0.917598, 0.91623, … 0.976714]","[-0.324268, -0.323495, … -0.285273]","[0.256713, 0.260131, … 0.04354]","[-0.898799, -0.900589, … -0.945479]","[-3.901367, -4.25354, … -15.505372]","[4.854126, 4.752198, … 5.877686]","[16.538087, 16.789552, … 1.728516]","[17.065888, 17.099092, … 21.376344]","[-7.372883, -7.565754, … 0.900879]","[-5.673542, -6.549518, … -44.271853]","[""Walking"", ""Walking"", … ""Walking""]"
16,1,1970-01-01 00:01:35,123,"[95.05, 95.06, … 96.27]","[0.13519, 0.136831, … 0.220806]","[0.21042, 0.211152, … 0.157788]","[0.917598, 0.91623, … 0.96293]","[-0.324268, -0.323495, … -0.310142]","[0.256713, 0.260131, … -0.036479]","[-0.898799, -0.900589, … -0.97207]","[-3.901367, -4.25354, … -2.897949]","[4.854126, 4.752198, … -2.667847]","[16.538087, 16.789552, … 8.232422]","[17.065888, 17.099092, … 23.19214]","[-7.372883, -7.565754, … -16.778565]","[-5.673542, -6.549518, … -26.832277]","[""Walking"", ""Walking"", … ""Walking""]"
16,1,1970-01-01 00:01:44,111,"[104.89, 104.9, … 105.99]","[0.22292, 0.223989, … 0.079836]","[0.159512, 0.161973, … 0.207484]","[0.962764, 0.96249, … 0.958306]","[-0.310347, -0.311602, … -0.068428]","[-0.042588, -0.049404, … -0.104253]","[-0.970176, -0.966821, … -0.989023]","[-3.088989, -3.150635, … -3.315442]","[-2.13501, -1.596069, … 18.046742]","[6.52649, 4.832154, … 21.313435]","[22.715455, 22.542726, … -17.020265]","[-16.652223, -16.458741, … 21.071779]","[-27.645876, -28.412477, … 20.974733]","[""Walking"", ""Walking"", … ""Walking""]"
16,1,1970-01-01 00:01:45,200,"[105.0, 105.01, … 106.99]","[0.232979, 0.225181, … -0.188411]","[0.18417, 0.186958, … 0.104055]","[0.949434, 0.947422, … 0.953397]","[-0.327788, -0.32231, … 0.244141]","[-0.091782, -0.096186, … -0.25773]","[-0.942778, -0.943594, … -0.941211]","[-5.819092, -6.476441, … -7.028852]","[7.442017, 8.313599, … -1.153601]","[-8.507081, -9.112549, … -8.745429]","[22.44568, 21.345826, … 38.156741]","[-5.696411, -6.170044, … 4.931641]","[-39.347537, -40.77698, … -1.323242]","[""Walking"", ""Walking"", … ""Walking""]"
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
74,2,1970-01-01 00:32:03,200,"[1923.0, 1923.01, … 1924.99]","[0.013457, 0.017798, … -0.119785]","[0.272964, 0.273794, … 0.294712]","[0.938852, 0.937539, … 0.936597]","[-0.425696, -0.429712, … -0.323726]","[0.574038, 0.575361, … 0.567236]","[-0.729364, -0.730582, … -0.761084]","[4.549561, 4.800415, … 1.835327]","[-5.324097, -4.873657, … -21.569215]","[-9.652711, -10.618287, … -28.094484]","[-10.984272, -11.37602, … 17.851564]","[5.585974, 5.806263, … -31.42029]","[-0.664008, -1.12829, … 35.249636]","[""Sniffing"", ""Sniffing"", … ""Sniffing""]"
74,2,1970-01-01 00:32:04,200,"[1924.0, 1924.01, … 1925.99]","[-0.241484, -0.240596, … -0.051816]","[0.370806, 0.374673, … 0.311343]","[0.87269, 0.872197, … 0.914258]","[-0.24978, -0.245337, … -0.461011]","[0.595937, 0.598193, … 0.608086]","[-0.77125, -0.773442, … -0.644155]","[13.912354, 14.229127, … 1.881104]","[-26.726076, -27.937013, … 8.006592]","[-71.061406, -71.550297, … 30.858156]","[44.939578, 44.708255, … -43.912967]","[-64.526371, -64.356693, … 51.706546]","[91.475836, 91.029669, … -61.630863]","[""Sniffing"", ""Sniffing"", … ""Sniffing""]"
74,2,1970-01-01 00:32:05,200,"[1925.0, 1925.01, … 1926.99]","[-0.119233, -0.119922, … -0.007007]","[0.29104, 0.287344, … 0.298125]","[0.937236, 0.937632, … 0.936172]","[-0.325693, -0.327148, … -0.396724]","[0.56688, 0.56792, … 0.590396]","[-0.760303, -0.760278, … -0.721177]","[0.930176, 0.144043, … -6.218262]","[-21.002199, -20.390626, … 14.462281]","[-27.72278, -27.391969, … 27.543947]","[18.020631, 18.298341, … -0.661011]","[-31.75049, -32.039797, … 12.969361]","[35.977785, 36.729128, … -13.742677]","[""Sniffing"", ""Sniffing"", … ""Sniffing""]"
74,2,1970-01-01 00:32:06,175,"[1926.0, 1926.01, … 1927.74]","[-0.052446, -0.053428, … 0.021606]","[0.311475, 0.312607, … 0.334512]","[0.914297, 0.913525, … 0.908535]","[-0.458603, -0.456001, … -0.389102]","[0.607671, 0.606524, … 0.61667]","[-0.64354, -0.64396, … -0.707734]","[1.931763, 2.077637, … 1.687012]","[8.520508, 8.966675, … 14.430543]","[32.230837, 33.60718, … 60.735477]","[-43.731692, -43.515017, … -32.224123]","[51.932376, 52.031864, … 45.592044]","[-62.060551, -62.419437, … -52.272952]","[""Sniffing"", ""Sniffing"", … ""Sniffing""]"
