In [47]:
import polars as pl
import os
from hampel import hampel
from datetime import datetime, timezone
import plotly.express as px

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")

start_time = datetime(2024, 6, 23, 0, 0, 0).replace(tzinfo=timezone.utc)
end_time = datetime(2024, 7, 11, 0, 0, 0).replace(tzinfo=timezone.utc)

id = 14

df = pl.read_parquet(os.path.join(DATA_DIRECTORY, "processed", "1m_cal_corr_acropolis.parquet"))

In [48]:
df_filtered = df.filter(pl.col("system_id")==id).filter(pl.col("creation_timestamp").is_between(start_time, end_time)) \
    .cast({"gmp343_corrected": pl.Float32})

data = df_filtered.get_column("gmp343_corrected").to_pandas()

# Apply the Hampel filter
result = hampel(data, window_size=120, n_sigma=3.0)

print(result.filtered_data)

0        434.294952
1        434.177917
2        434.001312
3        435.047974
4        434.560242
            ...    
23675    458.592804
23676    457.676361
23677    459.110840
23678    457.879211
23679    459.902039
Length: 23680, dtype: float32


In [49]:
print(len(result.outlier_indices) / len(data))

0.004222972972972973


In [50]:
df_filtered = df_filtered.with_columns((pl.from_pandas(result.filtered_data)).alias("hampel_filter"))
df_filtered = df_filtered.with_columns(pl.col("gmp343_corrected").ne(pl.col("hampel_filter")).alias("Flagged"),)

In [51]:
fig = px.scatter(df_filtered, x="creation_timestamp", y="gmp343_corrected", color="Flagged")
fig.show()

In [52]:
id = 7

df_reference = df.filter(pl.col("system_id")==id).filter(pl.col("creation_timestamp").is_between(start_time, end_time)) \
    .cast({"gmp343_corrected": pl.Float32}) \
    .rename({"gmp343_corrected": "gmp343_reference"}) \
    .select(["creation_timestamp", "gmp343_reference"])

In [53]:
df_plot = df_filtered.join(df_reference, on="creation_timestamp", how="left")

In [54]:
df_plot.head()

system_id,creation_timestamp,gmp343_raw,gmp343_compensated,gmp343_filtered,gmp343_temperature,sht45_humidity,sht45_temperature,bme280_humidity,bme280_temperature,bme280_pressure,revision,receipt_timestamp,h2o_ah,h2o_v%,gmp343_dry,sys_name_short,slope,intercept,median_bottle_1,median_bottle_2,wxt532_speed_avg,wxt532_speed_min,wxt532_speed_max,wxt532_direction_avg,wxt532_direction_min,wxt532_direction_max,wxt532_last_update_time,wxt532_temperature,wxt532_heating_voltage,wxt532_supply_voltage,wxt532_reference_voltage,system_id_right,CO2_corr,gmp343_corrected,diff,date,hampel_filter,Flagged,gmp343_reference
i64,"datetime[μs, UTC]",f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,"datetime[ns, UTC]",f64,f64,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,f64,f32,f64,date,f32,bool,f32
14,2024-06-23 00:00:00 UTC,381.4,446.216667,446.483333,37.45,27.813333,33.461667,22.263333,32.915,940.81,22.0,2024-06-23 00:00:36.642521344 UTC,12.491835,1.903307,455.146164,"""acropolis-14""",1.005056,-23.152576,,,,,,,,,,,,,,,,434.294952,,2024-06-23,434.294952,False,434.214966
14,2024-06-23 00:01:00 UTC,382.383333,447.733333,446.4,37.483333,27.746667,33.475,22.22,32.916667,943.626667,22.0,2024-06-23 00:01:36.469390080 UTC,12.483135,1.896508,455.029675,"""acropolis-14""",1.005056,-23.152245,,,,,,,,,,,,,,,,434.177917,,2024-06-23,434.177917,False,435.01178
14,2024-06-23 00:02:00 UTC,381.483333,446.416667,446.2,37.483333,27.761667,33.473333,22.226667,32.913333,941.13,22.0,2024-06-23 00:02:36.375594240 UTC,12.489884,1.902567,454.853901,"""acropolis-14""",1.005055,-23.151914,,,,,,,,,,,,,,,,434.001312,,2024-06-23,434.001312,False,433.682617
14,2024-06-23 00:03:00 UTC,382.22,447.08,447.22,37.48,27.826,33.464,22.268,32.912,942.976,22.0,2024-06-23 00:03:31.893481984 UTC,12.516695,1.902898,455.895223,"""acropolis-14""",1.005055,-23.151583,,,,,,,,,,,,,,,,435.047974,,2024-06-23,435.047974,False,433.853546
14,2024-06-23 00:04:00 UTC,382.266667,446.116667,446.733333,37.5,27.803333,33.466667,22.26,32.911667,942.081667,22.0,2024-06-23 00:04:27.087631616 UTC,12.519284,1.905221,455.4099,"""acropolis-14""",1.005054,-23.151251,,,,,,,,,,,,,,,,434.560242,,2024-06-23,434.560242,False,433.343781


In [55]:
fig = px.scatter(df_plot, x="gmp343_corrected", y="gmp343_reference", color="Flagged")
fig.show()