In [90]:
from datetime import datetime, timedelta, timezone
import polars as pl
import os
import plotly.express as px

start_date = datetime(2024, 9, 4, 15, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 9, 30, 23, 59, 59).replace(tzinfo=timezone.utc)

filter = '10m'

sensor_id = [2,6,19]

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")

# processed 10min average measurement data
df = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "processed", "10m_cal_corr_acropolis.parquet")).filter(pl.col("creation_timestamp").is_between(start_date, end_date)).collect()

df_p_10m = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "processed", "10m_cal_corr_picarro.parquet")).filter(pl.col("creation_timestamp").is_between(start_date, end_date)).collect()


In [91]:
df.head(1)

system_id,sys_name_short,creation_timestamp,system_name,gmp343_raw,gmp343_compensated,gmp343_filtered,gmp343_temperature,sht45_humidity,sht45_temperature,bme280_humidity,bme280_temperature,bme280_pressure,revision,receipt_timestamp,h2o_ah,h2o_v%,gmp343_dry,slope,intercept,wxt532_speed_avg,wxt532_speed_min,wxt532_speed_max,wxt532_direction_avg,wxt532_direction_min,wxt532_direction_max,wxt532_last_update_time,wxt532_temperature,wxt532_heating_voltage,wxt532_supply_voltage,wxt532_reference_voltage,enclosure_bme280_humidity,enclosure_bme280_pressure,enclosure_bme280_temperature,gmp343_corrected,date,std,var
i64,str,"datetime[μs, UTC]",str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,"datetime[ns, UTC]",f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,date,f64,f64
7,"""acropolis-7""",2024-09-04 15:00:00 UTC,,375.903333,448.165,448.568333,39.848333,33.512333,35.846833,27.026167,35.659667,935.630167,40.0,2024-09-04 15:04:57.286836480 UTC,16.995099,2.623886,460.655393,1.010955,-29.676713,,,,,,,,,,,,15.67,952.418,42.752,436.024964,2024-09-04,0.900351,0.810633


In [92]:
df_p_10m.head(1)

sys_name_short,creation_timestamp,picarro_corrected,h2o_reported,CavityPressure,CavityTemp,diff
str,"datetime[μs, UTC]",f64,f64,f64,f64,f64
"""Picarro""",2024-09-04 15:00:00 UTC,436.999756,2.390877,139.99981,44.995369,0.0


In [93]:
# correct wrong UTC time for Picarro from 04.09. until 05.09 11:50 UTC
fix_date = datetime(2024, 9, 5, 11, 30, 0).replace(tzinfo=timezone.utc)

df_dt_false = df_p_10m.filter(pl.col("creation_timestamp").is_between(start_date, fix_date)) \
    .with_columns(pl.col("creation_timestamp") - timedelta(hours=2))

df_dt_correct = df_p_10m.filter(pl.col("creation_timestamp") > fix_date)
      
df_p_10m = pl.concat([df_dt_false,df_dt_correct], how="diagonal")

In [94]:
df_p_filtered = df_p_10m.filter(pl.col("creation_timestamp").is_between(start_date, end_date)) \
    .rename({"picarro_corrected": "co2_corrected"}) \
    .select("creation_timestamp","sys_name_short", "co2_corrected") 

In [95]:
df_filtered = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date)) \
    .filter(pl.col("system_id").is_in(sensor_id)) \
    .rename({"gmp343_corrected": "co2_corrected"}) \
    .select("creation_timestamp","sys_name_short", "co2_corrected")

In [96]:
# join picarro dataframe for diff calculation
df_filtered = df_filtered.join(df_p_filtered.select("creation_timestamp", "co2_corrected"), on = ["creation_timestamp"], how= "left") \
    .with_columns(diff = pl.col("co2_corrected") - pl.col("co2_corrected_right"))

In [97]:
df_plot = pl.concat([df_filtered,df_p_filtered], how="diagonal")

In [98]:
fig = px.line(df_plot, x="creation_timestamp", y="co2_corrected", color="sys_name_short")
fig.update_layout(
    yaxis_title='CO2 (ppm)',
    xaxis_title='',
    title='',
)
fig.show()

In [99]:
fig = px.line(df_plot, x="creation_timestamp", y="diff", color="sys_name_short")
fig.update_layout(
    yaxis_title='System - PICARRO: CO2 (ppm)',
    xaxis_title='',
    title='',
)
fig.show()