In [None]:
import os
import sys
import polars as pl
import polars.selectors as cs

from datetime import datetime

PROJECT_PATH = os.path.abspath(os.path.join(".."))
PIPELINE_PATH = os.path.join(PROJECT_PATH, "pipeline")
DATA_DIRECTORY = os.path.join(PROJECT_PATH, "data")

unflagged_data = False

if PIPELINE_PATH not in sys.path:
    sys.path.append(PIPELINE_PATH)
    
from utils.paths import PROCESSED_PICARRO_DATA_DIRECTORY, POSTPROCESSED_DATA_DIRECTORY
from utils.import_data import import_acropolis_system_data
from utils.plot_dataframes import plot_sensor_measurement

assert(os.path.exists(POSTPROCESSED_DATA_DIRECTORY))
assert(os.path.exists(PROCESSED_PICARRO_DATA_DIRECTORY))

In [None]:
# Filters
start_date = datetime(2025, 6, 1, 0, 0, 0)
end_date = datetime(2025, 7, 30, 0, 0, 0)

filter = '1h'
ids = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
ids = [3,6]

In [None]:
# Load ACROPOLIS Data
all_systems = []

for id in ids:
    df_system = import_acropolis_system_data(
            years=[2024,2025],
            target_directory=POSTPROCESSED_DATA_DIRECTORY,
            id=id,
            prefix="1min"
        ).group_by_dynamic("datetime", every=filter, group_by=["system_id", "system_name"]) \
            .agg(cs.numeric().mean()) \
        .filter(pl.col("datetime").is_between(start_date, end_date))
        
    all_systems.append(df_system)
    
df = pl.concat(all_systems, how="diagonal").collect()
del df_system, all_systems
df.head(1).vstack(df.tail(1))

In [None]:
assert(False)

In [None]:
# Load Picarro Data
# DWD Picarro: "Calibrated_1_min_DWD_Picarro_G2301_413.parquet"
df_p_dwd = pl.scan_parquet(os.path.join(PROCESSED_PICARRO_DATA_DIRECTORY,"Calibrated_1_min_DWD_Picarro_G2301_413.parquet"))
df_p_dwd = df_p_dwd.filter(pl.col("datetime").is_between(start_date, end_date)).collect()

# ICOS Picarro: "Calibrated_1_min_ICOS_Picarro_G2401_529.parquet"
df_p_icos = pl.scan_parquet(os.path.join(PROCESSED_PICARRO_DATA_DIRECTORY,"Calibrated_1_min_ICOS_Picarro_G2401_529.parquet"))
df_p_icos = df_p_icos.filter(pl.col("datetime").is_between(start_date, end_date)).collect()
    
df_p = pl.concat([df_p_dwd,df_p_icos], how="diagonal")
del df_p_dwd, df_p_icos
df_p.head(1).vstack(df_p.tail(1))

In [None]:
# Load CAL ACROPOLIS Data (no aggregation)
all_systems = []

for id in ids:
    df_system = import_acropolis_system_data(
            years=[2024,2025],
            target_directory=POSTPROCESSED_DATA_DIRECTORY,
            id=id,
            prefix="Cal_1min"
        ) \
        .filter(pl.col("datetime").is_between(start_date, end_date))
    
    all_systems.append(df_system)
    
df_cal = pl.concat(all_systems, how="diagonal").collect()
del df_system, all_systems
df_cal.head(1).vstack(df_cal.tail(1))

In [None]:
plot_sensor_measurement(df,ids,col_name="enclosure_bme280_humidity")

In [None]:
plot_sensor_measurement(df,ids,col_name="gmp343_temperature")

In [None]:
plot_sensor_measurement(df,ids,col_name="gmp343_corrected")

In [None]:
plot_sensor_measurement(df_p,529, col_name="picarro_corrected")

In [None]:
plot_sensor_measurement(df,ids,col_name="wxt532_speed_avg")

In [None]:
plot_sensor_measurement(df,ids,col_name="sht45_humidity")

In [None]:
plot_sensor_measurement(df,ids,col_name="h2o_v%")

In [None]:
plot_sensor_measurement(df,ids,col_name="bme280_humidity")

In [None]:
plot_sensor_measurement(df,ids,col_name="ups_powered_by_grid")

In [None]:
plot_sensor_measurement(df,ids,col_name="h2o_v%")

In [None]:
plot_sensor_measurement(df,ids,col_name="sht45_temperature")

In [None]:
plot_sensor_measurement(df,ids,col_name="bme280_temperature")

In [None]:
plot_sensor_measurement(df,ids,col_name="bme280_pressure")

In [None]:
plot_sensor_measurement(df,ids,col_name="enclosure_bme280_temperature")

In [None]:
plot_sensor_measurement(df,ids,col_name="gmp343_raw")

In [None]:
plot_sensor_measurement(df,ids,col_name="gmp343_compensated")

In [None]:
plot_sensor_measurement(df,ids,col_name="wxt532_temperature")

In [None]:
plot_sensor_measurement(df,ids,col_name="wxt532_direction_avg")

In [None]:
plot_sensor_measurement(df,ids,col_name="raspi_cpu_usage")

In [None]:
plot_sensor_measurement(df,ids,col_name="raspi_memory_usage")

In [None]:
plot_sensor_measurement(df,ids,col_name="raspi_disk_usage")

In [None]:
plot_sensor_measurement(df,ids,col_name="raspi_cpu_temperature")

In [None]:
plot_sensor_measurement(df,ids,col_name="enclosure_bme280_pressure")

# Differences

In [None]:
df_plot = df.with_columns(diff_inflow_humidity = (pl.col("sht45_humidity") - pl.col("bme280_humidity"))) \
    .filter(pl.col("diff_inflow_humidity").is_not_nan()) \
    .select("datetime", "diff_inflow_humidity", "system_id") \
    .sort("system_id")
    
plot_sensor_measurement(df_plot,ids,col_name="diff_inflow_humidity")

In [None]:
df_plot = df.with_columns(diff_inflow_temperature = (pl.col("sht45_temperature") - pl.col("bme280_temperature"))) \
    .filter(pl.col("diff_inflow_temperature").is_not_nan()) \
    .select("datetime", "diff_inflow_temperature", "system_id") \
    .sort("system_id")
    
plot_sensor_measurement(df_plot,ids,col_name="diff_inflow_temperature")

In [None]:
df_plot = df.with_columns(diff_inflow_pressure = (pl.col("bme280_pressure") - pl.col("enclosure_bme280_pressure"))) \
    .filter(pl.col("diff_inflow_pressure").is_not_nan()) \
    .select("datetime", "diff_inflow_pressure", "system_id") \
    .sort("system_id")
    
plot_sensor_measurement(df_plot,ids,col_name="diff_inflow_pressure")

# Calibration Data

In [None]:
plot_sensor_measurement(df=df_cal,sensor_id=ids,col_name="cal_bottle_id")

In [None]:
plot_sensor_measurement(df_cal,ids,col_name="cal_gmp343_filtered")

In [None]:
plot_sensor_measurement(df_cal,ids,col_name="cal_sht45_humidity")

In [None]:
plot_sensor_measurement(df_cal,ids,col_name="cal_sht45_temperature")

In [None]:
plot_sensor_measurement(df_cal,ids,col_name="cal_gmp343_temperature")

In [None]:
plot_sensor_measurement(df_cal,ids,col_name="cal_bme280_temperature")

In [None]:
plot_sensor_measurement(df_cal,ids,col_name="cal_bme280_humidity")

In [None]:
plot_sensor_measurement(df_cal,ids,col_name="cal_sht45_humidity")

In [None]:
plot_sensor_measurement(df_cal,ids,col_name="cal_bme280_pressure")

In [None]:
plot_sensor_measurement(df_cal,ids,col_name="cal_gmp343_filtered", cut_above=350, cut_below=470)

In [None]:
plot_sensor_measurement(df_cal,ids,col_name="cal_gmp343_filtered", cut_above=470, cut_below=800)