In [1]:
import polars as pl
import polars.selectors as cs
from datetime import datetime

#start_date = datetime(2025, 3, 17, 0, 0, 0)
#end_date = datetime(2025, 3, 25, 0, 0, 0)

In [57]:
# 1_Dilution_Correction
start_date = datetime(2025, 2, 25, 0, 0, 0)
end_date = datetime(2025, 3, 3, 0, 0, 0)

# Read and filter the reference data
df_p = pl.read_parquet("../data/Calibrated_1_min_ICOS_Picarro_G2401_529.parquet")

df_p = df_p.filter(pl.col("datetime").is_between(start_date, end_date)).select(["datetime", "picarro_corrected"]) \
    .rename({"picarro_corrected": "CO2_reference"}) \
    .with_columns(pl.col("datetime").cast(pl.Datetime("ms")))
    
df_p.write_parquet("../data/1_Dilution_Correction/Reference_Picarro_G2401.parquet")

# Read and filter the sensor data
df_s = pl.read_parquet("../data/1min_acropolis-3.parquet")

df_s = df_s.filter(pl.col("datetime").is_between(start_date, end_date)) \
    .rename({"gmp343_filtered": "CO2_out_of_sensor", "gmp343_dry": "CO2_dry"}) \
    .select(["datetime", "CO2_out_of_sensor", "CO2_dry", "sht45_humidity", "gmp343_temperature", "bme280_pressure", "h2o_v%"]) 
    
df_s.write_parquet("../data/1_Dilution_Correction/Sensor_GMP343_Measurement.parquet")

In [None]:
# 2_Calibration_Correction
start_date = datetime(2025, 3, 10, 0, 0, 0)
end_date = datetime(2025, 3, 14, 0, 0, 0)

# Read and filter the reference data
df_p = pl.read_parquet("../data/Calibrated_1_min_ICOS_Picarro_G2401_529.parquet")

df_p = df_p.filter(pl.col("datetime").is_between(start_date, end_date)).select(["datetime", "picarro_corrected"]) \
    .rename({"picarro_corrected": "CO2_reference"}) \
    .with_columns(pl.col("datetime").cast(pl.Datetime("ms")))
    
df_p.write_parquet("../data/2_Calibration_Correction/Reference_Picarro_G2401.parquet")

# Read and filter the sensor data
df_s = pl.read_parquet("../data/1min_acropolis-6.parquet")

df_s = df_s.filter(pl.col("datetime").is_between(start_date, end_date)) \
    .rename({"gmp343_filtered": "CO2_out_of_sensor", "gmp343_dry": "CO2_dry"}) \
    .select(["datetime", "CO2_dry"]) 
    
df_s.write_parquet("../data/2_Calibration_Correction/Sensor_GMP343_Measurement.parquet")

# Read and filter the calibration data
df_c = pl.read_parquet("../data/Cal_1min_acropolis-6.parquet")

df_c = df_c.filter(pl.col("datetime").is_between(start_date, end_date)) \
    .rename({"cal_gmp343_filtered": "CO2_out_of_sensor"}) \
    .filter(pl.col("CO2_out_of_sensor") > 0).filter(pl.col("datetime").dt.day() == 11) \
    .select(["datetime", "CO2_out_of_sensor", "cal_sht45_humidity", "cal_bottle_id"]) 
    
df_c.write_parquet("../data/2_Calibration_Correction/Sensor_GMP343_Calibration.parquet")

In [62]:
# 3 Aggregation and performance metrics

# 1_Dilution_Correction
start_date = datetime(2025, 2, 25, 0, 0, 0)
end_date = datetime(2025, 3, 16, 23, 59, 59)

# Read and filter the reference data
df_p = pl.read_parquet("../data/Calibrated_1_min_ICOS_Picarro_G2401_529.parquet")

df_p = df_p.filter(pl.col("datetime").is_between(start_date, end_date)).select(["datetime", "picarro_corrected"]) \
    .rename({"picarro_corrected": "CO2_reference"}) \
    .with_columns(pl.col("datetime").cast(pl.Datetime("ms")))
    
df_p.write_parquet("../data/3_Aggregation_And_Performance_Metrics/Reference_Picarro_G2401.parquet")

# Read and filter the sensor data
df_s = pl.read_parquet("../data/1min_acropolis-3.parquet")

df_s = df_s.filter(pl.col("datetime").is_between(start_date, end_date)) \
    .rename({"gmp343_corrected": "CO2_processed",}) \
    .select(["datetime", "CO2_processed"]) \
    .join(df_p, on="datetime", how="left")
    
df_s.write_parquet("../data/3_Aggregation_And_Performance_Metrics/Sensor_3_GMP343_Measurement.parquet")

# Read and filter the sensor data
df_s = pl.read_parquet("../data/1min_acropolis-6.parquet")

df_s = df_s.filter(pl.col("datetime").is_between(start_date, end_date)) \
    .rename({"gmp343_corrected": "CO2_processed",}) \
    .select(["datetime", "CO2_processed"]) \
    .join(df_p, on="datetime", how="left")
    
df_s.write_parquet("../data/3_Aggregation_And_Performance_Metrics/Sensor_6_GMP343_Measurement.parquet")

In [12]:
# 4 Sensor Sensitivity

# Read and filter the reference data
df_s = pl.read_parquet("../data/2025_L1_1_min_sbs_period_acropolis.parquet")

df_s.filter(pl.col("Flag") == "U") \
    .with_columns(pl.col("picarro_corrected").round(2)) \
    .select(["datetime", "system_id", "gmp343_corrected", "picarro_corrected", "gmp343_temperature", "sht45_humidity", "h2o_v%", "bme280_pressure"]) \
    .group_by_dynamic("datetime", every='1h', group_by=["system_id"]).agg(cs.numeric().mean()) \
    .with_columns(diff = pl.col("gmp343_corrected") - pl.col("picarro_corrected")) \
    .rename({"gmp343_corrected": "CO2_processed", "picarro_corrected": "CO2_reference"}) \
    .write_parquet("../data/4_Sensor_Sensitivity/2025_Sensor_GMP343_Measurement.parquet")


df_s = pl.read_parquet("../data/2024_1h_sbs_period_acropolis.parquet")

df_s.select(["datetime", "system_id", "gmp343_corrected", "picarro_corrected", "gmp343_temperature", "sht45_humidity", "h2o_v%", "bme280_pressure", "diff"]) \
.filter(pl.col("system_id").is_in([2,6,7,15])) \
.rename({"gmp343_corrected": "CO2_processed", "picarro_corrected": "CO2_reference"}) \
.write_parquet("../data/4_Sensor_Sensitivity/2024_Sensor_GMP343_Measurement.parquet")
    