In [22]:
# Imports & Inits

from datetime import datetime
from datetime import timezone
import polars as pl
import os

from utils import two_point_calibration, average_bottle, plot_sensor_measurement


DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")
PICARRO_DATA_DIRECTORY = os.environ.get("PICARRO_DATA_DIRECTORY")

#------------
sensor_id = 13
#------------

df = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "download", "acropolis.parquet")) \
    .filter(pl.col("system_name") == f"tum-esm-midcost-raspi-{sensor_id}") \
    .sort(pl.col("creation_timestamp")) \
    .filter(pl.col("gmp343_filtered") > 0.0) \
    .select(pl.col("creation_timestamp"),
                pl.col("gmp343_filtered"),
                pl.col("sht45_humidity"))
    

df_p = pl.scan_parquet(os.path.join(PICARRO_DATA_DIRECTORY, "picarro.parquet")) 

In [18]:
df.head(3).collect()

creation_timestamp,gmp343_filtered,sht45_humidity
"datetime[μs, UTC]",f64,f64
2023-08-02 14:58:22.920 UTC,553.5,48.96
2023-08-02 14:58:32.930 UTC,572.7,48.99
2023-08-02 14:58:42.940 UTC,546.4,48.61


# ICOS Calibration PICARRO

In [19]:
picarro_slope = 0
picarro_intercept = 0

df_p.head(1).collect()

# ICOS Calibration MC13

In [20]:
# ---
# Measurement
# ---

#400
start_date = datetime(2023, 12, 19, 12, 3, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 12, 33, 0).replace(tzinfo=timezone.utc)

df_p_400 = df.sort("creation_timestamp").filter(pl.col("creation_timestamp").is_between(start_date, end_date))

plot_sensor_measurement(df_p_400.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_p_400.collect(), col_name="sht45_humidity")

#600
start_date = datetime(2023, 12, 19, 12, 33, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 13, 3, 0).replace(tzinfo=timezone.utc)

df_p_600 = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

plot_sensor_measurement(df_p_600.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_p_600.collect(), col_name="sht45_humidity")

# ---
# Averagng
# ---

measured_values = [None, None]
true_values = [427.38, 610.95]

# 400 ppm

data = df_p_400.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
measured_values[0] = average_bottle(data)

# 600 ppm
data = df_p_600.select(pl.col("gmp343_filtered")).collect().to_series().to_list()

measured_values[1] = average_bottle(data)

# calculate slope and intercept
acropolis_slope, acropolis_intercept = two_point_calibration(measured_values, true_values)
print(f"acropolis_slope = {acropolis_slope}, acropolis_intercept = {acropolis_intercept}")


Uncut average: 453.23500000000024
Cut average: 451.3085470085472
Uncut average: 626.7439999999999
Cut average: 632.3846153846154
acropolis_slope = 1.013772839482865, acropolis_intercept = -30.144347183740877


# Bottles on 19.12.2023

In [34]:
# bottle 

# PICARRO
start_date = datetime(2023, 10, 23, 13, 6, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 10, 23, 13, 31, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg}")

# ACROPOLIS
start_date = datetime(2023, 12, 19, 0, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 0, 0, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC18 Average: {avg}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


Uncut average: 607.0610295264346
Cut average: 607.0095882593893
Picarro Average: 0.0


ZeroDivisionError: division by zero