In [35]:
# Imports & Inits

from datetime import datetime
from datetime import timezone
import polars as pl
import os

from utils import two_point_calibration, average_bottle, plot_sensor_measurement


DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")
PICARRO_DATA_DIRECTORY = os.environ.get("PICARRO_DATA_DIRECTORY")

#------------
sensor_id = 13
#------------

df = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "download", "acropolis.parquet")) \
    .filter(pl.col("system_name") == f"tum-esm-midcost-raspi-{sensor_id}") \
    .sort(pl.col("creation_timestamp")) \
    .filter(pl.col("gmp343_filtered") > 0.0) \
    .select(pl.col("creation_timestamp"),
                pl.col("gmp343_filtered"),
                pl.col("sht45_humidity"))
    

df_p = pl.scan_parquet(os.path.join(PICARRO_DATA_DIRECTORY, "picarro.parquet")) 

In [36]:
df.head(3).collect()

creation_timestamp,gmp343_filtered,sht45_humidity
"datetime[μs, UTC]",f64,f64
2023-08-02 14:58:22.920 UTC,553.5,48.96
2023-08-02 14:58:32.930 UTC,572.7,48.99
2023-08-02 14:58:42.940 UTC,546.4,48.61


# ICOS Calibration PICARRO

In [37]:
picarro_slope = 1.0060429925902534 
picarro_intercept = 0.09305508001614271

df_p.head(1).collect()

DATE,TIME,FRAC_DAYS_SINCE_JAN1,FRAC_HRS_SINCE_JAN1,JULIAN_DAYS,EPOCH_TIME,ALARM_STATUS,INST_STATUS,CavityPressure,CavityTemp,DasTemp,EtalonTemp,species,OutletValve,CH4,CH4_dry,CO2,CO2_dry,h2o_reported,ch4_base,ch4_pzt_std,co2_base,co2_pzt_std,wlm1_offset,wlm2_offset,datetime,__index_level_0__
str,str,f64,f64,f64,f64,i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,datetime[ns],i64
"""2023-06-23""","""00:00:01.489""",173.000017,4152.000414,174.000017,1687500000.0,0,963,139.983005,44.999779,43.375,45.151695,2.0,22398.639168,1.96398,2.009492,425.117804,437.149219,2.186075,1182.223133,84.583058,1085.124996,71.320269,-0.079042,-0.047342,2023-06-23 00:00:01.489,7544020


# ICOS Calibration MC13

In [41]:
# ---
# Measurement
# ---

#400
start_date = datetime(2023, 12, 19, 12, 5, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 12, 33, 0).replace(tzinfo=timezone.utc)

df_p_400 = df.sort("creation_timestamp").filter(pl.col("creation_timestamp").is_between(start_date, end_date))


#600
start_date = datetime(2023, 12, 19, 12, 34, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 13, 3, 0).replace(tzinfo=timezone.utc)

df_p_600 = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))



# ---
# Averagng
# ---

measured_values = [None, None]
true_values = [427.38, 610.95]

# 400 ppm

data = df_p_400.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
measured_values[0] = average_bottle(data)

# 600 ppm
data = df_p_600.select(pl.col("gmp343_filtered")).collect().to_series().to_list()

measured_values[1] = average_bottle(data)

# calculate slope and intercept
acropolis_slope, acropolis_intercept = two_point_calibration(measured_values, true_values)
print(f"acropolis_slope = {acropolis_slope}, acropolis_intercept = {acropolis_intercept}")

# plot
plot_sensor_measurement(df_p_400.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_p_400.collect(), col_name="sht45_humidity")
plot_sensor_measurement(df_p_600.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_p_600.collect(), col_name="sht45_humidity")


Uncut average: 451.31607142857155
Cut average: 451.3568807339451
Uncut average: 632.6536842105262
Cut average: 632.3838709677419
acropolis_slope = 1.0140476829610818, acropolis_intercept = -30.317399096798397


# Bottles on 19.12.2023

In [47]:
# bottle 41

# PICARRO
# start_date = datetime(2023, 12, 19, 14, 57, 0).replace(tzinfo=timezone.utc)
# end_date = datetime(2023, 12, 19, 15, 22, 0).replace(tzinfo=timezone.utc)

# df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
#             pl.col("CO2_dry"),
#             pl.col("h2o_reported")) \
#     .sort("creation_timestamp") \
#     .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

# data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
# data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

# avg = average_bottle(data_cut) * picarro_slope + picarro_intercept

# print(f"Picarro Average: {avg}")

# ACROPOLIS
start_date = datetime(2023, 12, 19, 15, 29, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 15, 53, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC18 Average: {avg}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


Uncut average: 424.94946236559156
Cut average: 425.04262295081963
MC18 Average: 400.696087866181
