In [36]:
# Imports & Inits

from datetime import datetime
from datetime import timezone
import polars as pl
import os

from utils import two_point_calibration, average_bottle, plot_sensor_measurement

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")

df = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "input/picarro.parquet")) 

In [37]:

# 23.10.2023

start_date = datetime(2023, 10, 23, 13, 6, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 10, 23, 13, 31, 0).replace(tzinfo=timezone.utc)

df_p_600 = df.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
                pl.col("CO2_dry")).sort("creation_timestamp").filter(pl.col("creation_timestamp").is_between(start_date, end_date))

start_date = datetime(2023, 10, 23, 13, 31, 30).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 10, 23, 14, 6, 0).replace(tzinfo=timezone.utc)

df_p_400 = df.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
                pl.col("CO2_dry")).sort("creation_timestamp").filter(pl.col("creation_timestamp").is_between(start_date, end_date))


measured_values = [None, None]
true_values = [427.38, 610.95]

# 400 ppm

data = df_p_400.select(pl.col("CO2_dry")).collect().to_series().to_list()

data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

measured_values[0] = average_bottle(data_cut)

# 600 ppm
data = df_p_600.select(pl.col("CO2_dry")).collect().to_series().to_list()

data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

measured_values[1] = average_bottle(data_cut)

acropolis_slope, acropolis_intercept = two_point_calibration(measured_values, true_values)
print(acropolis_slope, acropolis_intercept)

#plots
plot_sensor_measurement(df_p_400.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_600.collect(), col_name="CO2_dry")

Uncut average: 424.6023673882955
Cut average: 424.60258686061144
Uncut average: 607.0095882593893
Cut average: 606.9971229611265
1.0064446223259518 0.04100982844977352


In [40]:
# 18.12.2023


start_date = datetime(2023, 12, 18, 14, 33, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 18, 15, 2, 0).replace(tzinfo=timezone.utc)

df_p_400 = df.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
                pl.col("CO2_dry")).sort("creation_timestamp").filter(pl.col("creation_timestamp").is_between(start_date, end_date))


start_date = datetime(2023, 12, 18, 15, 4, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 18, 15, 32, 0).replace(tzinfo=timezone.utc)

df_p_600 = df.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
                pl.col("CO2_dry")).sort("creation_timestamp").filter(pl.col("creation_timestamp").is_between(start_date, end_date))


measured_values = [None, None]
true_values = [427.38, 610.95]

# 400 ppm

data = df_p_400.select(pl.col("CO2_dry")).collect().to_series().to_list()

data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

measured_values[0] = average_bottle(data_cut)

# 600 ppm
data = df_p_600.select(pl.col("CO2_dry")).collect().to_series().to_list()

data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

measured_values[1] = average_bottle(data_cut)

acropolis_slope, acropolis_intercept = two_point_calibration(measured_values, true_values)
print(acropolis_slope, acropolis_intercept)

#plots
plot_sensor_measurement(df_p_400.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_600.collect(), col_name="CO2_dry")

Uncut average: 424.71947275268167
Cut average: 424.720362913965
Uncut average: 607.1851656466596
Cut average: 607.1877140630082
1.0060429925902534 0.09305508001614271
