In [1]:
# Imports & Inits

from datetime import datetime
from datetime import timezone
import polars as pl
import os
import plotly.express as px

# local imports
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from utils.calibration_processing import two_point_calibration, process_bottle

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")

df = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "input", "picarro.parquet")) 

In [2]:
df.head(3).collect()

datetime,CO2_dry,h2o_reported,CavityPressure,CavityTemp,__index_level_0__
datetime[ns],f64,f64,f64,f64,i64
2023-06-23 00:00:01.489,437.149219,2.186075,139.983005,44.999779,16146871
2023-06-23 00:00:02.397,437.149219,2.186075,140.003256,44.999779,16146872
2023-06-23 00:00:02.911,437.18184,2.190795,139.994537,44.999779,16146873


In [6]:
def process_two_point_picarro_calibration(low_start_date, low_end_date, high_start_date, high_end_date):

    df_p_400 = df.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
                    pl.col("CO2_dry"), 
                    pl.col("h2o_reported")) \
                    .sort("creation_timestamp").filter(pl.col("creation_timestamp").is_between(low_start_date, low_end_date)) \
                    .collect()


    df_p_600 = df.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
                    pl.col("CO2_dry"), 
                    pl.col("h2o_reported")) \
                    .sort("creation_timestamp").filter(pl.col("creation_timestamp").is_between(high_start_date, high_end_date)) \
                    .collect()


    measured_values = [None, None]
    true_values = [427.38, 610.95]

    # 400 ppm
    data = df_p_400.select(pl.col("CO2_dry")).to_series().to_list()
    measured_values[0] = process_bottle(data=data, ignore_len=True)

    # 600 ppm
    data = df_p_600.select(pl.col("CO2_dry")).to_series().to_list()
    measured_values[1] = process_bottle(data=data, ignore_len=True)

    print(measured_values)

    picarro_slope, picarro_intercept = two_point_calibration(measured_values, true_values)
    print(picarro_slope, picarro_intercept)

    #plots
    fig = px.line(df_p_400, x="creation_timestamp", y="h2o_reported")
    fig.show()

    fig = px.line(df_p_400, x="creation_timestamp", y="CO2_dry")
    fig.show()

    fig = px.line(df_p_600, x="creation_timestamp", y="h2o_reported")
    fig.show()

    fig = px.line(df_p_600, x="creation_timestamp", y="CO2_dry")
    fig.show()

In [7]:
# 23.10.2023

low_start_date = datetime(2023, 10, 23, 13, 31, 30).replace(tzinfo=timezone.utc)
low_end_date = datetime(2023, 10, 23, 14, 6, 0).replace(tzinfo=timezone.utc)

high_start_date = datetime(2023, 10, 23, 13, 6, 0).replace(tzinfo=timezone.utc)
high_end_date = datetime(2023, 10, 23, 13, 31, 0).replace(tzinfo=timezone.utc)

process_two_point_picarro_calibration(low_start_date, low_end_date, high_start_date, high_end_date)

[424.60165199, 607.0065449]
1.0063874771746113 0.06621464961165202


In [8]:
# 18.12.2023

low_start_date = datetime(2023, 12, 18, 14, 33, 0).replace(tzinfo=timezone.utc)
low_end_date = datetime(2023, 12, 18, 15, 2, 0).replace(tzinfo=timezone.utc)

high_start_date = datetime(2023, 12, 18, 15, 4, 0).replace(tzinfo=timezone.utc)
high_end_date = datetime(2023, 12, 18, 15, 32, 0).replace(tzinfo=timezone.utc)

process_two_point_picarro_calibration(low_start_date, low_end_date, high_start_date, high_end_date)

[424.7205036, 607.18271856]
1.0060713120261249 0.08088569875155827


In [12]:
# 04.09.2024

low_start_date = datetime(2024, 9, 4, 13, 00, 0).replace(tzinfo=timezone.utc)
low_end_date = datetime(2024, 9, 4, 13, 30, 0).replace(tzinfo=timezone.utc)

high_start_date = datetime(2024, 9, 4, 13, 31, 0).replace(tzinfo=timezone.utc)
high_end_date = datetime(2024, 9, 4, 14, 00, 0).replace(tzinfo=timezone.utc)

process_two_point_picarro_calibration(low_start_date, low_end_date, high_start_date, high_end_date)

[424.37113967, 606.63693264]
1.007155522760185 -0.02773701867431555
