In [23]:
# Imports & Inits

from datetime import datetime
from datetime import timezone
import polars as pl
import os
import plotly.express as px


# local imports
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from utils.calibration_processing import process_bottle

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")
    
df_p = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "input/picarro.parquet")) 

picarro_slope = 1.0060429925902534 
picarro_intercept = 0.09305508001614271

df_p.head(1).collect()

DATE,TIME,FRAC_DAYS_SINCE_JAN1,FRAC_HRS_SINCE_JAN1,JULIAN_DAYS,EPOCH_TIME,ALARM_STATUS,INST_STATUS,CavityPressure,CavityTemp,DasTemp,EtalonTemp,species,OutletValve,CH4,CH4_dry,CO2,CO2_dry,h2o_reported,ch4_base,ch4_pzt_std,co2_base,co2_pzt_std,wlm1_offset,wlm2_offset,datetime,__index_level_0__
str,str,f64,f64,f64,f64,i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,datetime[ns],i64
"""2023-06-23""","""00:00:01.489""",173.000017,4152.000414,174.000017,1687500000.0,0,963,139.983005,44.999779,43.375,45.151695,2.0,22398.639168,1.96398,2.009492,425.117804,437.149219,2.186075,1182.223133,84.583058,1085.124996,71.320269,-0.079042,-0.047342,2023-06-23 00:00:01.489,13303676


In [28]:
def plot_sensor_measurement(df, col_name: str, filter = None):
    if filter != None:
        df = df.groupby_dynamic("creation_timestamp", every=filter).agg(
            [
                pl.all().exclude(["creation_timestamp"]).mean(),
            ]
        )

    fig = px.line(
        df,
        x="creation_timestamp",
        y=col_name,
        markers=True,
        title=col_name,
    )
    fig.show()
    
def process_measurement(start_date, end_date):
    df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
                        pl.col("CO2_dry"),
                        pl.col("h2o_reported")) \
        .sort("creation_timestamp") \
        .filter(pl.col("creation_timestamp").is_between(start_date, end_date)) \
        .with_columns(pl.col("CO2_dry") * picarro_slope + picarro_intercept) \
        .collect()


    data = df_p_bottle.select(pl.col("CO2_dry")).to_series().to_list()
    data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

    median = process_bottle(data_cut, ignore_len=True) 

    print(f"Picarro Median Reading: {median}")

    # Plots
    plot_sensor_measurement(df_p_bottle, col_name="CO2_dry")
    plot_sensor_measurement(df_p_bottle, col_name="h2o_reported")

# Bottles on 19.12.2023

In [29]:
# bottle 36

# PICARRO
start_date = datetime(2023, 12, 19, 10, 39, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 10, 51, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 400.8012230209953


In [30]:
# bottle 38

# PICARRO
start_date = datetime(2023, 12, 19, 11, 32, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 11, 48, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)


Picarro Median Reading: 814.671130808131


In [31]:
# bottle 21		12:58	13:09	397,7

# PICARRO
start_date = datetime(2023, 12, 19, 11, 53, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 12, 3, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 400.30515469023914


In [32]:
# bottle 3		13:13	13:23	388.8

# PICARRO
start_date = datetime(2023, 12, 19, 12, 8, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 12, 16, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)


Picarro Median Reading: 391.30855312596685


In [33]:
# bottle 12		13:25	13:35	811,1

# PICARRO
start_date = datetime(2023, 12, 19, 12, 20, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 12, 29, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)


Picarro Median Reading: 816.1938692975559


In [34]:
# bottle 18		13:36	13:45	398,1

# PICARRO
start_date = datetime(2023, 12, 19, 12, 32, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 12, 39, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)


Picarro Median Reading: 400.61377059157815


In [35]:
# bottle 1		13:49	13:59	793,2

# PICARRO
start_date = datetime(2023, 12, 19, 12, 42, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 12, 52, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 798.1926212104053


In [36]:
# bottle 28		14:01	14:10	806.0

# PICARRO
start_date = datetime(2023, 12, 19, 12, 55, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 13, 4, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 811.0862213231853


In [37]:
# bottle 33		14:11	14:21	800,7

# PICARRO
start_date = datetime(2023, 12, 19, 13, 6, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 13, 14, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 805.6716103308408


In [38]:
# bottle 15		14:22	14:32	802,7

# PICARRO
start_date = datetime(2023, 12, 19, 13, 18, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 13, 26, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 807.8464165768654


In [39]:
# bottle 24		15:09	15:20	801,8

# PICARRO
start_date = datetime(2023, 12, 19, 14, 3, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 14, 13, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 806.811872104603


In [40]:
# bottle 5		15:22	15:32	404,4

# PICARRO
start_date = datetime(2023, 12, 19, 14, 17, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 14, 26, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 407.0481550009737


In [41]:
# bottle 8		15:33	15:43	410,7

# PICARRO
start_date = datetime(2023, 12, 19, 14, 28, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 14, 37, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 413.3585145509678


In [42]:
# bottle 23		15:44	15:54	388.0

# PICARRO
start_date = datetime(2023, 12, 19, 14, 39, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 14, 47, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 390.4876055247873
