In [None]:
# Imports & Inits
from datetime import datetime
from datetime import timezone
import polars as pl
import os
from typing import Optional
import plotly.express as px

from utils.calibration_processing import two_point_calibration, average_bottle


DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")
PICARRO_DATA_DIRECTORY = os.environ.get("PICARRO_DATA_DIRECTORY")

#------------
sensor_id = 13
#------------

df = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "download", "acropolis.parquet")) \
    .filter(pl.col("system_name") == f"tum-esm-midcost-raspi-{sensor_id}") \
    .sort(pl.col("creation_timestamp")) \
    .filter(pl.col("gmp343_filtered") > 0.0) \
    .select(pl.col("creation_timestamp"),
                pl.col("gmp343_filtered"),
                pl.col("sht45_humidity"))
    

df_p = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "input/picarro.parquet")) 

In [None]:
def plot_sensor_measurement(df, col_name: str, filter: Optional[str] = None):
    if filter != None:
        df = df.groupby_dynamic("creation_timestamp", every=filter).agg(
            [
                pl.all().exclude(["creation_timestamp"]).mean(),
            ]
        )

    fig = px.line(
        df,
        x="creation_timestamp",
        y=col_name,
        markers=True,
        title=col_name,
    )
    fig.show()

In [None]:
df.head(3).collect()

# ICOS Calibration PICARRO

In [None]:
picarro_slope = 1.0060429925902534 
picarro_intercept = 0.09305508001614271

df_p.head(1).collect()

# ICOS Calibration MC13 19.12.2023

In [None]:
# ---
# Measurement
# ---

#400
start_date = datetime(2023, 12, 19, 12, 5, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 12, 33, 0).replace(tzinfo=timezone.utc)

df_p_400 = df.sort("creation_timestamp").filter(pl.col("creation_timestamp").is_between(start_date, end_date))


#600
start_date = datetime(2023, 12, 19, 12, 34, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 13, 3, 0).replace(tzinfo=timezone.utc)

df_p_600 = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))



# ---
# Averagng
# ---

measured_values = [None, None]
true_values = [427.38, 610.95]

# 400 ppm

data = df_p_400.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
measured_values[0] = average_bottle(data)

# 600 ppm
data = df_p_600.select(pl.col("gmp343_filtered")).collect().to_series().to_list()

measured_values[1] = average_bottle(data)

# calculate slope and intercept
acropolis_slope, acropolis_intercept = two_point_calibration(measured_values, true_values)
print(f"acropolis_slope = {acropolis_slope}, acropolis_intercept = {acropolis_intercept}")

# plot
plot_sensor_measurement(df_p_400.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_p_400.collect(), col_name="sht45_humidity")
plot_sensor_measurement(df_p_600.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_p_600.collect(), col_name="sht45_humidity")


# Bottles on 19.12.2023

In [None]:
# bottle 41

# PICARRO
# 41		16:02	16:27	399,8
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 19, 14, 56, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 15, 21, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 19, 15, 29, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 15, 53, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 42

# PICARRO
# 42		16:32	16:53	401.7
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 19, 15, 28, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 15, 47, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 19, 15, 55, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 16, 15, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 43

# PICARRO
# 43		16:55	17:15	401,5
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 19, 15, 51, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 16, 9, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 19, 16, 18, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 16, 40, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 44

# PICARRO
# 44		17:18	17:40	401,7
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 19, 16, 14, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 16, 34, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 19, 16, 42, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 17, 2, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 45

# PICARRO
# 45		17:41	18:02	403,6
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 19, 16, 36, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 16, 56, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 19, 17, 5, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 17, 24, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 46

# PICARRO
# 46		18:04	18:24	403,2
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 19, 16, 59, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 17, 18, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 19, 17, 26, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 19, 17, 51, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


# ICOS Calibration MC13 20.12.2023

In [None]:

# ---
# Measurement
# ---

#400
start_date = datetime(2023, 12, 20, 12, 36, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 20, 13, 11, 0).replace(tzinfo=timezone.utc)

df_p_400 = df.sort("creation_timestamp").filter(pl.col("creation_timestamp").is_between(start_date, end_date))


#600
start_date = datetime(2023, 12, 20, 13, 12, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 20, 13, 42, 0).replace(tzinfo=timezone.utc)

df_p_600 = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))



# ---
# Averagng
# ---

measured_values = [None, None]
true_values = [427.38, 610.95]

# 400 ppm

data = df_p_400.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
measured_values[0] = average_bottle(data)

# 600 ppm
data = df_p_600.select(pl.col("gmp343_filtered")).collect().to_series().to_list()

measured_values[1] = average_bottle(data)

# calculate slope and intercept
acropolis_slope, acropolis_intercept = two_point_calibration(measured_values, true_values)
print(f"acropolis_slope = {acropolis_slope}, acropolis_intercept = {acropolis_intercept}")

# plot
plot_sensor_measurement(df_p_400.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_p_400.collect(), col_name="sht45_humidity")
plot_sensor_measurement(df_p_600.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_p_600.collect(), col_name="sht45_humidity")


In [None]:
# bottle 47

# PICARRO
# 47	20.12.2023	10:49	11:09	400,2
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 20, 9, 43, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 20, 10, 3, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 11:10	11:30
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 20, 10, 10, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 20, 10, 30, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 48

# PICARRO
# 48		11:10	11:30	401,4
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 20, 10, 5, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 20, 10, 24, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 11:31	11:51
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 20, 10, 31, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 20, 10, 51, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 49

# PICARRO  
# 49		11:31	11:51   401,3
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 20, 10, 26, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 20, 10, 45, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 20, 10, 31, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 20, 10, 51, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


Corrected Picarro Timeshift on Windows PC

In [None]:
# bottle 50

# PICARRO
# 50		13:54	14:14	401,4
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 20, 13, 45, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 20, 14, 14, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 20, 10, 31, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 20, 10, 51, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 51

# PICARRO
# 51		14:15	14:35	399,9
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 20, 14, 15, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 20, 14, 34, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 20, 10, 31, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 20, 10, 51, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 52

# PICARRO
# 52		14:35	14:50	401,7
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 20, 14, 35, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 20, 14, 50, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 14:51	15:17
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 20, 13, 56, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 20, 14, 17, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 53

# PICARRO
# 53		14:57	15:17	401,4
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 20, 14, 57, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 20, 15, 17, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 15:19	15:39
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 20, 14, 19, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 20, 14, 39, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 54

# PICARRO
# 54		15:19	15:39	401,1
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 20, 15, 19, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 20, 15, 39, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 15:40	16:00
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 20, 14, 41, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 20, 15, 0, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 55

# PICARRO
# 55		15:40	16:00	403,4
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 20, 15, 40, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 20, 16, 0, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 16:01	16:20
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 20, 14, 41, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 20, 15, 0, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 56

# PICARRO
# 56		16:01	16:20	401,4
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 20, 16, 2, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 20, 16, 19, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# -- --
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 20, 14, 41, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 20, 15, 0, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


# ICOS Calibration MC13 21.12.2023

In [None]:

# ---
# Measurement
# ---

#400
start_date = datetime(2023, 12, 21, 9, 21, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 9, 51, 0).replace(tzinfo=timezone.utc)

df_p_400 = df.sort("creation_timestamp").filter(pl.col("creation_timestamp").is_between(start_date, end_date))


#600
start_date = datetime(2023, 12, 21, 9, 51, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 10, 11, 0).replace(tzinfo=timezone.utc)

df_p_600 = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))



# ---
# Averagng
# ---

measured_values = [None, None]
true_values = [427.38, 610.95]

# 400 ppm

data = df_p_400.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
measured_values[0] = average_bottle(data)

# 600 ppm
data = df_p_600.select(pl.col("gmp343_filtered")).collect().to_series().to_list()

measured_values[1] = average_bottle(data)

# calculate slope and intercept
acropolis_slope, acropolis_intercept = two_point_calibration(measured_values, true_values)
print(f"acropolis_slope = {acropolis_slope}, acropolis_intercept = {acropolis_intercept}")

# plot
plot_sensor_measurement(df_p_400.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_p_400.collect(), col_name="sht45_humidity")
plot_sensor_measurement(df_p_600.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_p_600.collect(), col_name="sht45_humidity")


In [None]:
# bottle 57

# PICARRO
# 57		10:24	10:44	403,3
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 21, 10, 25, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 10, 44, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# -- --
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 21, 14, 41, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 15, 0, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 58

# PICARRO
# 58		10:45	11:05	403,1
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 21, 10, 45, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 11, 5, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# -- --
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 21, 14, 41, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 15, 0, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 59

# PICARRO
# 59		11:06	11:26	401,4
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 21, 11, 6, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 11, 25, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 11:27	11:47
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 21, 10, 27, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 10, 47, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}")
print(f"Difference  MD13 - Picarro = {avg - avg_p}")

# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 60

# PICARRO
# 60		11:27	11:47	403,1
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 21, 11, 27, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 11, 47, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 11:48	12:08
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 21, 10, 48, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 11, 8, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 61

# PICARRO
# 61		11:48	12:08	522,4
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 21, 11, 48, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 12, 7, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 13:26	13:46
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 21, 12, 26, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 12, 46, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 62

# PICARRO
# 62		13:26	13:46	522,6
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 21, 13, 26, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 13, 45, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 13:48	14:08
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 21, 12, 48, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 13, 8, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 63

# PICARRO
# 63		13:48	14:08	518,4
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 21, 13, 48, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 14, 7, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 14:09	13:29
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 21, 13, 9, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 13, 29, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 64

# PICARRO
# 64		14:09	14:29	522,3
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 21, 14, 9, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 14, 29, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 14:30	14:50
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 21, 13, 30, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 13, 50, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 65

# PICARRO
# 65		14:30	14:50	521,0	
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 21, 14, 31, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 14, 50, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 14:52	15:12
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 21, 13, 52, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 14, 12, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 66

# PICARRO
# 66		14:52	15:12	520,7	
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 21, 14, 51, 30).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 15, 11, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 15:13	15:33
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 21, 14, 13, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 14, 33, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 67

# PICARRO
# 67		15:13	15:33	522,2	
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 21, 15, 13, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 15, 32, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 15:34	15:54
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 21, 14, 34, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 14, 54, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 68

# PICARRO
# 68		15:34	15:54	522,5	
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 21, 15, 34, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 15, 53, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 15:55	16:15
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 21, 14, 55, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 15, 15, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 69

# PICARRO
# 69		15:55	16:15	522,1	
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 21, 15, 55, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 16, 14, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 16:16	16:36
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 21, 15, 16, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 15, 36, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 70

# PICARRO
# 70		16:16	16:36	521,7	
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 21, 16, 17, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 16, 35, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 16:37	17:00	
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 21, 15, 37, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 16, 0, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 71

# PICARRO
# 71		16:41	17:00	521,6	
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 21, 16, 41, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 16, 59, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 17:01	17:21
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 21, 16, 1, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 16, 21, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


# ICOS Calibration MC13 22.12.2023

In [None]:

# ---
# Measurement
# ---

#400
start_date = datetime(2023, 12, 22, 9, 32, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 22, 10, 0, 0).replace(tzinfo=timezone.utc)

df_p_400 = df.sort("creation_timestamp").filter(pl.col("creation_timestamp").is_between(start_date, end_date))


#600
start_date = datetime(2023, 12, 21, 10, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 10, 25, 0).replace(tzinfo=timezone.utc)

df_p_600 = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))



# ---
# Averagng
# ---

measured_values = [None, None]
true_values = [427.38, 610.95]

# 400 ppm

data = df_p_400.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
measured_values[0] = average_bottle(data)

# 600 ppm
data = df_p_600.select(pl.col("gmp343_filtered")).collect().to_series().to_list()

measured_values[1] = average_bottle(data)

# calculate slope and intercept
acropolis_slope, acropolis_intercept = two_point_calibration(measured_values, true_values)
print(f"acropolis_slope = {acropolis_slope}, acropolis_intercept = {acropolis_intercept}")

# plot
plot_sensor_measurement(df_p_400.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_p_400.collect(), col_name="sht45_humidity")
plot_sensor_measurement(df_p_600.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_p_600.collect(), col_name="sht45_humidity")


In [None]:
# bottle 72

# PICARRO
# 72		10:34	10:54	521,0
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 22, 9, 34, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 22, 9, 53, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# ----
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 21, 16, 1, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 16, 21, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

#print(f"MC13 Average: {avg}") 
#print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 73

# PICARRO
# 73		10:55	11:16	520,9
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 22, 9, 55, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 22, 10, 16, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# ----
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 21, 16, 1, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 21, 16, 21, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

#print(f"MC13 Average: {avg}") 
#print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 74

# PICARRO
# 74		11:17	11:39	520,3	
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 22, 10, 17, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 22, 10, 38, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 12:48	13:12
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 22, 11, 48, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 22, 12, 12, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 75

# PICARRO
# 75		12:48	13:08	521,1	
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 22, 11, 48, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 22, 12, 8, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 13:13	13:36
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 22, 12, 13, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 22, 12, 36, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 76

# PICARRO
# 76		13:16	13:36	521,6	
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 22, 12, 16, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 22, 12, 35, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 13:37	13:58
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 22, 12, 37, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 22, 12, 58, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 77

# PICARRO
# 77		13:37	13:58	521,2	
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 22, 12, 37, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 22, 12, 57, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 14:00	14:21
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 22, 13, 00, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 22, 13, 21, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 78

# PICARRO
# 78		14:00	14:21	518,4	
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 22, 13, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 22, 13, 21, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 14:23	14:43
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 22, 13, 23, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 22, 13, 43, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 79

# PICARRO
# 79		14:23	14:43	520,6	
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 22, 13, 23, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 22, 13, 43, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 14:44	
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 22, 13, 44, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 22, 14, 4, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# bottle 80

# PICARRO
# 80		14:44	15:04	520,6	
print("Starting Picarro Processing")
start_date = datetime(2023, 12, 22, 13,45, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 22, 14, 4, 0).replace(tzinfo=timezone.utc)

df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
            pl.col("CO2_dry"),
            pl.col("h2o_reported")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_p_bottle.select(pl.col("CO2_dry")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg_p = average_bottle(data_cut) * picarro_slope + picarro_intercept

print(f"Picarro Average: {avg_p}")

# ACROPOLIS
# 	
print("Starting ACROPOLIS Processing")
start_date = datetime(2023, 12, 22, 13, 44, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 12, 22, 14, 4, 0).replace(tzinfo=timezone.utc)

df_bottle = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

data = df_bottle.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

avg = average_bottle(data_cut) * acropolis_slope + acropolis_intercept

print(f"MC13 Average: {avg}") 
print(f"Difference  MD13 - Picarro = {avg - avg_p}")


# Plots
plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")

plot_sensor_measurement(df_bottle.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_bottle.collect(), col_name="sht45_humidity")


In [None]:
# print("Starting Picarro Processing")
# start_date = datetime(2023, 12, 20, 9, 0, 0).replace(tzinfo=timezone.utc)
# end_date = datetime(2023, 12, 22, 16, 0, 0).replace(tzinfo=timezone.utc)

# df_p_bottle = df_p.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
#             pl.col("CO2_dry"),
#             pl.col("h2o_reported")) \
#     .sort("creation_timestamp") \
#     .filter(pl.col("creation_timestamp").is_between(start_date, end_date))
    
# # Plots
# plot_sensor_measurement(df_p_bottle.collect(), col_name="CO2_dry")
# plot_sensor_measurement(df_p_bottle.collect(), col_name="h2o_reported")