In [1]:
# Imports & Inits

from datetime import datetime
from datetime import timezone
import polars as pl
import os

from utils import two_point_calibration, average_bottle, plot_sensor_measurement

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")
PICARRO_DATA_DIRECTORy = os.environ.get("PICARRO_DATA_DIRECTORY")

#------------
sensor_id = 8
#------------

df = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "download", "acropolis.parquet")) \
    .filter(pl.col("system_name") == f"tum-esm-midcost-raspi-{sensor_id}") \
    .sort(pl.col("creation_timestamp")) \
    .filter(pl.col("gmp343_filtered") > 0.0) \
    .select(pl.col("creation_timestamp"),
                pl.col("gmp343_filtered"),
                pl.col("sht45_humidity"))

In [2]:
#400
start_date = datetime(2023, 10, 24, 8, 55, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 10, 24, 9, 23, 0).replace(tzinfo=timezone.utc)

df_p_400 = df.sort("creation_timestamp").filter(pl.col("creation_timestamp").is_between(start_date, end_date))

plot_sensor_measurement(df_p_400.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_p_400.collect(), col_name="sht45_humidity")

#600
start_date = datetime(2023, 10, 24, 9, 24, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 10, 24, 9, 54, 0).replace(tzinfo=timezone.utc)

df_p_600 = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

plot_sensor_measurement(df_p_600.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_p_600.collect(), col_name="sht45_humidity")

In [3]:
measured_values = [None, None]
true_values = [427.38, 610.95]

# 400 ppm

data = df_p_400.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
measured_values[0] = average_bottle(data)

# 600 ppm
data = df_p_600.select(pl.col("gmp343_filtered")).collect().to_series().to_list()

measured_values[1] = average_bottle(data)

# calculate slope and intercept
slope, intercept = two_point_calibration(measured_values, true_values)
print(f"acropolis_slope = {slope}, acropolis_intercept = {intercept}")

Uncut average: 437.8434523809524
Cut average: 437.8431192660551
Uncut average: 618.9788888888887
Cut average: 619.3111111111109
acropolis_slope = 1.0115833549132953, acropolis_intercept = -15.534811512858141


# Westfalen Gas Bottle #20

In [4]:
# Picarro Analysis: 401.80090700034015

#400
start_date = datetime(2023, 10, 24, 11, 20, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 10, 24, 11, 54, 0).replace(tzinfo=timezone.utc)

df_wg_20 = df.sort("creation_timestamp").filter(pl.col("creation_timestamp").is_between(start_date, end_date))

plot_sensor_measurement(df_wg_20.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_wg_20.collect(), col_name="sht45_humidity")

# calculate average
data = df_wg_20.select(pl.col("gmp343_filtered")).collect().to_series().to_list()

# correct with previous ICOS calibration information
avg = average_bottle(data) * slope + intercept

print(f"The bottle average is: {round(avg,3)} compared to 401.800 measuremed by Picarro.")
print(round(avg,3) - 401.800)

Uncut average: 408.78620689655196
Cut average: 408.90984848484806
The bottle average is: 398.112 compared to 401.800 measuremed by Picarro.
-3.687999999999988


# ICOS Bottles with 0.2.0-beta.8

In [5]:
#400
start_date = datetime(2023, 10, 24, 12, 37, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 10, 24, 13, 6, 0).replace(tzinfo=timezone.utc)

df_p_400 = df.sort("creation_timestamp").filter(pl.col("creation_timestamp").is_between(start_date, end_date))

plot_sensor_measurement(df_p_400.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_p_400.collect(), col_name="sht45_humidity")

#600
start_date = datetime(2023, 10, 24, 11, 55, 30).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 10, 24, 12, 29, 0).replace(tzinfo=timezone.utc)

df_p_600 = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

plot_sensor_measurement(df_p_600.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_p_600.collect(), col_name="sht45_humidity")

In [6]:
measured_values = [None, None]
true_values = [427.38, 610.95]

# 400 ppm

data = df_p_400.select(pl.col("gmp343_filtered")).collect().to_series().to_list()
measured_values[0] = average_bottle(data)

# 600 ppm
data = df_p_600.select(pl.col("gmp343_filtered")).collect().to_series().to_list()

measured_values[1] = average_bottle(data)

# calculate slope and intercept
slope, intercept = two_point_calibration(measured_values, true_values)
print(f"acropolis_slope = {slope}, acropolis_intercept = {intercept}")

Uncut average: 436.5752873563219
Cut average: 436.7123893805311
Uncut average: 617.08
Cut average: 617.1253846153844
acropolis_slope = 1.017498765879016, acropolis_intercept = -16.974317238766673


From earlier that day:

Uncut average: 437.8434523809524

Cut average: 437.8431192660551

Uncut average: 618.9788888888887

Cut average: 619.3111111111109

acropolis_slope = 1.0115833549132953, acropolis_intercept = -15.534811512858141

# Westfalen Gas Bottle #17

In [7]:
# Picarro Analysis: 390.0240811582326

#400
start_date = datetime(2023, 10, 24, 13, 10, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 10, 24, 13, 35, 0).replace(tzinfo=timezone.utc)

df_wg_17 = df.sort("creation_timestamp").filter(pl.col("creation_timestamp").is_between(start_date, end_date))

plot_sensor_measurement(df_wg_17.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_wg_17.collect(), col_name="sht45_humidity")

# calculate average
data = df_wg_17.select(pl.col("gmp343_filtered")).collect().to_series().to_list()

# correct with previous ICOS calibration information
avg = average_bottle(data) * slope + intercept

print(f"The bottle average is: {round(avg,3)} compared to  390.024 measuremed by Picarro.")
print(round(avg,3) -  390.024)

Uncut average: 399.00733333333335
Cut average: 399.04639175257734
The bottle average is: 389.055 compared to  390.024 measuremed by Picarro.
-0.9689999999999941


# Westfalen Gas Bottle #31

In [8]:
# Picarro Analysis: 806.6917643208662

#400
start_date = datetime(2023, 10, 24, 13, 37, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 10, 24, 14, 6, 0).replace(tzinfo=timezone.utc)

df_wg_31 = df.sort("creation_timestamp").filter(pl.col("creation_timestamp").is_between(start_date, end_date))

plot_sensor_measurement(df_wg_31.collect(), col_name="gmp343_filtered")
plot_sensor_measurement(df_wg_31.collect(), col_name="sht45_humidity")

# calculate average
data = df_wg_31.select(pl.col("gmp343_filtered")).collect().to_series().to_list()

# correct with previous ICOS calibration information
avg = average_bottle(data) * slope + intercept

print(f"The bottle average is: {round(avg,3)} compared to 806.691 measuremed by Picarro.")
print(round(avg,3) -  806.691)

Uncut average: 811.589655172414
Cut average: 812.2185840707969
The bottle average is: 809.457 compared to 806.691 measuremed by Picarro.
2.7659999999999627
