In [4]:
# Imports & Inits

from datetime import datetime
from datetime import timezone
import polars as pl
import json
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import plotly.express as px
import math
import pyarrow as pa
import numpy as np
import os

picarro_path = r"/Users/patrickaigner/Documents/PROJECTS/ACROPOLIS/Database/PICARRO"

df = pl.scan_parquet(os.path.join(picarro_path, "picarro.parquet")) 

def plot_sensor_measurement(df_f, variant: str, col_name: str, sensor_id: int, filter = None):
    
    if filter != None:
            df_f = df_f.groupby_dynamic("date",every=filter).agg(pl.all().exclude('date').mean())

    sns.lineplot(data=df_f,
            x='date',
            y=col_name,
            label = id)


    # set axes labels
    plt.xlabel('Time')
    plt.ylabel(col_name)
    plt.xticks(rotation=45)
    plt.legend(title='Systems', bbox_to_anchor=(1, 1))
    plt.title(label=col_name)
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d.%m %H:%M'))

    # show the plot
    plt.show()
        
def two_point_calibration(measured_values, true_values):
    # Check if input lists have length 2
    if len(measured_values) != 2 or len(true_values) != 2:
        raise ValueError("Both measured_values and true_values must have length 2")

    # Calculate calibration parameters (slope and intercept)
    # 
    slope = (true_values[1] - true_values[0]) / (measured_values[1] - measured_values[0])
    # y_true = m * y_meas + t
    intercept = true_values[0] - slope * measured_values[0]

    return slope, intercept

def average_bottle(conc_list):
    uncut_avg = sum(conc_list) / len(conc_list)
    print(f"Uncut average: {uncut_avg}")

    if len(conc_list) > 0:
        conc_list_cut = conc_list[int(len(conc_list)*0.3):int(len(conc_list)*0.95)]
        cut_avg = sum(conc_list_cut) / len(conc_list_cut)
        print(f"Cut average: {cut_avg}")
    
        return cut_avg
    return 0

In [32]:
start_date = datetime(2023, 10, 23, 13, 6, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 10, 23, 13, 31, 0).replace(tzinfo=timezone.utc)

df_p_600 = df.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
                pl.col("CO2_dry")).sort("creation_timestamp").filter(pl.col("creation_timestamp").is_between(start_date, end_date))

start_date = datetime(2023, 10, 23, 13, 31, 30).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 10, 23, 14, 6, 0).replace(tzinfo=timezone.utc)

df_p_400 = df.select(pl.col("datetime").dt.cast_time_unit("us").dt.replace_time_zone("UTC").alias("creation_timestamp"),
                pl.col("CO2_dry")).sort("creation_timestamp").filter(pl.col("creation_timestamp").is_between(start_date, end_date))

In [33]:
df_p_400.collect()

creation_timestamp,CO2_dry
"datetime[μs, UTC]",f64
2023-10-23 13:31:30.887 UTC,424.830962
2023-10-23 13:31:31.811 UTC,424.830962
2023-10-23 13:31:32.316 UTC,424.749556
2023-10-23 13:31:33.861 UTC,424.749556
2023-10-23 13:31:34.776 UTC,424.749556
2023-10-23 13:31:35.286 UTC,424.679845
2023-10-23 13:31:36.838 UTC,424.679845
2023-10-23 13:31:37.759 UTC,424.679845
2023-10-23 13:31:38.267 UTC,424.684115
2023-10-23 13:31:39.802 UTC,424.684115


In [34]:
df_p_600.collect()

creation_timestamp,CO2_dry
"datetime[μs, UTC]",f64
2023-10-23 13:06:00.611 UTC,607.296466
2023-10-23 13:06:01.529 UTC,607.296466
2023-10-23 13:06:02.036 UTC,607.29622
2023-10-23 13:06:03.581 UTC,607.29622
2023-10-23 13:06:04.503 UTC,607.29622
2023-10-23 13:06:05.006 UTC,607.285509
2023-10-23 13:06:06.553 UTC,607.285509
2023-10-23 13:06:07.479 UTC,607.285509
2023-10-23 13:06:07.986 UTC,607.251496
2023-10-23 13:06:09.532 UTC,607.251496


In [35]:
measured_values = [None, None]
true_values = [427.38, 610.95]

# 400 ppm

data = df_p_400.select(pl.col("CO2_dry")).collect().to_series().to_list()

data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

print(sum(data) / len(data))
print(sum(data_cut) / len(data_cut))
measured_values[0] = sum(data_cut) / len(data_cut)

# 600 ppm
data = df_p_600.select(pl.col("CO2_dry")).collect().to_series().to_list()

data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

print(sum(data) / len(data))
print(sum(data_cut) / len(data_cut))
measured_values[1] = sum(data_cut) / len(data_cut)


two_point_calibration(measured_values, true_values)

424.6064014685599
424.6023673882955
607.0610295264346
607.0095882593893


(1.006374633215469, 0.0709482571842841)

In [36]:
607.0095882593893 * 1.006374633215469 + 0.0709482571842841

610.95

In [37]:
424.6023673882955 * 1.006374633215469 + 0.0709482571842841

427.38