In [20]:
# Imports & Inits

from datetime import datetime
from datetime import timezone
import polars as pl
import os
import sys
import plotly.express as px


PROJECT_PATH = os.path.abspath(os.path.join("..", ".."))
PIPELINE_PATH = os.path.join(PROJECT_PATH, "pipeline")
DATA_DIRECTORY = os.path.join(PROJECT_PATH, "data")

if PIPELINE_PATH not in sys.path:
    sys.path.append(PIPELINE_PATH)
    
from utils.paths import PROCESSED_PICARRO_DATA_DIRECTORY
from utils.calibration_processing import process_bottle

assert(os.path.exists(PROCESSED_PICARRO_DATA_DIRECTORY))

In [21]:
start_date = datetime(2024, 12, 3, 0, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12 , 21, 0, 0, 0).replace(tzinfo=timezone.utc)
  
df_p = pl.scan_parquet(os.path.join(PROCESSED_PICARRO_DATA_DIRECTORY, "Calibrated_Raw_DWD_Picarro_G2301_413.parquet")) \
    .filter(pl.col("datetime").is_between(start_date, end_date)) \
    .select(["datetime", "picarro_corrected", "h2o_reported"])
df_p.head(1).collect()

datetime,picarro_corrected,h2o_reported
"datetime[μs, UTC]",f64,f64
2024-12-03 12:53:31.812 UTC,427.175463,0.003362


In [22]:
def plot_sensor_measurement(df, col_name: str, filter = None):
    if filter != None:
        df = df.groupby_dynamic("datetime", every=filter).agg(
            [
                pl.all().exclude(["datetime"]).mean(),
            ]
        )

    fig = px.line(
        df,
        x="datetime",
        y=col_name,
        markers=True,
        title=col_name,
    )
    fig.show()
    
def process_measurement(start_date, end_date, plot=False):
    df_p_bottle = df_p.filter(pl.col("datetime").is_between(start_date, end_date)) \
        .collect()

    data = df_p_bottle.select(pl.col("picarro_corrected")).to_series().to_list()
    data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

    median = process_bottle(data_cut, ignore_len=True) 

    print(f"Picarro Median Reading: {median}")

    # Plots
    if plot:
        plot_sensor_measurement(df_p_bottle, col_name="picarro_corrected")
        plot_sensor_measurement(df_p_bottle, col_name="h2o_reported")

In [23]:
# 81 12:44 - 13:04
# Reading: 518.9

start_date = datetime(2024, 12, 11, 12, 44, 49).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 11, 13, 4, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.399596037361


In [24]:
# 82 13:08 - 13:29
# Reading: 518.3

start_date = datetime(2024, 12, 11, 13, 8, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 11, 13, 28, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.0029551874638


In [25]:
# 83 15:25 - 15:45
# Reading: 518.9

start_date = datetime(2024, 12, 3, 15, 25, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 3, 15, 45, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.4975732853461


In [26]:
# 84  16:05 - 16:25
# Reading: 516.0

start_date = datetime(2024, 12, 3, 16, 5, 6).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 3, 16, 25, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 519.4809280817365


In [27]:
# 85 UTC: 13:31 - 13:51
# Reading: 519.0

start_date = datetime(2024, 12, 11, 13, 31, 19).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 11, 13, 51, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.6054309448991


In [28]:
# 86 UTC: 13:53 - 14:13
# Reading: 518.7

start_date = datetime(2024, 12, 11, 13, 53, 20).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 11, 14, 13, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.4460071604951


In [29]:
# 87 UTC: 14:15 - 14:35
# Reading: 518.8

start_date = datetime(2024, 12, 11, 14, 15, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 11, 14, 35, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.3058339682808


In [30]:
# 88 UTC: 14:38 - 14:58
# Reading: 518.4

start_date = datetime(2024, 12, 11, 14, 38, 29).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 11, 14, 58, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.0212340787328


In [31]:
# 89 UTC: 15:01 - 15:21
# Reading: 519.6

start_date = datetime(2024, 12, 11, 15, 1, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 11, 15, 21, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 523.2034187247424


In [32]:
# 90 UTC: 13:00 - 13:22
# Reading: 519.1

start_date = datetime(2024, 12, 18, 13, 2, 46).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 18, 13, 21, 40).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.734519152853


In [33]:
# 91 UTC: 13:22 - 13:42
# Reading: 519.1

start_date = datetime(2024, 12, 18, 13, 22, 5).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 18, 13, 42, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.7434156870997


In [34]:
# 92 UTC: 13:42 - 14:02
# Reading: 516.4

start_date = datetime(2024, 12, 18, 13, 43, 29).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 18, 14, 2, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 519.9676870656331


In [35]:
# 93 UTC: 14:02 - 14:22
# Reading: 519.0

start_date = datetime(2024, 12, 18, 14, 3, 45).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 18, 14, 22, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.5332816091653


In [36]:
# 94 UTC: 14:22 - 14:42
# Reading: 518,3

start_date = datetime(2024, 12, 18, 14, 23, 14).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 18, 14, 42, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 521.8665165561736


In [37]:
# 95 UTC: 14:42 - 15:02
# Reading: 518.7

start_date = datetime(2024, 12, 18, 14, 43, 54).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 18, 15, 2, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.3340173370009


In [38]:
# 96 UTC: 15:02 - 15:22
# Reading: 518.1

start_date = datetime(2024, 12, 18, 15, 4, 7).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 18, 15, 22, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 521.8625590800384


In [39]:
# 97 UTC: 15:22 - 15:42
# Reading: 518.8

start_date = datetime(2024, 12, 18, 15, 23, 25).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 18, 15, 42, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.1986144749345


In [40]:
# 98 UTC: 10:21 - 10:41
# Reading: 519.0

start_date = datetime(2024, 12, 19, 10, 21, 52).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 19, 10, 40, 49).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.6351899066206


In [41]:
# 99 UTC: 12:32 - 12:53
# Reading: 519.3

start_date = datetime(2024, 12, 19, 12, 43, 30).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 19, 12, 53, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.884522933563


In [42]:
# 100 UTC: 12:52 - 13:14
# Reading: 519.0

start_date = datetime(2024, 12, 19, 12, 53, 24).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 19, 13, 14, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.4917033240507


In [43]:
# 101 UTC: 13:14 - 13:35
# Reading: 399.4

start_date = datetime(2024, 12, 19, 13, 14, 37).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 19, 13, 34, 33).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 401.95058691676235


In [44]:
# 102 UTC: 13:35 - 13:55
# Reading: 400.1

start_date = datetime(2024, 12, 19, 13, 35, 33).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 19, 13, 55, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 402.8450981061115


In [45]:
# 103 UTC: 13:55 - 14:15
# Reading: 400.1

start_date = datetime(2024, 12, 19, 13, 56, 43).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 19, 14, 15, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 402.8695100323505


In [46]:
# 104 UTC: 14:15 - 14:35
# Reading: 399.1

start_date = datetime(2024, 12, 19, 14, 15, 31).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 19, 14, 35, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 401.8133091910395


In [47]:
# 105 UTC: 14:55 - 15:15
# Reading: 397.9

start_date = datetime(2024, 12, 19, 14, 57, 40).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 19, 15, 15, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 400.6500867493169


In [48]:
# 106 UTC: 15:15 - 15:35
# Reading: 399.0

start_date = datetime(2024, 12, 19, 15, 16, 42).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 19, 15, 35, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 401.7771358183205


In [49]:
# 107 UTC: 15:35 - 15:55
# Reading: 499.3

start_date = datetime(2024, 12, 19, 15, 35, 41).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 19, 15, 54, 50).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 402.0895296633208


In [50]:
# 108 UTC: 08:45 - 09:05
# Reading: 400.0

start_date = datetime(2024, 12, 20, 8, 45, 10).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 9, 5, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 402.81699400731907


In [51]:
# 109 UTC: 09:05 - 09:25
# Reading: 396.9

start_date = datetime(2024, 12, 20, 9, 8, 30).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 9, 25, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 399.65400877556243


In [52]:
# 110 UTC: 09:25 - 09:45
# Reading: 399.0

start_date = datetime(2024, 12, 20, 9, 27, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 9, 45, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 401.76477491943956


In [53]:
# 111 UTC: 10:06 - 10:26
# Reading: 399.9

start_date = datetime(2024, 12, 20, 10, 6, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 10, 26, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 402.6973904653332


In [54]:
# 112 UTC: 10:26 - 10:46
# Reading: 399.9

start_date = datetime(2024, 12, 20, 10, 27, 20).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 10, 46, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 402.7038303868714


In [55]:
# 113 UTC: 12:08 - 12:28
# Reading: 399.1

start_date = datetime(2024, 12, 20, 12, 8, 30).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 12, 28, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 401.8625188426242


In [56]:
# 114 UTC: 12:28 - 12:49
# Reading: 399.1

start_date = datetime(2024, 12, 20, 12, 29, 10).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 12, 49, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 401.98147418130424


In [57]:
# 115 UTC: 12:49 - 13:09
# Reading: 399.9

start_date = datetime(2024, 12, 20, 12, 49, 40).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 13, 9, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 402.6424573618839


In [58]:
# 116 UTC: 13:09 - 13:29
# Reading: 399.4

start_date = datetime(2024, 12, 20, 13, 10, 10).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 13, 29, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 402.0960448177989


In [59]:
# 117 UTC: 13:29 - 13:49
# Reading: 399.0

start_date = datetime(2024, 12, 20, 13, 30, 10).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 13, 49, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 401.6390980113765


In [60]:
# 118 UTC: 13:49 - 14:09
# Reading: 399.2

start_date = datetime(2024, 12, 20, 13, 50, 30).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 14, 9, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 401.8435636753806


In [61]:
# 119 UTC: 14:09 - 14:29
# Reading: 

start_date = datetime(2024, 12, 20, 14, 9, 40).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 14, 29, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 403.0266976175326


In [62]:
# 120 UTC: 14:29 - 14:49
# Reading:  399.9

start_date = datetime(2024, 12, 20, 14, 29, 40).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 14, 49, 0).replace(tzinfo=timezone.utc)

process_measurement(start_date, end_date)

Picarro Median Reading: 402.66182572136006
