In [None]:
from datetime import datetime
import polars as pl
import os
import sys
import plotly.express as px
import polars.selectors as cs


PROJECT_PATH = os.path.abspath(os.path.join("..", ".."))
PIPELINE_PATH = os.path.join(PROJECT_PATH, "pipeline")
DATA_DIRECTORY = os.path.join(PROJECT_PATH, "data")

if PIPELINE_PATH not in sys.path:
    sys.path.append(PIPELINE_PATH)
    
from utils.paths import PROCESSED_PICARRO_DATA_DIRECTORY
from utils.calibration_processing import process_bottle

assert(os.path.exists(PROCESSED_PICARRO_DATA_DIRECTORY))

In [8]:
start_date = datetime(2024, 12, 3, 0, 0, 0)
end_date = datetime(2024, 12 , 21, 0, 0, 0)
  
# Load Picarro Data
# DWD Picarro: "Calibrated_1_min_DWD_Picarro_G2301_413.parquet"
df_p = pl.scan_parquet(os.path.join(PROCESSED_PICARRO_DATA_DIRECTORY,"Calibrated_1_min_DWD_Picarro_G2301_413.parquet"))
df_p = df_p.filter(pl.col("datetime").is_between(start_date, end_date)) \
    .select(["datetime", "picarro_corrected", "h2o_reported"]) \
    .collect()
    
df_p.head(1).vstack(df_p.tail(1))

datetime,picarro_corrected,h2o_reported
datetime[ms],f64,f64
2024-12-03 12:53:00,427.302241,0.001157
2024-12-20 16:17:00,610.944729,-0.001034


In [9]:
def plot_sensor_measurement(df, col_name: str, filter = None):
    if filter != None:
        df = df.groupby_dynamic("datetime", every=filter).agg(cs.numeric().mean())
        
    fig = px.line(
        df,
        x="datetime",
        y=col_name,
        markers=True,
        title=col_name,
    )
    fig.show()
    
def process_measurement(start_date, end_date, plot=False):
    df_p_bottle = df_p.filter(pl.col("datetime").is_between(start_date, end_date))

    data = df_p_bottle.select(pl.col("picarro_corrected")).to_series().to_list()
    data_cut = data[int(len(data)*0.3):int(len(data)*0.95)]

    median = process_bottle(data_cut, ignore_len=True) 

    print(f"Picarro Median Reading: {median}")

    # Plots
    if plot:
        plot_sensor_measurement(df_p_bottle, col_name="picarro_corrected")
        plot_sensor_measurement(df_p_bottle, col_name="h2o_reported")

In [50]:
# 81 12:44 - 13:04
# Reading: 518.9

start_date = datetime(2024, 12, 11, 12, 44, 49)
end_date = datetime(2024, 12, 11, 13, 4, 0)

process_measurement(start_date, end_date, plot = True)

Picarro Median Reading: 522.2875301643999


In [11]:
# 82 13:08 - 13:29
# Reading: 518.3

start_date = datetime(2024, 12, 11, 13, 8, 0)
end_date = datetime(2024, 12, 11, 13, 28, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 521.8658507946442


In [12]:
# 83 15:25 - 15:45
# Reading: 518.9

start_date = datetime(2024, 12, 3, 15, 25, 0)
end_date = datetime(2024, 12, 3, 15, 45, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.4968016109835


In [13]:
# 84  16:05 - 16:25
# Reading: 516.0

start_date = datetime(2024, 12, 3, 16, 5, 6)
end_date = datetime(2024, 12, 3, 16, 25, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 519.4770598989816


In [14]:
# 85 UTC: 13:31 - 13:51
# Reading: 519.0

start_date = datetime(2024, 12, 11, 13, 31, 19)
end_date = datetime(2024, 12, 11, 13, 51, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.4849526454174


In [15]:
# 86 UTC: 13:53 - 14:13
# Reading: 518.7

start_date = datetime(2024, 12, 11, 13, 53, 20)
end_date = datetime(2024, 12, 11, 14, 13, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.3177838289337


In [16]:
# 87 UTC: 14:15 - 14:35
# Reading: 518.8

start_date = datetime(2024, 12, 11, 14, 15, 0)
end_date = datetime(2024, 12, 11, 14, 35, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.1724404766294


In [17]:
# 88 UTC: 14:38 - 14:58
# Reading: 518.4

start_date = datetime(2024, 12, 11, 14, 38, 29)
end_date = datetime(2024, 12, 11, 14, 58, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 521.8874542192472


In [18]:
# 89 UTC: 15:01 - 15:21
# Reading: 519.6

start_date = datetime(2024, 12, 11, 15, 1, 0)
end_date = datetime(2024, 12, 11, 15, 21, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 523.0891391054257


In [19]:
# 90 UTC: 13:00 - 13:22
# Reading: 519.1

start_date = datetime(2024, 12, 18, 13, 2, 46)
end_date = datetime(2024, 12, 18, 13, 21, 40)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.491612062708


In [20]:
# 91 UTC: 13:22 - 13:42
# Reading: 519.1

start_date = datetime(2024, 12, 18, 13, 22, 5)
end_date = datetime(2024, 12, 18, 13, 42, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.5101663622424


In [21]:
# 92 UTC: 13:42 - 14:02
# Reading: 516.4

start_date = datetime(2024, 12, 18, 13, 43, 29)
end_date = datetime(2024, 12, 18, 14, 2, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 519.7332957497948


In [22]:
# 93 UTC: 14:02 - 14:22
# Reading: 519.0

start_date = datetime(2024, 12, 18, 14, 3, 45)
end_date = datetime(2024, 12, 18, 14, 22, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.3030308847357


In [23]:
# 94 UTC: 14:22 - 14:42
# Reading: 518,3

start_date = datetime(2024, 12, 18, 14, 23, 14)
end_date = datetime(2024, 12, 18, 14, 42, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 521.6356827796312


In [24]:
# 95 UTC: 14:42 - 15:02
# Reading: 518.7

start_date = datetime(2024, 12, 18, 14, 43, 54)
end_date = datetime(2024, 12, 18, 15, 2, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.1018738006179


In [25]:
# 96 UTC: 15:02 - 15:22
# Reading: 518.1

start_date = datetime(2024, 12, 18, 15, 4, 7)
end_date = datetime(2024, 12, 18, 15, 22, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 521.6260233154374


In [26]:
# 97 UTC: 15:22 - 15:42
# Reading: 518.8

start_date = datetime(2024, 12, 18, 15, 23, 25)
end_date = datetime(2024, 12, 18, 15, 42, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 521.9528892686263


In [27]:
# 98 UTC: 10:21 - 10:41
# Reading: 519.0

start_date = datetime(2024, 12, 19, 10, 21, 52)
end_date = datetime(2024, 12, 19, 10, 40, 49)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.3799135847399


In [28]:
# 99 UTC: 12:32 - 12:53
# Reading: 519.3

start_date = datetime(2024, 12, 19, 12, 43, 30)
end_date = datetime(2024, 12, 19, 12, 53, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.6305812150305


In [29]:
# 100 UTC: 12:52 - 13:14
# Reading: 519.0

start_date = datetime(2024, 12, 19, 12, 53, 24)
end_date = datetime(2024, 12, 19, 13, 14, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 522.2353233571587


In [30]:
# 101 UTC: 13:14 - 13:35
# Reading: 399.4

start_date = datetime(2024, 12, 19, 13, 14, 37)
end_date = datetime(2024, 12, 19, 13, 34, 33)

process_measurement(start_date, end_date)

Picarro Median Reading: 401.7647699354647


In [31]:
# 102 UTC: 13:35 - 13:55
# Reading: 400.1

start_date = datetime(2024, 12, 19, 13, 35, 33)
end_date = datetime(2024, 12, 19, 13, 55, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 402.66138570405303


In [32]:
# 103 UTC: 13:55 - 14:15
# Reading: 400.1

start_date = datetime(2024, 12, 19, 13, 56, 43)
end_date = datetime(2024, 12, 19, 14, 15, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 402.68248817549687


In [33]:
# 104 UTC: 14:15 - 14:35
# Reading: 399.1

start_date = datetime(2024, 12, 19, 14, 15, 31)
end_date = datetime(2024, 12, 19, 14, 35, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 401.6191689496525


In [34]:
# 105 UTC: 14:55 - 15:15
# Reading: 397.9

start_date = datetime(2024, 12, 19, 14, 57, 40)
end_date = datetime(2024, 12, 19, 15, 15, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 400.4666169738992


In [35]:
# 106 UTC: 15:15 - 15:35
# Reading: 399.0

start_date = datetime(2024, 12, 19, 15, 16, 42)
end_date = datetime(2024, 12, 19, 15, 35, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 401.60491206736805


In [36]:
# 107 UTC: 15:35 - 15:55
# Reading: 499.3

start_date = datetime(2024, 12, 19, 15, 35, 41)
end_date = datetime(2024, 12, 19, 15, 54, 50)

process_measurement(start_date, end_date)

Picarro Median Reading: 401.90021658762305


In [37]:
# 108 UTC: 08:45 - 09:05
# Reading: 400.0

start_date = datetime(2024, 12, 20, 8, 45, 10)
end_date = datetime(2024, 12, 20, 9, 5, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 402.62355954491454


In [38]:
# 109 UTC: 09:05 - 09:25
# Reading: 396.9

start_date = datetime(2024, 12, 20, 9, 8, 30)
end_date = datetime(2024, 12, 20, 9, 25, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 399.45926835304


In [39]:
# 110 UTC: 09:25 - 09:45
# Reading: 399.0

start_date = datetime(2024, 12, 20, 9, 27, 0)
end_date = datetime(2024, 12, 20, 9, 45, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 401.57076504850534


In [40]:
# 111 UTC: 10:06 - 10:26
# Reading: 399.9

start_date = datetime(2024, 12, 20, 10, 6, 0)
end_date = datetime(2024, 12, 20, 10, 26, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 402.5006864719442


In [41]:
# 112 UTC: 10:26 - 10:46
# Reading: 399.9

start_date = datetime(2024, 12, 20, 10, 27, 20)
end_date = datetime(2024, 12, 20, 10, 46, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 402.504090295343


In [42]:
# 113 UTC: 12:08 - 12:28
# Reading: 399.1

start_date = datetime(2024, 12, 20, 12, 8, 30)
end_date = datetime(2024, 12, 20, 12, 28, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 401.6594670193435


In [43]:
# 114 UTC: 12:28 - 12:49
# Reading: 399.1

start_date = datetime(2024, 12, 20, 12, 29, 10)
end_date = datetime(2024, 12, 20, 12, 49, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 401.778021092574


In [44]:
# 115 UTC: 12:49 - 13:09
# Reading: 399.9

start_date = datetime(2024, 12, 20, 12, 49, 40)
end_date = datetime(2024, 12, 20, 13, 9, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 402.45153795997226


In [45]:
# 116 UTC: 13:09 - 13:29
# Reading: 399.4

start_date = datetime(2024, 12, 20, 13, 10, 10)
end_date = datetime(2024, 12, 20, 13, 29, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 401.8961799160489


In [46]:
# 117 UTC: 13:29 - 13:49
# Reading: 399.0

start_date = datetime(2024, 12, 20, 13, 30, 10)
end_date = datetime(2024, 12, 20, 13, 49, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 401.4432746016526


In [47]:
# 118 UTC: 13:49 - 14:09
# Reading: 399.2

start_date = datetime(2024, 12, 20, 13, 50, 30)
end_date = datetime(2024, 12, 20, 14, 9, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 401.64859394908416


In [48]:
# 119 UTC: 14:09 - 14:29
# Reading: 

start_date = datetime(2024, 12, 20, 14, 9, 40)
end_date = datetime(2024, 12, 20, 14, 29, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 402.82572380927417


In [49]:
# 120 UTC: 14:29 - 14:49
# Reading:  399.9

start_date = datetime(2024, 12, 20, 14, 29, 40)
end_date = datetime(2024, 12, 20, 14, 49, 0)

process_measurement(start_date, end_date)

Picarro Median Reading: 402.4684831554075
