In [88]:
import os
import polars as pl
import numpy as np
import plotly.express as px

from datetime import datetime, timezone

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")

In [89]:
df_p_dwd = pl.read_parquet(os.path.join(DATA_DIRECTORY, "processed", "picarro", "Calibrated_Raw_DWD_Picarro_G2301_413.parquet"))

In [90]:
df_p_dwd.tail()

DATE,TIME,FRAC_DAYS_SINCE_JAN1,FRAC_HRS_SINCE_JAN1,JULIAN_DAYS,EPOCH_TIME,ALARM_STATUS,INST_STATUS,CavityPressure,CavityTemp,DasTemp,EtalonTemp,species,OutletValve,CH4,CH4_dry,CO2,CO2_dry,h2o_reported,ch4_base,ch4_pzt_std,co2_base,co2_pzt_std,wlm1_offset,wlm2_offset,datetime,__index_level_0__,Picarro ID,Bottle_1_Median,Bottle_2_Median,slope,intercept,slope_interpolated,intercept_interpolated,picarro_corrected
str,str,f64,f64,f64,f64,i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,"datetime[μs, UTC]",i64,i64,f64,f64,f64,f64,f64,f64,f64
"""2024-12-20""","""16:17:28.036""",354.678797,8512.291121,355.678797,1734700000.0,0,963,139.978611,45.001854,34.682666,45.163353,1.0,23887.427724,1.994084,1.994658,607.15721,607.203268,-0.001037,1178.424698,98.553013,1082.962702,59.50696,-0.073245,-0.043453,2024-12-20 16:17:28.036 UTC,18231138,,,,,,1.00673,0.028601,611.318404
"""2024-12-20""","""16:17:28.542""",354.678803,8512.291262,355.678803,1734700000.0,0,963,139.971558,45.001854,34.6875,45.163353,3.0,23882.675781,1.994084,1.994062,607.15721,607.149367,-0.001117,1178.424698,98.553013,1082.962702,59.50696,-0.073245,-0.043453,2024-12-20 16:17:28.542 UTC,18231139,,,,,,1.00673,0.028601,611.26414
"""2024-12-20""","""16:17:30.088""",354.67882,8512.291691,355.67882,1734700000.0,0,963,139.993316,45.001734,34.646597,45.163353,2.0,23880.213811,1.994109,1.994062,607.15721,607.149367,-0.001117,1178.40625,98.553013,1083.060345,59.50696,-0.073235,-0.043456,2024-12-20 16:17:30.088 UTC,18231140,,,,,,1.00673,0.028601,611.26414
"""2024-12-20""","""16:17:31.007""",354.678831,8512.291947,355.678831,1734700000.0,0,963,140.013272,45.001671,34.660221,45.163353,1.0,23886.199542,1.994109,1.994062,607.174696,607.149367,-0.001117,1178.40625,98.553013,1083.062964,59.50696,-0.073243,-0.043456,2024-12-20 16:17:31.007 UTC,18231141,,,,,,1.00673,0.028601,611.26414
"""2024-12-20""","""16:17:31.513""",354.678837,8512.292087,355.678837,1734700000.0,65536,0,140.028259,45.001671,34.625,45.163353,3.0,23891.722656,1.994109,1.994085,607.174696,607.165968,-0.001279,1178.40625,98.553013,1083.062964,59.50696,-0.073243,-0.043456,2024-12-20 16:17:31.513 UTC,18231142,,,,,,1.00673,0.028601,611.280853


In [91]:
def process_calibration_time(df, start_date, end_date, plot=True):
    df_filtered = df.filter(pl.col("datetime").is_between(start_date, end_date))
    
    if plot:
        fig = px.line(df_filtered, x="datetime", y="h2o_reported")
        fig.show()
        
        fig = px.line(df_filtered, x="datetime", y="picarro_corrected")
        fig.show()
        
    data = df_filtered.select(pl.col("picarro_corrected")).to_series().to_list()
        
    return np.median(data[int(len(data)*0.3):int(len(data)*0.95)])

In [92]:
# 81 12:44 - 13:04
# Reading: 518.9

start_date = datetime(2024, 12, 11, 12, 44, 49).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 11, 13, 4, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

522.4069026528374

In [93]:
# 82 13:08 - 13:29
# Reading: 518.3

start_date = datetime(2024, 12, 11, 13, 8, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 11, 13, 28, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

521.9948774217688

In [94]:
# 83 15:25 - 15:45
# Reading: 518.9

start_date = datetime(2024, 12, 3, 15, 25, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 3, 15, 45, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

522.4944370190834

In [95]:
# 84  16:05 - 16:25
# Reading: 516.0

start_date = datetime(2024, 12, 3, 16, 5, 6).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 3, 16, 25, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

519.4996164252359

In [96]:
# 85 UTC: 13:31 - 13:51
# Reading: 519.0

start_date = datetime(2024, 12, 11, 13, 31, 19).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 11, 13, 51, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

522.6065096159259

In [97]:
# 86 UTC: 13:53 - 14:13
# Reading: 518.7

start_date = datetime(2024, 12, 11, 13, 53, 20).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 11, 14, 13, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

522.4389257404735

In [98]:
# 87 UTC: 14:15 - 14:35
# Reading: 518.8

start_date = datetime(2024, 12, 11, 14, 15, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 11, 14, 35, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

522.304663100912

In [99]:
# 88 UTC: 14:38 - 14:58
# Reading: 518.4

start_date = datetime(2024, 12, 11, 14, 38, 29).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 11, 14, 58, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

522.0123712598768

In [100]:
# 89 UTC: 15:01 - 15:21
# Reading: 519.6

start_date = datetime(2024, 12, 11, 15, 1, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 11, 15, 21, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

523.1993757923232

In [101]:
# 90 UTC: 13:00 - 13:22
# Reading: 519.1

start_date = datetime(2024, 12, 18, 13, 2, 46).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 18, 13, 21, 40).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

522.7276754822104

In [102]:
# 91 UTC: 13:22 - 13:42
# Reading: 519.1

start_date = datetime(2024, 12, 18, 13, 22, 5).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 18, 13, 42, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

522.7470038342326

In [103]:
# 92 UTC: 13:42 - 14:02
# Reading: 516.4

start_date = datetime(2024, 12, 18, 13, 43, 29).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 18, 14, 2, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

519.9730075536429

In [104]:
# 93 UTC: 14:02 - 14:22
# Reading: 519.0

start_date = datetime(2024, 12, 18, 14, 3, 45).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 18, 14, 22, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

522.5310582054829

In [105]:
# 94 UTC: 14:22 - 14:42
# Reading: 518,3

start_date = datetime(2024, 12, 18, 14, 23, 14).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 18, 14, 42, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

521.878696409748

In [106]:
# 95 UTC: 14:42 - 15:02
# Reading: 518.7

start_date = datetime(2024, 12, 18, 14, 43, 54).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 18, 15, 2, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

522.3377255165628

In [107]:
# 96 UTC: 15:02 - 15:22
# Reading: 518.1

start_date = datetime(2024, 12, 18, 15, 4, 7).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 18, 15, 22, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

521.8621744185366

In [108]:
# 97 UTC: 15:22 - 15:42
# Reading: 518.8

start_date = datetime(2024, 12, 18, 15, 23, 25).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 18, 15, 42, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

522.2172384481117

In [109]:
# 98 UTC: 10:21 - 10:41
# Reading: 519.0

start_date = datetime(2024, 12, 19, 10, 21, 52).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 19, 10, 40, 49).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

522.6360574361446

In [111]:
# 99 UTC: 12:32 - 12:53
# Reading: 519.3

start_date = datetime(2024, 12, 19, 12, 43, 30).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 19, 12, 53, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

522.8800788846385

In [112]:
# 100 UTC: 12:52 - 13:14
# Reading: 519.0

start_date = datetime(2024, 12, 19, 12, 53, 24).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 19, 13, 14, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

522.5006704602793

In [113]:
# 101 UTC: 13:14 - 13:35
# Reading: 399.4

start_date = datetime(2024, 12, 19, 13, 14, 37).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 19, 13, 34, 33).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

401.9663457662931

In [114]:
# 102 UTC: 13:35 - 13:55
# Reading: 400.1

start_date = datetime(2024, 12, 19, 13, 35, 33).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 19, 13, 55, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

402.8704677346891

In [115]:
# 103 UTC: 13:55 - 14:15
# Reading: 400.1

start_date = datetime(2024, 12, 19, 13, 56, 43).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 19, 14, 15, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

402.8848475250676

In [116]:
# 104 UTC: 14:15 - 14:35
# Reading: 399.1

start_date = datetime(2024, 12, 19, 14, 15, 31).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 19, 14, 35, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

401.8274274077712

In [117]:
# 105 UTC: 14:55 - 15:15
# Reading: 397.9

start_date = datetime(2024, 12, 19, 14, 57, 40).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 19, 15, 15, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

400.6563701343928

In [118]:
# 106 UTC: 15:15 - 15:35
# Reading: 399.0

start_date = datetime(2024, 12, 19, 15, 16, 42).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 19, 15, 35, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

401.7903043412514

In [119]:
# 107 UTC: 15:35 - 15:55
# Reading: 499.3

start_date = datetime(2024, 12, 19, 15, 35, 41).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 19, 15, 54, 50).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

402.0935771008572

In [120]:
# 108 UTC: 08:45 - 09:05
# Reading: 400.0

start_date = datetime(2024, 12, 20, 8, 45, 10).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 9, 5, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

402.82112465109776

In [121]:
# 109 UTC: 09:05 - 09:25
# Reading: 396.9

start_date = datetime(2024, 12, 20, 9, 8, 30).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 9, 25, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

399.6676137159892

In [122]:
# 110 UTC: 09:25 - 09:45
# Reading: 399.0

start_date = datetime(2024, 12, 20, 9, 27, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 9, 45, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

401.7645314568672

In [123]:
# 111 UTC: 10:06 - 10:26
# Reading: 399.9

start_date = datetime(2024, 12, 20, 10, 6, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 10, 26, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

402.70213401646066

In [124]:
# 112 UTC: 10:26 - 10:46
# Reading: 399.9

start_date = datetime(2024, 12, 20, 10, 27, 20).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 10, 46, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

402.70129592372444

In [125]:
# 113 UTC: 12:08 - 12:28
# Reading: 399.1

start_date = datetime(2024, 12, 20, 12, 8, 30).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 12, 28, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

401.8755007882667

In [126]:
# 114 UTC: 12:28 - 12:49
# Reading: 399.1

start_date = datetime(2024, 12, 20, 12, 29, 10).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 12, 49, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

401.99006017958624

In [127]:
# 115 UTC: 12:49 - 13:09
# Reading: 399.9

start_date = datetime(2024, 12, 20, 12, 49, 40).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 13, 9, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

402.65517458784814

In [128]:
# 116 UTC: 13:09 - 13:29
# Reading: 399.4

start_date = datetime(2024, 12, 20, 13, 10, 10).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 13, 29, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

402.1074731976945

In [129]:
# 117 UTC: 13:29 - 13:49
# Reading: 399.0

start_date = datetime(2024, 12, 20, 13, 30, 10).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 13, 49, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

401.6490928378198

In [130]:
# 118 UTC: 13:49 - 14:09
# Reading: 399.2

start_date = datetime(2024, 12, 20, 13, 50, 30).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 14, 9, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

401.8560315443678

In [131]:
# 119 UTC: 14:09 - 14:29
# Reading: 

start_date = datetime(2024, 12, 20, 14, 9, 40).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 14, 29, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

403.03981803889957

In [132]:
# 120 UTC: 14:29 - 14:49
# Reading:  399.9

start_date = datetime(2024, 12, 20, 14, 29, 40).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 20, 14, 49, 0).replace(tzinfo=timezone.utc)

process_calibration_time(df=df_p_dwd, start_date=start_date, end_date=end_date, plot=True)

402.6622909917404