In [19]:
from datetime import datetime
from datetime import timezone
import polars as pl
import os
import plotly.express as px

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")

# processed 10min average measurement data
#df_1m = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "processed", "1m_cal_corr_acropolis.parquet"))
df_1h = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "processed", "1h_cal_corr_acropolis.parquet"))


In [42]:
# Utility

def extract_site_data(dates:dict[list[tuple]], site_name:str):
    extracted_sites = []
    
    for date in dates[site_name]:
        print(date[0], date[1], date[2])
        
        df_temp = df_1h.filter(pl.col("sys_name_short") == f"mid-cost-{date[0]}" ).filter(pl.col("creation_timestamp").is_between(date[1], date[2])).collect()
        
        extracted_sites.append(df_temp)
    
    df_extracted = pl.concat(extracted_sites)
    
    fig = px.line(df_extracted, x="creation_timestamp", y="gmp343_corrected", markers=True, title=f"{site_name}: CO2 Corrected [ppm]")
    fig.show()

In [65]:
today = datetime(2024, 7, 1, 0, 0, 0).replace(tzinfo=timezone.utc)

dates = {
      "Example": [
            (99, datetime(2023, 1, 1, 0, 0, 0).replace(tzinfo=timezone.utc), datetime(2023, 1, 1, 23, 59, 59).replace(tzinfo=timezone.utc)),
            (100, datetime(2024, 1, 1, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ],
      "SMAI": [
            (1, datetime(2023, 9, 8, 0, 0, 0).replace(tzinfo=timezone.utc), datetime(2024, 2, 5, 23, 59, 59).replace(tzinfo=timezone.utc)),
            (16, datetime(2024, 2, 8, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ], 
      "KLEG": [
            (1, datetime(2024, 2, 29, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ], 
      "DLRO": [
            (14, datetime(2023, 11, 22, 0, 0, 0).replace(tzinfo=timezone.utc), datetime(2023, 12, 22, 23, 59, 59).replace(tzinfo=timezone.utc)),
            (5, datetime(2024, 2, 28, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ],
      "TUMN": [
            (6, datetime(2024, 2, 21, 0, 0, 0).replace(tzinfo=timezone.utc), datetime(2024, 5, 11, 0, 0, 0).replace(tzinfo=timezone.utc)),
            (6, datetime(2024, 5, 31, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ],
      "KRDI": [
            (2, datetime(2023, 9, 13, 0, 0, 0).replace(tzinfo=timezone.utc), datetime(2023, 12, 22, 0, 0, 0).replace(tzinfo=timezone.utc)),
            (8, datetime(2024, 3, 15, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ],
      "SWGG": [
            (10, datetime(2024, 4, 11, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ],
      "RFIN": [
            (15, datetime(2023, 11, 16, 0, 0, 0).replace(tzinfo=timezone.utc), datetime(2023, 12, 22, 0, 0, 0).replace(tzinfo=timezone.utc)),
            (3, datetime(2024, 2, 22, 0, 0, 0).replace(tzinfo=timezone.utc), datetime(2024, 4, 2, 23, 59, 59).replace(tzinfo=timezone.utc)),
            (11, datetime(2024, 4, 11, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ],
 }

In [66]:
extract_site_data(dates = dates, site_name="RFIN")

15 2023-11-16 00:00:00+00:00 2023-12-22 00:00:00+00:00
3 2024-02-22 00:00:00+00:00 2024-04-02 23:59:59+00:00
11 2024-04-11 00:00:00+00:00 2024-07-01 00:00:00+00:00
