In [35]:
import polars as pl
import os
from datetime import datetime, timezone
import plotly.express as px

from sites_deloyment_times import deployment_times, datetime_format

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")

In [36]:
sites_meta = pl.read_csv("sites.csv", separator=";")

df_1m = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "processed", "pipeline", "flagged_1_min_acropolis.parquet"))
df_1h = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "processed", "pipeline", "flagged_1_h_acropolis.parquet"))
#df_1m = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "processed", "pipeline", "flagged_1_min_nsigma_3.0_acropolis.parquet"))
#df_1h = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "processed", "pipeline", "flagged_1_h_nsigma_3.0_acropolis.parquet"))

In [37]:
print(deployment_times["TUMR"])
sites_meta.filter(pl.col("site") == "TUMR")

{'sensors': [{'id': 6, 'start_time': '2024-02-21T00:00:00+0000', 'end_time': '2024-05-11T00:00:00+0000'}, {'id': 19, 'start_time': '2024-05-11T00:00:00+0000', 'end_time': '2024-07-19T00:00:00+0000'}, {'id': 6, 'start_time': '2024-09-04T00:00:00+0000', 'end_time': '2024-11-23T12:20:12+0000'}]}


site,site_name,latitude,longitude,elevation,site_type,installation_classification,height_of_building,responsible_party
str,str,f64,f64,f64,str,str,f64,str
"""TUMR""","""TUM Zentralgel…",48.150733,11.569168,511.39,"""city""","""rooftop""",31.06,"""tum:environmen…"


In [38]:
df_1m.head().collect()

creation_timestamp,system_id,sys_name_short,co2,h2o,ws,wd,OriginalFlag,Flag
"datetime[μs, UTC]",i64,str,f32,f64,f64,f64,i32,str
2024-01-02 12:36:00 UTC,1,"""acropolis-1""",426.914825,0.750941,6.6,221.0,0,"""O"""
2024-01-02 12:37:00 UTC,1,"""acropolis-1""",429.886505,0.756998,6.6,221.0,0,"""O"""
2024-01-02 12:38:00 UTC,1,"""acropolis-1""",430.034119,0.756623,10.8,232.0,0,"""O"""
2024-01-02 12:39:00 UTC,1,"""acropolis-1""",429.856812,0.761341,10.8,232.0,0,"""O"""
2024-01-02 12:40:00 UTC,1,"""acropolis-1""",428.979248,0.765358,7.3,244.0,0,"""O"""


In [39]:
site_name = "TUMR"
sensor = 0

deployment_times[site_name]["sensors"][sensor]["id"]
deployment_times[site_name]["sensors"][sensor]["start_time"]
deployment_times[site_name]["sensors"][sensor]["end_time"]

'2024-05-11T00:00:00+0000'

In [40]:
# Utility

def extract_site_data(df, deployment_times: dict, site_name:str):
    extracted_dates = []
    
    for sensor in deployment_times[site_name]["sensors"]:
        
        id = sensor["id"]
        start_time = datetime.strptime(sensor["start_time"], datetime_format)
        end_time = datetime.strptime(sensor["end_time"], datetime_format)       
        
        df_temp = df.filter(pl.col("system_id") == id)  \
            .filter(pl.col("creation_timestamp") \
            .is_between(start_time, end_time)) \
            .collect()
        
        extracted_dates.append(df_temp)
    
    return pl.concat(extracted_dates)

def plot_extracted_data(df_plot, filter_flag:bool = True):
    
    if filter_flag:
        df_plot= df_plot.filter(pl.col("OriginalFlag") == 0)
        
    df_plot = df_plot.with_columns(
    pl.when(pl.col("OriginalFlag") == 185)
    .then(pl.col("co2"))
    .otherwise(None)  # Assign None (or np.nan if you export to Pandas)
    .alias("co2_185")
    ) \
    .with_columns(
    pl.when(pl.col("OriginalFlag") == 0)
    .then(pl.col("co2"))
    .otherwise(None)  # Assign None (or np.nan if you export to Pandas)
    .alias("co2_0")
    )
    
    fig = px.line(df_plot, 
            x="creation_timestamp", 
            y=["co2_0", "co2_185"], 
            )
    fig.update_traces(connectgaps=False)
    fig.update_layout(
        yaxis_title='CO2 Concentration (ppm)',
        xaxis_title='',
        title='',
    )
    fig.show()

In [41]:
start_date = datetime(2024, 11, 1, 0, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 11, 18, 23, 59, 59).replace(tzinfo=timezone.utc)

df_test = df_1m.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

In [42]:
df_site = extract_site_data(df=df_test, deployment_times = deployment_times, site_name="TUMR")

In [43]:
df_site.head()

creation_timestamp,system_id,sys_name_short,co2,h2o,ws,wd,OriginalFlag,Flag
"datetime[μs, UTC]",i64,str,f32,f64,f64,f64,i32,str
2024-11-01 00:00:00 UTC,6,"""acropolis-6""",458.771606,1.519917,0.9,162.2,0,"""O"""
2024-11-01 00:01:00 UTC,6,"""acropolis-6""",456.615967,1.520186,1.6,130.5,0,"""O"""
2024-11-01 00:02:00 UTC,6,"""acropolis-6""",457.744507,1.517988,1.6,130.5,0,"""O"""
2024-11-01 00:03:00 UTC,6,"""acropolis-6""",457.935303,1.525674,1.3,169.8,0,"""O"""
2024-11-01 00:04:00 UTC,6,"""acropolis-6""",457.387238,1.52285,1.3,169.8,0,"""O"""


In [44]:
plot_extracted_data(df_site, filter_flag=False)