In [25]:
import polars as pl
import os
from datetime import datetime, timezone
import plotly.express as px

from sites_deloyment_times import deployment_times, datetime_format

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")

In [26]:
sites_meta = pl.read_csv("sites.csv", separator=";")


df_1m = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "processed", "1m_level_1_cities_portal.parquet"))
df_1h = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "processed", "1h_level_1_cities_portal.parquet"))

In [27]:
print(deployment_times["TUMR"])
sites_meta.filter(pl.col("site") == "TUMR")

{'sensors': [{'id': 6, 'start_time': '2024-02-21T00:00:00+0000', 'end_time': '2024-05-11T00:00:00+0000'}, {'id': 19, 'start_time': '2024-05-11T00:00:00+0000', 'end_time': '2024-07-19T00:00:00+0000'}, {'id': 6, 'start_time': '2024-09-04T00:00:00+0000', 'end_time': '2024-11-21T10:00:52+0000'}]}


site,site_name,latitude,longitude,elevation,site_type,installation_classification,height_of_building,responsible_party
str,str,f64,f64,f64,str,str,f64,str
"""TUMR""","""TUM Zentralgel…",48.150733,11.569168,511.39,"""city""","""rooftop""",31.06,"""tum:environmen…"


In [28]:
df_1m.head().collect()

creation_timestamp,system_id,sys_name_short,co2,h2o,ws,wd,OriginalFlag,Flag
"datetime[μs, UTC]",i64,str,f32,f64,f64,f64,i32,str
2023-07-06 09:10:00 UTC,1,"""acropolis-1""",486.32901,0.860639,,,0,"""O"""
2023-07-06 09:11:00 UTC,1,"""acropolis-1""",429.80545,0.865939,,,0,"""O"""
2023-07-06 09:12:00 UTC,1,"""acropolis-1""",426.278748,0.870763,,,0,"""O"""
2023-07-06 09:13:00 UTC,1,"""acropolis-1""",420.609375,0.867977,,,0,"""O"""
2023-07-06 09:14:00 UTC,1,"""acropolis-1""",421.917419,0.857573,,,0,"""O"""


In [29]:
site_name = "TUMR"
sensor = 0

deployment_times[site_name]["sensors"][sensor]["id"]
deployment_times[site_name]["sensors"][sensor]["start_time"]
deployment_times[site_name]["sensors"][sensor]["end_time"]

'2024-05-11T00:00:00+0000'

In [30]:
# Utility

def extract_site_data(df, deployment_times: dict, site_name:str):
    extracted_dates = []
    
    for sensor in deployment_times[site_name]["sensors"]:
        
        id = sensor["id"]
        start_time = datetime.strptime(sensor["start_time"], datetime_format)
        end_time = datetime.strptime(sensor["end_time"], datetime_format)       
        
        df_temp = df.filter(pl.col("system_id") == id)  \
            .filter(pl.col("creation_timestamp") \
            .is_between(start_time, end_time)) \
            .collect()
        
        extracted_dates.append(df_temp)
    
    return pl.concat(extracted_dates)

def plot_extracted_data(df_plot, filter_flag:bool = True):
    
    if filter_flag:
        df_plot= df_plot.filter(pl.col("OriginalFlag") == 0)
        
    df_plot = df_plot.with_columns(
    pl.when(pl.col("OriginalFlag") == 185)
    .then(pl.col("co2"))
    .otherwise(None)  # Assign None (or np.nan if you export to Pandas)
    .alias("co2_185")
    ) \
    .with_columns(
    pl.when(pl.col("OriginalFlag") == 0)
    .then(pl.col("co2"))
    .otherwise(None)  # Assign None (or np.nan if you export to Pandas)
    .alias("co2_0")
    )
    
    fig = px.line(df_plot, 
            x="creation_timestamp", 
            y=["co2_0", "co2_185"], 
            )
    fig.update_traces(connectgaps=False)
    fig.update_layout(
        yaxis_title='CO2 Concentration (ppm)',
        xaxis_title='',
        title='',
    )
    fig.show()

In [31]:
start_date = datetime(2024, 11, 1, 0, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 11, 18, 23, 59, 59).replace(tzinfo=timezone.utc)

df_test = df_1m.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

In [53]:
df_site = extract_site_data(df=df_test, deployment_times = deployment_times, site_name="MAIR")

In [54]:
df_site.head()

creation_timestamp,system_id,sys_name_short,co2,h2o,ws,wd,OriginalFlag,Flag
"datetime[μs, UTC]",i64,str,f32,f64,f64,f64,i32,str
2024-11-01 00:00:00 UTC,16,"""acropolis-16""",453.510284,1.560034,1.0,248.6,0,"""O"""
2024-11-01 00:01:00 UTC,16,"""acropolis-16""",455.810364,1.557814,0.8,250.2,0,"""O"""
2024-11-01 00:02:00 UTC,16,"""acropolis-16""",457.467712,1.558616,0.8,250.2,185,"""K"""
2024-11-01 00:03:00 UTC,16,"""acropolis-16""",454.22995,1.55546,0.8,250.2,0,"""O"""
2024-11-01 00:04:00 UTC,16,"""acropolis-16""",453.909332,1.558771,1.4,224.5,0,"""O"""


In [55]:
plot_extracted_data(df_site, filter_flag=False)