In [None]:
import polars as pl
import os
from datetime import datetime, timezone
import plotly.express as px

from sites_deloyment_times import deployment_times, datetime_format

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")

In [None]:
sites_meta = pl.read_csv("sites.csv", separator=";")


df_1m = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "processed", "1m_level_1_cities_portal.parquet"))
df_1h = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "processed", "1h_level_1_cities_portal.parquet"))

In [None]:
print(deployment_times["TUMR"])
sites_meta.filter(pl.col("site") == "TUMR")

In [None]:
df_1m.head().collect()

In [None]:
site_name = "TUMR"
sensor = 0

deployment_times[site_name]["sensors"][sensor]["id"]
deployment_times[site_name]["sensors"][sensor]["start_time"]
deployment_times[site_name]["sensors"][sensor]["end_time"]

In [None]:
# Utility

def extract_site_data(df, deployment_times: dict, site_name:str):
    extracted_dates = []
    
    for sensor in deployment_times[site_name]["sensors"]:
        
        id = sensor["id"]
        start_time = datetime.strptime(sensor["start_time"], datetime_format)
        end_time = datetime.strptime(sensor["end_time"], datetime_format)       
        
        df_temp = df.filter(pl.col("system_id") == id)  \
            .filter(pl.col("creation_timestamp") \
            .is_between(start_time, end_time)) \
            .collect()
        
        extracted_dates.append(df_temp)
    
    return pl.concat(extracted_dates)

def plot_extracted_data(df_plot, filter_flag:bool = True):
    
    if filter_flag:
        df_plot= df_plot.filter(pl.col("OriginalFlag") == 0)
        
    df_plot = df_plot.with_columns(
    pl.when(pl.col("OriginalFlag") == 185)
    .then(pl.col("co2"))
    .otherwise(None)  # Assign None (or np.nan if you export to Pandas)
    .alias("co2_185")
    ) \
    .with_columns(
    pl.when(pl.col("OriginalFlag") == 0)
    .then(pl.col("co2"))
    .otherwise(None)  # Assign None (or np.nan if you export to Pandas)
    .alias("co2_0")
    )
    
    fig = px.line(df_plot, 
            x="creation_timestamp", 
            y=["co2_0", "co2_185"], 
            )
    fig.update_traces(connectgaps=False)
    fig.update_layout(
        yaxis_title='CO2 Concentration (ppm)',
        xaxis_title='',
        title='',
    )
    fig.show()

In [None]:
start_date = datetime(2024, 4, 1, 0, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 7, 18, 23, 59, 59).replace(tzinfo=timezone.utc)

df_test = df_1m.filter(pl.col("creation_timestamp").is_between(start_date, end_date))

In [None]:
df_site = extract_site_data(df=df_1m, deployment_times = deployment_times, site_name="RDIR")

In [None]:
df_site.head()

In [None]:
plot_extracted_data(df_site, filter_flag=False)