In [None]:
from datetime import datetime
from datetime import timezone
import polars as pl
import os
import plotly.express as px

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")
df = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "processed",  "1h_cal_corr_acropolis.parquet"))
df_p = pl.read_parquet(os.path.join(DATA_DIRECTORY,"processed", "1h_cal_corr_picarro.parquet"))

In [None]:
start_time = datetime(2024, 4, 1, 0, 0, 0).replace(tzinfo=timezone.utc)

# until when where the systems on the roof-top

dates = [
        (1, datetime(2024, 2, 29, 0, 0, 0).replace(tzinfo=timezone.utc)),
        (2, datetime(2024, 7, 30, 0, 0, 0).replace(tzinfo=timezone.utc)),
        (3, datetime(2024, 6, 27, 0, 0, 0).replace(tzinfo=timezone.utc)),
        (4, datetime(2024, 7, 30, 0, 0, 0).replace(tzinfo=timezone.utc)),
        (5, datetime(2024, 2, 28, 0, 0, 0).replace(tzinfo=timezone.utc)),
        (6, datetime(2024, 8, 30, 0, 0, 0).replace(tzinfo=timezone.utc)),
        (7, datetime(2024, 6, 23, 0, 0, 0).replace(tzinfo=timezone.utc)),
        (8, datetime(2024, 3, 15, 0, 0, 0).replace(tzinfo=timezone.utc)),
        (9, datetime(2024, 6, 26, 0, 0, 0).replace(tzinfo=timezone.utc)),
        (10, datetime(2024, 4, 11, 0, 0, 0).replace(tzinfo=timezone.utc)),
        (11, datetime(2024, 4, 11, 0, 0, 0).replace(tzinfo=timezone.utc)),
        (12, datetime(2024, 2, 14, 0, 0, 0).replace(tzinfo=timezone.utc)),
        (13, datetime(2024, 2, 22, 0, 0, 0).replace(tzinfo=timezone.utc)),
        (14, datetime(2024, 6, 23, 0, 0, 0).replace(tzinfo=timezone.utc)),
        (15, datetime(2024, 5, 20, 0, 0, 0).replace(tzinfo=timezone.utc)),
        (16, datetime(2024, 2, 8, 0, 0, 0).replace(tzinfo=timezone.utc)),
        #(17, datetime(2024, 7, 9, 0, 0, 0).replace(tzinfo=timezone.utc)),
        (18, datetime(2024, 2, 8, 0, 0, 0).replace(tzinfo=timezone.utc)),
        (20, datetime(2024, 2, 14, 0, 0, 0).replace(tzinfo=timezone.utc)),    
    ]

In [None]:
end_time = datetime(2024, 8, 1, 0, 0, 0).replace(tzinfo=timezone.utc)
df_p_filtered = df_p.filter(pl.col("creation_timestamp").is_between(start_time, end_time)) \
    .rename({"picarro_corrected": "co2_corrected"}) \
    .select("creation_timestamp","sys_name_short", "co2_corrected")


In [None]:
df.head(1).collect()

In [None]:
df_p.tail(1)

In [None]:
# Utility
extracted_dates = [df_p_filtered]

for date in dates:

    df_temp = df.filter(pl.col("system_id") == date[0]) \
        .filter(pl.col("creation_timestamp").is_between(start_time, date[1])) \
        .rename({"gmp343_corrected": "co2_corrected"}) \
        .collect() \
    
    extracted_dates.append(df_temp)

df_extracted = pl.concat(extracted_dates, how="diagonal")

start_date = datetime(2024, 5, 16, 0, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 5, 21, 0, 0, 0).replace(tzinfo=timezone.utc)
df_extracted = df_extracted.filter(pl.col("creation_timestamp").is_between(start_date, end_date)) \

fig = px.line(df_extracted, x="creation_timestamp", y="gmp343_temperature", color="sys_name_short")
fig.update_layout(
    yaxis_title='Sensor Temperature (°C)',
    xaxis_title='',
    title='',
)
fig.show()
fig = px.line(df_extracted, x="creation_timestamp", y="enclosure_bme280_temperature", color="sys_name_short")
fig.update_layout(
    yaxis_title='Enclosure Temperature',
    xaxis_title='',
    title='',
)
fig.show()
fig = px.line(df_extracted, x="creation_timestamp", y="co2_corrected", color="sys_name_short")
fig.update_layout(
    yaxis_title='CO2 (ppm)',
    xaxis_title='',
    title='',
)
fig.show()

In [None]:
# Utility
extracted_dates = [df_p_filtered]

for date in dates:

    df_temp = df.filter(pl.col("system_id") == date[0]) \
        .filter(pl.col("creation_timestamp").is_between(start_time, date[1])) \
        .rename({"gmp343_corrected": "co2_corrected"}) \
        .collect() \
        .join(df_p.select("creation_timestamp", "picarro_corrected"), on="creation_timestamp") \
        .with_columns((pl.col("co2_corrected") - pl.col("picarro_corrected")).alias("diff"))
    
    extracted_dates.append(df_temp)

df_extracted = pl.concat(extracted_dates, how="diagonal")

start_date = datetime(2024, 5, 30, 0, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 6, 13, 0, 0, 0).replace(tzinfo=timezone.utc)
df_extracted = df_extracted.filter(pl.col("creation_timestamp").is_between(start_date, end_date)) \

fig = px.line(df_extracted, x="creation_timestamp", y="gmp343_temperature", color="sys_name_short")
fig.show()
fig = px.line(df_extracted, x="creation_timestamp", y="enclosure_bme280_temperature", color="sys_name_short")
fig.show()
fig = px.line(df_extracted, x="creation_timestamp", y="co2_corrected", color="sys_name_short")
fig.show()
fig = px.line(df_extracted, x="creation_timestamp", y="diff", color="sys_name_short")
fig.show()



In [None]:
df_extracted