In [1]:
from datetime import datetime
from datetime import timezone
import polars as pl
import os
import plotly.express as px

start_date = datetime(2024, 6, 21, 0, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 7, 4, 23, 59, 59).replace(tzinfo=timezone.utc)

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")

In [6]:
df = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "processed", "1m_cal_corr_acropolis.parquet")) \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date)) \
    .filter(pl.col("system_id").is_in([7,14])) \
    .select(["creation_timestamp", "system_id", "gmp343_corrected", "gmp343_temperature", "sht45_humidity", "bme280_pressure", "h2o_v%"])\
    .collect() \
    .with_columns(pl.when((pl.col("system_id") == 14)).then(48).otherwise(85).alias("measurement_height")) \
    .sort("creation_timestamp")

In [8]:
df.tail()

creation_timestamp,system_id,gmp343_corrected,gmp343_temperature,sht45_humidity,bme280_pressure,h2o_v%,measurement_height
"datetime[μs, UTC]",i64,f64,f64,f64,f64,f64,i32
2024-07-04 10:34:00 UTC,14,413.617997,37.883333,23.89,936.805,1.680859,48
2024-07-04 10:35:00 UTC,7,413.756269,38.7,24.093333,933.93,1.777029,85
2024-07-04 10:35:00 UTC,14,413.907073,37.883333,23.706667,933.793333,1.67334,48
2024-07-04 10:36:00 UTC,7,416.97728,38.75,24.275,934.14,1.794849,85
2024-07-04 10:36:00 UTC,14,414.53917,37.9,23.465,933.095,1.659018,48


In [9]:
df.write_csv(os.path.join(DATA_DIRECTORY, "processed", "ACROPOLIS_Blutenburgstrasse.csv"))

In [10]:
fig = px.line(df, x="creation_timestamp", y="gmp343_corrected", markers=True, color="system_id")
fig.show()