In [6]:
from datetime import datetime
from datetime import timezone
import polars as pl
import os
import plotly.express as px

start_date = datetime(2024, 6, 21, 0, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 7, 30, 23, 59, 59).replace(tzinfo=timezone.utc)

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")

In [7]:
df = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "processed", "1m_cal_corr_acropolis.parquet")) \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date)) \
    .filter(pl.col("system_id").is_in([7,14])) \
    .select(["creation_timestamp", "system_id", "gmp343_corrected", "gmp343_temperature", "sht45_humidity", "bme280_pressure", "h2o_v%"])\
    .collect() \
    .with_columns(pl.when((pl.col("system_id") == 14)).then(48).otherwise(85).alias("measurement_height")) \
    .sort("creation_timestamp")

In [8]:
df.tail()

creation_timestamp,system_id,gmp343_corrected,gmp343_temperature,sht45_humidity,bme280_pressure,h2o_v%,measurement_height
"datetime[μs, UTC]",i64,f64,f64,f64,f64,f64,i32
2024-07-11 07:12:00 UTC,14,453.302583,37.566667,40.568333,945.568333,2.779734,48
2024-07-11 07:13:00 UTC,7,454.340131,38.7,40.14,941.626667,2.936369,85
2024-07-11 07:13:00 UTC,14,452.521242,37.566667,40.473333,943.808333,2.778396,48
2024-07-11 07:14:00 UTC,7,456.929294,38.74,40.202,942.268,2.945236,85
2024-07-11 07:14:00 UTC,14,452.031122,37.58,40.504,941.946,2.788015,48


In [9]:
df.write_csv(os.path.join(DATA_DIRECTORY, "processed", "ACROPOLIS_Blutenburgstrasse.csv"))

In [10]:
fig = px.line(df, x="creation_timestamp", y="gmp343_corrected", markers=True, color="system_id")
fig.show()