In [1]:
from datetime import datetime
from datetime import timezone
import polars as pl
import os
import plotly.express as px

start_date = datetime(2024, 6, 21, 0, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 7, 30, 23, 59, 59).replace(tzinfo=timezone.utc)

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")

In [2]:
df = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "processed", "1m_cal_corr_acropolis.parquet")) \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date)) \
    .filter(pl.col("system_id").is_in([7,14])) \
    .select(["creation_timestamp", "system_id", "gmp343_corrected", "gmp343_temperature", "sht45_humidity", "bme280_pressure", "h2o_v%"])\
    .collect() \
    .with_columns(pl.when((pl.col("system_id") == 14)).then(48).otherwise(85).alias("measurement_height")) \
    .sort("creation_timestamp")

In [3]:
df.tail()

creation_timestamp,system_id,gmp343_corrected,gmp343_temperature,sht45_humidity,bme280_pressure,h2o_v%,measurement_height
"datetime[μs, UTC]",i64,f64,f64,f64,f64,f64,i32
2024-07-17 07:10:00 UTC,14,424.353418,36.1,27.863333,945.436667,1.762593,48
2024-07-17 07:11:00 UTC,7,422.409795,36.8,28.278333,943.698333,1.86212,85
2024-07-17 07:11:00 UTC,14,423.338166,36.1,27.87,943.628333,1.766393,48
2024-07-17 07:12:00 UTC,7,422.579177,36.8,28.27,943.396667,1.862166,85
2024-07-17 07:12:00 UTC,14,423.727586,36.1,27.91,942.9675,1.770168,48


In [4]:
df.write_csv(os.path.join(DATA_DIRECTORY, "processed", "ACROPOLIS_Blutenburgstrasse.csv"))

In [5]:
fig = px.line(df, x="creation_timestamp", y="gmp343_corrected", markers=True, color="system_id")
fig.show()