In [13]:
from datetime import datetime
from datetime import timezone
import polars as pl
import os
import plotly.express as px

start_date = datetime(2024, 6, 21, 0, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 7, 3, 23, 59, 59).replace(tzinfo=timezone.utc)

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")

In [31]:
df = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "processed", "1m_cal_corr_acropolis.parquet")) \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date)) \
    .filter(pl.col("system_id").is_in([7,14])) \
    .select(["creation_timestamp", "system_id", "gmp343_corrected", "gmp343_temperature", "sht45_humidity", "bme280_pressure", "h2o_v%"])\
    .collect() \
    .with_columns(pl.when((pl.col("system_id") == 14)).then(48).otherwise(85).alias("measurement_height"))

In [33]:
df.head()

creation_timestamp,system_id,gmp343_corrected,gmp343_temperature,sht45_humidity,bme280_pressure,h2o_v%,measurement_height
"datetime[μs, UTC]",i64,f64,f64,f64,f64,f64,i32
2024-06-21 12:05:00 UTC,7,520.957028,29.72,45.924,940.004,2.041718,85
2024-06-21 12:06:00 UTC,7,491.366954,29.9,46.155,940.633333,2.071946,85
2024-06-21 12:07:00 UTC,7,492.748623,30.1,46.3,943.54,2.095979,85
2024-06-21 12:12:00 UTC,7,424.679485,30.06,39.988,941.192,1.810594,85
2024-06-21 12:13:00 UTC,7,424.32191,30.266667,40.088333,940.205,1.838702,85


In [34]:
df.write_csv(os.path.join(DATA_DIRECTORY, "processed", "ACROPOLIS_Blutenburgstrasse.csv"))

In [22]:
fig = px.line(df, x="creation_timestamp", y="gmp343_corrected", markers=True, color="system_id")
fig.show()