In [12]:
import polars as pl
import os
import plotly.express as px
from datetime import datetime
from datetime import timezone

sensor_id = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")

In [13]:
df_raw = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "download", "acropolis.parquet"))

In [15]:
df_raw.sort("creation_timestamp").tail().collect()

In [5]:
def plot_sensor_measurement(
    df,
    sensor_id,
    col_name: str,
    filter="1h",
    cut_below: float | None = None,
    cut_above: float | None = None,
):
    df = df.select("creation_timestamp", "system_name", col_name).sort(
        "creation_timestamp"
    )

    l_df = []

    for id in sensor_id:
        df_t = df.filter(pl.col("system_name") == f"tum-esm-midcost-raspi-{id}").filter(
            pl.col(col_name) > 0
        )
        # additional filters < and >
        if cut_below != None:
            df_t = df_t.filter(pl.col(col_name) > cut_below)

        if cut_above != None:
            df_t = df_t.filter(pl.col(col_name) < cut_above)

        # time averaging
        if filter != None:
            df_t = (
                df_t.groupby_dynamic("creation_timestamp", every=filter)
                .agg(
                    [
                        pl.all().exclude(["creation_timestamp"]).mean(),
                    ]
                )
                .with_columns(
                    pl.lit(f"tum-esm-midcost-raspi-{id}").alias("system_name")
                )
            )

        l_df.append(df_t)

    df_agg = pl.concat(l_df, how="vertical")

    fig = px.line(
        df_agg,
        x="creation_timestamp",
        y=col_name,
        markers=True,
        title=col_name,
        color="system_name",
    )
    fig.show()


In [20]:
#2024-07-04 00:07:16.110000+00:00

start_date = datetime(2024, 7, 4, 0, 7, 10).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 7, 4, 0, 7, 20).replace(tzinfo=timezone.utc)

df_plot = df_raw.filter(pl.col("creation_timestamp").is_between(start_date, end_date)).collect()
#.filter(pl.col("system_name") == "tum-esm-midcost-raspi-2")

In [18]:
plot_sensor_measurement(df_plot,sensor_id=sensor_id,col_name="sht45_humidity",filter='10s')


`groupby_dynamic` is deprecated. It has been renamed to `group_by_dynamic`.



