In [1]:
from lmu_meteo_api import interface
from datetime import datetime
from datetime import timezone
import polars as pl
import os

start_date = datetime(2023,12, 22, 0, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 1, 29, 23, 59, 59).replace(tzinfo=timezone.utc)

system_ids = [3,6,10,11,12,13,16,18,20]

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")

df = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "download", "acropolis.parquet")).filter(pl.col("creation_timestamp").is_between(start_date, end_date)).collect()

In [2]:
col_name = "gmp343_temperature"
filter = '1h'

l_df = []

for id in system_ids:
        df_t = df.select("creation_timestamp", "system_name", col_name) \
        .sort("creation_timestamp") \
        .filter(pl.col("system_name") == f"tum-esm-midcost-raspi-{id}") \
        .filter(pl.col(col_name) > 0) \
        .rename({col_name: "temperature"})

        # time averaging
        if filter != None:
            df_t = (
                df_t.groupby_dynamic("creation_timestamp", every=filter)
                .agg(
                    [
                        pl.all().exclude(["creation_timestamp"]).mean(),
                    ]
                )
                .with_columns(
                    pl.lit(f"tum-esm-midcost-raspi-{id}").alias("system_name")
                )
            )
            
        l_df.append(df_t)
        
df_agg = pl.concat(l_df, how="vertical")


In [3]:
start_time = "2023-12-22T00-00-00"
end_time = "2024-01-17T00-00-00"

station_id = 'MIM01' # Stadtstation

parameters = [
    "air_temperature_2m",
    "air_temperature_30m",
    "relative_humidity_2m",
    "relative_humidity_30m",
    'wind_speed_30m', 
    'wind_from_direction_30m',
    "air_pressure"]

# download from the API
lmu_api = interface.meteo_data()
data = lmu_api.get_meteo_data(parameters=parameters, 
                              station_id= station_id, 
                              start_time=start_time, 
                              end_time=end_time)

In [4]:
data

Unnamed: 0_level_0,air_temperature_2m,air_temperature_30m,relative_humidity_2m,relative_humidity_30m,wind_speed_30m,wind_from_direction_30m,air_pressure
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-12-22 00:00:00,278.31,278.54,94.302,90.137,3.86,304.0,94160.0
2023-12-22 00:01:00,278.32,278.56,94.361,90.267,3.63,319.0,94160.0
2023-12-22 00:02:00,278.33,278.57,94.373,90.365,3.87,319.0,94190.0
2023-12-22 00:03:00,278.31,278.58,94.398,90.385,3.84,316.0,94210.0
2023-12-22 00:04:00,278.33,278.59,94.478,90.512,3.73,308.0,94210.0
...,...,...,...,...,...,...,...
2024-01-16 23:56:00,268.71,268.66,84.374,85.418,1.10,95.0,94460.0
2024-01-16 23:57:00,268.71,268.69,84.440,85.094,1.01,171.0,94460.0
2024-01-16 23:58:00,268.73,268.71,84.443,85.088,0.93,213.0,94460.0
2024-01-16 23:59:00,268.72,268.77,84.422,85.258,0.74,203.0,94460.0


In [5]:
df_lmu = pl.from_pandas(data, include_index = True) \
    .rename({"time": "creation_timestamp"}) \
    .with_columns(pl.col("creation_timestamp").dt.cast_time_unit("us").dt.replace_time_zone("UTC")) \
    .with_columns(pl.col("air_temperature_30m") -273.15) \
    .with_columns(pl.col("air_temperature_2m") -273.15)


In [6]:
df_lmu

creation_timestamp,air_temperature_2m,air_temperature_30m,relative_humidity_2m,relative_humidity_30m,wind_speed_30m,wind_from_direction_30m,air_pressure
"datetime[μs, UTC]",f64,f64,f64,f64,f64,f64,f64
2023-12-22 00:00:00 UTC,5.16,5.39,94.302,90.137,3.86,304.0,94160.0
2023-12-22 00:01:00 UTC,5.17,5.41,94.361,90.267,3.63,319.0,94160.0
2023-12-22 00:02:00 UTC,5.18,5.42,94.373,90.365,3.87,319.0,94190.0
2023-12-22 00:03:00 UTC,5.16,5.43,94.398,90.385,3.84,316.0,94210.0
2023-12-22 00:04:00 UTC,5.18,5.44,94.478,90.512,3.73,308.0,94210.0
2023-12-22 00:05:00 UTC,5.19,5.45,94.519,90.61,5.21,308.0,94220.0
2023-12-22 00:06:00 UTC,5.2,5.47,94.597,90.747,4.37,320.0,94230.0
2023-12-22 00:07:00 UTC,5.2,5.46,94.627,90.698,5.13,331.0,94230.0
2023-12-22 00:08:00 UTC,5.19,5.44,94.673,90.771,6.97,331.0,94240.0
2023-12-22 00:09:00 UTC,5.25,5.41,94.751,90.766,5.75,342.0,94240.0


In [7]:
col_name = "air_temperature_30m"

df_lmu = df_lmu.select("creation_timestamp", col_name) \
        .sort("creation_timestamp") \
        .groupby_dynamic("creation_timestamp", every=filter) \
        .agg(
            [
                pl.all().exclude(["creation_timestamp"]).mean(),
            ]
        ) \
        .with_columns(
            pl.lit(f"lmu meteo").alias("system_name")
        ) \
        .rename({col_name: "temperature"})


In [8]:
df_temp = pl.concat([df_agg,df_lmu], how="diagonal")

In [9]:
import plotly.express as px


fig = px.line(df_temp, x="creation_timestamp", y=f"temperature", markers=True, title = "Outside Temperature vs GMP343 Sensor Temperature", color="system_name")
fig.show()