In [1]:
import os
from datetime import datetime
from datetime import timezone
import polars as pl
import polars.selectors as cs
import plotly.express as px
from lmu_meteo_api import interface
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")
PICARRO_DATA_DIRECTORy = os.environ.get("PICARRO_DATA_DIRECTORY")

In [2]:
# water corrected measurement data
df_10m = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "processed", "10m_cal_corr_acropolis.parquet"))

In [3]:
def get_lmu_data(start_time="2024-01-01T00-00-00", end_time= "2024-01-02T00-00-00", station_id = 'MIM01'):

    parameters = [
        "air_temperature_2m",
        "air_temperature_30m",
        "relative_humidity_2m",
        "relative_humidity_30m",
        'wind_speed_30m', 
        'wind_from_direction_30m',
        "air_pressure"]

    # download from the API
    lmu_api = interface.meteo_data()
    data = lmu_api.get_meteo_data(parameters=parameters, 
                                station_id= station_id, 
                                start_time=start_time, 
                                end_time=end_time)
    
    df_lmu = pl.from_pandas(data, include_index = True) \
    .rename({"time": "creation_timestamp"}) \
    .with_columns(pl.col("creation_timestamp").dt.cast_time_unit("us").dt.replace_time_zone("UTC")) \
    .with_columns(pl.col("air_temperature_30m") -273.15) \
    .with_columns(pl.col("air_temperature_2m") -273.15)  \
    .with_columns(pl.col("air_pressure") / 100)  \
    .rename({"air_temperature_30m": "temperature (°C)", "relative_humidity_30m": "relative humidity (%)", "air_pressure": "pressure (mbar)"}) \
    .with_columns(
            pl.lit(f"lmu meteo").alias("sys_name_short")
        ) \
    .sort("creation_timestamp") \
    .group_by_dynamic("creation_timestamp", every='10m', by= "sys_name_short")  \
    .agg(pl.all().exclude(["creation_timestamp","sys_name_short"]).mean()) \
        
    return df_lmu

In [4]:
def plot_with_lmu_data(start_date, end_date, system_ids, df_lmu):
    
    df_plot = df_10m.filter(pl.col("creation_timestamp").is_between(start_date, end_date))  \
        .sort("creation_timestamp") \
        .filter(pl.col("system_id").is_in(system_ids)) \
        .rename({"gmp343_temperature": "temperature (°C)", "sht45_humidity": "relative humidity (%)", "bme280_pressure": "pressure (mbar)"}) \
        .sort("sys_name_short") \
        .collect()
        
    df_plot = pl.concat([df_lmu, df_plot], how="diagonal")
    
    
    fig = px.line(df_plot, x="creation_timestamp", y="pressure (mbar)", color="sys_name_short")
    fig.show()
    
    fig = px.line(df_plot, x="creation_timestamp", y="relative humidity (%)", color="sys_name_short")
    fig.show()
    
    fig = px.line(df_plot, x="creation_timestamp", y="temperature (°C)", color="sys_name_short")
    fig.show()

In [5]:
start_date = datetime(2024, 2, 22, 00, 00, 00).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 2, 28, 23, 59, 59).replace(tzinfo=timezone.utc)

system_ids = [3,4,6,7,8,9,10,11,12,13,15,16,18,20]

df_lmu = get_lmu_data(start_time="2024-2-22T00-00-00", end_time= "2024-2-28T23-59-59")
plot_with_lmu_data(start_date, end_date, system_ids, df_lmu)