In [4]:
import os
from datetime import datetime
from datetime import timezone
import polars as pl
import polars.selectors as cs
import plotly.express as px
from lmu_meteo_api import interface
import sys

from datetime import datetime

PROJECT_PATH = os.path.abspath(os.path.join("..", ".."))
PIPELINE_PATH = os.path.join(PROJECT_PATH, "pipeline")
DATA_DIRECTORY = os.path.join(PROJECT_PATH, "data")

unflagged_data = False

if PIPELINE_PATH not in sys.path:
    sys.path.append(PIPELINE_PATH)
    
from utils.paths import PROCESSED_PICARRO_DATA_DIRECTORY, POSTPROCESSED_DATA_DIRECTORY
from utils.import_data import import_acropolis_system_data

assert(os.path.exists(POSTPROCESSED_DATA_DIRECTORY))
assert(os.path.exists(PROCESSED_PICARRO_DATA_DIRECTORY))

In [16]:
start_date = datetime(2024, 2, 22, 00, 00, 00)
end_date = datetime(2024, 4, 17, 23, 59, 59)

system_ids = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
filter = '1h'

In [None]:
from datetime import datetime

# Create datetime object
dt = datetime(2024, 2, 22, 0, 0, 0)

# Format to desired string: "YYYY-MM-DDTHH-MM-SS"


print(formatted_dt)  # Output: "2024-02-22T00-00-00"

2024-02-22T00-00-00


In [6]:
# Load ACROPOLIS Data
all_systems = []

for id in range(1,21):
    df_system = import_acropolis_system_data(
            years=[2024,2025],
            target_directory=POSTPROCESSED_DATA_DIRECTORY,
            id=id,
            prefix="1min"
        ).group_by_dynamic("datetime", every=filter, group_by=["system_id", "system_name"]) \
            .agg(cs.numeric().mean()) \
        .filter(pl.col("datetime").is_between(start_date, end_date))
        
    all_systems.append(df_system)
    
del df_system
df = pl.concat(all_systems, how="diagonal").collect()
df.head(1).vstack(df.tail(1))

system_id,system_name,datetime,ts,gmp343_raw,gmp343_compensated,gmp343_filtered,gmp343_temperature,bme280_temperature,bme280_humidity,bme280_pressure,sht45_temperature,sht45_humidity,gmp343_edge_corrected,gmp343_edge_dry,h2o_ah,h2o_v%,bme280_h2o_v%,gmp343_dry,slope,intercept,slope_interpolated,intercept_interpolated,gmp343_corrected,wxt532_direction_min,wxt532_direction_avg,wxt532_direction_max,wxt532_speed_min,wxt532_speed_avg,wxt532_speed_max,wxt532_last_update_time,wxt532_temperature,wxt532_heating_voltage,wxt532_supply_voltage,wxt532_reference_voltage,enclosure_bme280_temperature,enclosure_bme280_humidity,enclosure_bme280_pressure,raspi_cpu_temperature,raspi_disk_usage,raspi_cpu_usage,raspi_memory_usage,ups_powered_by_grid,ups_battery_is_fully_charged,ups_battery_error_detected,ups_battery_above_voltage_threshold,cal_gmp343_slope,cal_gmp343_intercept,cal_sht_45_offset,cal_bottle_id,cal_gmp343_raw,cal_gmp343_compensated,cal_gmp343_filtered,cal_gmp343_temperature,cal_bme280_temperature,cal_bme280_humidity,cal_bme280_pressure,cal_sht45_temperature,cal_sht45_humidity
i32,str,datetime[ms],f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1,"""tum-esm-midcost-raspi-1""",2024-02-22 00:00:00,1708600000000.0,416.660556,476.849444,476.908889,29.248611,25.84375,17.163833,937.38475,25.806528,20.52475,,,5.980747,0.890436,0.744615,481.192991,0.984238,-28.565691,0.986985,-29.976044,444.953956,,,,,,,,,,,,32.1915,12.073,949.096,50.875,0.493,0.019417,0.116867,1.0,1.0,0.0,1.0,,,,,,,,,,,,,
20,,2024-04-16 03:00:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,61.291209,429.846703,499.931868,500.085714,33.137912,31.86456,0.127582,937.104505,32.540055,1.713352


In [18]:
start_date.strftime("%Y-%m-%dT%H-%M-%S")

'2024-02-22T00-00-00'

In [28]:
#functions
def get_lmu_data(start_date:datetime, end_date:datetime, station_id = 'MIM01', filter=filter):
    
    start_date = start_date.strftime("%Y-%m-%dT%H-%M-%S")
    end_date = end_date.strftime("%Y-%m-%dT%H-%M-%S")

    parameters = [
        "air_temperature_2m",
        "air_temperature_30m",
        "relative_humidity_2m",
        "relative_humidity_30m",
        'wind_speed_30m', 
        'wind_from_direction_30m',
        "air_pressure"]

    # download from the API
    lmu_api = interface.meteo_data()
    data = lmu_api.get_meteo_data(parameters=parameters, 
                                station_id= station_id, 
                                start_time=start_date, 
                                end_time=end_date)
    
    df_lmu = pl.from_pandas(data, include_index = True) \
    .rename({"time": "datetime"}) \
    .with_columns(pl.col("datetime").dt.cast_time_unit("ms")) \
    .with_columns(pl.col("air_temperature_30m") -273.15) \
    .with_columns(pl.col("air_temperature_2m") -273.15)  \
    .with_columns(pl.col("air_pressure") / 100)  \
    .rename({"air_temperature_30m": "temperature (°C)", "relative_humidity_30m": "relative humidity (%)", "air_pressure": "pressure (mbar)"}) \
    .with_columns(
            pl.lit(f"lmu meteo").alias("system_name")
        ) \
    .sort("datetime") \
    .group_by_dynamic("datetime", every=filter, group_by= "system_name")  \
    .agg(pl.all().exclude(["datetime","system_name"]).mean()) \
        
    return df_lmu

def plot_with_lmu_data(start_date, end_date, system_ids, df_lmu):
    
    df_plot = df.filter(pl.col("datetime").is_between(start_date, end_date))  \
        .sort("datetime") \
        .filter(pl.col("system_id").is_in(system_ids)) \
        .rename({"gmp343_temperature": "temperature (°C)", "sht45_humidity": "relative humidity (%)", "bme280_pressure": "pressure (mbar)"}) \
        .sort("system_name") \

    df_plot = pl.concat([df_lmu, df_plot], how="diagonal")
    
    
    fig = px.line(df_plot, x="datetime", y="pressure (mbar)", color="system_name")
    fig.show()
    
    fig = px.line(df_plot, x="datetime", y="relative humidity (%)", color="system_name")
    fig.show()
    
    fig = px.line(df_plot, x="datetime", y="temperature (°C)", color="system_name")
    fig.show()

In [None]:
df_lmu = get_lmu_data(start_date=start_date, end_date=end_date, station_id ='MIM01',filter=filter)
plot_with_lmu_data(start_date, end_date, system_ids, df_lmu)

In [30]:
start_date = datetime(2023, 8, 1, 00, 00, 00)
end_date = datetime(2024, 8, 1, 00, 00, 00)
df_lmu = get_lmu_data(start_date=start_date, end_date=end_date)

In [31]:
df_plot = df_lmu.sort("datetime") \
        .group_by_dynamic("datetime", every='1h') \
        .agg(pl.col("temperature (°C)").median()) \
        .with_columns(pl.col("datetime").dt.hour().alias("hour")) \
        .filter(pl.col("hour") == 3)   
        
fig = px.line(df_plot, x="datetime", y="temperature (°C)")
fig.update_layout(
    xaxis_title='03:00 UTC for each day',
    title='',
)
fig.show()  