In [4]:
import os
import sys
import polars as pl
import polars.selectors as cs

from datetime import datetime

PROJECT_PATH = os.path.abspath(os.path.join(".."))
PIPELINE_PATH = os.path.join(PROJECT_PATH, "pipeline")
DATA_DIRECTORY = os.path.join(PROJECT_PATH, "data")

unflagged_data = False

if PIPELINE_PATH not in sys.path:
    sys.path.append(PIPELINE_PATH)
    
from utils.paths import PROCESSED_PICARRO_DATA_DIRECTORY, POSTPROCESSED_DATA_DIRECTORY
from utils.import_data import import_acropolis_system_data
from utils.plot_dataframes import plot_sensor_measurement, plot_wind_rose, plot_co2_rose

assert(os.path.exists(POSTPROCESSED_DATA_DIRECTORY))
assert(os.path.exists(PROCESSED_PICARRO_DATA_DIRECTORY))

In [5]:
# Filters
start_date = datetime(2025, 6, 1, 0, 0, 0)
end_date = datetime(2025, 6, 30, 0, 0, 0)

filter = '1m'
ids = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
ids = [3,6]

In [6]:
# Load ACROPOLIS Data
all_systems = []

for id in ids:
    df_system = import_acropolis_system_data(
            years=[2024,2025],
            target_directory=POSTPROCESSED_DATA_DIRECTORY,
            id=id,
            prefix="1min"
        ).group_by_dynamic("datetime", every=filter, group_by=["system_id", "system_name"]) \
            .agg(cs.numeric().mean()) \
        .filter(pl.col("datetime").is_between(start_date, end_date))
        
    all_systems.append(df_system)
    
del df_system
df = pl.concat(all_systems, how="diagonal").collect()
df.head(1).vstack(df.tail(1))

system_id,system_name,datetime,ts,gmp343_raw,gmp343_compensated,gmp343_filtered,gmp343_temperature,bme280_temperature,bme280_humidity,bme280_pressure,sht45_temperature,sht45_humidity,gmp343_edge_corrected,gmp343_edge_dry,h2o_v%,gmp343_dry,slope,intercept,slope_interpolated,intercept_interpolated,gmp343_corrected,wxt532_direction_min,wxt532_direction_avg,wxt532_direction_max,wxt532_speed_min,wxt532_speed_avg,wxt532_speed_max,wxt532_last_update_time,wxt532_temperature,wxt532_heating_voltage,wxt532_supply_voltage,wxt532_reference_voltage,enclosure_bme280_temperature,enclosure_bme280_humidity,enclosure_bme280_pressure,raspi_cpu_temperature,raspi_disk_usage,raspi_cpu_usage,raspi_memory_usage,ups_powered_by_grid,ups_battery_is_fully_charged,ups_battery_error_detected,ups_battery_above_voltage_threshold,cal_gmp343_slope,cal_gmp343_intercept,cal_sht_45_offset
i32,str,datetime[ms],f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
3,"""acropolis-3""",2025-06-01 00:00:00,1748700000000.0,378.766667,453.966667,454.566667,42.6,39.64,15.698333,939.326667,39.822083,21.081317,440.966667,463.416667,1.901414,463.377389,0.993677,-19.948737,0.986146,-15.406682,441.55091,,,,,,,,,,,,29.12,30.44,956.88,50.1,0.323,0.02,0.082,1.0,1.0,0.0,1.0,0.9903,-17.92,1.07
6,"""acropolis-6""",2025-06-16 09:20:00,1750100000000.0,358.3,403.35,405.15,30.8,26.845,35.26,950.625,27.0188,37.99785,430.65,412.35,1.777091,412.480148,1.035123,2.809571,1.035123,2.809571,429.777194,20.0,19.0,353.0,0.4,1.6,5.2,1750100000.0,,,,,34.45,24.14,964.57,55.0,0.327,0.017,0.074,1.0,1.0,0.0,1.0,1.0401,1.77,1.66


In [None]:
assert(False)

In [10]:
# Load Picarro Data
# DWD Picarro: "Calibrated_1_min_DWD_Picarro_G2301_413.parquet"
df_p_dwd = pl.scan_parquet(os.path.join(PROCESSED_PICARRO_DATA_DIRECTORY,"Calibrated_1_min_DWD_Picarro_G2301_413.parquet"))
df_p_dwd = df_p_dwd.filter(pl.col("datetime").is_between(start_date, end_date)).collect()

# ICOS Picarro: "Calibrated_1_min_ICOS_Picarro_G2401_529.parquet"
df_p_icos = pl.scan_parquet(os.path.join(PROCESSED_PICARRO_DATA_DIRECTORY,"Calibrated_1_min_ICOS_Picarro_G2401_529.parquet"))
df_p_icos = df_p_icos.filter(pl.col("datetime").is_between(start_date, end_date)).collect()
    
df_p = pl.concat([df_p_dwd,df_p_icos], how="diagonal")
del df_p_dwd, df_p_icos
df_p.head(1).vstack(df_p.tail(1))

datetime,system_name,system_id,picarro_corrected,h2o_reported
datetime[ms],str,i32,f64,f64
2025-06-01 00:00:00,"""Picarro_G2401""",529,441.658023,1.830506
2025-06-03 23:59:00,"""Picarro_G2401""",529,450.425353,1.914866


In [11]:
# Load CAL ACROPOLIS Data (no aggregation)
all_systems = []

for id in ids:
    df_system = import_acropolis_system_data(
            years=[2024,2025],
            target_directory=POSTPROCESSED_DATA_DIRECTORY,
            id=id,
            prefix="Cal_1min"
        ) \
        .filter(pl.col("datetime").is_between(start_date, end_date))
    
    all_systems.append(df_system)
    
del df_system
df_cal = pl.concat(all_systems, how="diagonal").collect()
df_cal.head(1).vstack(df_cal.tail(1))

datetime,system_id,cal_bottle_id,cal_gmp343_raw,cal_gmp343_compensated,cal_gmp343_filtered,cal_gmp343_temperature,cal_bme280_temperature,cal_bme280_humidity,cal_bme280_pressure,cal_sht45_temperature,cal_sht45_humidity,cal_gmp343_slope,cal_gmp343_intercept,cal_sht_45_offset
datetime[ms],i32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2025-06-02 01:01:39.658,3,120.0,324.6,388.8,438.2,42.6,39.54,12.3,947.88,39.7616,18.1761,,,
2025-06-16 01:32:03.037,6,119.0,342.7,386.5,386.3,31.6,34.67,0.0,957.94,34.8936,0.1691,,,


In [None]:
df_plot = df.sort(pl.col("gmp343_temperature")).select(["datetime", "system_id", "gmp343_temperature", "sht45_temperature", "enclosure_bme280_temperature"]).tail(10000)

plot_sensor_measurement(df_plot,ids,col_name="gmp343_temperature")

In [12]:
plot_sensor_measurement(df,ids,col_name="enclosure_bme280_humidity")

In [7]:
plot_sensor_measurement(df,ids,col_name="gmp343_temperature")

In [9]:
plot_sensor_measurement(df,ids,col_name="gmp343_corrected")

In [14]:
plot_sensor_measurement(df_p,529,col_name="picarro_corrected")

In [16]:
plot_sensor_measurement(df,ids,col_name="wxt532_speed_avg")

In [17]:
plot_sensor_measurement(df,ids,col_name="sht45_humidity")

In [15]:
plot_sensor_measurement(df,ids,col_name="h2o_v%")

In [None]:
plot_sensor_measurement(df,ids,col_name="bme280_humidity")

In [None]:
plot_sensor_measurement(df,ids,col_name="ups_powered_by_grid")

In [None]:
plot_sensor_measurement(df,ids,col_name="bme280_h2o_v%")

In [None]:
df_plot = df.with_columns(diff_inflow_humidity = (pl.col("sht45_humidity") - pl.col("bme280_humidity"))) \
    .filter(pl.col("diff_inflow_humidity").is_not_nan()) \
    .select("datetime", "diff_inflow_humidity", "system_id") \
    .sort("system_id")
    
plot_sensor_measurement(df_plot,ids,col_name="diff_inflow_humidity")

In [None]:
df_plot = df.with_columns(diff_inflow_humidity = (pl.col("h2o_v%") - pl.col("bme280_h2o_v%"))) \
    .filter(pl.col("diff_inflow_humidity").is_not_nan()) \
    .select("datetime", "diff_inflow_humidity", "system_id") \
    .sort("system_id")
    
plot_sensor_measurement(df_plot,ids,col_name="diff_inflow_humidity")

In [None]:
plot_sensor_measurement(df,ids,col_name="sht45_temperature")

In [None]:
plot_sensor_measurement(df,ids,col_name="bme280_temperature")

In [None]:
df_plot = df.with_columns(diff_inflow_temperature = (pl.col("sht45_temperature") - pl.col("bme280_temperature"))) \
    .filter(pl.col("diff_inflow_temperature").is_not_nan()) \
    .select("datetime", "diff_inflow_temperature", "system_id") \
    .sort("system_id")
    
plot_sensor_measurement(df_plot,ids,col_name="diff_inflow_temperature")

In [None]:
plot_sensor_measurement(df,ids,col_name="bme280_pressure")

In [8]:
plot_sensor_measurement(df,ids,col_name="enclosure_bme280_temperature")

In [None]:
plot_sensor_measurement(df,ids=ids,col_name="gmp343_raw")

In [None]:
plot_sensor_measurement(df,ids,col_name="gmp343_compensated")

In [None]:
plot_sensor_measurement(df,ids,col_name="wxt532_temperature")

In [None]:
# KLEG
plot_co2_rose(df, 1, 'Klenze Gymnasium')

# X
#plot_co2_rose(df, 2, 'X')

# X
#plot_co2_rose(df, 3, 'X')

# X
#plot_co2_rose(df, 4, 'X')

# DLRO
plot_co2_rose(df, 5, 'DLR')

# TUMN
plot_co2_rose(df, 6, 'TUM')

# FB85
#plot_co2_rose(df, 7, 'Blutenburgturm 85m')

# KRDI
plot_co2_rose(df, 8, 'Klinikum Rechts-der-Isar')

# KNPL
plot_co2_rose(df, 9, 'Klinikum Neuperlach')

# SWGG
plot_co2_rose(df, 10, 'Städtisches Willi Graf Gymnasium')

# RFIN
plot_co2_rose(df, 11, 'Rathaus Finsing')

# WKRT
plot_co2_rose(df, 12, 'Walter-Klingenbeck-Realschule')

# RFEL
plot_co2_rose(df, 13, 'Rathaus Feldkirchen')

# FB48
#plot_co2_rose(df, 14, 'Blutenburgturm 48m')

# SWMZ
plot_co2_rose(df, 15, 'Stadtwerke München Zentrale')

# SMAI
plot_co2_rose(df, 16, 'Schule Maisach')

# KBOG
#plot_co2_rose(df, 17, 'Klinikum Bogenhausen')

# SBBG
plot_co2_rose(df, 18, 'Städtisches Bertolt Brecht Gymnasium')

# X
#plot_o2_rose(df, 19, 'X')

# LGRO
plot_co2_rose(df, 20, 'LMU Großhadern')



In [None]:
# KLEG
plot_wind_rose(df, 1, 'Klenze Gymnasium')

# X
#plot_wind_rose(df, 2, 'X')

# X
#plot_wind_rose(df, 3, 'X')

# X
#plot_wind_rose(df, 4, 'X')

# DLRO
plot_wind_rose(df, 5, 'DLR')

# TUMN
plot_wind_rose(df, 6, 'TUM')

# FB85
#plot_wind_rose(df, 7, 'Blutenburgturm 85m')

# KRDI
plot_wind_rose(df, 8, 'Klinikum Rechts-der-Isar')

# KNPL
plot_wind_rose(df, 9, 'Klinikum Neuperlach')

# SWGG
plot_wind_rose(df, 10, 'Städtisches Willi Graf Gymnasium')

# RFIN
plot_wind_rose(df, 11, 'Rathaus Finsing')

# WKRT
plot_wind_rose(df, 12, 'Walter-Klingenbeck-Realschule')

# RFEL
plot_wind_rose(df, 13, 'Rathaus Feldkirchen')

# FB48
#plot_wind_rose(df, 14, 'Blutenburgturm 48m')

# SWMZ
plot_wind_rose(df, 15, 'Stadtwerke München Zentrale')

# SMAI
plot_wind_rose(df, 16, 'Schule Maisach')

# KBOG
#plot_wind_rose(df, 17, 'Klinikum Bogenhausen')

# SBBG
plot_wind_rose(df, 18, 'Städtisches Bertolt Brecht Gymnasium')

# X
#plot_o2_rose(df, 19, 'X')

# LGRO
plot_wind_rose(df, 20, 'LMU Großhadern')



In [None]:
plot_sensor_measurement(df,ids,col_name="wxt532_direction_avg")

In [None]:
plot_sensor_measurement(df,ids,col_name="raspi_cpu_usage")

In [None]:
plot_sensor_measurement(df,ids,col_name="raspi_memory_usage")


In [None]:
plot_sensor_measurement(df,ids,col_name="raspi_disk_usage")


In [None]:
plot_sensor_measurement(df,ids,col_name="raspi_cpu_temperature")

In [None]:
plot_sensor_measurement(df,ids,col_name="enclosure_bme280_pressure")

In [None]:
df_plot = df.with_columns(pl.col("enclosure_bme280_pressure").forward_fill().backward_fill()) \
    .with_columns(measurement_pressure = (pl.col("bme280_pressure") - pl.col("enclosure_bme280_pressure"))) \
    .filter(pl.col("measurement_pressure").is_not_nan()) \
    .select("datetime", "measurement_pressure", "system_id") \
    .sort("system_id")
    

import matplotlib.pyplot as plt
import polars as pl

for id in ids:
    
    # Filter data for the given system_id
    filtered_df = (
    df_plot.filter(pl.col("system_id") == id)
)

    # Plot using Matplotlib
    plt.figure(figsize=(10, 6))
    plt.plot(
        filtered_df["datetime"], 
        filtered_df["measurement_pressure"], 
        marker='o', 
        label=f"System {id}"
    )

    # Customize plot
    plt.title(f"Pressure Drop During Measurement for System {id}", fontsize=14)
    plt.xlabel("Timestamp", fontsize=12)
    plt.ylabel("measurement_pressure", fontsize=12)
    plt.xticks(rotation=45)
    plt.grid(True, linestyle='--', alpha=0.6)
    plt.legend(fontsize=12)

    # Show the plot
    plt.tight_layout()
    plt.show()

# Calibration Data

In [None]:
plot_sensor_measurement(df=df_cal,sensor_id=ids,col_name="cal_bottle_id")

In [12]:
plot_sensor_measurement(df_cal,ids,col_name="cal_gmp343_filtered")

In [None]:
plot_sensor_measurement(df_cal,[17],col_name="cal_sht45_humidity")

In [None]:
plot_sensor_measurement(df_cal,ids,col_name="cal_sht45_temperature")

In [None]:
plot_sensor_measurement(df_cal,ids,col_name="cal_gmp343_temperature")

In [None]:
plot_sensor_measurement(df_cal,ids,col_name="cal_bme280_temperature")

In [None]:
plot_sensor_measurement(df_cal,ids,col_name="cal_bme280_humidity")

In [None]:
plot_sensor_measurement(df_cal,ids,col_name="cal_sht45_humidity")

In [None]:
plot_sensor_measurement(df_cal,ids,col_name="cal_bme280_pressure")

In [None]:
plot_sensor_measurement(df_cal,ids,col_name="cal_gmp343_filtered", cut_above=350, cut_below=470)

In [None]:
plot_sensor_measurement(df_cal,ids,col_name="cal_gmp343_filtered", cut_above=470, cut_below=800)