In [229]:
from datetime import datetime
from datetime import timezone
import polars as pl
import os
import math
import numpy as np
from sklearn.metrics import r2_score
from plot_data import plot_sensor_measurement, plot_wind_rose, rmse, calc_r2


start_date = datetime(2023, 10, 31, 0, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 11, 5, 23, 59, 59).replace(tzinfo=timezone.utc)

filter = '1h'

sensor_id = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,18,20]
DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")


# Load Datasets

In [230]:
# Local network measurements dataset
# parquet created by notebook: download_from_hermes.ipynb
# parquet created by notebook: download_from_hermes.ipynb
df = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "download", "acropolis.parquet")) \
.with_columns(pl.col("creation_timestamp").dt.cast_time_unit("us")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date)).collect()
    
# Calibration Correcteed dataset
# parquet created by notebook: processing_pipeline.ipynb
df_cor = pl.scan_parquet(os.path.join(DATA_DIRECTORY,"processed", "1h_cal_corr_acropolis.parquet"))
df_cor = df_cor.sort("creation_timestamp").filter(pl.col("creation_timestamp").is_between(start_date, end_date)).collect()

In [231]:
import plotly.express as px

fig = px.line(df_cor, x="creation_timestamp", y=f"CO2_corr", markers=True, title = "CO2", color="system_name")
fig.show()

fig = px.line(df_cor, x="creation_timestamp", y=f"diff", markers=True, title = "CO2", color="system_name")
fig.show()

fig = px.line(df_cor, x="creation_timestamp", y=f"slope", markers=True, title = "slope", color="system_name")
fig.show()

fig = px.line(df_cor, x="creation_timestamp", y=f"intercept", markers=True, title = "intercept", color="system_name")
fig.show()


In [232]:
for temp_id in sensor_id:

    print(f"\nAnalysis for System {temp_id}:")

    #----
    df_acropolis = df_cor.filter(pl.col("system_name")==f"tum-esm-midcost-raspi-{temp_id}") \
        .select(["creation_timestamp", "CO2_corr"]) \
        .rename({"CO2_corr": f"CO2_acropolis_{temp_id}"})
        
    df_picarro = df_cor.filter(pl.col("system_name")=="Picarro") \
        .select(["creation_timestamp", "CO2_corr"]) \
        .rename({"CO2_corr": f"CO2_picarro"})
        
    df_analysis = df_picarro.join(df_acropolis, on="creation_timestamp")  \
        .filter(pl.col("CO2_picarro") > 0)  \
        .filter(pl.col(f"CO2_acropolis_{temp_id}") > 0)
        
    l_picarro = df_analysis.select("CO2_picarro").to_series().to_list()
    l_acropolis = df_analysis.select(f"CO2_acropolis_{temp_id}").to_series().to_list()
    
    #----
    if len(l_acropolis) > 0:
        
        mean_offset = df_cor.filter(pl.col("system_name")==f"tum-esm-midcost-raspi-{temp_id}").select("diff").mean().item()
        print(f"Offset {mean_offset}")
        
        rmse_result = rmse(l_picarro,l_acropolis)
        print(f"RMSE: {rmse_result}")
        
        r2_result = calc_r2(l_picarro,l_acropolis)
        print(f"R2: {r2_result}")


Analysis for System 1:
Offset -0.2815297437845719
RMSE: 8.178600529742212
R2: 0.07189219801086155

Analysis for System 2:
Offset 5.938454714989806
RMSE: 8.631313903760184
R2: -0.033699374655023595

Analysis for System 3:

Analysis for System 4:

Analysis for System 5:

Analysis for System 6:
Offset 2.2209021886036506
RMSE: 4.8375491497271
R2: 0.6752933284545024

Analysis for System 7:

Analysis for System 8:
Offset -1.1743136161607053
RMSE: 6.617564599725036
R2: 0.39237335487085545

Analysis for System 9:

Analysis for System 10:
Offset 4.631626842304231
RMSE: 4.977008669448046
R2: 0.6563018255625674

Analysis for System 11:
Offset 7.683889280272096
RMSE: 7.8635214022656035
R2: 0.14202511103237758

Analysis for System 12:
Offset 11.09871778140105
RMSE: 11.142172278799789
R2: -0.7225830490187284

Analysis for System 13:
Offset -0.5220282893769294
RMSE: 1.1237913003828972
R2: 0.9824768667915368

Analysis for System 14:
Offset 7.544542633568739
RMSE: 7.591092316521768
R2: 0.2004438339044

# Plot other data directly from local database measurements.parquet

In [233]:
plot_sensor_measurement(df,sensor_id=sensor_id,col_name="gmp343_raw",filter=filter)

In [234]:
plot_sensor_measurement(df,sensor_id,col_name="gmp343_compensated",filter=filter)

In [235]:
plot_sensor_measurement(df,sensor_id,col_name="gmp343_filtered",filter=filter)

In [236]:
# plot wind at 3m pole inlet
plot_wind_rose(df,6, 'TUM')
# Maisach
plot_wind_rose(df,1, 'Maisach')
# Rechts-der-Isar
plot_wind_rose(df,2, 'Rechts-der-Isar')
# Finsing
plot_wind_rose(df,3, 'Finsing')
# Großhadern
plot_wind_rose(df,4, 'Großhadern')
# Großhadern
plot_wind_rose(df,5, 'Pasing')
# Feldkirchen
plot_wind_rose(df,7, 'Feldkirchen')
# Taufkirchen
plot_wind_rose(df,8, 'Taufkirchen')

In [237]:
plot_sensor_measurement(df,sensor_id,col_name="wxt532_speed_avg",filter=filter)

In [238]:
plot_sensor_measurement(df,sensor_id,col_name="wxt532_direction_avg",filter=filter)

In [239]:
# Plot auxilliary data
plot_sensor_measurement(df,sensor_id,col_name="sht45_humidity",filter=filter)

In [240]:
plot_sensor_measurement(df,sensor_id,col_name="bme280_humidity",filter=filter)

In [241]:
plot_sensor_measurement(df,sensor_id,col_name="sht45_temperature",filter=filter)

In [242]:
plot_sensor_measurement(df,sensor_id,col_name="bme280_temperature",filter=filter)

In [243]:
plot_sensor_measurement(df,sensor_id,col_name="bme280_pressure",filter=filter)

In [244]:
plot_sensor_measurement(df,sensor_id,col_name="gmp343_temperature",filter=filter)

In [245]:
plot_sensor_measurement(df,sensor_id,col_name="raspi_cpu_usage",filter=filter)

In [246]:
plot_sensor_measurement(df,sensor_id,col_name="raspi_disk_usage",filter=filter)


In [247]:
plot_sensor_measurement(df,sensor_id,col_name="raspi_cpu_temperature",filter=filter)

In [248]:
plot_sensor_measurement(df,sensor_id,col_name="enclosure_bme280_humidity",filter=filter)

In [249]:
plot_sensor_measurement(df,sensor_id,col_name="enclosure_bme280_pressure",filter=filter)

In [250]:
plot_sensor_measurement(df,sensor_id,col_name="enclosure_bme280_temperature",filter=filter)

# Calibration Data

In [251]:
plot_sensor_measurement(df,sensor_id,col_name="cal_bottle_id",filter= '30s')

In [252]:
plot_sensor_measurement(df,sensor_id,col_name="cal_gmp343_filtered",filter= '30s')

In [253]:
plot_sensor_measurement(df,sensor_id,col_name="cal_sht45_humidity",filter= '30s')

In [254]:
plot_sensor_measurement(df,sensor_id,col_name="cal_sht45_temperature",filter= '30s')

In [255]:
plot_sensor_measurement(df,sensor_id,col_name="cal_gmp343_temperature",filter= '30s')

In [256]:
plot_sensor_measurement(df,sensor_id,col_name="cal_bme280_temperature",filter= '30s')

In [257]:
plot_sensor_measurement(df,sensor_id,col_name="cal_bme280_humidity",filter= '30s')

In [258]:
plot_sensor_measurement(df,sensor_id,col_name="cal_bme280_pressure",filter= '30s')

In [259]:
# Plot 400ppm Calibration Bottle 
plot_sensor_measurement(df,sensor_id,col_name="cal_gmp343_filtered",filter= '1m',cut_above= 450,cut_below=350)

In [260]:
# Plot 800ppm Calibration Bottle 
plot_sensor_measurement(df,sensor_id,col_name="cal_gmp343_filtered",filter= '30s',cut_below=700)