In [170]:
from datetime import datetime
from datetime import timezone
import polars as pl
import os
import math
import numpy as np
from sklearn.metrics import r2_score
from utils.plot_data import plot_sensor_measurement, plot_wind_rose, rmse, calc_r2


start_date = datetime(2023, 11, 23, 0, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 11, 26, 23, 59, 59).replace(tzinfo=timezone.utc)

filter = '1h'

sensor_id = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,18,20]
DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")


# Load Datasets

In [171]:
# Local network measurements dataset
# parquet created by notebook: download_from_hermes.ipynb
# parquet created by notebook: download_from_hermes.ipynb
df = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "download", "acropolis.parquet")) \
.with_columns(pl.col("creation_timestamp").dt.cast_time_unit("us")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date)).collect()
    
# Calibration Correcteed dataset
# parquet created by notebook: processing_pipeline.ipynb
df_cor = pl.scan_parquet(os.path.join(DATA_DIRECTORY,"processed", "1h_cal_corr_acropolis.parquet"))
df_cor = df_cor.sort("creation_timestamp").filter(pl.col("creation_timestamp").is_between(start_date, end_date)).collect()

In [172]:
import plotly.express as px

fig = px.line(df_cor, x="creation_timestamp", y=f"CO2_corr", markers=True, title = "CO2", color="system_name")
fig.show()

fig = px.line(df_cor, x="creation_timestamp", y=f"diff", markers=True, title = "CO2", color="system_name")
fig.show()

fig = px.line(df_cor, x="creation_timestamp", y=f"slope", markers=True, title = "slope", color="system_name")
fig.show()

fig = px.line(df_cor, x="creation_timestamp", y=f"intercept", markers=True, title = "intercept", color="system_name")
fig.show()


In [173]:
for temp_id in sensor_id:

    print(f"\nAnalysis for System {temp_id}:")

    #----
    df_acropolis = df_cor.filter(pl.col("system_name")==f"tum-esm-midcost-raspi-{temp_id}") \
        .select(["creation_timestamp", "CO2_corr"]) \
        .rename({"CO2_corr": f"CO2_acropolis_{temp_id}"})
        
    df_picarro = df_cor.filter(pl.col("system_name")=="Picarro") \
        .select(["creation_timestamp", "CO2_corr"]) \
        .rename({"CO2_corr": f"CO2_picarro"})
        
    df_analysis = df_picarro.join(df_acropolis, on="creation_timestamp")  \
        .filter(pl.col("CO2_picarro") > 0)  \
        .filter(pl.col(f"CO2_acropolis_{temp_id}") > 0)
        
    l_picarro = df_analysis.select("CO2_picarro").to_series().to_list()
    l_acropolis = df_analysis.select(f"CO2_acropolis_{temp_id}").to_series().to_list()
    
    #----
    if len(l_acropolis) > 0:
        
        mean_offset = df_cor.filter(pl.col("system_name")==f"tum-esm-midcost-raspi-{temp_id}").select("diff").mean().item()
        print(f"Offset {mean_offset}")
        
        rmse_result = rmse(l_picarro,l_acropolis)
        print(f"RMSE: {rmse_result}")
        
        r2_result = calc_r2(l_picarro,l_acropolis)
        print(f"R2: {r2_result}")


Analysis for System 1:
Offset -3.7566323576884697
RMSE: 4.749297440067659
R2: 0.3666661147432482

Analysis for System 2:
Offset 9.823416653979484
RMSE: 10.043617697677158
R2: -1.856920651517834

Analysis for System 3:

Analysis for System 4:

Analysis for System 5:
Offset 6.498705386050525
RMSE: 11.222883371587804
R2: -2.5671940202504655

Analysis for System 6:
Offset -2.9117727199379946
RMSE: 4.868925397849479
R2: 0.3285957710041093

Analysis for System 7:
Offset 376.8785764121319
RMSE: 380.0383724950179
R2: -4089.465470872487

Analysis for System 8:
Offset 1.005221955569791
RMSE: 5.119586518371827
R2: 0.25768607873502225

Analysis for System 9:

Analysis for System 10:
Offset 5.654947467477226
RMSE: 5.67736401060406
R2: 0.08712500213216634

Analysis for System 11:
Offset 11.894711575847149
RMSE: 14.021285933542908
R2: -4.567926883593437

Analysis for System 12:
Offset 12.14444943763315
RMSE: 12.159150639181785
R2: -3.187205827649347

Analysis for System 13:
Offset -0.31439822500077


# Plot with corrected offset

In [174]:
for temp_id in sensor_id:

    print(f"\nAnalysis for System {temp_id}:")

    #----
    mean_offset = 0
    if len(l_acropolis) > 0:
        mean_offset = df_cor.filter(pl.col("system_name")==f"tum-esm-midcost-raspi-{temp_id}").select("diff").mean().item()
    
    df_acropolis = df_cor.filter(pl.col("system_name")==f"tum-esm-midcost-raspi-{temp_id}") \
        .select(["creation_timestamp", "CO2_corr"]) \
        .with_columns(pl.col("CO2_corr") - mean_offset)  \
        .rename({"CO2_corr": f"CO2_acropolis_{temp_id}"})
        
    df_picarro = df_cor.filter(pl.col("system_name")=="Picarro") \
        .select(["creation_timestamp", "CO2_corr"]) \
        .rename({"CO2_corr": f"CO2_picarro"})
        
    df_analysis = df_picarro.join(df_acropolis, on="creation_timestamp")  \
        .filter(pl.col("CO2_picarro") > 0)  \
        .filter(pl.col(f"CO2_acropolis_{temp_id}") > 0)
        
    l_picarro = df_analysis.select("CO2_picarro").to_series().to_list()
    l_acropolis = df_analysis.select(f"CO2_acropolis_{temp_id}").to_series().to_list()
    
    #----
    if len(l_acropolis) > 0:
        
        rmse_result = rmse(l_picarro,l_acropolis)
        print(f"RMSE: {rmse_result}")
        
        r2_result = calc_r2(l_picarro,l_acropolis)
        print(f"R2: {r2_result}")


Analysis for System 1:
RMSE: 2.9057769190702154
R2: 0.7629180311241811

Analysis for System 2:
RMSE: 2.0915883197733542
R2: 0.876100302048587

Analysis for System 3:

Analysis for System 4:

Analysis for System 5:
RMSE: 11.222883371587804
R2: -2.5671940202504655

Analysis for System 6:
RMSE: 3.9023088239206025
R2: 0.5687182274701166

Analysis for System 7:
RMSE: 48.90504278933614
R2: -66.73686898504944

Analysis for System 8:
RMSE: 5.019929774323045
R2: 0.28630424240910035

Analysis for System 9:

Analysis for System 10:
RMSE: 5.67736401060406
R2: 0.08712500213216634

Analysis for System 11:
RMSE: 7.42376560497272
R2: -0.5608685648091836

Analysis for System 12:
RMSE: 0.5977391764696474
R2: 0.9898809143093414

Analysis for System 13:
RMSE: 0.574989111164671
R2: 0.9906365246287662

Analysis for System 14:
RMSE: 2.6206142759364273
R2: 0.8054980154455078

Analysis for System 15:
RMSE: 2.871612439017309
R2: 0.7664555862608226

Analysis for System 16:
RMSE: 0.5896737759784018
R2: 0.9901521

# Plot other data directly from local database measurements.parquet

In [175]:
plot_sensor_measurement(df,sensor_id=sensor_id,col_name="gmp343_raw",filter=filter)

In [176]:
plot_sensor_measurement(df,sensor_id,col_name="gmp343_compensated",filter=filter)

In [177]:
plot_sensor_measurement(df,sensor_id,col_name="gmp343_filtered",filter=filter)

In [178]:
# plot wind at 3m pole inlet
plot_wind_rose(df,6, 'TUM')
# Maisach
plot_wind_rose(df,1, 'Maisach')
# Rechts-der-Isar
plot_wind_rose(df,2, 'Rechts-der-Isar')
# Finsing
plot_wind_rose(df,15, 'Finsing')
# Großhadern
plot_wind_rose(df,4, 'Großhadern')
# Großhadern
plot_wind_rose(df,5, 'Pasing')
# Feldkirchen
plot_wind_rose(df,7, 'Feldkirchen')
# Taufkirchen
plot_wind_rose(df,8, 'Taufkirchen')
# Finsing
plot_wind_rose(df,14, 'DLR')

In [179]:
plot_sensor_measurement(df,sensor_id,col_name="wxt532_speed_avg",filter=filter)

In [180]:
plot_sensor_measurement(df,sensor_id,col_name="wxt532_direction_avg",filter=filter)

In [181]:
# Plot auxilliary data
plot_sensor_measurement(df,sensor_id,col_name="sht45_humidity",filter=filter)

In [182]:
plot_sensor_measurement(df,sensor_id,col_name="bme280_humidity",filter=filter)

In [183]:
plot_sensor_measurement(df,sensor_id,col_name="sht45_temperature",filter=filter)

In [184]:
plot_sensor_measurement(df,sensor_id,col_name="bme280_temperature",filter=filter)

In [185]:
plot_sensor_measurement(df,sensor_id,col_name="bme280_pressure",filter=filter)

In [186]:
plot_sensor_measurement(df,sensor_id,col_name="gmp343_temperature",filter=filter)

In [187]:
plot_sensor_measurement(df,sensor_id,col_name="raspi_cpu_usage",filter=filter)

In [188]:
plot_sensor_measurement(df,sensor_id,col_name="raspi_memory_usage",filter=filter)


In [189]:
plot_sensor_measurement(df,sensor_id,col_name="raspi_disk_usage",filter=filter)


In [190]:
plot_sensor_measurement(df,sensor_id,col_name="raspi_cpu_temperature",filter=filter)

In [191]:
plot_sensor_measurement(df,sensor_id,col_name="enclosure_bme280_humidity",filter=filter)

In [192]:
plot_sensor_measurement(df,sensor_id,col_name="enclosure_bme280_pressure",filter=filter)

In [193]:
plot_sensor_measurement(df,sensor_id,col_name="enclosure_bme280_temperature",filter=filter)

# Calibration Data

In [194]:
plot_sensor_measurement(df,sensor_id,col_name="cal_bottle_id",filter= '30s')

In [195]:
plot_sensor_measurement(df,sensor_id,col_name="cal_gmp343_filtered",filter= '30s')

In [196]:
plot_sensor_measurement(df,sensor_id,col_name="cal_sht45_humidity",filter= '30s')

In [197]:
plot_sensor_measurement(df,sensor_id,col_name="cal_sht45_temperature",filter= '30s')

In [198]:
plot_sensor_measurement(df,sensor_id,col_name="cal_gmp343_temperature",filter= '30s')

In [199]:
plot_sensor_measurement(df,sensor_id,col_name="cal_bme280_temperature",filter= '30s')

In [200]:
plot_sensor_measurement(df,sensor_id,col_name="cal_bme280_humidity",filter= '30s')

In [201]:
plot_sensor_measurement(df,sensor_id,col_name="cal_bme280_pressure",filter= '30s')

In [202]:
# Plot 400ppm Calibration Bottle 
plot_sensor_measurement(df,sensor_id,col_name="cal_gmp343_filtered",filter= '1m',cut_above= 450,cut_below=350)

In [203]:
# Plot 800ppm Calibration Bottle 
plot_sensor_measurement(df,sensor_id,col_name="cal_gmp343_filtered",filter= '30s',cut_below=700)