In [478]:
from datetime import datetime
from datetime import timezone
import polars as pl
import os
import math
import numpy as np
from sklearn.metrics import r2_score
from utils.plot_data import plot_sensor_measurement, plot_wind_rose, rmse, calc_r2


start_date = datetime(2023, 11, 5, 0, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2023, 11, 19, 23, 59, 59).replace(tzinfo=timezone.utc)

filter = '1h'

sensor_id = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,18,20]

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")


# Load Datasets

In [479]:
# Local network measurements dataset
# parquet created by notebook: download_from_hermes.ipynb
# parquet created by notebook: download_from_hermes.ipynb
df = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "download", "acropolis.parquet")) \
.with_columns(pl.col("creation_timestamp").dt.cast_time_unit("us")) \
    .sort("creation_timestamp") \
    .filter(pl.col("creation_timestamp").is_between(start_date, end_date)).collect()
    
# Calibration Correcteed dataset
# parquet created by notebook: processing_pipeline.ipynb
df_cor = pl.scan_parquet(os.path.join(DATA_DIRECTORY,"processed", "1h_cal_corr_acropolis.parquet"))
df_cor = df_cor.sort("creation_timestamp").filter(pl.col("creation_timestamp").is_between(start_date, end_date)).collect()

In [480]:
import plotly.express as px

fig = px.line(df_cor, x="creation_timestamp", y=f"CO2_corr", markers=True, title = "CO2", color="system_name")
fig.show()

fig = px.line(df_cor, x="creation_timestamp", y=f"diff", markers=True, title = "CO2", color="system_name")
fig.show()

fig = px.line(df_cor, x="creation_timestamp", y=f"slope", markers=True, title = "slope", color="system_name")
fig.show()

fig = px.line(df_cor, x="creation_timestamp", y=f"intercept", markers=True, title = "intercept", color="system_name")
fig.show()
sensor_id = [6,10,11,12,13,18]

In [481]:
for temp_id in sensor_id:

    print(f"\nAnalysis for System {temp_id}:")

    #----
    df_acropolis = df_cor.filter(pl.col("system_name")==f"tum-esm-midcost-raspi-{temp_id}") \
        .select(["creation_timestamp", "CO2_corr"]) \
        .rename({"CO2_corr": f"CO2_acropolis_{temp_id}"})
        
    df_picarro = df_cor.filter(pl.col("system_name")=="Picarro") \
        .select(["creation_timestamp", "CO2_corr"]) \
        .rename({"CO2_corr": f"CO2_picarro"})
        
    df_analysis = df_picarro.join(df_acropolis, on="creation_timestamp")  \
        .filter(pl.col("CO2_picarro") > 0)  \
        .filter(pl.col(f"CO2_acropolis_{temp_id}") > 0)
        
    l_picarro = df_analysis.select("CO2_picarro").to_series().to_list()
    l_acropolis = df_analysis.select(f"CO2_acropolis_{temp_id}").to_series().to_list()
    
    #----
    if len(l_acropolis) > 0:
        
        mean_offset = df_cor.filter(pl.col("system_name")==f"tum-esm-midcost-raspi-{temp_id}").select("diff").mean().item()
        print(f"Offset {mean_offset}")
        
        rmse_result = rmse(l_picarro,l_acropolis)
        print(f"RMSE: {rmse_result}")
        
        r2_result = calc_r2(l_picarro,l_acropolis)
        print(f"R2: {r2_result}")


Analysis for System 6:
Offset -0.917680925103319
RMSE: 2.533485269427385
R2: 0.9431981712293575

Analysis for System 10:

Analysis for System 11:
Offset 7.7895598472344885
RMSE: 8.310167012099946
R2: 0.3888538849249683

Analysis for System 12:
Offset 10.593732653279961
RMSE: 10.71420017462871
R2: -0.01588620627832027

Analysis for System 13:
Offset -0.9874691161014512
RMSE: 2.247559506886168
R2: 0.9554079241930202

Analysis for System 18:
Offset -0.327617091343368
RMSE: 1.7195202683400366
R2: 0.9738338305243966


# Plot with corrected offset

In [482]:
for temp_id in sensor_id:

    print(f"\nAnalysis for System {temp_id}:")

    #----
    mean_offset = 0
    if len(l_acropolis) > 0:
        mean_offset = df_cor.filter(pl.col("system_name")==f"tum-esm-midcost-raspi-{temp_id}").select("diff").mean().item()
    
    df_acropolis = df_cor.filter(pl.col("system_name")==f"tum-esm-midcost-raspi-{temp_id}") \
        .select(["creation_timestamp", "CO2_corr"]) \
        .with_columns(pl.col("CO2_corr") - mean_offset)  \
        .rename({"CO2_corr": f"CO2_acropolis_{temp_id}"})
        
    df_picarro = df_cor.filter(pl.col("system_name")=="Picarro") \
        .select(["creation_timestamp", "CO2_corr"]) \
        .rename({"CO2_corr": f"CO2_picarro"})
        
    df_analysis = df_picarro.join(df_acropolis, on="creation_timestamp")  \
        .filter(pl.col("CO2_picarro") > 0)  \
        .filter(pl.col(f"CO2_acropolis_{temp_id}") > 0)
        
    l_picarro = df_analysis.select("CO2_picarro").to_series().to_list()
    l_acropolis = df_analysis.select(f"CO2_acropolis_{temp_id}").to_series().to_list()
    
    #----
    if len(l_acropolis) > 0:
        
        rmse_result = rmse(l_picarro,l_acropolis)
        print(f"RMSE: {rmse_result}")
        
        r2_result = calc_r2(l_picarro,l_acropolis)
        print(f"R2: {r2_result}")


Analysis for System 6:
RMSE: 2.361442214009707
R2: 0.9506507909343463

Analysis for System 10:

Analysis for System 11:
RMSE: 8.310167012099946
R2: 0.3888538849249683

Analysis for System 12:
RMSE: 1.6021591221984814
R2: 0.9772837392524987

Analysis for System 13:
RMSE: 2.0190167116049866
R2: 0.9640155293048972

Analysis for System 18:
RMSE: 1.688021621511971
R2: 0.9747836884521572


# Plot other data directly from local database measurements.parquet

In [483]:
plot_sensor_measurement(df,sensor_id=sensor_id,col_name="gmp343_raw",filter=filter)

In [484]:
plot_sensor_measurement(df,sensor_id,col_name="gmp343_compensated",filter=filter)

In [485]:
plot_sensor_measurement(df,sensor_id,col_name="gmp343_filtered",filter=filter)

In [486]:
# plot wind at 3m pole inlet
plot_wind_rose(df,6, 'TUM')
# Maisach
plot_wind_rose(df,1, 'Maisach')
# Rechts-der-Isar
plot_wind_rose(df,2, 'Rechts-der-Isar')
# Finsing
plot_wind_rose(df,15, 'Finsing')
# Großhadern
plot_wind_rose(df,4, 'Großhadern')
# Großhadern
plot_wind_rose(df,5, 'Pasing')
# Feldkirchen
plot_wind_rose(df,7, 'Feldkirchen')
# Taufkirchen
plot_wind_rose(df,8, 'Taufkirchen')
# Finsing
plot_wind_rose(df,14, 'DLR')

In [487]:
plot_sensor_measurement(df,sensor_id,col_name="wxt532_speed_avg",filter=filter)

In [488]:
plot_sensor_measurement(df,sensor_id,col_name="wxt532_direction_avg",filter=filter)

In [489]:
# Plot auxilliary data
plot_sensor_measurement(df,sensor_id,col_name="sht45_humidity",filter=filter)

In [490]:
plot_sensor_measurement(df,sensor_id,col_name="bme280_humidity",filter=filter)

In [491]:
plot_sensor_measurement(df,sensor_id,col_name="sht45_temperature",filter=filter)

In [492]:
plot_sensor_measurement(df,sensor_id,col_name="bme280_temperature",filter=filter)

In [493]:
plot_sensor_measurement(df,sensor_id,col_name="bme280_pressure",filter=filter)

In [494]:
plot_sensor_measurement(df,sensor_id,col_name="gmp343_temperature",filter=filter)

In [495]:
plot_sensor_measurement(df,sensor_id,col_name="raspi_cpu_usage",filter=filter)

In [496]:
plot_sensor_measurement(df,sensor_id,col_name="raspi_memory_usage",filter=filter)


In [497]:
plot_sensor_measurement(df,sensor_id,col_name="raspi_disk_usage",filter=filter)


In [498]:
plot_sensor_measurement(df,sensor_id,col_name="raspi_cpu_temperature",filter=filter)

In [499]:
plot_sensor_measurement(df,sensor_id,col_name="enclosure_bme280_humidity",filter=filter)

In [500]:
plot_sensor_measurement(df,sensor_id,col_name="enclosure_bme280_pressure",filter=filter)

In [501]:
plot_sensor_measurement(df,sensor_id,col_name="enclosure_bme280_temperature",filter=filter)

# Calibration Data

In [502]:
plot_sensor_measurement(df,sensor_id,col_name="cal_bottle_id",filter= '30s')

In [503]:
plot_sensor_measurement(df,sensor_id,col_name="cal_gmp343_filtered",filter= '30s')

In [504]:
plot_sensor_measurement(df,sensor_id,col_name="cal_sht45_humidity",filter= '30s')

In [505]:
plot_sensor_measurement(df,sensor_id,col_name="cal_sht45_temperature",filter= '30s')

In [506]:
plot_sensor_measurement(df,sensor_id,col_name="cal_gmp343_temperature",filter= '30s')

In [507]:
plot_sensor_measurement(df,sensor_id,col_name="cal_bme280_temperature",filter= '30s')

In [508]:
plot_sensor_measurement(df,sensor_id,col_name="cal_bme280_humidity",filter= '30s')

In [509]:
plot_sensor_measurement(df,sensor_id,col_name="cal_bme280_pressure",filter= '30s')

In [510]:
# Plot 400ppm Calibration Bottle 
plot_sensor_measurement(df,sensor_id,col_name="cal_gmp343_filtered",filter= '1m',cut_above= 450,cut_below=350)

In [511]:
# Plot 800ppm Calibration Bottle 
plot_sensor_measurement(df,sensor_id,col_name="cal_gmp343_filtered",filter= '30s',cut_below=700)