In [1]:
import polars as pl
import os
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

import math

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")
PICARRO_DATA_DIRECTORy = os.environ.get("PICARRO_DATA_DIRECTORY")

In [5]:
# 10m preprocessed acropolis data
df_acropolis = pl.read_parquet(os.path.join(DATA_DIRECTORY, "processed", "side-by-side", "1_h_sbs_period_acropolis.parquet"))

In [6]:
system_ids = df_acropolis["system_id"].unique().to_list()

In [7]:
def plot_acropolis_scatter(x_col_name, y_col_name="diff",max_col = 3, x_title="x_name", y_title="ACROPOLIS - Picarro (ppm)", title_text="title_name"):
    # fixed
    titles = []
    max_rows = math.ceil(len(system_ids)/max_col)

    for system_id in system_ids:
        titles.append(f"System: {int(system_id)}")
        
    fig = make_subplots(rows=max_rows, cols=max_col, shared_yaxes='all', subplot_titles=(titles), x_title=x_title, y_title=y_title)

    row = 1
    col = 0

    for system_id in system_ids:
        
        col += 1
        
        if col > max_col:
            col = 1
            row += 1

        #----
        df_plot = df_acropolis.filter(pl.col("system_id")==system_id) \
            .select(["creation_timestamp", y_col_name, x_col_name]) \
            
        l_delta = df_plot.select(y_col_name).to_series().to_list()
        l_humidity = df_plot.select(x_col_name).to_series().to_list()
        
        fig.add_trace(go.Scatter(x=l_humidity, y=l_delta, name = f"MC-{int(system_id)}", mode='markers'),row=row, col=col)

    fig.update_layout(title_text=title_text, height=1000, width=1000, showlegend=False) 

    fig.show()

In [8]:
df_acropolis.head(3)

system_id,sys_name_short,creation_timestamp,co2,h2o,ws,wd,OriginalFlag,Flag,picarro_corrected,diff,std,date
i64,str,"datetime[μs, UTC]",f32,f64,f64,f64,f64,str,f64,f64,f32,date
1,"""acropolis-1""",2024-02-07 00:00:00 UTC,434.259674,0.866428,,,0.0,,434.098514,0.161072,1.000123,2024-02-07
1,"""acropolis-1""",2024-02-07 01:00:00 UTC,433.796417,0.86577,,,0.0,,433.887309,-0.090965,1.227941,2024-02-07
1,"""acropolis-1""",2024-02-07 02:00:00 UTC,432.608551,0.863911,,,0.0,,432.907572,-0.298997,0.87169,2024-02-07


In [7]:
fig = px.scatter(df_acropolis, x="gmp343_corrected", y="picarro_corrected", color="sys_name_short")
fig.update_layout(
    yaxis_title='Reference PICARRO CO2 (ppm)',
    xaxis_title='ACROPOLIS Systems CO2 (ppm)',
    title='',
)
fig.show()

In [8]:
fig = px.scatter(df_acropolis, x="h2o_ah", y="diff", color="sys_name_short")
fig.update_layout(
    yaxis_title='ACROPOLIS - PICARRO in CO2 (ppm)',
    xaxis_title='Absolute Humidity (g/m^3)',
    title='',
)
fig.show()

In [9]:
fig = px.scatter(df_acropolis, x="gmp343_temperature", y="diff", color="sys_name_short")
fig.update_layout(
    yaxis_title='ACROPOLIS - PICARRO in CO2 (ppm)',
    xaxis_title='Sensor Temperature (°C)',
    title='',
)
fig.show()

ValueError: Value of 'x' is not the name of a column in 'data_frame'. Expected one of ['sys_name_short', 'diff'] but received: gmp343_temperature

In [10]:
fig = px.scatter(df_acropolis, x="bme280_pressure", y="diff", color="sys_name_short")
fig.update_layout(
    yaxis_title='ACROPOLIS - PICARRO in CO2 (ppm)',
    xaxis_title='Pressure (hPa)',
    title='',
)
fig.show()

In [11]:
fig = px.scatter(df_acropolis, x="picarro_corrected", y="diff", color="sys_name_short")
fig.update_layout(
    yaxis_title='ACROPOLIS - PICARRO in CO2 (ppm)',
    xaxis_title='Picarro CO2 (ppm)',
    title='',
)
fig.show()

In [6]:
# parameters
x_col_name = "gmp343_corrected"
y_col_name = "picarro_corrected"
max_col = 3
x_title = "ACROPOLIS (ppm)"
y_title = "PICARRO (ppm)"
title_text = "Measured CO2 Concentration" 

plot_acropolis_scatter(x_col_name=x_col_name,y_col_name=y_col_name,max_col=max_col, x_title=x_title, y_title = y_title, title_text=title_text)

In [7]:
# parameters
x_col_name = "h2o_v%"
max_col = 3
x_title = "Absolute Humidity (%)"
title_text = "Scatter Plots: Delta CO2 vs Absolute Humidity" 

plot_acropolis_scatter(x_col_name=x_col_name,max_col=max_col, x_title=x_title, title_text=title_text)

In [8]:
# parameters
x_col_name = "gmp343_temperature"
max_col = 3
x_title = "Temperature (°C)"
title_text = "Scatter Plots: Delta CO2 vs Temperature" 

plot_acropolis_scatter(x_col_name=x_col_name,max_col=max_col, x_title=x_title, title_text=title_text)

In [9]:
# parameters
x_col_name = "bme280_pressure"
max_col = 3
x_title = "Pressure (mbar)"
title_text = "Scatter Plots: Delta CO2 vs Pressure" 

plot_acropolis_scatter(x_col_name=x_col_name,max_col=max_col, x_title=x_title, title_text=title_text)