In [1]:
import os
import polars as pl
import plotly.express as px
import plotly.graph_objs as go
from hampel import hampel
from datetime import datetime, timezone, timedelta
from typing import Literal
import glob

from utils import ambient_parameter_conversion as apc
from utils import calibration_processing as cp

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")

sensor_id = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]

# customize pipeline
outlier_removal = True

# Import data files

In [None]:
# load calibration bottle concentrations (preprocessed)
df_gas = pl.read_csv(os.path.join(DATA_DIRECTORY,"input", "averaged_gases.csv"))

measurement_months = []
paths = []

for year in [2024,2025]:
    paths += sorted(glob.glob(os.path.join(DATA_DIRECTORY, "download", "measurements", str(year), "*.parquet")), key=os.path.getmtime)

for path in paths:
    measurement_months.append(pl.scan_parquet(path))

df_raw = pl.concat(measurement_months, how="diagonal") \
    .filter(pl.col("system_name") != "test-sensor") \
    

In [26]:
#extract wind data from df_raw
df_wind = df_raw.select(pl.col("creation_timestamp", "system_id", "^(wxt532_.*)$")) \
    .filter(pl.col('wxt532_direction_avg') > 0) \
    .collect()
    
df_aux = df_raw.select(pl.col("creation_timestamp", "system_id", "^(enclosure_.*)$", "^(raspi_.*)$", "^ups_.*$")) \
    .filter(pl.col('enclosure_bme280_temperature') > 0) \
    .collect()

# extract measurement data from df_raw and aggregate to 1m 
df_1_m = df_raw.sort("creation_timestamp") \
    .select(pl.all().exclude('^wxt532_.*$', '^cal_.*$', '^enclosure_.*$', '^raspi_.*$', '^ups_.*$')) \
    .filter(pl.col('gmp343_filtered') > 0) \
    .filter(pl.col('gmp343_temperature') > 0) \
    .filter(pl.col('sht45_humidity') > 0) \
    .filter(pl.col('bme280_pressure') > 0) \
    .group_by_dynamic("creation_timestamp", every='1m', by= "system_id") \
    .agg(pl.all().exclude(["creation_timestamp","system_id"]).mean()) \
    .collect()
    
# extract calibration data from df_raw
df_dry_calibration = df_raw.filter(pl.col("cal_gmp343_filtered") > 0) \
    .filter(pl.col("cal_gmp343_temperature") > 0) \
    .filter(pl.col("cal_bme280_pressure") > 0) \
    .with_columns(pl.col("cal_sht45_humidity").fill_null(0.0)) \
    .with_columns(pl.struct(['cal_gmp343_temperature','cal_sht45_humidity','cal_bme280_pressure'])
    .map_elements(lambda x: apc.rh_to_molar_mixing(x['cal_sht45_humidity'],apc.absolute_temperature(x['cal_gmp343_temperature']),x['cal_bme280_pressure']*100), return_dtype=pl.Float64) \
    .alias("cal_h2o_v%")) \
    .with_columns(pl.struct(['cal_gmp343_filtered','cal_gmp343_temperature','cal_sht45_humidity','cal_bme280_pressure']) \
    .map_elements(lambda x: apc.calculate_co2dry(x['cal_gmp343_filtered'],x['cal_gmp343_temperature'],x['cal_sht45_humidity'],x['cal_bme280_pressure']*100), return_dtype=pl.Float64)
    .alias("cal_gmp343_dry")) \
    .select("creation_timestamp","system_id", '^cal_.*$') \
    .filter((pl.col("cal_bottle_id") > 0) & (pl.col("cal_bottle_id") <= df_gas["cal_bottle_id"].max())) \
    .collect()
    



The argument `by` for `group_by_dynamic` is deprecated. It has been renamed to `group_by`.



In [27]:
df_raw = None

# Perform Dry-Wet Conversion

### Measurement Data

In [28]:
# perform dry conversion for measurement data                
df_1_m = df_1_m.with_columns(pl.struct(['gmp343_temperature','sht45_humidity']) \
    .map_elements(lambda x: apc.rh_to_ah(x['sht45_humidity'],apc.absolute_temperature(x['gmp343_temperature'])), return_dtype=pl.Float64)
    .alias("h2o_ah")) \
    .with_columns(pl.struct(['gmp343_temperature','sht45_humidity','bme280_pressure'])
    .map_elements(lambda x: (apc.rh_to_molar_mixing(x['sht45_humidity'],apc.absolute_temperature(x['gmp343_temperature']),x['bme280_pressure']*100))*100, return_dtype=pl.Float64) \
    .alias("h2o_v%")) \
    .with_columns(pl.struct(['gmp343_temperature','bme280_humidity','bme280_pressure'])
    .map_elements(lambda x: (apc.rh_to_molar_mixing(x['bme280_humidity'],apc.absolute_temperature(x['gmp343_temperature']),x['bme280_pressure']*100))*100, return_dtype=pl.Float64) \
    .alias("bme280_h2o_v%")) \
    .with_columns(pl.struct(['gmp343_filtered','gmp343_temperature','sht45_humidity','bme280_pressure']) \
    .map_elements(lambda x: apc.calculate_co2dry(x['gmp343_filtered'],x['gmp343_temperature'],x['sht45_humidity'],x['bme280_pressure']*100), return_dtype=pl.Float64)
    .alias("gmp343_dry"))

In [29]:
df_1_m.tail(3).select("creation_timestamp","system_id","gmp343_filtered", "h2o_ah", "h2o_v%" ,"gmp343_dry")

creation_timestamp,system_id,gmp343_filtered,h2o_ah,h2o_v%,gmp343_dry
"datetime[μs, UTC]",i64,f64,f64,f64,f64
2024-12-04 10:07:00 UTC,17,463.4,7.947065,1.187228,468.967715
2024-12-04 10:08:00 UTC,17,451.016667,7.939766,1.179518,456.399985
2024-12-04 10:09:00 UTC,17,447.9,7.905772,1.174796,453.224465


In [30]:
df_1_m.write_parquet(os.path.join(DATA_DIRECTORY, "processed", "pipeline", "dry_1_min_acropolis.parquet"))

# Calculate Calibration Correction

In [31]:
df_slope_intercept = df_dry_calibration.join(df_gas.cast({"cal_bottle_id": pl.Float64}), on = ["cal_bottle_id"], how= "left", coalesce=True) \
    .with_columns((pl.col("creation_timestamp").dt.date()).alias("date")) \
    .sort("date") \
    .group_by([pl.col("date"), pl.col("system_id"), pl.col("cal_bottle_id")]) \
    .agg([
        pl.col("cal_gmp343_dry"),
        pl.col("cal_bottle_CO2").last(),
        pl.col("creation_timestamp").last(),
        ]) \
    .with_columns([pl.col("cal_gmp343_dry").map_elements(lambda x: cp.process_bottle(x), return_dtype=pl.Float64)]) \
    .filter(pl.col("cal_gmp343_dry") > 0) \
    .sort(pl.col("cal_gmp343_dry")) \
    .group_by(["date", "system_id"]) \
    .agg([
        pl.col("cal_gmp343_dry"),
        pl.col("cal_bottle_CO2"),
        pl.col("creation_timestamp").last()
        ]) \
    .filter(pl.col("cal_gmp343_dry").list.len() == 2) \
    .with_columns(pl.struct(['cal_gmp343_dry','cal_bottle_CO2']) \
    .map_elements(lambda x: cp.two_point_calibration(x['cal_gmp343_dry'],x['cal_bottle_CO2'])) \
    .alias('slope, intercept')) \
    .with_columns([(pl.col("slope, intercept").list.first()).alias("slope"),
                   (pl.col("slope, intercept").list.last()).alias("intercept")
                   ]) \
    .select("creation_timestamp", "system_id", "slope", "intercept") \
    .filter(pl.col("slope") > 0) \
    .sort("creation_timestamp")





In [32]:
df_slope_intercept.tail(3)

creation_timestamp,system_id,slope,intercept
"datetime[μs, UTC]",i64,f64,f64
2024-12-04 03:31:34.790 UTC,7,0.999528,-23.173282
2024-12-04 03:31:37.640 UTC,1,0.991006,-48.23984
2024-12-04 03:31:54.870 UTC,14,1.004126,-26.639127


In [33]:
# safe results to parquet
df_slope_intercept.write_parquet(os.path.join(DATA_DIRECTORY, "processed", "pipeline", "slope_intercept_acropolis.parquet"))

In [34]:
fig = px.line(df_slope_intercept.sort("creation_timestamp").filter((pl.col("slope") > 0.7) & (pl.col("slope") < 1.1)), x="creation_timestamp", y = "slope", color = "system_id")
fig.show()
fig = px.histogram(df_slope_intercept.filter((pl.col("slope") > 0.7) & (pl.col("slope") < 1.1)), x="slope", color = "system_id")
fig.show()
fig = px.histogram(df_slope_intercept.filter((pl.col("intercept") < 100) & (pl.col("intercept") > -100)), x="intercept", color = "system_id")
fig.show()

# Perform Calibration Correction

## 1m aggregated data

In [35]:
df_systems = []

for id in sensor_id:
    df_slope_intercept_id = df_slope_intercept.filter(pl.col("system_id") == id) \
        .sort("creation_timestamp") \
        .drop("system_id")
        
    df_wind_id = df_wind.filter(pl.col("system_id") == id) \
        .sort("creation_timestamp") \
        .drop("system_id", "system_name", "date")
        
    df_aux_id = df_aux.filter(pl.col("system_id") == id) \
        .sort("creation_timestamp") \
        .drop("system_id", "system_name", "date")
    
    df_system = df_1_m.filter(pl.col("system_id") == id) \
        .sort("creation_timestamp") \
        .join_asof(df_slope_intercept_id, on="creation_timestamp", strategy="nearest", tolerance="10m") \
        .join_asof(df_wind_id, on="creation_timestamp", strategy="nearest", tolerance="10m") \
        .join_asof(df_aux_id, on="creation_timestamp", strategy="nearest", tolerance="10m") \
        .with_columns([
            pl.col("slope").interpolate().alias("slope_interpolated"),
            pl.col("intercept").interpolate().alias("intercept_interpolated")
            ]) \
        .with_columns([
            pl.col("slope_interpolated").forward_fill(),
            pl.col("intercept_interpolated").forward_fill()
            ]) \
        .with_columns(((pl.col("gmp343_dry")) * pl.col("slope_interpolated") + pl.col("intercept_interpolated")).alias("gmp343_corrected")) \
        .with_columns((pl.col("creation_timestamp").dt.date()).alias("date"))
            
    
    df_systems.append(df_system)
        

df_1_m = pl.concat(df_systems, how="vertical") \
    .with_columns(pl.struct(["system_id"]) \
    .map_elements(lambda x: f"acropolis-{x['system_id']}", return_dtype=pl.String) \
    .alias("sys_name_short"))

In [36]:
df_1_m.tail(1)

system_id,creation_timestamp,system_name,gmp343_raw,gmp343_compensated,gmp343_filtered,gmp343_temperature,sht45_humidity,sht45_temperature,bme280_humidity,bme280_temperature,bme280_pressure,revision,receipt_timestamp,h2o_ah,h2o_v%,bme280_h2o_v%,gmp343_dry,slope,intercept,wxt532_speed_avg,wxt532_speed_min,wxt532_speed_max,wxt532_direction_avg,wxt532_direction_min,wxt532_direction_max,wxt532_last_update_time,wxt532_temperature,wxt532_heating_voltage,wxt532_supply_voltage,wxt532_reference_voltage,enclosure_bme280_humidity,enclosure_bme280_pressure,enclosure_bme280_temperature,raspi_cpu_usage,raspi_cpu_temperature,raspi_disk_usage,raspi_memory_usage,ups_battery_error_detected,ups_battery_above_voltage_threshold,ups_battery_is_fully_charged,ups_powered_by_grid,slope_interpolated,intercept_interpolated,gmp343_corrected,date,sys_name_short
i64,"datetime[μs, UTC]",str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,"datetime[ns, UTC]",f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,date,str
20,2024-12-04 10:09:00 UTC,,404.1,456.0,456.35,27.9,24.035,24.11,19.02,23.47,940.675,29.0,2024-12-04 10:09:12.195924480 UTC,6.506148,0.960962,0.760453,460.7779,,,0.9,0.0,2.2,269.8,10.0,353.0,1733300000.0,,,,,15.58,957.99,30.75,0.019,51.1,0.512,0.115,0.0,1.0,1.0,1.0,0.989252,-12.684538,443.141145,2024-12-04,"""acropolis-20"""


In [37]:
df_1_m.write_parquet(os.path.join(DATA_DIRECTORY, "processed", "pipeline", "calibrated_1_min_acropolis.parquet"))

## 10m aggregated data

In [38]:
df_1_m.sort("creation_timestamp") \
        .group_by_dynamic("creation_timestamp", every='10m', by=["system_id", "sys_name_short"]) \
        .agg(pl.all().exclude(["creation_timestamp","sys_name_short"]).mean(),
                pl.col("gmp343_corrected").std().alias("std"),
                pl.col("gmp343_corrected").var().alias("var")) \
        .write_parquet(os.path.join(DATA_DIRECTORY, "processed", "pipeline", "calibrated_10_min_acropolis.parquet"))


The argument `by` for `group_by_dynamic` is deprecated. It has been renamed to `group_by`.



## 1h aggregated data

In [39]:
df_1_m.sort("creation_timestamp") \
        .group_by_dynamic("creation_timestamp", every='1h', by=["system_id", "sys_name_short"]) \
        .agg(pl.all().exclude(["creation_timestamp","sys_name_short"]).mean(),
             pl.col("gmp343_corrected").std().alias("std"),
             (pl.col("gmp343_temperature").max() - pl.col("gmp343_temperature").min()).alias("gmp343_temperature_change")) \
        .write_parquet(os.path.join(DATA_DIRECTORY, "processed", "pipeline", "calibrated_1_h_acropolis.parquet"))


The argument `by` for `group_by_dynamic` is deprecated. It has been renamed to `group_by`.



# Renaming and column selection for ICOS cities portal

In [17]:
assert(outlier_removal)

In [18]:
#df_1_m = pl.read_parquet(os.path.join(DATA_DIRECTORY, "processed", "1m_cal_corr_acropolis.parquet"))

In [40]:
# Select the columns to be present in the ICOS cities portal product
selected_columns = ["creation_timestamp", "system_id", "sys_name_short", "gmp343_corrected", "gmp343_temperature", "h2o_v%", "bme280_pressure", "enclosure_bme280_pressure", "wxt532_speed_avg", "wxt532_direction_avg"]

df_1_m = df_1_m.select(selected_columns) 

In [41]:
df_1_m.tail(1)

creation_timestamp,system_id,sys_name_short,gmp343_corrected,gmp343_temperature,h2o_v%,bme280_pressure,enclosure_bme280_pressure,wxt532_speed_avg,wxt532_direction_avg
"datetime[μs, UTC]",i64,str,f64,f64,f64,f64,f64,f64,f64
2024-12-04 10:09:00 UTC,20,"""acropolis-20""",443.141145,27.9,0.960962,940.675,957.99,0.9,269.8


# Outlier Removal

In [42]:
df_systems = []
n_sigma = 3.0

for id in sensor_id:
    df_filtered = df_1_m.filter(pl.col("system_id")==id) \
        .cast({"gmp343_corrected": pl.Float32}) \
        .filter(pl.col("gmp343_corrected") > 0)
        #co2 is cast to f32 to match the hampel filter output, else comparison fails
    
    # Convert CO2 column to pandas series 
    data = df_filtered.get_column("gmp343_corrected").to_pandas()
      
    # Apply the Hampel filter  
    result = hampel(data, window_size=120, n_sigma=n_sigma)
    
    # Print share of detected spikes
    print(f"System ID: {id}, Detected spikes: {(len(result.outlier_indices) / len(data)):.4f}")
    
    # Create column "OriginalFlag" = 389 indicating local contamination
    df_system = df_filtered.with_columns((pl.from_pandas(result.filtered_data)).alias("co2_hampel_filtered")) \
        .with_columns(pl.when(pl.col("gmp343_corrected").ne(pl.col("co2_hampel_filtered"))).then(pl.lit(185)).otherwise(pl.lit(0)).alias("OriginalFlag")) \
        .drop("co2_hampel_filtered") \
        .cast({"gmp343_corrected": pl.Float64})
    
    df_systems.append(df_system)
    
df_1_m_spike_detected = pl.concat(df_systems, how="vertical")

# Option to add additional OriginalFlags

System ID: 1, Detected spikes: 0.0070
System ID: 2, Detected spikes: 0.0050
System ID: 3, Detected spikes: 0.0034
System ID: 4, Detected spikes: 0.0061
System ID: 5, Detected spikes: 0.0068
System ID: 6, Detected spikes: 0.0052
System ID: 7, Detected spikes: 0.0062
System ID: 8, Detected spikes: 0.0077
System ID: 9, Detected spikes: 0.0110
System ID: 10, Detected spikes: 0.0048
System ID: 11, Detected spikes: 0.0042
System ID: 12, Detected spikes: 0.0073
System ID: 13, Detected spikes: 0.0061
System ID: 14, Detected spikes: 0.0053
System ID: 15, Detected spikes: 0.0047
System ID: 16, Detected spikes: 0.0194
System ID: 17, Detected spikes: 0.0102
System ID: 18, Detected spikes: 0.0037
System ID: 19, Detected spikes: 0.0112
System ID: 20, Detected spikes: 0.0038


In [43]:
# Sensor temperature specific accurcy flag
df_1_m_spike_detected.with_columns((pl.when((pl.col("gmp343_temperature") < 26.0) | (pl.col("gmp343_temperature") > 32.0)) \
                                    .then(pl.lit(644)) \
                                    .otherwise(pl.lit(0)) \
                                    .alias("OriginalFlag"))).filter(pl.col("OriginalFlag") > 0)

creation_timestamp,system_id,sys_name_short,gmp343_corrected,gmp343_temperature,h2o_v%,bme280_pressure,enclosure_bme280_pressure,wxt532_speed_avg,wxt532_direction_avg,OriginalFlag
"datetime[μs, UTC]",i64,str,f64,f64,f64,f64,f64,f64,f64,i32
2024-02-06 14:33:00 UTC,1,"""acropolis-1""",666.747864,21.425,0.84198,946.8725,958.17,,,644
2024-02-06 14:34:00 UTC,1,"""acropolis-1""",661.960938,21.683333,0.864929,948.215,958.17,,,644
2024-02-06 14:35:00 UTC,1,"""acropolis-1""",697.487061,22.083333,0.926776,946.978333,958.18,,,644
2024-02-06 14:36:00 UTC,1,"""acropolis-1""",704.314392,22.466667,0.98847,947.06,958.18,,,644
2024-02-06 14:37:00 UTC,1,"""acropolis-1""",660.725525,22.816667,0.936552,948.055,958.05,,,644
…,…,…,…,…,…,…,…,…,…,…
2024-10-31 04:32:00 UTC,20,"""acropolis-20""",457.243622,32.066667,1.468037,946.695,961.88,1.8,81.6,644
2024-10-31 04:33:00 UTC,20,"""acropolis-20""",457.899811,32.083333,1.473286,947.186667,961.88,1.8,81.6,644
2024-10-31 04:34:00 UTC,20,"""acropolis-20""",455.257965,32.033333,1.473258,946.796667,961.8,0.4,153.7,644
2024-10-31 04:35:00 UTC,20,"""acropolis-20""",456.884094,32.033333,1.47119,946.615,961.8,0.4,153.7,644


In [44]:
df_1_m_spike_detected = df_1_m_spike_detected.with_columns(pl.when(pl.col("OriginalFlag") > 0).then(pl.lit('K')).otherwise(pl.lit('O')).alias("Flag"))

# save a 1m product for ICOS cities portal
df_1_m_spike_detected.write_parquet(os.path.join(DATA_DIRECTORY, "processed", "pipeline", "flagged_1_min_acropolis.parquet"))

In [45]:
df_1_m_spike_detected.tail(1)

creation_timestamp,system_id,sys_name_short,gmp343_corrected,gmp343_temperature,h2o_v%,bme280_pressure,enclosure_bme280_pressure,wxt532_speed_avg,wxt532_direction_avg,OriginalFlag,Flag
"datetime[μs, UTC]",i64,str,f64,f64,f64,f64,f64,f64,f64,i32,str
2024-12-04 10:09:00 UTC,20,"""acropolis-20""",443.141144,27.9,0.960962,940.675,957.99,0.9,269.8,0,"""O"""


In [46]:
# save a 1h product for ICOS cities portal
df_1_h_despiked = df_1_m_spike_detected.sort("creation_timestamp") \
        .filter(pl.col("Flag") == 'O') \
        .drop("Flag", "OriginalFlag") \
        .group_by_dynamic("creation_timestamp", every='1h', by=["system_id", "sys_name_short"]) \
        .agg(pl.all().exclude(["creation_timestamp","sys_name_short"]).mean(),
             pl.col("gmp343_corrected").std().alias("Stdev"),
             pl.col("gmp343_corrected").count().alias("NbPoints")) \
        .with_columns(pl.when(pl.col("NbPoints") < 40).then(pl.lit(389)).otherwise(pl.lit(0)).alias("OriginalFlag")) \
        .with_columns(
                pl.when(pl.col("OriginalFlag") > 0).then(pl.lit('K')).otherwise(pl.lit('O')).alias("Flag"),
                (pl.col("creation_timestamp") + timedelta(minutes=30)))
             
df_1_h_despiked.write_parquet(os.path.join(DATA_DIRECTORY, "processed", "pipeline", "flagged_1_h_acropolis.parquet"))


The argument `by` for `group_by_dynamic` is deprecated. It has been renamed to `group_by`.



In [47]:
df_1_h_despiked.tail(1)

system_id,sys_name_short,creation_timestamp,gmp343_corrected,gmp343_temperature,h2o_v%,bme280_pressure,enclosure_bme280_pressure,wxt532_speed_avg,wxt532_direction_avg,Stdev,NbPoints,OriginalFlag,Flag
i64,str,"datetime[μs, UTC]",f64,f64,f64,f64,f64,f64,f64,f64,u32,i32,str
17,"""acropolis-17""",2024-12-04 10:30:00 UTC,450.003189,29.265,1.182824,938.018,960.343,1.08,237.06,4.707614,10,389,"""K"""


# Plot Despiked Data with Continous Error Bars

In [48]:
#df_1_h_despiked= pl.read_parquet(os.path.join(DATA_DIRECTORY, "processed", "1h_level_1_cities_portal.parquet"))

In [49]:
start_date = datetime(2024, 7, 1, 0, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 11, 30, 23, 59, 59).replace(tzinfo=timezone.utc)

def create_figure(df, system_name:str, start_date, end_date, color:Literal["red", "blue", "green"]):
    
    df_plot = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date)) \
        .filter(pl.col("sys_name_short")==system_name) \
        .with_columns(upper = pl.col("gmp343_corrected") + pl.col("Stdev"),
                    lower = pl.col("gmp343_corrected") - pl.col("Stdev"))
    
    
    if color=='red':
        color_set = ('#b91c1c', 'rgba(239, 68, 68, 0.3)')
    if color=='blue':
        color_set = ('#1d4ed8','rgba(59, 131, 246, 0.3)')
    if color=='green':
        color_set = ('#15803d','rgba(34, 197, 94, 0.3)')
    
    return [
        go.Scatter(
            name=system_name,
            x=df_plot["creation_timestamp"],
            y=df_plot["gmp343_corrected"],
            mode='lines',
            line=dict(color=color_set[0]),
        ),
        go.Scatter(
            name='Upper Bound',
            x=df_plot["creation_timestamp"],
            y=df_plot["upper"],
            mode='lines',
            marker=dict(color=color_set[1]),
            line=dict(width=0),
            showlegend=False
        ),
        go.Scatter(
            name='Lower Bound',
            x=df_plot["creation_timestamp"],
            y=df_plot["lower"],
            marker=dict(color=color_set[1]),
            line=dict(width=0),
            mode='lines',
            fillcolor=color_set[1],
            fill='tonexty',
            showlegend=False
        )
    ]
  
figures = create_figure(df_1_h_despiked, "acropolis-14", start_date, end_date, color="red") \
    + create_figure(df_1_h_despiked, "acropolis-7", start_date, end_date, color="green") \
    #+ create_figure(df_1_h_despiked, "acropolis-6", start_date, end_date, color="blue")

fig = go.Figure(figures)
fig.update_layout(
    yaxis_title='CO2 (ppm)',
    xaxis_title='UTC Time (hourly aggregated)',
    title='Continuous, variable value error bars',
    hovermode="x"
)
fig.show()