In [None]:
import polars as pl
import os
from datetime import datetime, timezone
import plotly.express as px
import numpy as np

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")

In [None]:
df_1_min = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "processed", "pipeline", "flagged_L1_1_min_acropolis.parquet"))

In [None]:
# Flags

# 1 minute level 1 product
#   - Flag 'U' = data correct before manual quality control
#   - Flag 'H' = Potentially locally contaminated by hampel filter (auto)
#   - Flag 'W' = Possible local contamination indicated by wind direction or velocity (auto)
#   - Flag 'T' = Sensor temperature instability (auto)

# 1 minute level 2 product
#   - Flag 'O' = data correct after manual quality control
#   - Flag 'H' = Potentially locally contaminated by hampel filter (auto)
#   - Flag 'W' = Possible local contamination indicated by wind direction or velocity (auto)
#   - Flag 'T' = Sensor temperature instability (auto)
#   - Flag 'C' = Potentially locally contaminated (manual quality control)
#   - Flag 'I' = Sub-optimal inlet placement


# 1 hour aggregated product
#   - Flag 'O' = data correct after manual quality control
#   - Flag 'K' = data incorrect after manual quality control

In [None]:
df_1_min.head().collect()

In [None]:
# Utility

def plot_1_h_co2(df_1_h):
    
    df_plot = df_1_h.filter(pl.col("Flag") == 'O')
    
    fig = px.line(df_plot, 
            x="creation_timestamp", 
            y="gmp343_corrected", 
            markers=True)
    fig.update_traces(connectgaps=False)
    fig.update_layout(
        yaxis_title='CO2 Concentration (ppm)',
        xaxis_title='',
        title='',
    )
    fig.show()
    
def plot_1_min_co2(df_1_min):
       
    df_plot = df_1_min.with_columns(
    pl.when(pl.col("Flag") != 'U')
    .then(pl.col("gmp343_corrected"))
    .otherwise(np.nan) 
    .alias("Not U")
    ) \
    .with_columns(
    pl.when(pl.col("Flag") == 'U')
    .then(pl.col("gmp343_corrected"))
    .otherwise(np.nan)
    .alias("U")
    )
    
    fig = px.scatter(df_plot, 
            x="creation_timestamp", 
            y=["U", "Not U"], 
            )
    fig.update_traces(connectgaps=False)
    fig.update_layout(
        yaxis_title='CO2 Concentration (ppm)',
        xaxis_title='',
        title='',
    )
    fig.update_yaxes(range=[400, 1000])
    fig.show()
    
def plot_1_min_wind(df_plot):
       
    fig = px.scatter(df_plot, 
            x="creation_timestamp", 
            y="wxt532_direction_avg", 
            )
    fig.update_traces(connectgaps=False)
    fig.update_layout(
        yaxis_title='Wind Direction (°)',
        xaxis_title='',
        title='',
    )
    fig.update_yaxes(range=[0, 360])
    fig.show()

def flag_and_plot(df, system_id: int, start_date, end_date):

    df_temp = df.filter(pl.col("creation_timestamp").is_between(start_date, end_date)) \
        .filter(pl.col("system_id") == system_id) \
        .collect()
        
    plot_1_min_co2(df_temp)
    
    return df.with_columns(
        pl.when(
            pl.col("creation_timestamp").is_between(start_date, end_date) &
            (pl.col("system_id") == system_id)
        )
        .then(pl.lit("C")) 
        .otherwise(pl.col("Flag"))  # Keep original value for others
        .alias("Flag")
    )

# Change Flag to 'C' = Potentially locally contaminated (manual quality control)

In [None]:
df_manual_check = df_1_min

In [44]:
# Manually update flags for NPLR 

# Remove period
start_date = datetime(2024, 7, 3, 5, 39, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 7, 3, 7, 34, 0).replace(tzinfo=timezone.utc)

df_manual_check = flag_and_plot(df=df_manual_check, system_id=9, start_date=start_date, end_date=end_date)

# Remove period
start_date = datetime(2024, 12, 10, 14, 30, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 10, 17, 0, 0).replace(tzinfo=timezone.utc)

df_manual_check = flag_and_plot(df=df_manual_check, system_id=9, start_date=start_date, end_date=end_date)

# Remove period
start_date = datetime(2024, 12, 10, 8, 30, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 11, 0, 30, 0).replace(tzinfo=timezone.utc)

df_manual_check = flag_and_plot(df=df_manual_check, system_id=9, start_date=start_date, end_date=end_date)

In [None]:
# Manually update flags for TUMR

# Remove period
start_date = datetime(2024, 10, 14, 7, 22, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 10, 14, 12, 53, 0).replace(tzinfo=timezone.utc)

df_manual_check = flag_and_plot(df=df_manual_check, system_id=6, start_date=start_date, end_date=end_date)

In [None]:
# Manually update flags for MAIR

# Remove period
start_date = datetime(2024, 3, 7, 15, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 3, 8, 5, 8, 0).replace(tzinfo=timezone.utc)

df_manual_check = flag_and_plot(df=df_manual_check, system_id=16, start_date=start_date, end_date=end_date)


# Remove period
start_date = datetime(2024, 3, 8, 19, 30, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 3, 9, 1, 20, 0).replace(tzinfo=timezone.utc)

df_manual_check = flag_and_plot(df=df_manual_check, system_id=16, start_date=start_date, end_date=end_date)


# Remove period
start_date = datetime(2024, 4, 22, 6, 10, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 4, 22, 8, 10, 0).replace(tzinfo=timezone.utc)

df_manual_check = flag_and_plot(df=df_manual_check, system_id=16, start_date=start_date, end_date=end_date)


# Remove period
start_date = datetime(2024, 4, 22, 16, 20, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 4, 23, 11, 0, 0).replace(tzinfo=timezone.utc)

df_manual_check = flag_and_plot(df=df_manual_check, system_id=16, start_date=start_date, end_date=end_date)


# Remove period
start_date = datetime(2024, 12, 8, 18, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 11, 11, 0, 0).replace(tzinfo=timezone.utc)

df_manual_check = flag_and_plot(df=df_manual_check, system_id=16, start_date=start_date, end_date=end_date)

In [None]:
# Manually update flags for PASR
# Readings might be related to a summer party on the roof-top

start_date = datetime(2024, 8, 13, 17, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 8, 13, 19, 59, 59).replace(tzinfo=timezone.utc)

df_manual_check = flag_and_plot(df=df_manual_check, system_id=18, start_date=start_date, end_date=end_date)

In [None]:
# Manually update flags for BOGR

# Remove period
start_date = datetime(2024, 10, 4, 16, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 10, 5, 1, 0, 0).replace(tzinfo=timezone.utc)

df_manual_check = flag_and_plot(df=df_manual_check, system_id=17, start_date=start_date, end_date=end_date)

In [None]:
# Manually update flags for BALR 

# Remove period
start_date = datetime(2024, 11, 2, 1, 30, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 11, 2, 4, 30, 0).replace(tzinfo=timezone.utc)

df_manual_check = flag_and_plot(df=df_manual_check, system_id=19, start_date=start_date, end_date=end_date)

# Remove period
start_date = datetime(2024, 11, 2, 6, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 11, 2, 7, 30, 0).replace(tzinfo=timezone.utc)

df_manual_check = flag_and_plot(df=df_manual_check, system_id=19, start_date=start_date, end_date=end_date)

# Remove period
start_date = datetime(2024, 11, 21, 20, 50, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 11, 22, 2, 20, 0).replace(tzinfo=timezone.utc)

df_manual_check = flag_and_plot(df=df_manual_check, system_id=19, start_date=start_date, end_date=end_date)

# Remove period
start_date = datetime(2024, 11, 28, 6, 30, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 11, 28, 8, 30, 0).replace(tzinfo=timezone.utc)

df_manual_check = flag_and_plot(df=df_manual_check, system_id=19, start_date=start_date, end_date=end_date)

# Remove period
start_date = datetime(2024, 11, 13, 9, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 11, 13, 20, 0, 0).replace(tzinfo=timezone.utc)

df_manual_check = flag_and_plot(df=df_manual_check, system_id=19, start_date=start_date, end_date=end_date)

# Remove period
start_date = datetime(2024, 12, 4, 0, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 4, 2, 0, 0).replace(tzinfo=timezone.utc)

df_manual_check = flag_and_plot(df=df_manual_check, system_id=19, start_date=start_date, end_date=end_date)

# Remove period
start_date = datetime(2024, 12, 8, 15, 0, 0).replace(tzinfo=timezone.utc)
end_date = datetime(2024, 12, 8, 17, 0, 0).replace(tzinfo=timezone.utc)

df_manual_check = flag_and_plot(df=df_manual_check, system_id=19, start_date=start_date, end_date=end_date)

In [None]:
# TODO: set U to O for level 2 1-minute product
#   - Flag 'U' = data correct before manual quality control
#   - Flag 'O' = data correct after manual quality control

# df.with_columns(
#         pl.when(pl.col("Flag") == 'U')
#         .then(pl.lit("O")) 
#         .otherwise(pl.col("Flag"))  # Keep original value for others
#         .alias("Flag")
#     )