In [1]:
from datetime import datetime
from datetime import timezone
import polars as pl
import os
import plotly.express as px

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")

# processed average measurement data
df_1h = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "processed/acropolis_sites", "1h_acropolis_site_data.parquet"))

In [2]:
df_1h.columns

['system_id',
 'sys_name_short',
 'creation_timestamp',
 'co2',
 'h2o',
 'ws',
 'wd',
 'Stdev',
 'NbPoints',
 'OriginalFlag',
 'Flag',
 'site_name']

In [3]:
df_1h.head().collect()

system_id,sys_name_short,creation_timestamp,co2,h2o,ws,wd,Stdev,NbPoints,OriginalFlag,Flag,site_name
i64,str,"datetime[μs, UTC]",f32,f64,f64,f64,f32,u32,i32,str,str
1,"""acropolis-1""",2024-02-29 00:30:00 UTC,451.317993,0.989681,0.951667,111.996667,1.371983,60,0,"""O""","""SENR"""
1,"""acropolis-1""",2024-02-29 01:30:00 UTC,451.69223,0.994055,1.023333,111.54,1.587086,60,0,"""O""","""SENR"""
1,"""acropolis-1""",2024-02-29 02:30:00 UTC,452.962433,0.986591,0.691667,117.578333,0.996243,60,0,"""O""","""SENR"""
1,"""acropolis-1""",2024-02-29 03:30:00 UTC,453.715302,0.755575,0.930769,117.303846,1.770944,26,389,"""K""","""SENR"""
1,"""acropolis-1""",2024-02-29 04:30:00 UTC,455.956268,0.900768,1.07,118.681667,1.205888,60,0,"""O""","""SENR"""


In [4]:
def extract_site_data(df, site_name:str, wind:list=[180,270], plot=False):
        
    df_extracted = df.filter(pl.col("site_name") == site_name) \
        .filter(pl.col("creation_timestamp") > datetime(2024, 2, 1).replace(tzinfo=timezone.utc)) \
        .filter((pl.col("wd") > wind[0]) & (pl.col("wd") < wind[1])) \
        .filter((pl.col("ws") > 3)) \
        .filter(pl.col("Flag") == "O") \
        .collect()
        
    df_extracted = df_extracted.with_columns(
        (pl.col("creation_timestamp").dt.hour()).alias("hour of the day")) \
        .group_by("hour of the day", by=["site_name"]) \
        .agg(pl.col("co2").mean()) \
        .sort("hour of the day")
    
    if plot:
        fig = px.line(df_extracted, x="hour of the day", y="co2", markers=True, title=f"{site_name}: CO2 [ppm], Filter for wind between {wind}°")
        fig.show()
    else:
        return df_extracted

In [5]:
sites = ["DLRR", "TUMR", "RDIR"]
df_sites = []

for site in sites:
    
    df_sites.append(extract_site_data(df=df_1h, site_name=site, wind=[250,290]))
    
df_plot = pl.concat(df_sites)

fig = px.line(df_plot, x="hour of the day", y="co2", color="site_name", markers=True)
fig.show()

  .group_by("hour of the day", by=["site_name"]) \


In [6]:
sites = ["FINR", "TUMR", "PASR"]
df_sites = []

for site in sites:
    
    df_sites.append(extract_site_data(df=df_1h, site_name=site, wind=[75,105]))
    
df_plot = pl.concat(df_sites)

fig = px.line(df_plot, x="hour of the day", y="co2", color="site_name", markers=True)
fig.show()


named `by` param is deprecated; use positional `*args` instead.



In [7]:
def extract_monthly_site_data(df, site_name:str, wind:list=[], plot=False):
      
    if wind != []:  
        df_extracted = df.filter(pl.col("site_name") == site_name) \
            .filter(pl.col("creation_timestamp") > datetime(2024, 2, 1).replace(tzinfo=timezone.utc)) \
            .filter((pl.col("wd") > wind[0]) & (pl.col("wd") < wind[1])) \
            .filter((pl.col("ws") > 1)) \
            .filter(pl.col("Flag") == "O") \
            .collect()
    else:
        df_extracted = df.filter(pl.col("site_name") == site_name) \
        .filter(pl.col("creation_timestamp") > datetime(2024, 2, 1).replace(tzinfo=timezone.utc)) \
        .filter(pl.col("Flag") == "O") \
        .collect()
        
    df_extracted = df_extracted.with_columns(
        (pl.col("creation_timestamp").dt.month()).alias("month"))\
        .group_by("month", by=["site_name"]) \
        .agg(pl.col("co2").mean()) \
        .sort("month")
    
    if plot:
        fig = px.line(df_extracted, x="month", y="co2", markers=True, title=f"{site_name}: CO2 [ppm], Filter for wind between {wind}°")
        fig.show()
    else:
        return df_extracted

In [14]:
# east wind
sites = ["FINR", "TUMR", "PASR", "MAIR"]
df_sites = []

for site in sites:
    
    df_sites.append(extract_monthly_site_data(df=df_1h, site_name=site, wind=[70,110]))
    
df_plot = pl.concat(df_sites)

fig = px.line(df_plot, x="month", y="co2", color="site_name", markers=True)
fig.show()


named `by` param is deprecated; use positional `*args` instead.



In [13]:
# east wind back ground
sites = ["FINR", "FELR", "NPLR", "TAUR"]
df_sites = []

for site in sites:
    
    df_sites.append(extract_monthly_site_data(df=df_1h, site_name=site, wind=[70,110]))
    
df_plot = pl.concat(df_sites)

fig = px.line(df_plot, x="month", y="co2", color="site_name", markers=True)
fig.show()


named `by` param is deprecated; use positional `*args` instead.



In [15]:
# west wind downwind
sites = ["FINR", "FELR", "NPLR", "TAUR"]
df_sites = []

for site in sites:
    
    df_sites.append(extract_monthly_site_data(df=df_1h, site_name=site, wind=[250,290]))
    
df_plot = pl.concat(df_sites)

fig = px.line(df_plot, x="month", y="co2", color="site_name", markers=True)
fig.show()


named `by` param is deprecated; use positional `*args` instead.



In [18]:
# south-east wind back ground
sites = ["MAIR", "DLRR", "GROR", "TAUR"]
df_sites = []

for site in sites:
    
    df_sites.append(extract_monthly_site_data(df=df_1h, site_name=site, wind=[200,250]))
    
df_plot = pl.concat(df_sites)

fig = px.line(df_plot, x="month", y="co2", color="site_name", markers=True)
fig.show()


named `by` param is deprecated; use positional `*args` instead.



In [19]:
# west wind downwind
sites = ["MAIR", "DLRR", "SWMR", "TUMR", "PASR", "GROR"]
df_sites = []

for site in sites:
    
    df_sites.append(extract_monthly_site_data(df=df_1h, site_name=site, wind=[80,110]))
    
df_plot = pl.concat(df_sites)

fig = px.line(df_plot, x="month", y="co2", color="site_name", markers=True)
fig.show()


named `by` param is deprecated; use positional `*args` instead.



In [None]:
# inner city
sites = ["BLUT_48", "TUMR", "SWMR", "SCHR", "RDIR", "SENR", "BOGR"]
df_sites = []

for site in sites:
    
    df_sites.append(extract_monthly_site_data(df=df_1h, site_name=site, wind=[]))
    
df_plot = pl.concat(df_sites)

fig = px.line(df_plot, x="month", y="co2", color="site_name", markers=True)
fig.show()


named `by` param is deprecated; use positional `*args` instead.



In [20]:
# sorounded by green areas
sites = ["MAIR", "DLRR", "TAUR", "FINR", "FELR"]
df_sites = []

for site in sites:
    
    df_sites.append(extract_monthly_site_data(df=df_1h, site_name=site, wind=[]))
    
df_plot = pl.concat(df_sites)

fig = px.line(df_plot, x="month", y="co2", color="site_name", markers=True)
fig.show()


named `by` param is deprecated; use positional `*args` instead.



In [21]:
# since feb
sites = ["TUMR", "TAUR", "MAIR", "DLRR", "FINR", "SENR", "FELR"]
df_sites = []

for site in sites:
    
    df_sites.append(extract_monthly_site_data(df=df_1h, site_name=site, wind=[]))
    
df_plot = pl.concat(df_sites)

fig = px.line(df_plot, x="month", y="co2", color="site_name", markers=True)
fig.show()


named `by` param is deprecated; use positional `*args` instead.

