In [14]:
from datetime import datetime
from datetime import timezone
import polars as pl
import os
import plotly.express as px

DATA_DIRECTORY = os.environ.get("DATA_DIRECTORY")

# processed 10min average measurement data
df_1m = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "processed", "1m_level_1_cities_portal.parquet"))
df_1h = pl.scan_parquet(os.path.join(DATA_DIRECTORY, "processed", "1h_level_1_cities_portal.parquet"))


In [15]:
df_1m.tail(2).collect()

#Datetime,system_id,sys_name_short,co2,h2o,ws,wd,OriginalFlag,Flag,Year,Month,Day,Hour,Minute,Second
"datetime[μs, UTC]",i64,str,f32,f64,f64,f64,i32,str,i32,i8,i8,i8,i8,i8
2024-07-18 07:30:00 UTC,20,"""acropolis-20""",432.470215,2.223626,3.9,95.6,0,"""O""",2024,7,18,7,30,0
2024-07-18 07:31:00 UTC,20,"""acropolis-20""",431.552277,2.219629,3.9,95.6,0,"""O""",2024,7,18,7,31,0


In [16]:
df_1h.tail(2).collect()

system_id,sys_name_short,#Datetime,co2,h2o,ws,wd,Stdev,NbPoints,OriginalFlag,Flag,Year,Month,Day,Hour,Minute,Second
i64,str,"datetime[μs, UTC]",f32,f64,f64,f64,f32,u32,i32,str,i32,i8,i8,i8,i8,i8
17,"""acropolis-17""",2024-07-18 06:30:00 UTC,448.151337,2.535515,2.306667,67.828333,11.806256,60,0,"""O""",2024,7,18,6,30,0
17,"""acropolis-17""",2024-07-18 07:30:00 UTC,428.596436,2.524838,2.9125,62.096875,3.190628,32,389,"""K""",2024,7,18,7,30,0


In [17]:
# Utility

def extract_site_data(df, dates:dict[list[tuple]], site_name:str, plot=False):
    extracted_dates = []
    
    for date in dates[site_name]:
        #print(date[0], date[1], date[2])
        
        df_temp = df.filter(pl.col("system_id") == date[0]).filter(pl.col("#Datetime").is_between(date[1], date[2])).collect()
        
        extracted_dates.append(df_temp)
    
    df_extracted = pl.concat(extracted_dates)
    
    if plot:
        fig = px.line(df_extracted, x="#Datetime", y="co2", markers=True, title=f"{site_name}: CO2 Corrected [ppm]")
        fig.show()
    else:
        return df_extracted

In [18]:
today = datetime(2024, 7, 17, 0, 0, 0).replace(tzinfo=timezone.utc)

# INFO: First two days of deployment are cut due to system warming up and adjusting to new environment

dates = {
      "KLEG": [
            (1, datetime(2024, 2, 29, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ], 
      "DLRO": [
            (14, datetime(2023, 11, 22, 0, 0, 0).replace(tzinfo=timezone.utc), datetime(2023, 12, 22, 23, 59, 59).replace(tzinfo=timezone.utc)),
            (5, datetime(2024, 2, 28, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ],
      "TUMN": [
            (6, datetime(2024, 2, 21, 0, 0, 0).replace(tzinfo=timezone.utc), datetime(2024, 5, 11, 0, 0, 0).replace(tzinfo=timezone.utc)),
            (6, datetime(2024, 5, 31, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ],
      "KRDI": [
            (2, datetime(2023, 9, 13, 0, 0, 0).replace(tzinfo=timezone.utc), datetime(2023, 12, 22, 0, 0, 0).replace(tzinfo=timezone.utc)),
            (8, datetime(2024, 3, 15, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ],
      "SWGG": [
            (10, datetime(2024, 4, 11, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ],
      "RFIN": [
            (15, datetime(2023, 11, 16, 0, 0, 0).replace(tzinfo=timezone.utc), datetime(2023, 12, 22, 0, 0, 0).replace(tzinfo=timezone.utc)),
            (3, datetime(2024, 2, 22, 0, 0, 0).replace(tzinfo=timezone.utc), datetime(2024, 4, 2, 23, 59, 59).replace(tzinfo=timezone.utc)),
            (11, datetime(2024, 4, 11, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ],
      "WKRT": [
            (8, datetime(2023, 10, 27, 0, 0, 0).replace(tzinfo=timezone.utc), datetime(2023, 12, 22, 0, 0, 0).replace(tzinfo=timezone.utc)),
            (12, datetime(2024, 2, 14, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ],
      "RFEL": [
            (7, datetime(2023, 12, 14, 0, 0, 0).replace(tzinfo=timezone.utc), datetime(2023, 12, 22, 0, 0, 0).replace(tzinfo=timezone.utc)),
            (13, datetime(2024, 2, 22, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ],
      "SWMZ": [
            (15, datetime(2024, 6, 14, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ],
      "SMAI": [
            (1, datetime(2023, 9, 8, 0, 0, 0).replace(tzinfo=timezone.utc), datetime(2023, 12, 22, 0, 0, 0).replace(tzinfo=timezone.utc)),
            (16, datetime(2024, 2, 8, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ],
      "SBBG": [
            (5, datetime(2023, 11, 16, 0, 0, 0).replace(tzinfo=timezone.utc), datetime(2024, 2, 6, 0, 0, 0).replace(tzinfo=timezone.utc)),
            (18, datetime(2024, 2, 8, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ],
      "LGRO": [
            (4, datetime(2023, 9, 22, 0, 0, 0).replace(tzinfo=timezone.utc), datetime(2024, 2, 12, 0, 0, 0).replace(tzinfo=timezone.utc)),
            (20, datetime(2024, 2, 14, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ],
      "FB48": [
            (14, datetime(2024, 6, 23, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ], 
      "FB85": [
            (7, datetime(2024, 6, 23, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ], 
      "KNPL": [
            (9, datetime(2024, 6, 26, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ], 
      "KBOG": [
            (17, datetime(2024, 7, 9, 0, 0, 0).replace(tzinfo=timezone.utc), today)
            ], 
 }


In [19]:
extract_site_data(df=df_1h, dates = dates, site_name="SMAI", plot=True)

In [20]:
extracted_dates = []

for key, site in dates.items():
    
    df_temp = extract_site_data(df=df_1m, dates = dates, site_name=key)
    df_temp = df_temp.with_columns(site_name = pl.lit(key))
    df_temp.write_parquet(os.path.join(DATA_DIRECTORY,"processed/acropolis_sites", f"1m_acropolis_{key}.parquet"))
    
    df_temp = extract_site_data(df=df_1h, dates = dates, site_name=key)
    df_temp = df_temp.with_columns(site_name = pl.lit(key))
    df_temp.write_parquet(os.path.join(DATA_DIRECTORY,"processed/acropolis_sites", f"1h_acropolis_{key}.parquet"))
    extracted_dates.append(df_temp)

In [124]:
df_temp.with_columns((pl.col("#Datetime").dt.total_seconds()))

SchemaError: invalid series dtype: expected `Duration`, got `datetime[μs, UTC]`

In [107]:
# construct icos cities portal head
import csv

file_name = "example_site.csv"
file_lines = len(df_temp)
data_level = 1
site_short_name = 'SMAI'
site_long_name = 'Mittelschule Maisach'
latitude = 12.345678
longitude = 12.3456789
altitude = 523.1234234
sampling_height = 24.234
start_date = df_temp.select("#Datetime").row(0)[0].strftime("%Y-%m-%d %H:%M:%S")
stop_date = df_temp.select("#Datetime").row(-1)[0].strftime("%Y-%m-%d %H:%M:%S")

with open(os.path.join(DATA_DIRECTORY,"processed/acropolis_sites",file_name), 'w', newline='') as file:
    writer = csv.writer(file, delimiter=';', lineterminator='\n')
    field = ["#Datetime","Year","Month","Day","Hour","Minute","Second","DecimalDate","co2","NbPoints","Stdev","Flag","OriginalFlag"]
    
    writer.writerow(["# TITLE: co2 - continuous time series from low and mid cost sensors"])
    writer.writerow([f"# FILE NAME: {file_name}"])
    writer.writerow(["# DATA FORMAT: see the last line of this header for column description"])
    writer.writerow([f'# TOTAL LINES: {file_lines}'])
    writer.writerow(['# HEADER LINES: 39'])
    writer.writerow(['# PROJECT: PAUL/ICOS CITIES'])
    writer.writerow([f'# DATA VERSION: L{data_level}'])
    writer.writerow([f'# STATION CODE: {site_short_name}'])
    writer.writerow([f'# STATION NAME: {site_long_name} ({site_short_name})'])
    writer.writerow(['# OBSERVATION CATEGORY: Air sampling observation at a stationary platform'])
    writer.writerow(['# COUNTRY/TERRITORY: DE'])
    writer.writerow([r'# RESPONSIBLE INSTITUTE: TUM, Technial University Munich'])
    writer.writerow(['# CONTRIBUTOR:  Patrick Aigner, Klaus Kürzinger, Jia Chen'])
    writer.writerow(['# CONTACT POINT: Patrick Aigner <patrick.aigner@tum.de>, Jia Chen <jia.chen@tum.de>'])
    writer.writerow(["# FUNDING: European Union's Horizon 2020 Research and Innovation Programme, Grant Agreement No. 101037319"])
    writer.writerow([f'# LATITUDE: {latitude:.6f}'])
    writer.writerow([f'# LONGITUDE: {longitude:.6f}'])
    writer.writerow([f'# ALTITUDE: {altitude:.1f} m asl'])
    writer.writerow([f'# SAMPLING HEIGHTS: {sampling_height:.1f} m agl'])
    writer.writerow(['# PARAMETER: co2'])
    writer.writerow([f'# COVERING PERIOD: {start_date} - {stop_date}'])
    writer.writerow(['# TIME INTERVAL: hourly'])
    writer.writerow(['# MEASUREMENT UNIT: µmol/mol'])
    writer.writerow(['# MEASUREMENT METHOD: NDIR'])
    writer.writerow(['# INSTRUMENT: Vaisala GMP343'])
    writer.writerow(['# SAMPLING TYPE: continuous'])
    writer.writerow(['# TIME ZONE: UTC +0'])
    writer.writerow(['# MEASUREMENT SCALE: WMO-CO2-X2019'])
    writer.writerow(['# DATA POLICY: ICOS CITIES DATA is licensed under a Creative Commons Attribution 4.0 international licence (http://creativecommons.org/licenses/by/4.0/.The ICOS CITIES data licence is described at https://data.icos-cities.eu/licence.'])
    writer.writerow(['# COMMENT:'])
    writer.writerow(['#'])
    writer.writerow(['#   - Times are UTC'])
    writer.writerow(['#   - Time-averaged values are reported at the middle of the averaging interval.'])
    writer.writerow(["#   - Flag 'O' = data correct after manual quality control"])
    writer.writerow(["#   - Flag 'K' = data incorrect after manual quality control"])
    writer.writerow(['#   - In case of gaps between instruments, the timeseries are filled with empty string'])
    writer.writerow(['#   - Release notes: '])
    writer.writerow(['#'])
    writer.writerow(field)
    
    for row in df_temp.iter_rows():
        writer.writerow([''])
    



In [119]:
for row in df_temp.iter_rows():
    print(len(row))
    print(row[1])
    #print([' '.join(item) for item in row])

18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acropolis-17
18
acrop