In [1]:
import atmospy
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings

# disable warnings for demo purposes
warnings.filterwarnings("ignore")

atmospy.set_theme()

# this tutorial was completed using version:
atmospy.__version__

'0.1.2'

In [2]:
# Initialize an empty DataFrame
df_lcs = pd.DataFrame()

# Define the start and end months and years
start_month = 3
start_year = 2024
end_month = 7
end_year = 2024

# Function to generate month-year strings
def generate_month_year_strings(start_month, start_year, end_month, end_year):
    month_year_list = []
    year = start_year
    month = start_month
    while year < end_year or (year == end_year and month <= end_month):
        month_year_list.append(f"{year}_{str(month).zfill(2)}")
        month += 1
        if month > 12:
            month = 1
            year += 1
    return month_year_list

# Generate the list of month-year strings
month_year_strings = generate_month_year_strings(start_month, start_year, end_month, end_year)

# Loop through each month-year string and load the CSV files
for month_year_string in month_year_strings:
    year, month = month_year_string.split('_')
    filename_tail = f"@devnode_{year}-{month}.csv"
    
    df_co = pd.read_csv(f"{month_year_string}/raw_CO-B4{filename_tail}", sep=',')
    # df_nh3 = pd.read_csv(f"{month_year_string}/raw_NH3-B1{filename_tail}", sep=',')
    # df_no2 = pd.read_csv(f"{month_year_string}/raw_NO2-B43F{filename_tail}", sep=',')
    # df_ox = pd.read_csv(f"{month_year_string}/raw_OX-B431{filename_tail}", sep=',')
    df_pm_opcn3 = pd.read_csv(f"{month_year_string}/raw_PM-OPCN3{filename_tail}", sep=',')
    df_pm_sps30 = pd.read_csv(f"{month_year_string}/raw_PM-SPS30{filename_tail}", sep=',')
    df_trh = pd.read_csv(f"{month_year_string}/raw_T_RH_P_BME280{filename_tail}", sep=',')
    
    # Combining all datasets using the "Čas" column
    df_temp = pd.merge(df_co, df_trh, on="Čas")
    # df_temp = pd.merge(df_temp, df_no2, on="Čas")
    # df_temp = pd.merge(df_temp, df_ox, on="Čas")
    df_temp = pd.merge(df_temp, df_pm_opcn3, on="Čas")
    df_temp = pd.merge(df_temp, df_pm_sps30, on="Čas")
    
    # Calculating actual values for the WE (working electrode) and AE (auxiliary electrode) values
    df_temp["CO"] = df_temp["WE_CO"] - df_temp["AE_CO"]
    # df_temp["NH3"] = df_temp["WE_NH3"] - df_temp["AE_NH3"]
    # df_temp["NO2"] = df_temp["WE_NO2"] - df_temp["AE_NO2"]
    # df_temp["O3"] = df_temp["WE_O3"] - df_temp["AE_O3"]
      
    # Dropping the extra columns (including the pressure - that makes a negligible difference)
    df_temp = df_temp.drop(["WE_CO", "AE_CO", "PM_4"], axis=1)
    
    # Renaming the "Čas" column
    df_temp = df_temp.rename(columns={'Čas': 'Time'})
    
    # Converting the time to a datetime
    df_temp['Time'] = pd.to_datetime(df_temp['Time'])
    
    # 1h shift of the CSV data (CSV seems to be UTC+1, reference data UTC)
    df_temp['Time'] += pd.Timedelta(hours=1)
    
    # Set 'Time' column as the DataFrame index
    df_temp.set_index('Time', inplace=True)
    
    # Resample to 1-hour intervals and calculate the mean for each group
    df_temp_1h = df_temp.resample('1h').mean()
    
    # Assign the calculated hourly means back to the original DataFrame
    df_temp = df_temp_1h.reset_index()
    
    # Append the data to the main DataFrame
    df_lcs = pd.concat([df_lcs, df_temp], ignore_index=True)

# Renaming the "Time" column to "GMT"
df_lcs = df_lcs.rename(columns={'Time': 'GMT'})

# Set 'GMT' column as the DataFrame index
df_lcs.set_index('GMT', inplace=True)

# Print the data types of the final DataFrame
print(df_lcs.dtypes)

FileNotFoundError: [Errno 2] No such file or directory: '2024_04/raw_PM-OPCN3@devnode_2024-04.csv'

In [None]:
# The edited dataset has the "Celkem" values at the end of the data removed
#df_ref = pd.read_excel(month_year_string + "/MS_MarianskeHory_60min_" + month_year_string + "_edited.xlsx")
df_ref = pd.DataFrame()

#df_ref = pd.read_csv('gmt_data/Nov-Feb_GMT_MarHory.csv', sep=';')
df_ref = pd.read_csv('gmt_data/Mar-Jul_GMT_MarHory.csv', sep=';')

#df_ref = df_ref.rename(columns={
#        'Unnamed: 0': 'Time',
#        'Celaskon; 1663_MarHory; WSPE [m/s]': 'WSPE [m/s]',
#        'Celaskon; 1663_MarHory; WDIR [deg]': 'WDIR [deg]',
#        'Celaskon; 1663_MarHory; HUMI [%]': 'HUMI [%]',
#        'Celaskon; 1663_MarHory; PRES [hPa]': 'PRES [hPa]',
#        'Celaskon; 1663_MarHory; TEMP [°C]': 'TEMP [°C]',
#        'Celaskon; 1663_MarHory; PM10 [ug/m3]': 'PM10 [µg/m3]',
#        'Celaskon; 1663_MarHory; O3 [ug/m3]': 'O3 [µg/m3]',
#        'Celaskon; 1663_MarHory; SO2 [ug/m3]': 'SO2 [µg/m3]',
#        'Celaskon; 1663_MarHory; NO [ug/m3]': 'NO [µg/m3]',
#        'Celaskon; 1663_MarHory; NO2 [ug/m3]': 'NO2 [µg/m3]',
#        'Celaskon; 1663_MarHory; CO [ug/m3]': 'CO [µg/m3]'
#})

#Apply the function to the entire column
df_ref['GMT'] = pd.to_datetime(df_ref['GMT'])

#Synchronizing the datatypes of "Time" in the datasets
df_ref['GMT'] = df_ref['GMT'].dt.tz_localize('UTC')

# Dropping the extra columns (including the pressure - that makes a negligible difference)
df_ref = df_ref.drop(["TIME"], axis=1)

# Pokud potřebujete změnit pouze určité sloupce, můžete specifikovat sloupce takto:

df_ref[['WDIR [deg]', 
        'HUMI [%]', 
        'PRES [hPa]', 
        'TEMP [°C]', 
        'PM10 [ug/m3]', 
        'PM25 [ug/m3]', 
        'PM01 [ug/m3]', 
        'O3 [ug/m3]', 
        'SO2 [ug/m3]', 
        'NO [ug/m3]', 
        'NO2 [ug/m3]', 
        'CO [ug/m3]']] = df_ref[['WDIR [deg]', 
                                 'HUMI [%]', 
                                 'PRES [hPa]', 
                                 'TEMP [°C]', 
                                 'PM10 [ug/m3]', 
                                 'PM25 [ug/m3]', 
                                 'PM01 [ug/m3]', 
                                 'O3 [ug/m3]', 
                                 'SO2 [ug/m3]', 
                                 'NO [ug/m3]', 
                                 'NO2 [ug/m3]', 
                                 'CO [ug/m3]']].astype('float64')

#df_ref.head()
#print(df_ref.describe())
print(df_ref.dtypes)

In [None]:
# Merge the two DataFrames on the 'Time' column
df = pd.merge(df_lcs, df_ref, on='GMT')
# Sorting the values by the "Time" attribute
df.sort_values(by="GMT")
# Check the correlation between 'CO' and 'CO [µg/m3]'
#correlation = df_merged['CO'].diff().corr(df_merged['CO [ug/m3]'].diff())
#print(f'Correlation between CO and CO [µg/m3]: {correlation}')

In [None]:
start_date = '2024-03-29'
end_date = '2024-04-03'

# Filter the DataFrame
sel_df = df[df['GMT'] >= start_date]
sel_df = sel_df[sel_df['GMT'] <= end_date]

# Sorting the values by the "GMT" attribute
df = sel_df.sort_values(by="GMT")

#df.head()
print(df[['TEMP [°C]','HUMI [%]','PRES [hPa]']].describe())

In [None]:
t = atmospy.regplot(
    data=df,
    x="TEMP [°C]", 
    y="T",
    color="orange",
    marker='^',
#    fit_reg=False
)
t.set_axis_labels(xlabel="Reference $T$ [$°C$]", ylabel="LCS node $T$ [$°C$]")

In [None]:
plt.figure(figsize=(14, 7))

plt.plot(df['GMT'], df['TEMP [°C]'],  color="grey",label='Temperature (Reference)')
plt.plot(df['GMT'], df['T'], color="orange", linestyle=":", label='Temperature (LCS node)')

plt.title('Temperature during colocation period')
plt.xlabel('Time [Year-Month-Day]')
plt.ylabel('Temperature [$°C$]')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
rh = atmospy.regplot(
    data=df,
    x="HUMI [%]", 
    y="RH",
    color="b",
    marker='v',
#    fit_reg=False
)
t.set_axis_labels(xlabel="Reference $RH$ [$\%$]", ylabel="LCS node $RH$ [$\%$]")

In [None]:
plt.figure(figsize=(14, 7))

plt.plot(df['GMT'], df['HUMI [%]'],  color="grey",label='Relative humidity [%]  (Reference)')
plt.plot(df['GMT'], df['RH'], color="b", linestyle=":", label='Relative humidity [%] (LCS node)')

plt.title('Relative humidity during colocation period')
plt.xlabel('Time [Year-Month-Day]')
plt.ylabel('Relative humidity [$\%$]')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
df["T"] = pd.to_numeric(df["T"], errors='coerce') + 273.15
df["CO"] = pd.to_numeric(df["CO"], errors='coerce') * 4394 + (-0.693)* pd.to_numeric(df["T"], errors='coerce')
df["CO [ug/m3]"] = pd.to_numeric(df["CO [ug/m3]"], errors='coerce')

#df_filter = df[df["CO [ug/m3]"] > 200]
#df_subfilter = df_filter[df_filter["CO [ug/m3]"] < 400]

In [None]:
co = atmospy.regplot(
    data=df,
#    data=df_filter,
#    data=df_subfilter,
    x="CO [ug/m3]", 
    y="CO",
    color="g",
    marker='o',
#    fit_reg=False
)
co.set_axis_labels(xlabel="Reference $CO$ [$µg/m^{3}$]", ylabel="LCS node $CO$ [$µg/m^{3}$]")

In [None]:
# Plotting
plt.figure(figsize=(14, 7))

plt.plot(df['GMT'], df['CO [ug/m3]'],  color="grey",label='CO concentration (Reference)')
plt.plot(df['GMT'], df['CO'], color="g", linestyle=":", label='CO concentration (LCS node)')

plt.title('Concentration of carbon monoxide during colocation period')
plt.xlabel('Time [Year-Month-Day]')
plt.ylabel('CO [$µg/m^3$]')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
df["PM_2"] = pd.to_numeric(df["PM_2"], errors='coerce')

pm = atmospy.regplot(
    data=df,
    x="PM25 [ug/m3]", 
    y="PM_2",   
    color="brown",
    marker='x',
#    fit_reg=False
)
pm.set_axis_labels(xlabel="Reference $PM_{2.5}$ [$µg/m^{3}$]", ylabel="SPS30 $PM_{2}$ [$µg/m^{3}$]")

In [None]:
plt.figure(figsize=(14, 7))

plt.plot(df['GMT'], df['PM25 [ug/m3]'],  color="grey",label='PM2.5 (Reference)')
plt.plot(df['GMT'], df['PM_2'], color="brown", linestyle=":", label='PM2 (SPS30)')

plt.title('Concentration of fine particulate matter during colocation period')
plt.xlabel('Time [Year-Month-Day]')
plt.ylabel('PM2.5 [$µg/m^3$]')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
df["PM_2.5"] = pd.to_numeric(df["PM_2.5"], errors='coerce')

pm = atmospy.regplot(
    data=df,
    x="PM25 [ug/m3]", 
    y="PM_2.5",   
    color="pink",
    marker='x',
#    fit_reg=False
)
pm.set_axis_labels(xlabel="Reference $PM_{2.5}$ [$µg/m^{3}$]", ylabel="OPC-N3 $PM_{2.5}$ [$µg/m^{3}$]")

In [None]:
plt.figure(figsize=(14, 7))

plt.plot(df['GMT'], df['PM25 [ug/m3]'],  color="grey",label='PM2.5 (Reference)')
plt.plot(df['GMT'], df['PM_2.5'], color="pink", linestyle=":", label='PM2.5 (OPC-N3)')

plt.title('Concentration of fine particulate matter during colocation period')
plt.xlabel('Time [Year-Month-Day]')
plt.ylabel('PM2.5 [$µg/m^3$]')
plt.legend()
plt.grid(True)
plt.show()