In [3]:
import pandas as pd
import numpy as np

# File paths
input_path = r"Z:\Thesis\Data\test\DustCast\SFC\DC_v0063\sfc_row_level_predictions_2023_v0063_H3res4.csv"
output_path = r"Z:\Thesis\Data\test\DustCast\SFC\DC_v0063\aggregated_quarterly_µg_prediction_errors.csv"

# Read CSV file
df = pd.read_csv(input_path)

# Convert 'Month' column (assumed format 'YYYY-MM') to datetime
df['Month_dt'] = pd.to_datetime(df['Month'], format="%Y-%m", errors='coerce')

# Define a function to map month to the custom quarter
def map_month_to_quarter(dt):
    if pd.isnull(dt):
        return None
    month = dt.month
    # Q1 = DJF: December, January, February
    if month in [12, 1, 2]:
        return 'DJF'
    elif month in [3, 4, 5]:
        return 'MAM'
    elif month in [6, 7, 8]:
        return 'JJA'
    elif month in [9, 10, 11]:
        return 'SON'
    else:
        return None

# Apply the function to create a new 'Quarter' column
df['Quarter'] = df['Month_dt'].apply(map_month_to_quarter)

# Convert from kg/m³ to µg/m³ (1 kg = 1e9 µg)
df['DUSMASS_mean_ugm3'] = df['DUSMASS_mean'] * 1e9
df['ensemble_predictions_ugm3'] = df['ensemble_predictions'] * 1e9

# Compute the per-row difference (observed - predicted)
df['Diff_error'] = df['DUSMASS_mean_ugm3'] - df['ensemble_predictions_ugm3']

# Group by hexagon (h3_res_4) and Quarter for spatial and temporal aggregation
grouped = df.groupby(['h3_res_4', 'Quarter'])

# Define a function to aggregate error metrics for each group
def aggregate_errors(group):
    # Observed and predicted values in µg/m³
    obs = group['DUSMASS_mean_ugm3']
    pred = group['ensemble_predictions_ugm3']
    diff = obs - pred
    
    # Compute aggregated metrics
    mean_obs = obs.mean()
    mean_pred = pred.mean()
    mean_diff = diff.mean()             # Aggregated Diff_error (mean difference)
    mse = np.mean(diff**2)               # Mean Squared Error over the group
    mae = np.mean(np.abs(diff))          # Mean Absolute Error over the group
    rmse = np.sqrt(mse)                  # Root Mean Squared Error over the group
    
    return pd.Series({
        'Mean_DUSMASS_mean_ugm3': mean_obs,
        'Mean_ensemble_predictions_ugm3': mean_pred,
        'Diff_error': mean_diff,
        'MSE': mse,
        'MAE': mae,
        'RMSE': rmse,
        'sample_size': group.shape[0]  # Optional: number of observations in the group
    })

# Apply the aggregation function to each group
agg_df = grouped.apply(aggregate_errors).reset_index()

# Optionally rearrange columns
agg_df = agg_df[['h3_res_4', 'Quarter', 'sample_size',
                 'Mean_DUSMASS_mean_ugm3', 'Mean_ensemble_predictions_ugm3',
                 'Diff_error', 'MSE', 'MAE', 'RMSE']]

# Save the aggregated results to a new CSV file
agg_df.to_csv(output_path, index=False)
print("Aggregated quarterly predictions saved to:", output_path)


Aggregated quarterly predictions saved to: Z:\Thesis\Data\test\DustCast\SFC\DC_v0063\aggregated_quarterly_µg_prediction_errors.csv


In [8]:
#Surface

import pandas as pd
import numpy as np

# File paths
input_path = r"Z:\Thesis\Data\test\DustCast\SFC\DC_v0063\sfc_row_level_predictions_2023_v0063_H3res4.csv"
output_path = r"Z:\Thesis\Data\test\DustCast\SFC\DC_v0063\SFC_aggregated_quarterly_µg_prediction_errors2.csv"

# Read CSV file
df = pd.read_csv(input_path)

# Convert 'Month' column (assumed format 'YYYY-MM') to datetime
df['Month_dt'] = pd.to_datetime(df['Month'], format="%Y-%m", errors='coerce')

# Define a function to map month to the custom quarter
def map_month_to_quarter(dt):
    if pd.isnull(dt):
        return None
    month = dt.month
    # Q1 = DJF: December, January, February
    if month in [12, 1, 2]:
        return 'DJF'
    elif month in [3, 4, 5]:
        return 'MAM'
    elif month in [6, 7, 8]:
        return 'JJA'
    elif month in [9, 10, 11]:
        return 'SON'
    else:
        return None

# Create a new 'Quarter' column based on the Month_dt column
df['Quarter'] = df['Month_dt'].apply(map_month_to_quarter)

# Convert from kg/m³ to µg/m³ (1 kg = 1e9 µg)
df['DUSMASS_mean_ugm3'] = df['DUSMASS_mean'] * 1e9
df['ensemble_predictions_ugm3'] = df['ensemble_predictions'] * 1e9

# Compute the per-row difference (observed - predicted)
df['Diff_error'] = df['DUSMASS_mean_ugm3'] - df['ensemble_predictions_ugm3']

# Group by hexagon (h3_res_4) and Quarter for spatial and temporal aggregation
grouped = df.groupby(['h3_res_4', 'Quarter'])

# Define a function to aggregate error metrics for each group.
# Aggregated metrics get the prefix "agg_" to distinguish them from row-level values.
def aggregate_errors(group):
    obs = group['DUSMASS_mean_ugm3']
    pred = group['ensemble_predictions_ugm3']
    diff = obs - pred
    
    # Compute aggregated metrics
    mean_obs = obs.mean()
    mean_pred = pred.mean()
    mean_diff = diff.mean()       # Aggregated Diff_error (mean difference)
    mse = np.mean(diff**2)         # Mean Squared Error over the group
    mae = np.mean(np.abs(diff))    # Mean Absolute Error over the group
    rmse = np.sqrt(mse)            # Root Mean Squared Error over the group
    
    return pd.Series({
        'agg_Mean_DUSMASS_mean_ugm3': mean_obs,
        'agg_Mean_ensemble_predictions_ugm3': mean_pred,
        'agg_Diff_error': mean_diff,
        'agg_MSE': mse,
        'agg_MAE': mae,
        'agg_RMSE': rmse,
        'sample_size': group.shape[0]  # Number of observations in the group
    })

# Apply the aggregation function to each group
agg_df = grouped.apply(aggregate_errors).reset_index()

# Merge the aggregated metrics back to the original DataFrame.
# Every row will have the aggregated metrics corresponding to its hexagon and quarter.
merged_df = pd.merge(df, agg_df, on=['h3_res_4', 'Quarter'], how='left')

# (Optional) Rearrange columns so that the original data appears first, followed by aggregated metrics.
cols_order = [
    'lon', 'lat', 'time', 'DUSMASS_mean', 'Month', 'lag_1', 'ensemble_predictions',
    'Country', 'h3_res_3', 'h3_res_4', 'Month_dt', 'Quarter',
    'DUSMASS_mean_ugm3', 'ensemble_predictions_ugm3', 'Diff_error',
    'agg_Mean_DUSMASS_mean_ugm3', 'agg_Mean_ensemble_predictions_ugm3', 
    'agg_Diff_error', 'agg_MSE', 'agg_MAE', 'agg_RMSE', 'sample_size'
]
cols_order = [col for col in cols_order if col in merged_df.columns]
merged_df = merged_df[cols_order]

# Save the merged results to a new CSV file
merged_df.to_csv(output_path, index=False)
print("Aggregated quarterly predictions with original columns saved to:", output_path)


Aggregated quarterly predictions with original columns saved to: Z:\Thesis\Data\test\DustCast\SFC\DC_v0063\SFC_aggregated_quarterly_µg_prediction_errors2.csv


In [6]:
#upper atmosphere

#Surface

import pandas as pd
import numpy as np

# File paths
input_path = r"Z:\Thesis\Data\test\DustCast\UA\DC_v0063\ua_level_averages_2023_v0063_H3res4_test.csv"
output_path = r"Z:\Thesis\Data\test\DustCast\UA\DC_v0063\UA_aggregated_quarterly_µg_prediction_errors2.csv"

# Read CSV file
df = pd.read_csv(input_path)

# Convert 'Month' column (assumed format 'YYYY-MM') to datetime
df['Month_dt'] = pd.to_datetime(df['Month'], format="%Y-%m", errors='coerce')

# Define a function to map month to the custom quarter
def map_month_to_quarter(dt):
    if pd.isnull(dt):
        return None
    month = dt.month
    # Q1 = DJF: December, January, February
    if month in [12, 1, 2]:
        return 'DJF'
    elif month in [3, 4, 5]:
        return 'MAM'
    elif month in [6, 7, 8]:
        return 'JJA'
    elif month in [9, 10, 11]:
        return 'SON'
    else:
        return None

# Create a new 'Quarter' column based on the Month_dt column
df['Quarter'] = df['Month_dt'].apply(map_month_to_quarter)

# Convert from kg/m³ to µg/m³ (1 kg = 1e9 µg)
df['DUCMASS_mean_ugm3'] = df['DUCMASS_mean'] * 1e9
df['ensemble_predictions_ugm3'] = df['ensemble_predictions'] * 1e9

# Compute the per-row difference (observed - predicted)
df['Diff_error'] = df['DUCMASS_mean_ugm3'] - df['ensemble_predictions_ugm3']

# Group by hexagon (h3_res_4) and Quarter for spatial and temporal aggregation
grouped = df.groupby(['h3_res_4', 'Quarter'])

# Define a function to aggregate error metrics for each group.
# Aggregated metrics get the prefix "agg_" to distinguish them from row-level values.
def aggregate_errors(group):
    obs = group['DUCMASS_mean_ugm3']
    pred = group['ensemble_predictions_ugm3']
    diff = obs - pred
    
    # Compute aggregated metrics
    mean_obs = obs.mean()
    mean_pred = pred.mean()
    mean_diff = diff.mean()       # Aggregated Diff_error (mean difference)
    mse = np.mean(diff**2)         # Mean Squared Error over the group
    mae = np.mean(np.abs(diff))    # Mean Absolute Error over the group
    rmse = np.sqrt(mse)            # Root Mean Squared Error over the group
    
    return pd.Series({
        'agg_Mean_DUCMASS_mean_ugm3': mean_obs,
        'agg_Mean_ensemble_predictions_ugm3': mean_pred,
        'agg_Diff_error': mean_diff,
        'agg_MSE': mse,
        'agg_MAE': mae,
        'agg_RMSE': rmse,
        'sample_size': group.shape[0]  # Number of observations in the group
    })

# Apply the aggregation function to each group
agg_df = grouped.apply(aggregate_errors).reset_index()

# Merge the aggregated metrics back to the original DataFrame.
# Every row will have the aggregated metrics corresponding to its hexagon and quarter.
merged_df = pd.merge(df, agg_df, on=['h3_res_4', 'Quarter'], how='left')

# (Optional) Rearrange columns so that the original data appears first, followed by aggregated metrics.
cols_order = [
    'lon', 'lat', 'time', 'DUCMASS_mean', 'Month', 'lag_1', 'ensemble_predictions',
    'h3_res_3', 'h3_res_4', 'Month_dt', 'Quarter', #'Country',
    'DUCMASS_mean_ugm3', 'ensemble_predictions_ugm3', 'Diff_error',
    'agg_Mean_DUCMASS_mean_ugm3', 'agg_Mean_ensemble_predictions_ugm3', 
    'agg_Diff_error', 'agg_MSE', 'agg_MAE', 'agg_RMSE', 'sample_size'
]
cols_order = [col for col in cols_order if col in merged_df.columns]
merged_df = merged_df[cols_order]

# Save the merged results to a new CSV file
merged_df.to_csv(output_path, index=False)
print("Aggregated quarterly predictions with original columns saved to:", output_path)


Aggregated quarterly predictions with original columns saved to: Z:\Thesis\Data\test\DustCast\UA\DC_v0063\UA_aggregated_quarterly_µg_prediction_errors2.csv
