In [1]:
import pandas as pd
import numpy as np

In [2]:
import xarray as xr

In [3]:
# 0p1 2.54 mm
# 1p0 25.4
# 2p5 = 63.5 mm per day

## Read METplus output

In [4]:
list_ds = []
list_exp_name = []

base_dir = '/glade/campaign/ral/hap/ksha/GWC_results/MET/'

exp_names = ['B3H', 'B6H', 'GDAS', 'C404']
thres_names = ['_0p1.txt', '_1p0.txt', '_2p5.txt']

fn_lists = ['B3H_1p0.txt', 'C404_1p0.txt']

for exp in exp_names:
    for thres in thres_names:
        fn = base_dir + exp + thres
        list_exp_name.append(exp + thres)
        
        df = pd.read_csv(fn, na_values=["NA"])
        
        # Parse the valid_time column (format matches YYYYMMDD_HHMMSS)
        df["valid_time"] = pd.to_datetime(df["valid_time"], format="%Y%m%d_%H%M%S")
        
        # Convert numeric columns (NA already became NaN)
        num_cols = ["object_count", "total_object_area", "mean_object_area", "mean_intensity90"]
        df[num_cols] = df[num_cols].apply(pd.to_numeric, errors="coerce")
        
        # If you want object_count as a nullable integer:
        df["object_count"] = df["object_count"].astype("Int64")
        
        df = df.copy()
        df["month"] = df["valid_time"].dt.month  # 1â€“12
        
        # 12-month climatological mean of numeric columns (NaNs ignored)
        monthly_mean = (
            df.groupby("month")[['total_object_area', 'mean_object_area', 'mean_intensity90']]
              .mean(numeric_only=True)
              .reindex(range(1, 13))
        )
        
        #monthly_mean = monthly_mean.drop(['object_count'])
        
        # 12-month total number of objects across all years
        monthly_total = (
            df.groupby("month")["object_count"]
              .sum(min_count=1)          # keep all-NA months as NaN
              .reindex(range(1, 13))
              .rename("total_objects")
        )
        
        df_month = pd.concat([monthly_mean, monthly_total], axis=1)
        ds = xr.Dataset.from_dataframe(df_month)
        list_ds.append(ds)

In [5]:
ds_all = xr.concat(list_ds, dim='experiment')
ds_all = ds_all.assign_coords({'experiment': list_exp_name})

In [6]:
ds_all = ds_all.load()

In [8]:
ds_all = ds_all.chunk({'month': 12, 'experiment': 12})
save_name = base_dir + 'object_verif_all.zarr'
# ds_all.to_zarr(save_name, mode='w', consolidated=True, compute=True)
print(save_name)

/glade/campaign/ral/hap/ksha/GWC_results/MET/object_verif_all.zarr




In [11]:
# xr.open_zarr(save_name)['total_objects'].values