Calculate Monthly Mean

In [None]:
"""
Calculate Monthly Mean from NetCDF Collections
=============================================

This script:
    1. Locates NetCDF files that match a month-based filename pattern.
    2. Opens them with xarray.open_mfdataset (combine="by_coords").
    3. Computes the mean of VAR_NAME over the "time" dimension.
    4. Saves each monthly mean to OUTPUT_DIR as <VAR>_mean_<YYYYMM>.nc

"""

from pathlib import Path
import xarray as xr

# ========= USER CONFIGURATION ==========================================
INPUT_DIR        = r"Path of Input Directory"                       # source folder
OUTPUT_DIR       = r"Path of Output Directory"                      # where results go
VAR_NAME         = "Variable Name"                                  # variable to average
FILE_TEMPLATE    = "*-{month}*.nc"                                  # glob pattern; {month} → 01-12 set according to yout file
MONTHS           = [f"{m:02d}" for m in range(1, 13)]               # list of months to process
OVERWRITE        = False                                            # True → replace existing files
COMBINE_METHOD   = "by_coords"                                      # open_mfdataset combine mode
OUTPUT_NAME      = "{var}_mean_{month}.nc"                          # format for output filename
# =======================================================================


def monthly_mean(src_dir: Path,
                 month_code: str,
                 var_name: str,
                 pattern: str,
                 combine_method: str):
    """Open all files for *month_code*, compute mean of *var_name*, return DataArray."""
    files = list(src_dir.glob(pattern.format(month=month_code)))
    if not files:
        raise FileNotFoundError(f"No files match pattern '{pattern}' for month {month_code}")

    ds = xr.open_mfdataset(files, combine=combine_method)
    if var_name not in ds:
        raise KeyError(f"'{var_name}' not found in dataset variables: {list(ds.data_vars)}")

    return ds[var_name].mean(dim="time", skipna=True)


def save_monthly_mean():
    in_dir  = Path(INPUT_DIR).expanduser().resolve()
    out_dir = Path(OUTPUT_DIR).expanduser().resolve()
    out_dir.mkdir(parents=True, exist_ok=True)

    for mon in MONTHS:
        try:
            mean_da = monthly_mean(in_dir, mon, VAR_NAME, FILE_TEMPLATE, COMBINE_METHOD)
            out_path = out_dir / OUTPUT_NAME.format(var=VAR_NAME, month=mon)

            if out_path.exists() and not OVERWRITE:
                print(f"⚠️  {out_path.name} exists — skipping (set OVERWRITE=True to replace).")
                continue

            mean_da.to_netcdf(out_path, mode="w", engine="netcdf4")
            print(f"✔  Saved monthly mean for {mon} → {out_path.name}")

        except Exception as e:
            print(f"🚫  {mon}: {e}")


if __name__ == "__main__":
    save_monthly_mean()
