In [1]:
import sys
import xarray as xr
from pathlib import Path
import pandas as pd
import numpy as np
from datetime import datetime

In [2]:
import cftime


def fix_time(ds):
    """If months are missing, fix the dataset by adding NaN values for those months."""

    var = list(ds.data_vars)[0]
    src = ds[var].encoding["source"]

    # look for missing months and fill those variable values with NaN
    # get all months present in the time coordinate
    months_present = set(ds["time"].astype("datetime64[ns]").dt.month.values)
    # get the years present in the dataset
    years = np.unique(ds["time"].astype("datetime64[ns]").dt.year.values)

    all_months = set(range(1, 13))
    missing_months = sorted(list(all_months - months_present))

    if missing_months:
        print(
            f"Missing months in {src}: {missing_months} .... attempting to add all NaN array for these months."
        )

        # create new timestamps for each missing month in each year
        # these must be in CF compliant format, units of "days since 1950-01-01" and calendar "noleap"
        new_times = []
        for year in years:
            for month in missing_months:
                # Use the 15th day of each missing month as the timestamp (common for monthly means)
                new_times.append(cftime.DatetimeNoLeap(year, month, 15, 12, 0, 0))

        # reindex the variable to include the new times, filling missing values with NaN
        new_shape = (len(ds.lon), len(ds.lat), len(new_times))
        nan_data = xr.DataArray(
            np.full(new_shape, np.nan),
            dims=("lon", "lat", "time"),
            coords={"lon": ds.lon, "lat": ds.lat, "time": new_times},
        )
        new_ds = xr.Dataset({var: nan_data})
        combined = xr.concat([ds, new_ds], dim="time")
        combined = combined.sortby("time")

    return combined if missing_months else ds

In [3]:
dir = "/beegfs/CMIP6/jdpaul3/CMIP6_common_regrid/regrid/"
# output where the fixed files will be saved
output_dir = Path("/beegfs/CMIP6/jdpaul3/cmip6_regrid_timefix")

In [4]:
# list all monthly *.nc files in the directory and any subdirectories
files = list(Path(dir).rglob("*mon_*.nc"))

In [5]:
# for each file, open it and add missing months, and save it to the output directory with the same directory structure
for file in files:
    try:
        ds = xr.open_dataset(file, decode_times=True)
        ds = fix_time(ds)
        output_file = output_dir / file.relative_to(dir)
        output_file.parent.mkdir(
            parents=True, exist_ok=True
        )  # Create parent directories if they don't exist
        ds.to_netcdf(output_file, mode="w", format="NETCDF4")
        ds.close()
        # print(f"Saved {file} successfully.")
    except Exception as e:
        print(f"Error processing {file}: {e}", file=sys.stderr)

Missing months in /beegfs/CMIP6/jdpaul3/CMIP6_common_regrid/regrid/TaiESM1/ssp370/LImon/snw/snw_LImon_TaiESM1_ssp370_regrid_201502-201512.nc: [1] .... attempting to add all NaN array for these months.
Missing months in /beegfs/CMIP6/jdpaul3/CMIP6_common_regrid/regrid/TaiESM1/ssp585/LImon/snw/snw_LImon_TaiESM1_ssp585_regrid_201502-201512.nc: [1] .... attempting to add all NaN array for these months.
Missing months in /beegfs/CMIP6/jdpaul3/CMIP6_common_regrid/regrid/NorESM2-MM/ssp245/LImon/snw/snw_LImon_NorESM2-MM_ssp245_regrid_201502-201512.nc: [1] .... attempting to add all NaN array for these months.
Missing months in /beegfs/CMIP6/jdpaul3/CMIP6_common_regrid/regrid/NorESM2-MM/ssp370/LImon/snw/snw_LImon_NorESM2-MM_ssp370_regrid_201502-201512.nc: [1] .... attempting to add all NaN array for these months.
Missing months in /beegfs/CMIP6/jdpaul3/CMIP6_common_regrid/regrid/NorESM2-MM/ssp126/LImon/snw/snw_LImon_NorESM2-MM_ssp126_regrid_201502-201512.nc: [1] .... attempting to add all NaN a