In [1]:
import sys
import xarray as xr
from pathlib import Path
import pandas as pd

In [2]:
def convert_time(ds):
    """Convert the 'year' coordinate to CF compliant 'time' coordinate.
    This function assumes that the dataset has a 'year' coordinate in YYYY format.
    CF conventions require the time coordinate to be in a format that includes
    the time unit and reference time. We will use 'days since 1950-01-01 00:00:00'
    and the first day of the year to represent the time coordinate.
    """

    if "year" in ds.coords:
        years = ds["year"].values.astype(int)
        # Reference date for CF time
        ref_date = pd.Timestamp("1950-01-01")
        # Create datetime index for January 1st of each year
        times = pd.to_datetime(years, format="%Y")
        # Calculate days since reference date
        days_since_ref = (times - ref_date).days
        # Assign new coordinate values and rename 'year' to 'time'
        ds = ds.assign_coords(year=("year", days_since_ref))
        ds = ds.rename({"year": "time"})
        ds["time"].attrs["units"] = "days since 1950-01-01 00:00:00"
        ds["time"].attrs["calendar"] = "standard"
        ds["time"].attrs["long_name"] = "time"
        ds["time"].attrs["standard_name"] = "time"

    else:
        print(
            "Dataset does not contain 'year' coordinate. Cannot convert to 'time' coordinate."
        )

    return ds

In [3]:
# input indicator files
dir = "/beegfs/CMIP6/jdpaul3/cmip6_indicators_test/cmip6_indicators/netcdf"
# output where the fixed files will be saved
output_dir = Path(
    "/beegfs/CMIP6/jdpaul3/cmip6_indicators_test/cmip6_indicators/netcdf_timefix"
)

In [4]:
# list all *.nc files in the directory and any subdirectories
files = list(Path(dir).rglob("*.nc"))

In [5]:
# for each file, open it and convert the 'year' coordinate to 'time' and save it to the output directory with the same directory structure
for file in files:
    try:
        ds = xr.open_dataset(file, decode_times=False)
        ds = convert_time(ds)
        output_file = output_dir / file.relative_to(dir)
        output_file.parent.mkdir(
            parents=True, exist_ok=True
        )  # Create parent directories if they don't exist
        ds.to_netcdf(output_file, mode="w", format="NETCDF4")
        ds.close()
        print(f"Converted {file} successfully.")
    except Exception as e:
        print(f"Error processing {file}: {e}", file=sys.stderr)

Converted /beegfs/CMIP6/jdpaul3/cmip6_indicators_test/cmip6_indicators/netcdf/MPI-ESM1-2-HR/historical/ftc/ftc_MPI-ESM1-2-HR_historical_indicator.nc successfully.
Converted /beegfs/CMIP6/jdpaul3/cmip6_indicators_test/cmip6_indicators/netcdf/MPI-ESM1-2-HR/historical/cwd/cwd_MPI-ESM1-2-HR_historical_indicator.nc successfully.
Converted /beegfs/CMIP6/jdpaul3/cmip6_indicators_test/cmip6_indicators/netcdf/MPI-ESM1-2-HR/historical/r10mm/r10mm_MPI-ESM1-2-HR_historical_indicator.nc successfully.
Converted /beegfs/CMIP6/jdpaul3/cmip6_indicators_test/cmip6_indicators/netcdf/MPI-ESM1-2-HR/historical/hd/hd_MPI-ESM1-2-HR_historical_indicator.nc successfully.
Converted /beegfs/CMIP6/jdpaul3/cmip6_indicators_test/cmip6_indicators/netcdf/MPI-ESM1-2-HR/historical/su/su_MPI-ESM1-2-HR_historical_indicator.nc successfully.
Converted /beegfs/CMIP6/jdpaul3/cmip6_indicators_test/cmip6_indicators/netcdf/MPI-ESM1-2-HR/historical/rx1day/rx1day_MPI-ESM1-2-HR_historical_indicator.nc successfully.
Converted /beegf