pydata · spencerkclark · Mar 6, 2025 · Mar 6, 2025 · Mar 7, 2025 · Mar 7, 2025
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -21,7 +21,15 @@ v2025.03.1 (unreleased)
 
 New Features
 ~~~~~~~~~~~~
-
+- By default xarray now encodes :py:class:`numpy.timedelta64` values by
+  converting to :py:class:`numpy.int64` values and storing ``"dtype"`` and
+  ``"units"`` attributes consistent with the dtype of the in-memory
+  :py:class:`numpy.timedelta64` values, e.g. ``"timedelta64[s]"`` and
+  ``"seconds"`` for second-resolution timedeltas. These values will always be
+  decoded to timedeltas without a warning moving forward. Timedeltas encoded
+  via the previous approach can still be roundtripped exactly, but in the
+  future will not be decoded by default (:issue:`1621`, :issue:`10099`,
+  :pull:`10101`). By `Spencer Clark <https://github.com/spencerkclark>`_.
 
 Breaking changes
 ~~~~~~~~~~~~~~~~

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import re
+import typing
 import warnings
 from collections.abc import Callable, Hashable
 from datetime import datetime, timedelta
@@ -92,6 +93,14 @@
 )
 
 
+_INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS = [
+    "_FillValue",
+    "missing_value",
+    "add_offset",
+    "scale_factor",
+]
+
+
 def _is_standard_calendar(calendar: str) -> bool:
     return calendar.lower() in _STANDARD_CALENDARS
 
@@ -1394,62 +1403,140 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable:
             return variable
 
 
+def has_timedelta64_encoding_dtype(attrs_or_encoding: dict) -> bool:
+    dtype = attrs_or_encoding.get("dtype", None)
+    return isinstance(dtype, str) and dtype.startswith("timedelta64")
+
+
 class CFTimedeltaCoder(VariableCoder):
     """Coder for CF Timedelta coding.
 
     Parameters
     ----------
     time_unit : PDDatetimeUnitOptions
-          Target resolution when decoding timedeltas. Defaults to "ns".
+        Target resolution when decoding timedeltas via units. Defaults to "ns".
+        When decoding via dtype, the resolution is specified in the dtype
+        attribute, so this parameter is ignored.
+    decode_via_units : bool
+        Whether to decode timedeltas based on the presence of a timedelta-like
+        units attribute, e.g. "seconds". Defaults to True, but in the future
+        will default to False.
+    decode_via_dtype : bool
+        Whether to decode timedeltas based on the presence of a np.timedelta64
+        dtype attribute, e.g. "timedelta64[s]". Defaults to True.
     """
 
     def __init__(
         self,
         time_unit: PDDatetimeUnitOptions = "ns",
+        decode_via_units: bool = True,
+        decode_via_dtype: bool = True,
     ) -> None:
         self.time_unit = time_unit
+        self.decode_via_units = decode_via_units
+        self.decode_via_dtype = decode_via_dtype
         self._emit_decode_timedelta_future_warning = False
 
     def encode(self, variable: Variable, name: T_Name = None) -> Variable:
         if np.issubdtype(variable.data.dtype, np.timedelta64):
             dims, data, attrs, encoding = unpack_for_encoding(variable)
+            has_timedelta_dtype = has_timedelta64_encoding_dtype(encoding)
+            if ("units" in encoding or "dtype" in encoding) and not has_timedelta_dtype:
+                dtype = encoding.get("dtype", None)
+                units = encoding.pop("units", None)
 
-            dtype = encoding.get("dtype", None)
+                # in the case of packed data we need to encode into
+                # float first, the correct dtype will be established
+                # via CFScaleOffsetCoder/CFMaskCoder
+                if "add_offset" in encoding or "scale_factor" in encoding:
+                    dtype = data.dtype if data.dtype.kind == "f" else "float64"
 
-            # in the case of packed data we need to encode into
-            # float first, the correct dtype will be established
-            # via CFScaleOffsetCoder/CFMaskCoder
-            if "add_offset" in encoding or "scale_factor" in encoding:
-                dtype = data.dtype if data.dtype.kind == "f" else "float64"
-
-            data, units = encode_cf_timedelta(data, encoding.pop("units", None), dtype)
+            else:
+                resolution, _ = np.datetime_data(variable.dtype)
+                dtype = np.int64
+                attrs_dtype = f"timedelta64[{resolution}]"
+                units = _numpy_dtype_to_netcdf_timeunit(variable.dtype)
+                safe_setitem(attrs, "dtype", attrs_dtype, name=name)
+                # Remove dtype encoding if it exists to prevent it from
+                # interfering downstream in NonStringCoder.
+                encoding.pop("dtype", None)
+
+                if any(
+                    k in encoding for k in _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS
+                ):
+                    raise ValueError(
+                        f"Specifying '_FillValue', 'missing_value', "
+                        f"'add_offset', or 'scale_factor' is not supported "
+                        f"when literally encoding the np.timedelta64 values "
+                        f"of variable {name!r}. To encode {name!r} with such "
+                        f"encoding parameters, additionally set "
+                        f"encoding['units'] to a unit of time, e.g. "
+                        f"'seconds'. To proceed with literal np.timedelta64 "
+                        f"encoding of {name!r}, remove any encoding entries "
+                        f"for '_FillValue', 'missing_value', 'add_offset', "
+                        f"or 'scale_factor'."
+                    )
 
+            data, units = encode_cf_timedelta(data, units, dtype)
             safe_setitem(attrs, "units", units, name=name)
-
             return Variable(dims, data, attrs, encoding, fastpath=True)
         else:
             return variable
 
     def decode(self, variable: Variable, name: T_Name = None) -> Variable:
         units = variable.attrs.get("units", None)
-        if isinstance(units, str) and units in TIME_UNITS:
-            if self._emit_decode_timedelta_future_warning:
-                emit_user_level_warning(
-                    "In a future version of xarray decode_timedelta will "
-                    "default to False rather than None. To silence this "
-                    "warning, set decode_timedelta to True, False, or a "
-                    "'CFTimedeltaCoder' instance.",
-                    FutureWarning,
-                )
+        has_timedelta_units = isinstance(units, str) and units in TIME_UNITS
+        has_timedelta_dtype = has_timedelta64_encoding_dtype(variable.attrs)
+        is_dtype_decodable = has_timedelta_units and has_timedelta_dtype
+        is_units_decodable = has_timedelta_units
+        if (is_dtype_decodable and self.decode_via_dtype) or (
+            is_units_decodable and self.decode_via_units
+        ):
             dims, data, attrs, encoding = unpack_for_decoding(variable)
-
             units = pop_to(attrs, encoding, "units")
-            dtype = np.dtype(f"timedelta64[{self.time_unit}]")
-            transform = partial(
-                decode_cf_timedelta, units=units, time_unit=self.time_unit
-            )
+            if is_dtype_decodable and self.decode_via_dtype:
+                if any(
+                    k in encoding for k in _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS
+                ):
+                    raise ValueError(
+                        "Decoding np.timedelta64 values via dtype is not "
+                        "supported when '_FillValue', 'missing_value', "
+                        "'add_offset', or 'scale_factor' are present in "
+                        "encoding."
+                    )
+                dtype = pop_to(attrs, encoding, "dtype", name=name)
+                dtype = np.dtype(dtype)
+                resolution, _ = np.datetime_data(dtype)
+                if resolution not in typing.get_args(PDDatetimeUnitOptions):
+                    raise ValueError(
+                        f"Following pandas, xarray only supports decoding to "
+                        f"timedelta64 values with a resolution of 's', 'ms', "
+                        f"'us', or 'ns'. Encoded values have a resolution of "
+                        f"{resolution!r}."
+                    )
+                time_unit = cast(PDDatetimeUnitOptions, resolution)
+            elif self.decode_via_units:
+                if self._emit_decode_timedelta_future_warning:
+                    emit_user_level_warning(
+                        "In a future version, xarray will not decode "
+                        "timedelta values based on the presence of a "
+                        "timedelta-like units attribute by default. Instead "
+                        "it will rely on the presence of a np.timedelta64 "
+                        "dtype attribute, which is now xarray's default way "
+                        "of encoding np.timedelta64 values. To continue "
+                        "decoding timedeltas based on the presence of a "
+                        "timedelta-like units attribute, users will need to "
+                        "explicitly opt-in by passing True or "
+                        "CFTimedeltaCoder(decode_via_units=True) to "
+                        "decode_timedelta. To silence this warning, set "
+                        "decode_timedelta to True, False, or a "
+                        "'CFTimedeltaCoder' instance.",
+                        FutureWarning,
+                    )
+                dtype = np.dtype(f"timedelta64[{self.time_unit}]")
+                time_unit = self.time_unit
+            transform = partial(decode_cf_timedelta, units=units, time_unit=time_unit)
             data = lazy_elemwise_func(data, transform, dtype=dtype)
-
             return Variable(dims, data, attrs, encoding, fastpath=True)
         else:
             return variable
diff --git a/xarray/conventions.py b/xarray/conventions.py
@@ -204,8 +204,10 @@ def decode_cf_variable(
             var = coder.decode(var, name=name)
 
     if decode_timedelta:
-        if not isinstance(decode_timedelta, CFTimedeltaCoder):
-            decode_timedelta = CFTimedeltaCoder()
+        if isinstance(decode_timedelta, bool):
+            decode_timedelta = CFTimedeltaCoder(
+                decode_via_units=decode_timedelta, decode_via_dtype=decode_timedelta
+            )
         decode_timedelta._emit_decode_timedelta_future_warning = (
             decode_timedelta_was_none
         )

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -635,7 +635,10 @@ def test_roundtrip_timedelta_data(self) -> None:
         #  though we cannot test that until we fix the timedelta decoding
         #  to support large ranges
         time_deltas = pd.to_timedelta(["1h", "2h", "NaT"]).as_unit("s")  # type: ignore[arg-type, unused-ignore]
+        encoding = {"units": "seconds"}
         expected = Dataset({"td": ("td", time_deltas), "td0": time_deltas[0]})
+        expected["td"].encoding = encoding
+        expected["td0"].encoding = encoding
         with self.roundtrip(
             expected, open_kwargs={"decode_timedelta": CFTimedeltaCoder(time_unit="ns")}
         ) as actual:

diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
@@ -20,6 +20,7 @@
 )
 from xarray.coders import CFDatetimeCoder, CFTimedeltaCoder
 from xarray.coding.times import (
+    _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS,
     _encode_datetime_with_cftime,
     _netcdf_to_numpy_timeunit,
     _numpy_to_netcdf_timeunit,
@@ -1515,7 +1516,7 @@ def test_roundtrip_timedelta64_nanosecond_precision(
     timedelta_values[2] = nat
     timedelta_values[4] = nat
 
-    encoding = dict(dtype=dtype, _FillValue=fill_value)
+    encoding = dict(dtype=dtype, _FillValue=fill_value, units="nanoseconds")
     var = Variable(["time"], timedelta_values, encoding=encoding)
 
     encoded_var = conventions.encode_cf_variable(var)
@@ -1867,7 +1868,8 @@ def test_decode_timedelta(
     decode_times, decode_timedelta, expected_dtype, warns
 ) -> None:
     timedeltas = pd.timedelta_range(0, freq="D", periods=3)
-    var = Variable(["time"], timedeltas)
+    encoding = {"units": "days"}
+    var = Variable(["time"], timedeltas, encoding=encoding)
     encoded = conventions.encode_cf_variable(var)
     if warns:
         with pytest.warns(FutureWarning, match="decode_timedelta"):
@@ -1959,3 +1961,125 @@ def test_decode_floating_point_timedelta_no_serialization_warning() -> None:
     decoded = conventions.decode_cf_variable("foo", encoded, decode_timedelta=True)
     with assert_no_warnings():
         decoded.load()
+
+
+def test_literal_timedelta64_coding(time_unit: PDDatetimeUnitOptions) -> None:
+    timedeltas = pd.timedelta_range(0, freq="D", periods=3, unit=time_unit)  # type: ignore[call-arg]
+    variable = Variable(["time"], timedeltas)
+    expected_dtype = f"timedelta64[{time_unit}]"
+    expected_units = _numpy_to_netcdf_timeunit(time_unit)
+
+    encoded = conventions.encode_cf_variable(variable)
+    assert encoded.attrs["dtype"] == expected_dtype
+    assert encoded.attrs["units"] == expected_units
+
+    decoded = conventions.decode_cf_variable("timedeltas", encoded)
+    assert decoded.encoding["dtype"] == expected_dtype
+    assert decoded.encoding["units"] == expected_units
+
+    assert_identical(decoded, variable)
+    assert decoded.dtype == variable.dtype
+
+    reencoded = conventions.encode_cf_variable(decoded)
+    assert_identical(reencoded, encoded)
+    assert reencoded.dtype == encoded.dtype
+
+
+def test_literal_timedelta_coding_resolution_error() -> None:
+    attrs = {"dtype": "timedelta64[D]", "units": "days"}
+    encoded = Variable(["time"], [0, 1, 2], attrs=attrs)
+    with pytest.raises(ValueError, match="xarray only supports"):
+        conventions.decode_cf_variable("timedeltas", encoded)
+
+
+@pytest.mark.parametrize("attribute", ["dtype", "units"])
+def test_literal_timedelta_decode_invalid_encoding(attribute) -> None:
+    attrs = {"dtype": "timedelta64[s]", "units": "seconds"}
+    encoding = {attribute: "foo"}
+    encoded = Variable(["time"], [0, 1, 2], attrs=attrs, encoding=encoding)
+    with pytest.raises(ValueError, match="failed to prevent"):
+        conventions.decode_cf_variable("timedeltas", encoded)
+
+
+@pytest.mark.parametrize("attribute", ["dtype", "units"])
+def test_literal_timedelta_encode_invalid_attribute(attribute) -> None:
+    timedeltas = pd.timedelta_range(0, freq="D", periods=3)
+    attrs = {attribute: "foo"}
+    variable = Variable(["time"], timedeltas, attrs=attrs)
+    with pytest.raises(ValueError, match="failed to prevent"):
+        conventions.encode_cf_variable(variable)
+
+
+@pytest.mark.parametrize("invalid_key", _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS)
+def test_literal_timedelta_encoding_mask_and_scale_error(invalid_key) -> None:
+    encoding = {invalid_key: 1.0}
+    timedeltas = pd.timedelta_range(0, freq="D", periods=3)
+    variable = Variable(["time"], timedeltas, encoding=encoding)
+    with pytest.raises(ValueError, match=invalid_key):
+        conventions.encode_cf_variable(variable)
+
+
+@pytest.mark.parametrize("invalid_key", _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS)
+def test_literal_timedelta_decoding_mask_and_scale_error(invalid_key) -> None:
+    attrs = {invalid_key: 1.0, "dtype": "timedelta64[s]", "units": "seconds"}
+    variable = Variable(["time"], [0, 1, 2], attrs=attrs)
+    with pytest.raises(ValueError, match=invalid_key):
+        conventions.decode_cf_variable("foo", variable)
+
+
+@pytest.mark.parametrize(
+    ("decode_via_units", "decode_via_dtype", "attrs", "expect_timedelta64"),
+    [
+        (True, True, {"units": "seconds"}, True),
+        (True, False, {"units": "seconds"}, True),
+        (False, True, {"units": "seconds"}, False),
+        (False, False, {"units": "seconds"}, False),
+        (True, True, {"dtype": "timedelta64[s]", "units": "seconds"}, True),
+        (True, False, {"dtype": "timedelta64[s]", "units": "seconds"}, True),
+        (False, True, {"dtype": "timedelta64[s]", "units": "seconds"}, True),
+        (False, False, {"dtype": "timedelta64[s]", "units": "seconds"}, False),
+    ],
+    ids=lambda x: f"{x!r}",
+)
+def test_timedelta_decoding_options(
+    decode_via_units, decode_via_dtype, attrs, expect_timedelta64
+) -> None:
+    array = np.array([0, 1, 2], dtype=np.dtype("int64"))
+    encoded = Variable(["time"], array, attrs=attrs)
+
+    # Confirm we decode to the expected dtype.
+    decode_timedelta = CFTimedeltaCoder(
+        time_unit="s",
+        decode_via_units=decode_via_units,
+        decode_via_dtype=decode_via_dtype,
+    )
+    decoded = conventions.decode_cf_variable(
+        "foo", encoded, decode_timedelta=decode_timedelta
+    )
+    if expect_timedelta64:
+        assert decoded.dtype == np.dtype("timedelta64[s]")
+    else:
+        assert decoded.dtype == np.dtype("int64")
+
+    # Confirm we exactly roundtrip.
+    reencoded = conventions.encode_cf_variable(decoded)
+    assert_identical(reencoded, encoded)
+
+
+def test_timedelta_encoding_explicit_non_timedelta64_dtype() -> None:
+    encoding = {"dtype": np.dtype("int32")}
+    timedeltas = pd.timedelta_range(0, freq="D", periods=3)
+    variable = Variable(["time"], timedeltas, encoding=encoding)
+
+    encoded = conventions.encode_cf_variable(variable)
+    assert encoded.attrs["units"] == "days"
+    assert encoded.dtype == np.dtype("int32")
+
+    with pytest.warns(FutureWarning, match="timedelta"):
+        decoded = conventions.decode_cf_variable("foo", encoded)
+    assert_identical(decoded, variable)
+
+    reencoded = conventions.encode_cf_variable(decoded)
+    assert_identical(reencoded, encoded)
+    assert encoded.attrs["units"] == "days"
+    assert encoded.dtype == np.dtype("int32")