xCDAT · pochedls · Jun 24, 2022 · Jun 23, 2022 · Jun 23, 2022 · Jun 24, 2022
diff --git a/tests/fixtures.py b/tests/fixtures.py
@@ -52,7 +52,15 @@
         "standard_name": "time",
     },
 )
-
+time_non_cf_unsupported = xr.DataArray(
+    data=np.arange(1850 + 1 / 24.0, 1851 + 3 / 12.0, 1 / 12.0),
+    dims=["time"],
+    attrs={
+        "units": "year A.D.",
+        "long_name": "time",
+        "standard_name": "time",
+    },
+)
 time_bnds = xr.DataArray(
     name="time_bnds",
     data=np.array(
@@ -104,6 +112,16 @@
     dims=["time", "bnds"],
     attrs={"xcdat_bounds": "True"},
 )
+tb = []
+for t in time_non_cf_unsupported:
+    tb.append([t - 1 / 24.0, t + 1 / 24.0])
+time_bnds_non_cf_unsupported = xr.DataArray(
+    name="time_bnds",
+    data=tb,
+    coords={"time": time_non_cf_unsupported},
+    dims=["time", "bnds"],
+    attrs={"is_generated": "True"},
+)
 
 # LATITUDE
 # ========
@@ -159,7 +177,9 @@
 )
 
 
-def generate_dataset(cf_compliant: bool, has_bounds: bool) -> xr.Dataset:
+def generate_dataset(
+    cf_compliant: bool, has_bounds: bool, unsupported: bool = False
+) -> xr.Dataset:
     """Generates a dataset using coordinate and data variable fixtures.
 
     Parameters
@@ -169,12 +189,22 @@ def generate_dataset(cf_compliant: bool, has_bounds: bool) -> xr.Dataset:
     has_bounds : bool, optional
         Include bounds for coordinates. This also adds the "bounds" attribute
         to existing coordinates to link them to their respective bounds.
+    unsupported : bool, optional
+        Create time units that are unsupported and cannot be decoded.
+        Note that cf_compliant must be set to False.
 
     Returns
     -------
     xr.Dataset
         Test dataset.
     """
+
+    if unsupported & cf_compliant:
+        raise ValueError(
+            "Cannot set cf_compliant=True and unsupported=True. \n"
+            "Set cf_compliant=False."
+        )
+
     if has_bounds:
         ds = xr.Dataset(
             data_vars={
@@ -189,8 +219,12 @@ def generate_dataset(cf_compliant: bool, has_bounds: bool) -> xr.Dataset:
             ds.coords["time"] = time_cf.copy()
             ds["time_bnds"] = time_bnds.copy()
         elif not cf_compliant:
-            ds.coords["time"] = time_non_cf.copy()
-            ds["time_bnds"] = time_bnds_non_cf.copy()
+            if unsupported:
+                ds.coords["time"] = time_non_cf_unsupported.copy()
+                ds["time_bnds"] = time_bnds_non_cf_unsupported.copy()
+            else:
+                ds.coords["time"] = time_non_cf.copy()
+                ds["time_bnds"] = time_bnds_non_cf.copy()
 
         # If the "bounds" attribute is included in an existing DataArray and
         # added to a new Dataset, it will get dropped. Therefore, it needs to be

diff --git a/tests/test_dataset.py b/tests/test_dataset.py
@@ -37,6 +37,16 @@ def test_non_cf_compliant_time_is_not_decoded(self):
         expected = generate_dataset(cf_compliant=False, has_bounds=True)
         assert result.identical(expected)
 
+    def test_non_cf_compliant_and_unsupported_time_is_not_decoded(self):
+        ds = generate_dataset(cf_compliant=False, has_bounds=True, unsupported=True)
+        ds.to_netcdf(self.file_path)
+
+        # even though decode_times=True, it should fail to decode unsupported time axis
+        result = open_dataset(self.file_path, decode_times=True)
+        expected = ds
+
+        assert result.identical(expected)
+
     def test_non_cf_compliant_time_is_decoded(self):
         ds = generate_dataset(cf_compliant=False, has_bounds=False)
         ds.to_netcdf(self.file_path)

diff --git a/xcdat/dataset.py b/xcdat/dataset.py
@@ -41,9 +41,10 @@ def open_dataset(
         the Dataset, by default True. Bounds are required for many xCDAT
         features.
     decode_times: bool, optional
-        If True, decode times encoded in the standard NetCDF datetime format
-        into datetime objects. Otherwise, leave them encoded as numbers.
-        This keyword may not be supported by all the backends, by default True.
+        If True, attempt to decode times encoded in the standard NetCDF
+        datetime format into datetime objects. Otherwise, leave them encoded
+        as numbers. This keyword may not be supported by all the backends,
+        by default True.
     center_times: bool, optional
         If True, center time coordinates using the midpoint between its upper
         and lower bounds. Otherwise, use the provided time coordinates, by
@@ -84,6 +85,7 @@ def open_dataset(
         if cf_compliant_time is False:
             # XCDAT handles decoding time values with non-CF units.
             ds = xr.open_dataset(path, decode_times=False, **kwargs)
+            # attempt to decode non-cf-compliant time axis
             ds = decode_non_cf_time(ds)
         else:
             ds = xr.open_dataset(path, decode_times=True, **kwargs)
@@ -225,13 +227,14 @@ def decode_non_cf_time(dataset: xr.Dataset) -> xr.Dataset:
     numerically encoded time values (representing the offset from the reference
     date) to pandas DateOffset objects. These offset values are added to the
     reference date, forming DataArrays of datetime objects that replace the time
-    coordinate and time bounds (if they exist) values in the Dataset.
+    coordinate and time bounds (if they exist) in the Dataset.
 
     Parameters
     ----------
     dataset : xr.Dataset
         Dataset with numerically encoded time coordinates and time bounds (if
-        they exist).
+        they exist). If the time coordinates cannot be decoded then the original
+        dataset is returned.
 
     Returns
     -------
@@ -304,7 +307,14 @@ def decode_non_cf_time(dataset: xr.Dataset) -> xr.Dataset:
     time = ds.cf["T"]
     time_bounds = ds.get(time.attrs.get("bounds"), None)
     units_attr = time.attrs.get("units")
-    units, ref_date = _split_time_units_attr(units_attr)
+
+    # If the time units cannot be split into a unit and reference date, it
+    # cannot be decoded so the original dateset is returned.
+    try:
+        units, ref_date = _split_time_units_attr(units_attr)
+    except ValueError:
+        return ds
+
     ref_date = pd.to_datetime(ref_date)
 
     data = [ref_date + pd.DateOffset(**{units: offset}) for offset in time.data]
@@ -403,7 +413,13 @@ def _has_cf_compliant_time(
         return None
 
     time = ds.cf["T"]
-    units = _split_time_units_attr(time.attrs.get("units"))[0]
+
+    # If the time units attr cannot be split, it is not cf_compliant.
+    try:
+        units = _split_time_units_attr(time.attrs.get("units"))[0]
+    except ValueError:
+        return False
+
     cf_compliant = units not in NON_CF_TIME_UNITS
 
     return cf_compliant
@@ -589,6 +605,7 @@ def _preprocess_non_cf_dataset(
     if callable:
         ds_new = callable(ds)
 
+    # Attempt to decode non-cf-compliant time axis.
     ds_new = decode_non_cf_time(ds_new)
 
     return ds_new
@@ -606,11 +623,23 @@ def _split_time_units_attr(units_attr: str) -> Tuple[str, str]:
     -------
     Tuple[str, str]
         The units (e.g, "months") and the reference date (e.g., "1800-01-01").
-        If the units attribute doesn't exist for the time coordinates.
+
+    Raises
+    ------
+    KeyError
+        If the time units attribute was not found.
+
+    ValueError
+        If the time units attribute is not of the form `X since Y`.
     """
     if units_attr is None:
-        raise KeyError("No 'units' attribute found for the dataset's time coordinates.")
+        raise KeyError("The dataset's time coordinates does not have a 'units' attr.")
 
-    units, reference_date = units_attr.split(" since ")
+    if "since" in units_attr:
+        units, reference_date = units_attr.split(" since ")
+    else:
+        raise ValueError(
+            "This dataset does not have time coordinates of the form 'X since Y'."
+        )
 
     return units, reference_date