Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix add_bounds() breaking when time coords are cftime objects #241

Merged
merged 4 commits into from
May 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 62 additions & 16 deletions tests/test_bounds.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import cftime
import numpy as np
import pytest
import xarray as xr
Expand Down Expand Up @@ -115,7 +116,7 @@ def test_add_bounds_raises_error_if_bounds_exist(self):
with pytest.raises(ValueError):
ds.bounds.add_bounds("lat")

def test__add_bounds_raises_errors_for_data_dim_and_length(self):
def test_add_bounds_raises_errors_for_data_dim_and_length(self):
# Multidimensional
lat = xr.DataArray(
data=np.array([[0, 1, 2], [3, 4, 5]]),
Expand All @@ -132,23 +133,23 @@ def test__add_bounds_raises_errors_for_data_dim_and_length(self):

# If coords dimensions does not equal 1.
with pytest.raises(ValueError):
ds.bounds._add_bounds("lat")
ds.bounds.add_bounds("lat")
# If coords are length of <=1.
with pytest.raises(ValueError):
ds.bounds._add_bounds("lon")
ds.bounds.add_bounds("lon")

def test__add_bounds_returns_dataset_with_bounds_added(self):
def test_add_bounds_for_dataset_with_coords_as_datetime_objects(self):
ds = self.ds.copy()

ds = ds.bounds._add_bounds("lat")
assert ds.lat_bnds.equals(lat_bnds)
assert ds.lat_bnds.is_generated == "True"
result = ds.bounds.add_bounds("lat")
assert result.lat_bnds.equals(lat_bnds)
assert result.lat_bnds.is_generated == "True"

ds = ds.bounds._add_bounds("lon")
assert ds.lon_bnds.equals(lon_bnds)
assert ds.lon_bnds.is_generated == "True"
result = result.bounds.add_bounds("lon")
assert result.lon_bnds.equals(lon_bnds)
assert result.lon_bnds.is_generated == "True"

ds = ds.bounds._add_bounds("time")
result = ds.bounds.add_bounds("time")
# NOTE: The algorithm for generating time bounds doesn't extend the
# upper bound into the next month.
expected_time_bnds = xr.DataArray(
Expand All @@ -173,16 +174,61 @@ def test__add_bounds_returns_dataset_with_bounds_added(self):
],
dtype="datetime64[ns]",
),
coords={"time": ds.time},
coords={"time": ds.time.assign_attrs({"bounds": "time_bnds"})},
dims=["time", "bnds"],
attrs={"is_generated": "True"},
)

assert result.time_bnds.identical(expected_time_bnds)

def test_returns_bounds_for_dataset_with_coords_as_cftime_objects(self):
ds = self.ds.copy()
ds = ds.drop_dims("time")
ds["time"] = xr.DataArray(
name="time",
data=np.array(
[
cftime.DatetimeNoLeap(1850, 1, 1),
cftime.DatetimeNoLeap(1850, 2, 1),
cftime.DatetimeNoLeap(1850, 3, 1),
],
),
dims=["time"],
attrs={
"axis": "T",
"long_name": "time",
"standard_name": "time",
},
)

result = ds.bounds.add_bounds("time")
expected_time_bnds = xr.DataArray(
name="time_bnds",
data=np.array(
[
[
cftime.DatetimeNoLeap(1849, 12, 16, 12),
cftime.DatetimeNoLeap(1850, 1, 16, 12),
],
[
cftime.DatetimeNoLeap(1850, 1, 16, 12),
cftime.DatetimeNoLeap(1850, 2, 15, 0),
],
[
cftime.DatetimeNoLeap(1850, 2, 15, 0),
cftime.DatetimeNoLeap(1850, 3, 15, 0),
],
],
),
coords={"time": ds.time.assign_attrs({"bounds": "time_bnds"})},
dims=["time", "bnds"],
attrs=ds.time_bnds.attrs,
attrs={"is_generated": "True"},
)

assert ds.time_bnds.equals(expected_time_bnds)
assert ds.time_bnds.is_generated == "True"
assert result.time_bnds.identical(expected_time_bnds)


class TestGetCoord:
class Test_GetCoord:
@pytest.fixture(autouse=True)
def setup(self):
self.ds = generate_dataset(cf_compliant=True, has_bounds=False)
Expand Down
10 changes: 5 additions & 5 deletions tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def test_keeps_specified_var(self):
assert result.identical(expected)


class TestHasCFCompliantTime:
class Test_HasCFCompliantTime:
@pytest.fixture(autouse=True)
def setUp(self, tmp_path):
# Create temporary directory to save files.
Expand Down Expand Up @@ -668,7 +668,7 @@ def test_decodes_years_with_a_reference_date_on_a_leap_year(self):
assert result.time_bnds.encoding == expected.time_bnds.encoding


class TestPostProcessDataset:
class Test_PostProcessDataset:
@pytest.fixture(autouse=True)
def setup(self):
self.ds = generate_dataset(cf_compliant=True, has_bounds=True)
Expand Down Expand Up @@ -868,7 +868,7 @@ def test_raises_error_if_dataset_has_no_longitude_coords_but_lon_orient_is_speci
_postprocess_dataset(ds, lon_orient=(0, 360))


class TestKeepSingleVar:
class Test_KeepSingleVar:
@pytest.fixture(autouse=True)
def setup(self):
self.ds = generate_dataset(cf_compliant=True, has_bounds=True)
Expand Down Expand Up @@ -909,7 +909,7 @@ def test_bounds_always_persist(self):
assert ds.get("time_bnds") is not None


class TestPreProcessNonCFDataset:
class Test_PreProcessNonCFDataset:
@pytest.fixture(autouse=True)
def setup(self):
self.ds = generate_dataset(cf_compliant=False, has_bounds=True)
Expand Down Expand Up @@ -944,7 +944,7 @@ def callable(ds):
assert result.identical(expected)


class TestSplitTimeUnitsAttr:
class Test_SplitTimeUnitsAttr:
def test_raises_error_if_units_attr_is_none(self):
with pytest.raises(KeyError):
_split_time_units_attr(None) # type: ignore
Expand Down
75 changes: 63 additions & 12 deletions tests/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def test_averages_for_yearly_time_series(self):
attrs={"is_generated": "True"},
)
ds["ts"] = xr.DataArray(
data=np.array([[[2]], [[1]], [[1]], [[1]], [[2]]]),
data=np.array([[[2]], [[np.nan]], [[1]], [[1]], [[2]]]),
coords={"lat": ds.lat, "lon": ds.lon, "time": ds.time},
dims=["time", "lat", "lon"],
)
Expand All @@ -74,7 +74,7 @@ def test_averages_for_yearly_time_series(self):
expected = ds.copy()
expected = expected.drop_dims("time")
expected["ts"] = xr.DataArray(
data=np.array([[1.4]]),
data=np.array([[1.5]]),
coords={"lat": expected.lat, "lon": expected.lon},
dims=["lat", "lon"],
attrs={
Expand All @@ -93,7 +93,7 @@ def test_averages_for_yearly_time_series(self):
expected = ds.copy()
expected = expected.drop_dims("time")
expected["ts"] = xr.DataArray(
data=np.array([[1.4]]),
data=np.array([[1.5]]),
coords={"lat": expected.lat, "lon": expected.lon},
dims=["lat", "lon"],
attrs={
Expand All @@ -120,7 +120,7 @@ def test_averages_for_monthly_time_series(self):
"2000-02-01T00:00:00.000000000",
"2000-03-01T00:00:00.000000000",
"2000-04-01T00:00:00.000000000",
"2000-05-01T00:00:00.000000000",
"2001-02-01T00:00:00.000000000",
],
dtype="datetime64[ns]",
),
Expand All @@ -142,7 +142,7 @@ def test_averages_for_monthly_time_series(self):
["2000-02-01T00:00:00.000000000", "2000-03-01T00:00:00.000000000"],
["2000-03-01T00:00:00.000000000", "2000-04-01T00:00:00.000000000"],
["2000-04-01T00:00:00.000000000", "2000-05-01T00:00:00.000000000"],
["2000-05-01T00:00:00.000000000", "2000-06-01T00:00:00.000000000"],
["2001-01-01T00:00:00.000000000", "2000-03-01T00:00:00.000000000"],
],
dtype="datetime64[ns]",
),
Expand All @@ -151,7 +151,7 @@ def test_averages_for_monthly_time_series(self):
attrs={"is_generated": "True"},
)
ds["ts"] = xr.DataArray(
data=np.array([[[2]], [[1]], [[1]], [[1]], [[1]]]),
data=np.array([[[2]], [[np.nan]], [[1]], [[1]], [[1]]]),
coords={"lat": ds.lat, "lon": ds.lon, "time": ds.time},
dims=["time", "lat", "lon"],
)
Expand All @@ -161,7 +161,7 @@ def test_averages_for_monthly_time_series(self):
expected = ds.copy()
expected = expected.drop_dims("time")
expected["ts"] = xr.DataArray(
data=np.array([[1.2]]),
data=np.array([[1.24362357]]),
coords={"lat": expected.lat, "lon": expected.lon},
dims=["lat", "lon"],
attrs={
Expand All @@ -173,14 +173,14 @@ def test_averages_for_monthly_time_series(self):
},
)

assert result.identical(expected)
xr.testing.assert_allclose(result, expected)

# Test unweighted averages
result = ds.temporal.average("ts", weighted=False)
expected = ds.copy()
expected = expected.drop_dims("time")
expected["ts"] = xr.DataArray(
data=np.array([[1.2]]),
data=np.array([[1.25]]),
coords={"lat": expected.lat, "lon": expected.lon},
dims=["lat", "lon"],
attrs={
Expand All @@ -191,7 +191,7 @@ def test_averages_for_monthly_time_series(self):
"center_times": "False",
},
)
assert result.identical(expected)
xr.testing.assert_allclose(result, expected)

def test_averages_for_daily_time_series(self):
ds = xr.Dataset(
Expand Down Expand Up @@ -826,6 +826,57 @@ def test_weighted_monthly_averages(self):

assert result.identical(expected)

def test_weighted_monthly_averages_with_masked_data(self):
ds = self.ds.copy()
ds["ts"] = xr.DataArray(
data=np.array(
[[[2.0]], [[np.nan]], [[1.0]], [[1.0]], [[2.0]]], dtype="float64"
),
coords={"time": self.ds.time, "lat": self.ds.lat, "lon": self.ds.lon},
dims=["time", "lat", "lon"],
)

result = ds.temporal.group_average("ts", "month")
expected = ds.copy()
expected = expected.drop_dims("time")
expected["ts"] = xr.DataArray(
name="ts",
data=np.array([[[2.0]], [[0.0]], [[1.0]], [[1.0]], [[2.0]]]),
coords={
"lat": expected.lat,
"lon": expected.lon,
"time": xr.DataArray(
data=np.array(
[
"2000-01-01T00:00:00.000000000",
"2000-03-01T00:00:00.000000000",
"2000-06-01T00:00:00.000000000",
"2000-09-01T00:00:00.000000000",
"2001-02-01T00:00:00.000000000",
],
dtype="datetime64[ns]",
),
dims=["time"],
attrs={
"axis": "T",
"long_name": "time",
"standard_name": "time",
"bounds": "time_bnds",
},
),
},
dims=["time", "lat", "lon"],
attrs={
"operation": "temporal_avg",
"mode": "group_average",
"freq": "month",
"weighted": "True",
"center_times": "False",
},
)

assert result.identical(expected)

def test_weighted_daily_averages(self):
ds = self.ds.copy()

Expand Down Expand Up @@ -1584,7 +1635,7 @@ def test_raises_error_if_time_dimension_does_not_exist_in_dataset(self):
ds = ds.drop_dims("time")

with pytest.raises(KeyError):
ds.temporal.center_times(ds)
ds.temporal.center_times()

def test_gets_time_as_the_midpoint_between_time_bounds(self):
ds = self.ds.copy()
Expand Down Expand Up @@ -1658,7 +1709,7 @@ def test_gets_time_as_the_midpoint_between_time_bounds(self):
time_bounds["time"] = expected.time
expected["time_bnds"] = time_bounds

result = ds.temporal.center_times(ds)
result = ds.temporal.center_times()
assert result.identical(expected)


Expand Down
28 changes: 25 additions & 3 deletions xcdat/bounds.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
"""Bounds module for functions related to coordinate bounds."""
import collections
import warnings
from typing import Dict, List, Literal, Optional

import cf_xarray as cfxr # noqa: F401
import cftime
import numpy as np
import pandas as pd
import xarray as xr

from xcdat.axis import GENERIC_AXIS_MAP
Expand Down Expand Up @@ -253,13 +256,32 @@ def _add_bounds(self, axis: BoundsAxis, width: float = 0.5) -> xr.Dataset:
diffs = da_coord.diff(dim).values

# Add beginning and end points to account for lower and upper bounds.
# np.array of string values with `dtype="timedelta64[ns]"`
diffs = np.insert(diffs, 0, diffs[0])
diffs = np.append(diffs, diffs[-1])

# Get lower and upper bounds by using the width relative to nearest point.
# In xarray and xCDAT, time coordinates with non-CF compliant calendars
# (360-day, noleap) and/or units ("months", "years") are decoded using
# `cftime` objects instead of `datetime` objects. `cftime` objects only
# support arithmetic using `timedelta` objects, so the values of `diffs`
# must be casted from `dtype="timedelta64[ns]"` to `timedelta`.
if da_coord.name in ("T", "time") and issubclass(
type(da_coord.values[0]), cftime.datetime
):
diffs = pd.to_timedelta(diffs)

# FIXME: These lines produces the warning: `PerformanceWarning:
# Adding/subtracting object-dtype array to TimedeltaArray not
# vectorized` after converting diffs to `timedelta`. I (Tom) was not
# able to find an alternative, vectorized solution at the time of this
# implementation.
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=pd.errors.PerformanceWarning)
# Get lower and upper bounds by using the width relative to nearest point.
lower_bounds = da_coord - diffs[:-1] * width
upper_bounds = da_coord + diffs[1:] * (1 - width)
tomvothecoder marked this conversation as resolved.
Show resolved Hide resolved

# Transpose both bound arrays into a 2D array.
lower_bounds = da_coord - diffs[:-1] * width
upper_bounds = da_coord + diffs[1:] * (1 - width)
bounds = np.array([lower_bounds, upper_bounds]).transpose()
tomvothecoder marked this conversation as resolved.
Show resolved Hide resolved

# Clip latitude bounds at (-90, 90)
Expand Down
2 changes: 1 addition & 1 deletion xcdat/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ def _postprocess_dataset(

if center_times:
if dataset.cf.dims.get("T") is not None:
dataset = dataset.temporal.center_times(dataset)
dataset = dataset.temporal.center_times()
else:
raise ValueError("This dataset does not have a time coordinates to center.")

Expand Down
Loading