From 8fee9527b08355587f5fa217ebc94a5191c2e762 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 27 May 2026 11:00:05 -0700 Subject: [PATCH] zonal: rename crop(zones_ids=) to crop(zone_ids=) with deprecation shim (#2521) stats() and crosstab() use zone_ids, but crop() historically used zones_ids (extra 's'). Switching between sibling zonal functions raised TypeError on the typo. Fix accepts both kwargs, emits DeprecationWarning on the legacy zones_ids, raises if both are passed, and raises if neither is supplied. Internal call sites migrated to the canonical zone_ids; new regression tests cover both paths plus error cases. --- .claude/sweep-api-consistency-state.csv | 1 + xrspatial/tests/test_dask_cupy_gaps.py | 8 +- xrspatial/tests/test_validation.py | 2 +- xrspatial/tests/test_zonal.py | 103 +++++++++++++++++++++++- xrspatial/zonal.py | 43 ++++++++-- 5 files changed, 142 insertions(+), 15 deletions(-) diff --git a/.claude/sweep-api-consistency-state.csv b/.claude/sweep-api-consistency-state.csv index ed1c2bc83..5d8c16920 100644 --- a/.claude/sweep-api-consistency-state.csv +++ b/.claude/sweep-api-consistency-state.csv @@ -3,3 +3,4 @@ geotiff,2026-05-18,2106,MEDIUM,3,"Sweep 2026-05-18 (deep-sweep-api-consistency-g polygonize,2026-05-19,2148,HIGH,1;3,"Sweep 2026-05-19 (deep-sweep-api-consistency-polygonize-2026-05-19). 1 MEDIUM Cat 3 finding fixed in this branch (#2148): polygonize() was the only public vector/raster conversion function without a return type annotation. Sieve/contours/rasterize/clip_polygon all declare one. Fix adds a Union return annotation (numpy tuple | awkward tuple | geopandas GeoDataFrame | spatialpandas GeoDataFrame | geojson dict) using TYPE_CHECKING forward refs for optional deps, and expands the docstring Returns section to enumerate the per-return_type shapes. 1 HIGH Cat 1 finding NOT fixed in this PR -- cross-module rename: polygonize uses `connectivity` (int 4|8) while sieve uses `neighborhood` (int 4|8) for the identical rook/queen pixel-connectivity concept. Industry convention (GDAL, rasterio.features.sieve) favours `connectivity`; the deprecation shim belongs in sieve.py, not polygonize, so this is out of scope for the polygonize-scoped sweep branch. Documented here for the next sieve sweep pass. 1 LOW Cat 1 cross-cutting: polygonize/sieve/clip_polygon use `raster` while contours and many older modules use `agg` for the input DataArray -- library-wide drift, not filed per-module per sweep template. Cat 2 return-shape: polygonize returns tuple/GeoDataFrame/dict by return_type; consistent with contours' tuple/GeoDataFrame dispatch. No Cat 4 (no mutable defaults; connectivity=4 default matches sieve neighborhood=4 default). No Cat 5 (polygonize re-exported in xrspatial/__init__.py; no orphan API; no __all__ but consistent with module convention). cuda-validated: cupy backend accepts identical kwargs, smoke-tested with cupy DataArray on host with CUDA_AVAILABLE." rasterize,2026-05-21,2250,MEDIUM,3,"Sweep 2026-05-21 (deep-sweep-api-consistency-rasterize-2026-05-21). 1 MEDIUM Cat 3 finding fixed in this branch (#2250): rasterize() was missing type annotations on geometries, columns, and merge (3 of 16 public params); the other 13 plus the return type were annotated. The docstring already declared the intended types so this was a doc-vs-signature drift. Fix annotates geometries: Any (because the accepted GeoDataFrame / dask_geopandas / iterable union spans optional deps), columns: Optional[Sequence[str]], merge: Union[str, Callable]. Regression test in test_rasterize_signature_annot_2250.py pins every param + the return annotation so a future contributor can't silently drop annotations again. Cross-module drift documented but not filed per template: clip_polygon(nodata) vs rasterize(fill) same concept different name; clip_polygon(name: Optional[str]=None) vs rasterize(name: str='rasterize') default convention; polygonize(column_name) vs rasterize(column) column selector. No Cat 1 in-module rename, no Cat 2 return drift (returns xr.DataArray as documented), no Cat 4 mutable defaults, no Cat 5 orphan API (rasterize is the only public symbol from the module and is re-exported in __init__). cuda-validated: cupy backend accepts identical kwargs, smoke-tested with use_cuda=True on host with CUDA_AVAILABLE." reproject,2026-05-10,1570,HIGH,2;5,"Filed cross-module attrs['vertical_crs'] type collision (string vs EPSG int) vs xrspatial.geotiff. Fixed in PR (TBD): reproject now writes EPSG int and preserves friendly token under vertical_datum. MEDIUM kwarg-order drift (transform_precision vs chunk_size) and missing type hints vs geotiff documented but not fixed (cosmetic, kwarg-only)." +zonal,2026-05-27,2521,HIGH,1;3;5,"Sweep 2026-05-27 (deep-sweep-api-consistency-zonal-2026-05-27). 1 HIGH Cat 1 finding fixed in this branch (#2521): crop() used zones_ids while stats/crosstab use zone_ids -- pure typo creating a TypeError trap when switching between sibling zonal functions. Fix accepts both, deprecates zones_ids with DeprecationWarning, raises if both supplied, raises if neither. All call sites in tests migrated to canonical zone_ids; legacy zones_ids paths covered by new regression tests. Other findings not fixed in this PR: (HIGH Cat 1+4) nodata vs nodata_values drift across stats/crosstab (nodata_values=None) vs apply/hypsometric_integral (nodata=0) -- different name AND different default, breaks substitutability; cross-function scope, needs a design issue. (MEDIUM Cat 3) crosstab docstring says 'layer: int, default=0' but signature is 'Optional[int] = None'. (MEDIUM Cat 3) hypsometric_integral lacks all type annotations; apply and crop lack return type annotations (siblings have them). (MEDIUM Cat 5) get_full_extent has public-style docstring with 'from xrspatial.zonal import get_full_extent' example but is not in __init__.py -- borderline orphan, but minor utility. (LOW Cat 3) apply() docstring mixes 'values' parameter name with 'agg' prose; example returns np.array shape (not DataArray) while function actually returns a DataArray. Cross-cutting: zones/raster as first-arg name varies (zonal.stats uses zones; zonal.regions/trim use raster). Regions/trim are single-array operations on the zone raster itself, so the rename arguably matches the role. Documented, not filed. cuda-validated: CUDA_AVAILABLE=True on this host." diff --git a/xrspatial/tests/test_dask_cupy_gaps.py b/xrspatial/tests/test_dask_cupy_gaps.py index 1290a8222..eb0f0d15e 100644 --- a/xrspatial/tests/test_dask_cupy_gaps.py +++ b/xrspatial/tests/test_dask_cupy_gaps.py @@ -246,7 +246,7 @@ def test_crop_dask(): raster = xr.DataArray( da.from_array(_CROP_ARR, chunks=(3, 2)), dims=['y', 'x'], ) - result = crop(raster, raster, zones_ids=(1, 3)) + result = crop(raster, raster, zone_ids=(1, 3)) assert result.shape == _CROP_EXPECTED_SHAPE np.testing.assert_array_equal(result.data.compute(), _CROP_EXPECTED) @@ -259,7 +259,7 @@ def test_crop_dask_lazy(): raster = xr.DataArray( da.from_array(_CROP_ARR, chunks=(3, 2)), dims=['y', 'x'], ) - result = crop(raster, raster, zones_ids=(1, 3)) + result = crop(raster, raster, zone_ids=(1, 3)) assert isinstance(result.data, da.Array) @@ -268,7 +268,7 @@ def test_crop_cupy(): import cupy raster = xr.DataArray(cupy.asarray(_CROP_ARR), dims=['y', 'x']) - result = crop(raster, raster, zones_ids=(1, 3)) + result = crop(raster, raster, zone_ids=(1, 3)) assert result.shape == _CROP_EXPECTED_SHAPE np.testing.assert_array_equal(result.data.get(), _CROP_EXPECTED) @@ -281,7 +281,7 @@ def test_crop_dask_cupy(): gpu = cupy.asarray(_CROP_ARR) raster = xr.DataArray(da.from_array(gpu, chunks=(3, 2)), dims=['y', 'x']) - result = crop(raster, raster, zones_ids=(1, 3)) + result = crop(raster, raster, zone_ids=(1, 3)) assert result.shape == _CROP_EXPECTED_SHAPE computed = result.data.compute() assert isinstance(computed, cupy.ndarray) diff --git a/xrspatial/tests/test_validation.py b/xrspatial/tests/test_validation.py index ff50b7e73..9af58d56d 100644 --- a/xrspatial/tests/test_validation.py +++ b/xrspatial/tests/test_validation.py @@ -142,7 +142,7 @@ def test_trim_rejects_ndarray(self): def test_crop_rejects_ndarray(self): with pytest.raises(TypeError, match="xarray.DataArray"): - crop(np.zeros((5, 5)), _raster_2d, zones_ids=[1]) + crop(np.zeros((5, 5)), _raster_2d, zone_ids=[1]) # Classify — all 10 functions @pytest.mark.parametrize('func,args', [ diff --git a/xrspatial/tests/test_zonal.py b/xrspatial/tests/test_zonal.py index f790d2b6c..a5b5fcf01 100644 --- a/xrspatial/tests/test_zonal.py +++ b/xrspatial/tests/test_zonal.py @@ -1673,7 +1673,7 @@ def test_crop(): [0, 0, 0, 0]], dtype=np.int64) raster = create_test_arr(arr) - result = crop(raster, raster, zones_ids=(1, 3)) + result = crop(raster, raster, zone_ids=(1, 3)) assert result.shape == (4, 3) trimmed_arr = np.array([[4, 0, 3], @@ -1731,12 +1731,107 @@ def test_crop_nothing_to_crop(): [0, 0, 0, 0]], dtype=np.int64) raster = create_test_arr(arr) - result = crop(raster, raster, zones_ids=(0,)) + result = crop(raster, raster, zone_ids=(0,)) assert result.shape == arr.shape compare = arr == result.data assert compare.all() +# --------------------------------------------------------------------------- +# Regression tests for #2521: crop() should accept the canonical zone_ids +# kwarg (matching stats() and crosstab()), with zones_ids kept as a +# deprecated alias. +# --------------------------------------------------------------------------- + +def test_crop_zone_ids_canonical_matches_zones_ids_legacy(): + """crop(..., zone_ids=...) produces the same result as the legacy alias.""" + import warnings + + arr = np.array([[0, 4, 0, 3], + [0, 4, 4, 3], + [0, 1, 1, 3], + [0, 1, 1, 3], + [0, 0, 0, 0]], dtype=np.int64) + raster = create_test_arr(arr) + + new = crop(raster, raster, zone_ids=(1, 3)) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + legacy = crop(raster, raster, zones_ids=(1, 3)) + + assert new.shape == legacy.shape + assert (new.data == legacy.data).all() + + +def test_crop_zones_ids_emits_deprecation_warning(): + """Passing zones_ids must emit a DeprecationWarning.""" + import warnings + + arr = np.array([[0, 4, 0, 3], + [0, 4, 4, 3], + [0, 1, 1, 3], + [0, 1, 1, 3], + [0, 0, 0, 0]], dtype=np.int64) + raster = create_test_arr(arr) + + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + crop(raster, raster, zones_ids=(1, 3)) + + dep = [w for w in caught if issubclass(w.category, DeprecationWarning) + and "zones_ids" in str(w.message)] + assert len(dep) >= 1, ( + f"Expected a DeprecationWarning mentioning zones_ids, got: " + f"{[str(w.message) for w in caught]}" + ) + + +def test_crop_zone_ids_does_not_warn(): + """Passing zone_ids (canonical) must not emit a DeprecationWarning.""" + import warnings + + arr = np.array([[0, 4, 0, 3], + [0, 4, 4, 3], + [0, 1, 1, 3], + [0, 1, 1, 3], + [0, 0, 0, 0]], dtype=np.int64) + raster = create_test_arr(arr) + + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + crop(raster, raster, zone_ids=(1, 3)) + + dep = [w for w in caught if issubclass(w.category, DeprecationWarning) + and "zones_ids" in str(w.message)] + assert dep == [], ( + f"Expected no DeprecationWarning for zone_ids, got: " + f"{[str(w.message) for w in dep]}" + ) + + +def test_crop_both_aliases_raises(): + """Passing both zone_ids and zones_ids must raise TypeError.""" + arr = np.array([[0, 4, 0, 3], + [0, 4, 4, 3], + [0, 1, 1, 3], + [0, 1, 1, 3], + [0, 0, 0, 0]], dtype=np.int64) + raster = create_test_arr(arr) + + with pytest.raises(TypeError, match="zone_ids.*zones_ids|zones_ids.*zone_ids"): + crop(raster, raster, zone_ids=(1,), zones_ids=(3,)) + + +def test_crop_missing_zone_ids_raises(): + """crop() with neither zone_ids nor zones_ids must raise TypeError.""" + arr = np.array([[0, 4, 0, 3], + [0, 4, 4, 3]], dtype=np.int64) + raster = create_test_arr(arr) + + with pytest.raises(TypeError, match="zone_ids"): + crop(raster, raster) + + # --------------------------------------------------------------------------- # Regression tests for #881: np.unique / np.isfinite must not materialise # the full dask array. @@ -1925,8 +2020,8 @@ def test_apply_gdf(self): def test_crop_gdf(self): values, gdf, zones_raster = self._zones_raster_and_gdf() - expected = crop(zones_raster, values, zones_ids=[1.0]) - result = crop(gdf, values, zones_ids=[1.0], column='zone_id') + expected = crop(zones_raster, values, zone_ids=[1.0]) + result = crop(gdf, values, zone_ids=[1.0], column='zone_id') xr.testing.assert_identical(result, expected) # -- rasterize_kw forwarding -- diff --git a/xrspatial/zonal.py b/xrspatial/zonal.py index 9db5e3d6f..706a8f13e 100644 --- a/xrspatial/zonal.py +++ b/xrspatial/zonal.py @@ -2556,10 +2556,11 @@ def _crop_bounds_dask(data, target_values): def crop( zones, values: xr.DataArray, - zones_ids: Union[list, tuple], + zone_ids: Optional[Union[list, tuple]] = None, name: str = "crop", column: Optional[str] = None, rasterize_kw: Optional[dict] = None, + zones_ids: Optional[Union[list, tuple]] = None, ): """ Crop scans from edges and eliminates rows / cols until one of the @@ -2574,8 +2575,9 @@ def crop( values: xr.DataArray Input values raster. - zones_ids : list or tuple - List of zone ids to crop raster. + zone_ids : list or tuple + List of zone ids to crop raster. Matches the ``zone_ids`` parameter + of :func:`stats` and :func:`crosstab`. name: str, default='crop' Output xr.DataArray.name property. @@ -2588,6 +2590,10 @@ def crop( Extra keyword arguments forwarded to ``rasterize()`` when *zones* is vector input. + zones_ids : list or tuple, optional + Deprecated alias for ``zone_ids``. Will emit a + ``DeprecationWarning`` and be removed in a future release. + Returns ------- crop_agg : xarray.DataArray @@ -2644,7 +2650,7 @@ def crop( cropped_agg = crop( zones=zones_sub, values=values_agg, - zones_ids=[1], + zone_ids=[1], ) # Edit Attributes @@ -2686,6 +2692,31 @@ def crop( 'Max Elevation': '4000', } """ + # Backwards-compatible alias: stats() and crosstab() use `zone_ids`, + # crop() historically used `zones_ids` (extra 's'). Accept both, + # emit a DeprecationWarning on the old name, raise if both are passed. + if zones_ids is not None: + import warnings + if zone_ids is not None: + raise TypeError( + "crop() received both `zone_ids` and `zones_ids`; pass " + "only `zone_ids` (the canonical name)." + ) + warnings.warn( + "crop(zones_ids=...) is deprecated and will be removed in a " + "future release; use `zone_ids=...` to match stats() and " + "crosstab().", + DeprecationWarning, + stacklevel=2, + ) + zone_ids = zones_ids + + if zone_ids is None: + raise TypeError( + "crop() missing required argument `zone_ids` (list or tuple " + "of zone ids to crop to)." + ) + zones = _maybe_rasterize_zones(zones, values, column=column, rasterize_kw=rasterize_kw) @@ -2694,11 +2725,11 @@ def crop( data = zones.data if has_dask_array() and isinstance(data, da.Array): - top, bottom, left, right = _crop_bounds_dask(data, zones_ids) + top, bottom, left, right = _crop_bounds_dask(data, zone_ids) else: if is_cupy_array(data): data = data.get() - top, bottom, left, right = _crop(data, np.asarray(zones_ids)) + top, bottom, left, right = _crop(data, np.asarray(zone_ids)) arr = values[top: bottom + 1, left: right + 1] arr.name = name