From 7a6b0f2b44b1d1b88ea5b69511f1b02d7f2e0189 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 21 May 2026 07:19:25 -0700 Subject: [PATCH] rasterize: annotate geometries, columns, merge params (#2250) The signature was annotated on 13 of 16 public parameters plus the return type. The remaining three -- geometries, columns, merge -- were left bare even though the docstring already declared the intended types. IDE hints, mypy, and sphinx autodoc all see a partial picture when annotations stop mid-signature. - geometries: Any (the docstring's "GeoDataFrame or iterable of (geometry, value)" spans the geopandas / dask_geopandas / list-of- pairs cases; both geopandas backends are optional deps, so a precise Union would require runtime imports of optional packages in the type expression. Any keeps the public contract honest without forcing the imports.) - columns: Optional[Sequence[str]] (matches the docstring's "list of str, optional") - merge: Union[str, Callable] (matches "str or callable, default 'last'"; the precise Callable[[float, np.ndarray, int], float] signature is documented in the body of the docstring under merge_fn) Regression test pins every public-signature param plus the return annotation so a future contributor can't silently drop annotations again. The 2250-specific test names the three offending params directly so a regression message points at the right gap. Found by deep-sweep api-consistency on rasterize. Cross-module drifts the sweep also noted (nodata vs fill, name default convention, column_name vs column) are out of scope per the sweep template -- they get filed against the cross-cutting module pair, not against rasterize alone. --- .claude/sweep-api-consistency-state.csv | 1 + xrspatial/rasterize.py | 8 ++-- .../test_rasterize_signature_annot_2250.py | 45 +++++++++++++++++++ 3 files changed, 50 insertions(+), 4 deletions(-) create mode 100644 xrspatial/tests/test_rasterize_signature_annot_2250.py diff --git a/.claude/sweep-api-consistency-state.csv b/.claude/sweep-api-consistency-state.csv index 1509a4748..ed1c2bc83 100644 --- a/.claude/sweep-api-consistency-state.csv +++ b/.claude/sweep-api-consistency-state.csv @@ -1,4 +1,5 @@ module,last_inspected,issue,severity_max,categories_found,notes geotiff,2026-05-18,2106,MEDIUM,3,"Sweep 2026-05-18 (deep-sweep-api-consistency-geotiff-2026-05-18-1779164255). 1 MEDIUM Cat 3 finding fixed in this branch: open_geotiff(max_cloud_bytes=...) was the only kwarg on the public reader/writer surface without a Python type annotation. Docstring already declared ``int or None``; the surface and the docs disagreed. Fix adds ``int | None`` to the annotation; default stays the module-internal _MAX_CLOUD_BYTES_SENTINEL. Regression test in test_open_geotiff_max_cloud_bytes_annot_2106.py pins the immediate gap and parametrises over every public reader/writer to catch future ungenerated annotations. Prior sweep findings (#1922/#1935 kwarg ordering, #2052 mask_nodata parity, #2097 GPU MinIsWhite, #2095 zero-band 3D writes, #1946 write_vrt path/vrt_path shim) all confirmed fixed. Cross-sibling return-type drift (Cat 2): write_vrt returns str while to_geotiff and write_geotiff_gpu return path which is str | BinaryIO -- inspected and still LOW (callers do not substitute writers; the return-type drift is documented in each writer's docstring). Cross-cutting cross-module drift (chunk_size in reproject vs chunks in geotiff; target_crs vs crs) documented but not filed per sweep template (cross-cutting). cuda-validated." polygonize,2026-05-19,2148,HIGH,1;3,"Sweep 2026-05-19 (deep-sweep-api-consistency-polygonize-2026-05-19). 1 MEDIUM Cat 3 finding fixed in this branch (#2148): polygonize() was the only public vector/raster conversion function without a return type annotation. Sieve/contours/rasterize/clip_polygon all declare one. Fix adds a Union return annotation (numpy tuple | awkward tuple | geopandas GeoDataFrame | spatialpandas GeoDataFrame | geojson dict) using TYPE_CHECKING forward refs for optional deps, and expands the docstring Returns section to enumerate the per-return_type shapes. 1 HIGH Cat 1 finding NOT fixed in this PR -- cross-module rename: polygonize uses `connectivity` (int 4|8) while sieve uses `neighborhood` (int 4|8) for the identical rook/queen pixel-connectivity concept. Industry convention (GDAL, rasterio.features.sieve) favours `connectivity`; the deprecation shim belongs in sieve.py, not polygonize, so this is out of scope for the polygonize-scoped sweep branch. Documented here for the next sieve sweep pass. 1 LOW Cat 1 cross-cutting: polygonize/sieve/clip_polygon use `raster` while contours and many older modules use `agg` for the input DataArray -- library-wide drift, not filed per-module per sweep template. Cat 2 return-shape: polygonize returns tuple/GeoDataFrame/dict by return_type; consistent with contours' tuple/GeoDataFrame dispatch. No Cat 4 (no mutable defaults; connectivity=4 default matches sieve neighborhood=4 default). No Cat 5 (polygonize re-exported in xrspatial/__init__.py; no orphan API; no __all__ but consistent with module convention). cuda-validated: cupy backend accepts identical kwargs, smoke-tested with cupy DataArray on host with CUDA_AVAILABLE." +rasterize,2026-05-21,2250,MEDIUM,3,"Sweep 2026-05-21 (deep-sweep-api-consistency-rasterize-2026-05-21). 1 MEDIUM Cat 3 finding fixed in this branch (#2250): rasterize() was missing type annotations on geometries, columns, and merge (3 of 16 public params); the other 13 plus the return type were annotated. The docstring already declared the intended types so this was a doc-vs-signature drift. Fix annotates geometries: Any (because the accepted GeoDataFrame / dask_geopandas / iterable union spans optional deps), columns: Optional[Sequence[str]], merge: Union[str, Callable]. Regression test in test_rasterize_signature_annot_2250.py pins every param + the return annotation so a future contributor can't silently drop annotations again. Cross-module drift documented but not filed per template: clip_polygon(nodata) vs rasterize(fill) same concept different name; clip_polygon(name: Optional[str]=None) vs rasterize(name: str='rasterize') default convention; polygonize(column_name) vs rasterize(column) column selector. No Cat 1 in-module rename, no Cat 2 return drift (returns xr.DataArray as documented), no Cat 4 mutable defaults, no Cat 5 orphan API (rasterize is the only public symbol from the module and is re-exported in __init__). cuda-validated: cupy backend accepts identical kwargs, smoke-tested with use_cuda=True on host with CUDA_AVAILABLE." reproject,2026-05-10,1570,HIGH,2;5,"Filed cross-module attrs['vertical_crs'] type collision (string vs EPSG int) vs xrspatial.geotiff. Fixed in PR (TBD): reproject now writes EPSG int and preserves friendly token under vertical_datum. MEDIUM kwarg-order drift (transform_precision vs chunk_size) and missing type hints vs geotiff documented but not fixed (cosmetic, kwarg-only)." diff --git a/xrspatial/rasterize.py b/xrspatial/rasterize.py index cfb9f930c..e0d7a67ea 100644 --- a/xrspatial/rasterize.py +++ b/xrspatial/rasterize.py @@ -13,7 +13,7 @@ import math import warnings -from typing import NamedTuple, Optional, Tuple, Union +from typing import Any, Callable, NamedTuple, Optional, Sequence, Tuple, Union import numpy as np import shapely @@ -2853,12 +2853,12 @@ def _extract_grid_from_like(like): # --------------------------------------------------------------------------- def rasterize( - geometries, + geometries: Any, width: Optional[int] = None, height: Optional[int] = None, bounds: Optional[Tuple[float, float, float, float]] = None, column: Optional[str] = None, - columns=None, + columns: Optional[Sequence[str]] = None, fill: float = np.nan, dtype: Optional[np.dtype] = None, all_touched: bool = False, @@ -2866,7 +2866,7 @@ def rasterize( name: str = 'rasterize', resolution: Optional[Union[float, Tuple[float, float]]] = None, like: Optional[xr.DataArray] = None, - merge='last', + merge: Union[str, Callable] = 'last', chunks: Optional[Union[int, Tuple[int, int]]] = None, max_pixels: int = MAX_PIXELS_DEFAULT, ) -> xr.DataArray: diff --git a/xrspatial/tests/test_rasterize_signature_annot_2250.py b/xrspatial/tests/test_rasterize_signature_annot_2250.py new file mode 100644 index 000000000..4b615f615 --- /dev/null +++ b/xrspatial/tests/test_rasterize_signature_annot_2250.py @@ -0,0 +1,45 @@ +"""Regression: rasterize() public signature carries type annotations on every +parameter and on the return type. + +Pins the fix for issue #2250 (api-consistency sweep, Cat 3 MEDIUM). Without +this test, future contributors can drop annotations on ``geometries``, +``columns``, or ``merge`` without surfacing the drift between docstring and +signature -- IDE hints, mypy, and sphinx autodoc all silently lose +information. +""" +from __future__ import annotations + +import inspect + +from xrspatial.rasterize import rasterize + + +def test_rasterize_every_param_is_annotated(): + sig = inspect.signature(rasterize) + unannotated = [ + name for name, p in sig.parameters.items() + if p.annotation is inspect.Parameter.empty + ] + assert not unannotated, ( + f"rasterize() must annotate every public parameter; " + f"missing annotation(s): {unannotated}" + ) + + +def test_rasterize_return_is_annotated(): + sig = inspect.signature(rasterize) + assert sig.return_annotation is not inspect.Signature.empty, ( + "rasterize() must declare a return annotation" + ) + + +def test_rasterize_2250_specific_params_annotated(): + """Pin the three params the sweep called out so a future regression names + them directly in the failure message. + """ + sig = inspect.signature(rasterize) + for name in ('geometries', 'columns', 'merge'): + assert sig.parameters[name].annotation is not inspect.Parameter.empty, ( + f"rasterize({name}=...) lost its type annotation; " + f"this regressed issue #2250" + )