From 8e2685ebba32ccf1048cfe5f22f8f4f4c597f1fe Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 21 May 2026 07:22:47 -0700 Subject: [PATCH] rasterize: drop stale transform/res from like.attrs when grid is overridden Closes #2251 When rasterize(geoms, like=template, ...) is called with an explicit bounds/width/height/resolution that reshapes the output relative to the template, the inherited attrs['res'] and attrs['transform'] from ``like`` describe the template's grid rather than the actual output. ``get_dataarray_resolution`` prefers attrs['res'] over computing from coords, so a stale res silently poisoned downstream slope/aspect/ proximity callers with the template's cellsize. Same class as the sky_view_factor cellsize bug. The fix strips res/transform from the propagated like.attrs when the output grid no longer matches the template (the existing reuse_like_coords flag already captures exactly this condition). Crs, spatial_ref, and other non-grid-shape attrs still propagate. When the output grid matches the template (no override), res and transform stay so chained pipelines see the unchanged spatial metadata. 9 new tests in TestLikeStaleGridAttrs2251 cover bounds / width-height / resolution overrides, matching-size keeps the attrs, get_dataarray_ resolution consistency end-to-end, and parity across numpy / cupy / dask+numpy / dask+cupy backends. State CSV updated to record the re-audit on 2026-05-21. --- .claude/sweep-metadata-state.csv | 2 +- xrspatial/rasterize.py | 12 +++ xrspatial/tests/test_rasterize.py | 136 ++++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 1 deletion(-) diff --git a/.claude/sweep-metadata-state.csv b/.claude/sweep-metadata-state.csv index 5047db05f..c6bdd0d9a 100644 --- a/.claude/sweep-metadata-state.csv +++ b/.claude/sweep-metadata-state.csv @@ -1,5 +1,5 @@ module,last_inspected,issue,severity_max,categories_found,notes geotiff,2026-05-18,1909,HIGH,4;5,"Re-audit 2026-05-15 (agent-a55b69cec1ef2a092 worktree, branch deep-sweep-metadata-geotiff-2026-05-15). 4-backend (numpy/cupy/dask+numpy/dask+cupy) parity reverified after the #1813 modular refactor: full reads, windowed reads, multi-band, band=N selection, no-georef integer pixel coords, crs/crs_wkt/transform/nodata/x_resolution/y_resolution/resolution_unit/image_description/gdal_metadata all agree across backends. DataArray .name and dims agree (y, x for 2D; y, x, band for 3D). NEW HIGH finding #1909: GDS chunked GPU path (_read_geotiff_gpu_chunked_gds) declared the dask graph dtype as float64 when source had an in-range integer nodata sentinel, matching the CPU dask path's #1597 contract, but the per-chunk _chunk_task did not cast its returned cupy array to declared_dtype -- chunks with no sentinel hit returned the raw uint16/int16 source dtype, producing a silent declared/actual dtype mismatch. Fix mirrors the #1597 + #1624 CPU dask pattern: compute declared_dtype before defining _chunk_task, cast inside the task only when arr.dtype != declared_dtype to skip the no-op astype(copy=True). 6 regression tests added in test_chunked_gpu_declared_dtype_1909.py covering declared vs computed parity, CPU/GPU dask declared-dtype agreement, eager paths preserve source dtype, no-nodata round-trip, explicit dtype= kwarg, and sentinel-hit float64 promotion. Pre-existing test failures in test_predictor2_big_endian_gpu_1517.py and test_size_param_validation_gpu_vrt_1776.py exist on main (read_to_array AttributeError after #1813 refactor, tile_size=4 rejected by stricter _validate_tile_size_arg) and are unrelated to this audit. | Re-audited 2026-05-18 (agent-a59a61958f181c31a worktree, branch deep-sweep-metadata-geotiff-2026-05-18). 4-backend (numpy / cupy / dask+numpy / dask+cupy) metadata parity reverified end-to-end: open_geotiff over a tiled uint16 fixture with crs + transform + GDAL_NODATA sentinel emits identical attrs across all 4 backends (crs=32633, crs_wkt, transform 6-tuple, nodata=5, masked_nodata=True, _xrspatial_geotiff_contract=2, extra_tags, image_description, resolution_unit, x_resolution, y_resolution). Multi-band 3D (y, x, band) with band coord, no-georef int64 pixel coords, windowed reads with transform origin shift, and mask_nodata=False keeping integer dtype all agree across the 4 backends. Write round-trip via to_geotiff (numpy, cupy, dask streaming) re-emits crs / transform / nodata / masked_nodata / contract version with byte-stable transform. Band-first (band, y, x) input correctly remaps to (y, x, band) on disk. _populate_attrs_from_geo_info, _set_nodata_attrs, and _extract_rich_tags centralise attrs emission across all read paths (_init_, _backends/dask, _backends/gpu, _backends/vrt) and write paths (_writers/eager, _writers/gpu, _writers/vrt). _ATTRS_CONTRACT_VERSION=2 is stamped on every path including the chunked GPU GDS and chunked VRT inline-attrs branches. No new CRITICAL/HIGH/MEDIUM/LOW findings." polygonize,2026-05-19,2149,MEDIUM,1,"Audited 2026-05-19 (agent-ad1070530d37a4fdf worktree, branch deep-sweep-metadata-polygonize-2026-05-19). Output is vector (column, polygon_points / GeoDataFrame / GeoJSON dict / awkward) so Cat 2/3 do not apply in the DataArray sense. Cat 1 MEDIUM finding #2149: GeoDataFrame output drops raster.attrs['crs'] (and crs_wkt and rioxarray rio.crs); GeoDataFrame.crs is always None even when input is georeferenced. Fix: new _detect_raster_crs helper + crs= kwarg threaded into _to_geopandas; df.set_crs is called when a CRS is detected. spatialpandas has no CRS slot and GeoJSON RFC 7946 is WGS84-only, so propagation lives only on the geopandas path. CRS propagation runs at the public API level so all 4 backends (numpy / cupy / dask+numpy / dask+cupy) propagate consistently -- verified end-to-end with EPSG:4326 attrs across all 4 backends. 8 new tests in TestPolygonizeCRSPropagation cover EPSG string/int, crs_wkt, no CRS, unparseable CRS, attrs-vs-rioxarray preference, rioxarray-only path, and simplify interaction. Cat 2 LOW (not fixed): output coords are pixel-space when input has georeferenced x/y or attrs['transform']; user must pass transform= explicitly. Documented behavior, leave as-is. Cat 4 LOW (not fixed): nodatavals from input attrs is not auto-applied as a mask; documented behavior (explicit mask= kwarg)." -rasterize,2026-05-17,2018,HIGH,1;2;4,"rasterize() drops like.attrs, rebuilds like.coords via linspace (not bit-identical), and never emits _FillValue/nodatavals even when fill is non-NaN. Cat 1 HIGH: chained pipelines like slope(rasterize(gdf, like=elevation)) silently lose crs/res/transform. Cat 2 MEDIUM: linspace round-trip from re-derived bounds breaks xr.align with like. Cat 4 MEDIUM: rasterize(..., fill=-9999, dtype=int32) emits no _FillValue. All 4 backends share the same final return so the fix is one place. Fixed in deep-sweep-metadata-rasterize-2026-05-17-01 (worktree agent-ab7a9aee97c1e4cdf): _extract_grid_from_like now returns coords/attrs; rasterize() reuses like.coords directly when grid matches, copies like.attrs, and emits _FillValue + nodatavals when fill is not NaN. 9 new tests in TestMetadataPropagation cover attrs propagation, bit-identical coord reuse, fill-value emission, isolation from template attrs, and parity across numpy/cupy/dask+numpy/dask+cupy backends. Full test suite (193 passing) clean." +rasterize,2026-05-21,2251,HIGH,1,"rasterize() drops like.attrs, rebuilds like.coords via linspace (not bit-identical), and never emits _FillValue/nodatavals even when fill is non-NaN. Cat 1 HIGH: chained pipelines like slope(rasterize(gdf, like=elevation)) silently lose crs/res/transform. Cat 2 MEDIUM: linspace round-trip from re-derived bounds breaks xr.align with like. Cat 4 MEDIUM: rasterize(..., fill=-9999, dtype=int32) emits no _FillValue. All 4 backends share the same final return so the fix is one place. Fixed in deep-sweep-metadata-rasterize-2026-05-17-01 (worktree agent-ab7a9aee97c1e4cdf): _extract_grid_from_like now returns coords/attrs; rasterize() reuses like.coords directly when grid matches, copies like.attrs, and emits _FillValue + nodatavals when fill is not NaN. 9 new tests in TestMetadataPropagation cover attrs propagation, bit-identical coord reuse, fill-value emission, isolation from template attrs, and parity across numpy/cupy/dask+numpy/dask+cupy backends. Full test suite (193 passing) clean. | Re-audited 2026-05-21 (agent-a645dc07f847ae8ae worktree, branch deep-sweep-metadata-rasterize-2026-05-21). 4-backend (numpy/cupy/dask+numpy/dask+cupy) metadata parity reverified: all 4 backends route through the same final xr.DataArray constructor in rasterize(); crs / spatial_ref non-dim coord / coords / dims agree across backends. NEW HIGH finding #2251 (Cat 1): when rasterize(geoms, like=template, bounds=..., width=..., height=..., resolution=...) overrides the grid relative to like, the inherited attrs['transform'] and attrs['res'] from like are propagated unchanged so they describe the template's grid, not the actual output. get_dataarray_resolution() prefers attrs['res'] over calc_res from coords, so downstream slope/aspect/proximity see the wrong cellsize. Same class as #1407 sky_view_factor bug. Fix in rasterize(): out_attrs.pop('res') / out_attrs.pop('transform') when like_attrs is present but reuse_like_coords is False (output grid != template grid). Preserves crs / nodata triplet / spatial_ref handling. 9 new tests in TestLikeStaleGridAttrs2251 cover bounds override, width/height override, resolution override, matching width/height preserves attrs, get_dataarray_resolution consistency, and parity across all 4 backends. Full rasterize test suite (224 passed, 2 skipped) clean." reproject,2026-05-10,1572;1573,HIGH,1;3;4,geoid_height_raster dropped input attrs and used dims[-2:] for 3D inputs (#1572). reproject/merge ignored nodatavals (rasterio convention) when rioxarray absent (#1573). Fixed in same branch. diff --git a/xrspatial/rasterize.py b/xrspatial/rasterize.py index cfb9f930c..d7e04a824 100644 --- a/xrspatial/rasterize.py +++ b/xrspatial/rasterize.py @@ -3213,6 +3213,18 @@ def rasterize( out_attrs = like_attrs if like_attrs is not None else {} for k in ('nodata', '_FillValue', 'nodatavals'): out_attrs.pop(k, None) + # Grid-shape attrs (res, transform) describe the template's grid. + # When the caller overrides bounds/width/height/resolution so the + # output grid no longer matches ``like``, leaving the inherited + # values in place lies to downstream consumers: get_dataarray_ + # resolution() prefers ``attrs['res']`` over computing from coords, + # so a stale res silently poisons slope/aspect/proximity callers + # with the template's cellsize instead of the actual one. Drop them + # when the grid was reshaped; keep them when the output reuses the + # template's coords bit-identically. + if like_attrs is not None and not reuse_like_coords: + for k in ('res', 'transform'): + out_attrs.pop(k, None) try: fill_as_float = float(fill) fill_is_nan = np.isnan(fill_as_float) diff --git a/xrspatial/tests/test_rasterize.py b/xrspatial/tests/test_rasterize.py index f14adbb15..b9927214e 100644 --- a/xrspatial/tests/test_rasterize.py +++ b/xrspatial/tests/test_rasterize.py @@ -2046,6 +2046,142 @@ def test_geotiff_round_trip_preserves_fill(self, tmp_path): assert float(nodata) == -9999.0 +class TestLikeStaleGridAttrs2251: + """Issue #2251 -- ``like.attrs['res']`` and ``attrs['transform']`` describe + the template's grid. When the caller overrides ``bounds``, + ``width``/``height``, or ``resolution`` so the output grid is no longer + identical to ``like``, those keys have to be dropped so downstream + consumers (``get_dataarray_resolution`` prefers ``attrs['res']`` over + coords) don't see a stale cellsize. + """ + + @staticmethod + def _template(): + x = np.linspace(0.5, 9.5, 10) + y = np.linspace(9.5, 0.5, 10) + return xr.DataArray( + np.zeros((10, 10), dtype=np.float64), + dims=['y', 'x'], + coords={'y': y, 'x': x}, + attrs={ + 'crs': 'EPSG:32610', + 'res': (1.0, 1.0), + 'transform': (1.0, 0.0, 0.0, 0.0, -1.0, 10.0), + }, + ) + + def test_bounds_override_strips_stale_grid_attrs(self): + like = self._template() + result = rasterize( + [(box(20, 20, 80, 80), 1.0)], + like=like, bounds=(0, 0, 100, 100), + width=10, height=10, fill=0, + ) + # Grid shape moved from 1.0/pixel to 10.0/pixel; stale attrs gone. + assert 'res' not in result.attrs + assert 'transform' not in result.attrs + # Non-grid-shape attrs (crs) still propagate. + assert result.attrs.get('crs') == 'EPSG:32610' + + def test_width_height_override_strips_stale_grid_attrs(self): + like = self._template() + result = rasterize( + [(box(2, 2, 8, 8), 1.0)], + like=like, width=5, height=5, fill=0, + ) + assert 'res' not in result.attrs + assert 'transform' not in result.attrs + assert result.attrs.get('crs') == 'EPSG:32610' + + def test_resolution_override_strips_stale_grid_attrs(self): + like = self._template() + result = rasterize( + [(box(2, 2, 8, 8), 1.0)], + like=like, resolution=0.5, fill=0, + ) + assert 'res' not in result.attrs + assert 'transform' not in result.attrs + assert result.attrs.get('crs') == 'EPSG:32610' + + def test_identical_grid_preserves_grid_attrs(self): + """No override -> output grid == template grid -> keep res/transform.""" + like = self._template() + result = rasterize( + [(box(2, 2, 8, 8), 1.0)], like=like, fill=0, + ) + assert result.attrs.get('res') == (1.0, 1.0) + assert result.attrs.get('transform') == \ + (1.0, 0.0, 0.0, 0.0, -1.0, 10.0) + + def test_matching_width_height_preserves_grid_attrs(self): + """Passing width/height that match the template's size still + reuses ``like.coords`` (so the grid is identical), so the + template's grid-shape attrs are still correct. + """ + like = self._template() + result = rasterize( + [(box(2, 2, 8, 8), 1.0)], + like=like, width=10, height=10, fill=0, + ) + assert result.attrs.get('res') == (1.0, 1.0) + assert result.attrs.get('transform') == \ + (1.0, 0.0, 0.0, 0.0, -1.0, 10.0) + + def test_get_dataarray_resolution_consistent_after_override(self): + """The user-visible symptom: ``get_dataarray_resolution`` must + report the actual cellsize, not the stale template value. + """ + from xrspatial.utils import get_dataarray_resolution + like = self._template() + result = rasterize( + [(box(20, 20, 80, 80), 1.0)], + like=like, bounds=(0, 0, 100, 100), + width=10, height=10, fill=0, + ) + # With res stripped, get_dataarray_resolution falls back to + # computing from coords; both axes should be 10.0. + cx, cy = get_dataarray_resolution(result) + assert abs(cx - 10.0) < 1e-9 + assert abs(cy - 10.0) < 1e-9 + + @skip_no_dask + def test_bounds_override_strips_stale_grid_attrs_dask(self): + like = self._template() + result = rasterize( + [(box(20, 20, 80, 80), 1.0)], + like=like, bounds=(0, 0, 100, 100), + width=10, height=10, fill=0, chunks=5, + ) + assert 'res' not in result.attrs + assert 'transform' not in result.attrs + assert result.attrs.get('crs') == 'EPSG:32610' + + @skip_no_cuda + def test_bounds_override_strips_stale_grid_attrs_cupy(self): + like = self._template() + result = rasterize( + [(box(20, 20, 80, 80), 1.0)], + like=like, bounds=(0, 0, 100, 100), + width=10, height=10, fill=0, use_cuda=True, + ) + assert 'res' not in result.attrs + assert 'transform' not in result.attrs + assert result.attrs.get('crs') == 'EPSG:32610' + + @skip_no_cuda + @skip_no_dask + def test_bounds_override_strips_stale_grid_attrs_dask_cupy(self): + like = self._template() + result = rasterize( + [(box(20, 20, 80, 80), 1.0)], + like=like, bounds=(0, 0, 100, 100), + width=10, height=10, fill=0, use_cuda=True, chunks=5, + ) + assert 'res' not in result.attrs + assert 'transform' not in result.attrs + assert result.attrs.get('crs') == 'EPSG:32610' + + class TestPolysToWkb: """Tests for the _polys_to_wkb helper (issue #2059)."""