From fe49e483e5736678c9a17aa2d254c488409b38dd Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sun, 17 May 2026 05:20:55 -0700 Subject: [PATCH 1/2] geotiff: stamp _xrspatial_geotiff_contract=1 on every read (#1984) PR 3 of 7 on issue #1984. Adds a contract-version marker attr (``_xrspatial_geotiff_contract``) to every DataArray returned by an xrspatial geotiff read path so downstream code can identify which attrs-contract revision produced an array. The value lives as a module-level constant ``_ATTRS_CONTRACT_VERSION`` in ``_attrs.py``. The eager numpy, dask+numpy, GPU, dask+GPU, and the COG/HTTP path all funnel through ``_populate_attrs_from_geo_info``, so one stamp there covers four backends. The VRT backends in ``_backends/vrt.py`` build their attrs dict directly and stamp the version inline; both the eager and chunked VRT paths reuse the same constant so the value stays in lockstep when it is later bumped. Adds ``test_attrs_contract_version_1984.py`` with one assertion per read path (eager, dask, GPU, dask+GPU, VRT eager, VRT chunked) plus a pin on the constant value. --- xrspatial/geotiff/_attrs.py | 14 ++ xrspatial/geotiff/_backends/vrt.py | 5 +- .../tests/test_attrs_contract_version_1984.py | 141 ++++++++++++++++++ 3 files changed, 158 insertions(+), 2 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_attrs_contract_version_1984.py diff --git a/xrspatial/geotiff/_attrs.py b/xrspatial/geotiff/_attrs.py index 37f8615c1..3806cfc28 100644 --- a/xrspatial/geotiff/_attrs.py +++ b/xrspatial/geotiff/_attrs.py @@ -52,6 +52,13 @@ _TIFF_SHORT = 3 +# Contract version emitted on every read; bumped when the attrs contract +# changes. Downstream code reads ``attrs['_xrspatial_geotiff_contract']`` +# to learn which attrs-contract revision produced the array. See issue +# #1984 and ``docs/source/user_guide/attrs_contract.rst``. +_ATTRS_CONTRACT_VERSION = 1 + + # String identifiers (used in xrspatial attrs) -> TIFF ResolutionUnit tag ids. _RESOLUTION_UNIT_IDS = {'none': 1, 'inch': 2, 'centimeter': 3} @@ -101,6 +108,13 @@ def _populate_attrs_from_geo_info(attrs: dict, geo_info, *, window=None) -> None advertises the windowed transform. The GPU path does not currently expose a windowed read, so it passes ``window=None``. """ + # Stamp the contract version first so every read path that funnels + # through this helper carries the marker. The VRT backends build + # their attrs dict directly and stamp the version there (see + # ``_backends/vrt.py``); keep both sites in sync via the constant + # rather than the bare literal. + attrs['_xrspatial_geotiff_contract'] = _ATTRS_CONTRACT_VERSION + if geo_info.crs_epsg is not None: attrs['crs'] = geo_info.crs_epsg if geo_info.crs_wkt is not None: diff --git a/xrspatial/geotiff/_backends/vrt.py b/xrspatial/geotiff/_backends/vrt.py index 14016c6ca..dd87566a7 100644 --- a/xrspatial/geotiff/_backends/vrt.py +++ b/xrspatial/geotiff/_backends/vrt.py @@ -13,6 +13,7 @@ import numpy as np import xarray as xr +from .._attrs import _ATTRS_CONTRACT_VERSION from .._coords import ( coords_from_pixel_geometry as _coords_from_pixel_geometry, transform_tuple_from_pixel_geometry as _transform_tuple_from_pixel_geometry, @@ -193,7 +194,7 @@ def read_vrt(source: str, *, else: coords = {} - attrs = {} + attrs = {'_xrspatial_geotiff_contract': _ATTRS_CONTRACT_VERSION} if vrt.crs_wkt: epsg = _wkt_to_epsg(vrt.crs_wkt) if epsg is not None: @@ -562,7 +563,7 @@ def _read_vrt_chunked(source, *, window, band, name, chunks, gpu, dtype, # eager reads share the same x/y arrays. gt = vrt.geo_transform coords = {} - attrs = {} + attrs = {'_xrspatial_geotiff_contract': _ATTRS_CONTRACT_VERSION} if gt is not None: origin_x, res_x, _, origin_y, _, res_y = gt coord_window = (win_r0, win_c0, win_r0 + full_h, win_c0 + full_w) diff --git a/xrspatial/geotiff/tests/test_attrs_contract_version_1984.py b/xrspatial/geotiff/tests/test_attrs_contract_version_1984.py new file mode 100644 index 000000000..6950f4080 --- /dev/null +++ b/xrspatial/geotiff/tests/test_attrs_contract_version_1984.py @@ -0,0 +1,141 @@ +"""Contract-version marker tests for issue #1984. + +PR 3 of the 7-PR plan attached to issue #1984 stamps every DataArray +returned by an xrspatial geotiff read path with +``attrs['_xrspatial_geotiff_contract'] = 1``. Downstream code reads +this marker to learn which attrs-contract revision produced the array. + +The stamp must appear on every backend: + +* eager numpy (``open_geotiff``) +* dask + numpy (``open_geotiff(chunks=...)`` / ``read_geotiff_dask``) +* cupy / GPU (``open_geotiff(gpu=True)`` / ``read_geotiff_gpu``) +* dask + cupy (``open_geotiff(gpu=True, chunks=...)``) +* VRT eager (``read_vrt``) +* VRT dask chunked (``read_vrt(chunks=...)``) + +The fixture style mirrors ``test_attrs_parity_1548.py``: build a small +on-disk TIFF (and a small VRT pointing at one) inside ``tmp_path``, +open it through each backend, and assert on the resulting attrs. +""" +from __future__ import annotations + +import importlib.util +import os + +import numpy as np +import pytest + +from xrspatial.geotiff import open_geotiff, read_vrt +from xrspatial.geotiff._attrs import _ATTRS_CONTRACT_VERSION + +tifffile = pytest.importorskip("tifffile") + + +_CONTRACT_KEY = '_xrspatial_geotiff_contract' + + +def _gpu_available() -> bool: + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +_HAS_GPU = _gpu_available() +_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") + + +def _write_small_tiff(path): + """Write a small tiled float32 TIFF used by every read-path assertion.""" + arr = np.arange(64 * 64, dtype=np.float32).reshape(64, 64) + tifffile.imwrite( + path, arr, photometric='minisblack', planarconfig='contig', + tile=(32, 32), compression='deflate', metadata=None, + ) + return arr + + +def _write_minimal_vrt(vrt_path, source_name, *, height, width): + """Write a VRT that references ``source_name`` as a single-band source.""" + vrt_path.write_text( + f'\n' + ' \n' + ' \n' + f' {source_name}' + '\n' + ' 1\n' + f' \n' + f' \n' + ' \n' + ' \n' + '\n' + ) + + +def test_attrs_contract_version_constant_is_one(): + """Pin the integer value so a careless bump shows up here first.""" + assert _ATTRS_CONTRACT_VERSION == 1 + + +def test_eager_numpy_stamps_contract_version(tmp_path): + path = str(tmp_path / "contract_v1_eager.tif") + _write_small_tiff(path) + + da = open_geotiff(path) + + assert da.attrs[_CONTRACT_KEY] == 1 + + +def test_dask_numpy_stamps_contract_version(tmp_path): + path = str(tmp_path / "contract_v1_dask.tif") + _write_small_tiff(path) + + da = open_geotiff(path, chunks=32) + + assert da.attrs[_CONTRACT_KEY] == 1 + + +@_gpu_only +def test_gpu_stamps_contract_version(tmp_path): + path = str(tmp_path / "contract_v1_gpu.tif") + _write_small_tiff(path) + + da = open_geotiff(path, gpu=True) + + assert da.attrs[_CONTRACT_KEY] == 1 + + +@_gpu_only +def test_dask_gpu_stamps_contract_version(tmp_path): + path = str(tmp_path / "contract_v1_dask_gpu.tif") + _write_small_tiff(path) + + da = open_geotiff(path, gpu=True, chunks=32) + + assert da.attrs[_CONTRACT_KEY] == 1 + + +def test_vrt_eager_stamps_contract_version(tmp_path): + src = tmp_path / "contract_v1_vrt_source.tif" + _write_small_tiff(str(src)) + vrt = tmp_path / "contract_v1_vrt_eager.vrt" + _write_minimal_vrt(vrt, os.path.basename(src), height=64, width=64) + + da = read_vrt(str(vrt)) + + assert da.attrs[_CONTRACT_KEY] == 1 + + +def test_vrt_chunked_stamps_contract_version(tmp_path): + src = tmp_path / "contract_v1_vrt_chunked_source.tif" + _write_small_tiff(str(src)) + vrt = tmp_path / "contract_v1_vrt_chunked.vrt" + _write_minimal_vrt(vrt, os.path.basename(src), height=64, width=64) + + da = read_vrt(str(vrt), chunks=32) + + assert da.attrs[_CONTRACT_KEY] == 1 From 7f8c753a00afbc144d5b20a1db4ddd98c4cdb42f Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sun, 17 May 2026 10:16:37 -0700 Subject: [PATCH 2/2] geotiff: address #2003 review nits on contract version stamp - _attrs.py docstring: noted that the stamp overwrites any pre-existing value on the passed-in attrs dict; callers pass freshly built dicts. - _backends/vrt.py (eager + chunked): added inline comments pointing at _populate_attrs_from_geo_info as the canonical stamp site, so future maintainers know why the helper is bypassed in the VRT path. --- xrspatial/geotiff/_attrs.py | 5 +++++ xrspatial/geotiff/_backends/vrt.py | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/xrspatial/geotiff/_attrs.py b/xrspatial/geotiff/_attrs.py index 3806cfc28..4db832c0c 100644 --- a/xrspatial/geotiff/_attrs.py +++ b/xrspatial/geotiff/_attrs.py @@ -107,6 +107,11 @@ def _populate_attrs_from_geo_info(attrs: dict, geo_info, *, window=None) -> None the outer window through this helper so the resulting DataArray advertises the windowed transform. The GPU path does not currently expose a windowed read, so it passes ``window=None``. + + ``attrs['_xrspatial_geotiff_contract']`` is stamped unconditionally + as the first step. Any pre-existing value on the passed-in dict is + overwritten with the current ``_ATTRS_CONTRACT_VERSION``; callers + pass freshly built dicts, so this is the intended behaviour. """ # Stamp the contract version first so every read path that funnels # through this helper carries the marker. The VRT backends build diff --git a/xrspatial/geotiff/_backends/vrt.py b/xrspatial/geotiff/_backends/vrt.py index dd87566a7..706dbde0d 100644 --- a/xrspatial/geotiff/_backends/vrt.py +++ b/xrspatial/geotiff/_backends/vrt.py @@ -194,6 +194,9 @@ def read_vrt(source: str, *, else: coords = {} + # VRT builds its attrs dict inline rather than going through + # ``_populate_attrs_from_geo_info``; stamp the contract version here + # so both code paths emit the same marker. attrs = {'_xrspatial_geotiff_contract': _ATTRS_CONTRACT_VERSION} if vrt.crs_wkt: epsg = _wkt_to_epsg(vrt.crs_wkt) @@ -563,6 +566,9 @@ def _read_vrt_chunked(source, *, window, band, name, chunks, gpu, dtype, # eager reads share the same x/y arrays. gt = vrt.geo_transform coords = {} + # Mirrors the eager VRT branch: this code path bypasses + # ``_populate_attrs_from_geo_info``, so the contract version is + # stamped inline using the shared constant to stay in lockstep. attrs = {'_xrspatial_geotiff_contract': _ATTRS_CONTRACT_VERSION} if gt is not None: origin_x, res_x, _, origin_y, _, res_y = gt