From 8982b7ff8b644e04c5e2585a90f0824683a23923 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 25 May 2026 20:21:06 -0700 Subject: [PATCH 1/2] geotiff tests: consolidate attrs-contract cluster (#2397) Folds the four ``test_attrs_contract_*_1984.py`` files into one parametrised ``attrs/test_contract.py`` under the new ``attrs/`` package. Tests are organised by contract dimension (canonical / aliases / passthrough / version) with descriptive parametrise IDs (``canonical[GDAL_NODATA]``, ``alias[nodatavals->nodata]``, etc.) rather than per-issue filenames. - 4 source files deleted, 1 new file added (-3 net). - 40 test cases retained; coverage parity verified in ``CLUSTER_AUDIT_PR5.md`` (audit file deleted before merge). - Release-gate checklist (release_gate_geotiff.rst) updated to point at the new path; the cited-files audit gate stays green. - Markers come from ``_helpers/markers.py`` (no per-file gpu probe). - Tests-only restructure; no source changes. Part of epic #2390 (GeoTIFF test consolidation, PR 5 of 11). --- .../source/reference/release_gate_geotiff.rst | 9 +- xrspatial/geotiff/tests/CLUSTER_AUDIT_PR5.md | 114 ++ xrspatial/geotiff/tests/attrs/__init__.py | 0 .../geotiff/tests/attrs/test_contract.py | 976 ++++++++++++++++++ .../tests/test_attrs_contract_aliases_1984.py | 182 ---- .../test_attrs_contract_canonical_1984.py | 407 -------- .../test_attrs_contract_passthrough_1984.py | 325 ------ .../tests/test_attrs_contract_version_1984.py | 196 ---- 8 files changed, 1094 insertions(+), 1115 deletions(-) create mode 100644 xrspatial/geotiff/tests/CLUSTER_AUDIT_PR5.md create mode 100644 xrspatial/geotiff/tests/attrs/__init__.py create mode 100644 xrspatial/geotiff/tests/attrs/test_contract.py delete mode 100644 xrspatial/geotiff/tests/test_attrs_contract_aliases_1984.py delete mode 100644 xrspatial/geotiff/tests/test_attrs_contract_canonical_1984.py delete mode 100644 xrspatial/geotiff/tests/test_attrs_contract_passthrough_1984.py delete mode 100644 xrspatial/geotiff/tests/test_attrs_contract_version_1984.py diff --git a/docs/source/reference/release_gate_geotiff.rst b/docs/source/reference/release_gate_geotiff.rst index b616af390..2461baba5 100644 --- a/docs/source/reference/release_gate_geotiff.rst +++ b/docs/source/reference/release_gate_geotiff.rst @@ -424,27 +424,26 @@ attrs contract - stable - Every read stamps ``attrs['_xrspatial_geotiff_contract']`` so downstream callers can branch on the version. - - ``xrspatial/geotiff/tests/test_attrs_contract_version_1984.py`` + - ``xrspatial/geotiff/tests/attrs/test_contract.py`` - `#2341`_ * - Canonical attrs after read - stable - ``transform``, ``crs``, ``crs_wkt``, ``nodata``, ``georef_status``, ``raster_type`` appear in canonical form on every backend. - - ``xrspatial/geotiff/tests/test_attrs_contract_canonical_1984.py``, + - ``xrspatial/geotiff/tests/attrs/test_contract.py``, ``xrspatial/geotiff/tests/test_attrs_parity_1548.py`` - `#2341`_ * - Attrs pass-through on write - stable - User-supplied attrs survive write round-trips; reserved keys are not silently dropped. - - ``xrspatial/geotiff/tests/test_attrs_contract_passthrough_1984.py``, - ``xrspatial/geotiff/tests/test_attrs_contract_aliases_1984.py`` + - ``xrspatial/geotiff/tests/attrs/test_contract.py`` - `#2341`_ * - ``georef_status`` canonical signal - stable - ``attrs['georef_status']`` reports whether CRS and transform were both parsed, partially parsed, or absent. - - ``xrspatial/geotiff/tests/test_attrs_contract_canonical_1984.py`` + - ``xrspatial/geotiff/tests/attrs/test_contract.py`` - `#2341`_ * - ``reader.allow_rotated`` (``allow_rotated=True`` drops ``crs``) - experimental diff --git a/xrspatial/geotiff/tests/CLUSTER_AUDIT_PR5.md b/xrspatial/geotiff/tests/CLUSTER_AUDIT_PR5.md new file mode 100644 index 000000000..debb7ca49 --- /dev/null +++ b/xrspatial/geotiff/tests/CLUSTER_AUDIT_PR5.md @@ -0,0 +1,114 @@ +# CLUSTER_AUDIT_PR5.md — Attrs contract consolidation + +Temporary audit table tracking every old `file::test` and where it +lands in `attrs/test_contract.py`. Deleted on the final commit on the +branch before approval (epic #2390 contract). + +## Source files folded + +- `test_attrs_contract_canonical_1984.py` +- `test_attrs_contract_aliases_1984.py` +- `test_attrs_contract_passthrough_1984.py` +- `test_attrs_contract_version_1984.py` + +All four deleted in the same commit that added `attrs/test_contract.py`. +File count delta: `-4 +1 = -3`. + +## Canonical tier mapping + +| Old `file::test` | New `file::test_id` | Notes | +|---|---|---| +| `test_attrs_contract_canonical_1984.py::test_every_canonical_key_present` | `attrs/test_contract.py::test_canonical_every_key_present` | Renamed for the new prefix scheme; logic identical. | +| `test_attrs_contract_canonical_1984.py::test_crs_roundtrip` | `attrs/test_contract.py::test_canonical_value_roundtrip[canonical[crs]]` | Folded into the per-key parametrize; assertion unchanged. | +| `test_attrs_contract_canonical_1984.py::test_crs_wkt_roundtrip` | `attrs/test_contract.py::test_canonical_value_roundtrip[canonical[crs_wkt]]` | Folded. | +| `test_attrs_contract_canonical_1984.py::test_transform_roundtrip` | `attrs/test_contract.py::test_canonical_value_roundtrip[canonical[transform]]` | Folded. | +| `test_attrs_contract_canonical_1984.py::test_nodata_roundtrip` | `attrs/test_contract.py::test_canonical_value_roundtrip[canonical[GDAL_NODATA]]` | Folded; id renamed to surface the underlying TIFF tag. | +| `test_attrs_contract_canonical_1984.py::test_extra_tags_roundtrip` | `attrs/test_contract.py::test_canonical_value_roundtrip[canonical[extra_tags]]` | Folded. | +| `test_attrs_contract_canonical_1984.py::test_gdal_metadata_roundtrip` | `attrs/test_contract.py::test_canonical_value_roundtrip[canonical[gdal_metadata]]` | Folded. | +| `test_attrs_contract_canonical_1984.py::test_gdal_metadata_xml_roundtrip` | `attrs/test_contract.py::test_canonical_value_roundtrip[canonical[gdal_metadata_xml]]` | Folded. | +| `test_attrs_contract_canonical_1984.py::test_resolution_group_roundtrip` | `attrs/test_contract.py::test_canonical_value_roundtrip[canonical[resolution_group]]` | Folded. | +| `test_attrs_contract_canonical_1984.py::test_contract_version_roundtrip` | `attrs/test_contract.py::test_canonical_value_roundtrip[canonical[contract_version]]` | Folded. | +| `test_attrs_contract_canonical_1984.py::test_georef_status_roundtrip` | `attrs/test_contract.py::test_canonical_value_roundtrip[canonical[georef_status]]` | Folded. | +| `test_attrs_contract_canonical_1984.py::test_canonical_keys_present_per_backend[eager-numpy]` | `attrs/test_contract.py::test_canonical_keys_present_per_backend[canonical[eager-numpy]]` | Param IDs renamed to the dimensional scheme; marker now `requires_gpu` from `_helpers/markers.py`. | +| `test_attrs_contract_canonical_1984.py::test_canonical_keys_present_per_backend[dask-numpy]` | `attrs/test_contract.py::test_canonical_keys_present_per_backend[canonical[dask-numpy]]` | Same. | +| `test_attrs_contract_canonical_1984.py::test_canonical_keys_present_per_backend[gpu]` | `attrs/test_contract.py::test_canonical_keys_present_per_backend[canonical[gpu]]` | Same; marker swapped to `requires_gpu`. | +| `test_attrs_contract_canonical_1984.py::test_canonical_keys_present_per_backend[dask-gpu]` | `attrs/test_contract.py::test_canonical_keys_present_per_backend[canonical[dask-gpu]]` | Same. | +| `test_attrs_contract_canonical_1984.py::test_raster_type_area_omitted_on_roundtrip` | `attrs/test_contract.py::test_canonical_raster_type_area_omitted_on_roundtrip` | Renamed for prefix consistency. | +| `test_attrs_contract_canonical_1984.py::test_raster_type_point_roundtrip` | `attrs/test_contract.py::test_canonical_raster_type_point_roundtrip` | Renamed. | + +## Aliases tier mapping + +| Old `file::test` | New `file::test_id` | Notes | +|---|---|---| +| `test_attrs_contract_aliases_1984.py::test_read_uses_nodatavals_when_nodata_absent` | `attrs/test_contract.py::test_alias_read_used_when_nodata_absent[alias[nodatavals->nodata]]` | Folded into the two-alias parametrize. | +| `test_attrs_contract_aliases_1984.py::test_read_uses_fill_value_when_nodata_absent` | `attrs/test_contract.py::test_alias_read_used_when_nodata_absent[alias[_FillValue->nodata]]` | Folded. | +| `test_attrs_contract_aliases_1984.py::test_canonical_nodata_wins_over_aliases_at_resolver` | `attrs/test_contract.py::test_alias_canonical_nodata_wins_at_resolver` | Renamed for prefix. | +| `test_attrs_contract_aliases_1984.py::test_canonical_nodata_wins_over_aliases_at_write` | `attrs/test_contract.py::test_alias_canonical_nodata_wins_at_write` | Renamed; `ConflictingNodataError` import moved to module top. | +| `test_attrs_contract_aliases_1984.py::test_write_does_not_emit_aliases_when_canonical_present` | `attrs/test_contract.py::test_alias_write_does_not_emit_aliases_when_canonical_present` | Renamed. | +| `test_attrs_contract_aliases_1984.py::test_nan_in_nodatavals_is_skipped` | `attrs/test_contract.py::test_alias_nan_in_nodatavals_is_skipped` | Renamed. | + +## Passthrough tier mapping + +| Old `file::test` | New `file::test_id` | Notes | +|---|---|---| +| `test_attrs_contract_passthrough_1984.py::test_passthrough_cases_cover_all_keys` | `attrs/test_contract.py::test_passthrough_cases_cover_all_keys` | Verbatim. | +| `test_attrs_contract_passthrough_1984.py::test_passthrough_key_roundtrip[image_description]` | `attrs/test_contract.py::test_passthrough_key_roundtrip[passthrough[image_description]]` | Param ID prefixed with the tier. | +| `test_attrs_contract_passthrough_1984.py::test_passthrough_key_roundtrip[extra_samples]` | `attrs/test_contract.py::test_passthrough_key_roundtrip[passthrough[extra_samples]]` | Same. | +| `test_attrs_contract_passthrough_1984.py::test_passthrough_key_roundtrip[colormap]` | `attrs/test_contract.py::test_passthrough_key_roundtrip[passthrough[colormap]]` | Same. | +| `test_attrs_contract_passthrough_1984.py::test_passthrough_dropped_when_no_crs` | `attrs/test_contract.py::test_passthrough_dropped_when_no_crs` | Verbatim. | +| `test_attrs_contract_passthrough_1984.py::test_passthrough_does_not_promote_to_canonical` | `attrs/test_contract.py::test_passthrough_does_not_promote_to_canonical` | Verbatim. | +| `test_attrs_contract_passthrough_1984.py::test_removed_attrs_not_emitted` | `attrs/test_contract.py::test_passthrough_removed_attrs_not_emitted` | Renamed for prefix. | +| `test_attrs_contract_passthrough_1984.py::test_removed_attrs_absent_after_roundtrip` | `attrs/test_contract.py::test_passthrough_removed_attrs_absent_after_roundtrip` | Renamed. | +| `test_attrs_contract_passthrough_1984.py::test_contract_version_is_current` | `attrs/test_contract.py::test_passthrough_contract_version_is_current` | Renamed; assertion unchanged. | + +## Version tier mapping + +| Old `file::test` | New `file::test_id` | Notes | +|---|---|---| +| `test_attrs_contract_version_1984.py::test_attrs_contract_version_constant_is_current` | `attrs/test_contract.py::test_version_constant_is_current` | Renamed for prefix. | +| `test_attrs_contract_version_1984.py::test_attrs_module_docstring_version_matches_constant` | `attrs/test_contract.py::test_version_module_docstring_matches_constant` | Renamed. | +| `test_attrs_contract_version_1984.py::test_eager_numpy_stamps_contract_version` | `attrs/test_contract.py::test_version_stamp_present_per_tiff_backend[version[eager-numpy]]` | Folded into TIFF-backend parametrize. | +| `test_attrs_contract_version_1984.py::test_dask_numpy_stamps_contract_version` | `attrs/test_contract.py::test_version_stamp_present_per_tiff_backend[version[dask-numpy]]` | Folded. | +| `test_attrs_contract_version_1984.py::test_gpu_stamps_contract_version` | `attrs/test_contract.py::test_version_stamp_present_per_tiff_backend[version[gpu]]` | Folded; marker swapped to `requires_gpu`. | +| `test_attrs_contract_version_1984.py::test_dask_gpu_stamps_contract_version` | `attrs/test_contract.py::test_version_stamp_present_per_tiff_backend[version[dask-gpu]]` | Folded. | +| `test_attrs_contract_version_1984.py::test_vrt_eager_stamps_contract_version` | `attrs/test_contract.py::test_version_stamp_present_per_vrt_backend[version[vrt-eager]]` | Folded into VRT-backend parametrize. | +| `test_attrs_contract_version_1984.py::test_vrt_chunked_stamps_contract_version` | `attrs/test_contract.py::test_version_stamp_present_per_vrt_backend[version[vrt-chunked]]` | Folded. | + +## Coverage parity check + +Old surface (deduplicated functions/params): +- Canonical: 1 presence test + 10 per-key value tests + 4 per-backend + params + 2 raster_type tests = 17 cases. +- Aliases: 6 tests (2 read-side, 2 precedence, 1 write-side, 1 NaN). +- Passthrough: 1 covers-all + 3 param cases + 5 misc = 9 cases. +- Version: 2 constant/docstring + 4 TIFF backends + 2 VRT backends = 8. +- Total old: 40 cases. + +New surface: +- Canonical: 1 + 10 (parametrize) + 4 (parametrize) + 2 = 17 cases. +- Aliases: 2 (parametrize) + 4 = 6 cases. +- Passthrough: 1 + 3 (parametrize) + 5 = 9 cases. +- Version: 2 + 4 (parametrize) + 2 (parametrize) = 8 cases. +- Total new: 40 cases. **No coverage drop.** + +## Out of scope (intentionally untouched) + +- `test_attrs_finalization_parity_2211.py` — parity-flavoured (PR 4). +- `test_attrs_parity_1548.py` — parity (PR 4). +- `test_attrs_kwarg_parity_1561.py` — parity (PR 4). +- `test_release_gate_attrs_contract.py` — release-gate registry (PR 10). +- `test_nodata_attr_aliases_1582.py` — finer-grained alias regression + tests; the original aliases file noted these as a sibling, not as + contract scope. Stays for now; can fold into a later cluster if it + fits. + +## Roundtrip slice deferral + +The epic mentions a future `attrs/test_roundtrip.py`. The four source +files do not contain a clean roundtrip slice that would naturally fall +out during this consolidation: every roundtrip case here is bound to a +specific tier (canonical key presence, alias promotion, passthrough +reconstruction) and is exercised inside that tier's section. Adding a +standalone `test_roundtrip.py` now would duplicate the canonical +fixture without adding coverage. Leaving for a follow-up so it can be +designed against a real coverage gap. diff --git a/xrspatial/geotiff/tests/attrs/__init__.py b/xrspatial/geotiff/tests/attrs/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/xrspatial/geotiff/tests/attrs/test_contract.py b/xrspatial/geotiff/tests/attrs/test_contract.py new file mode 100644 index 000000000..dfb4adfae --- /dev/null +++ b/xrspatial/geotiff/tests/attrs/test_contract.py @@ -0,0 +1,976 @@ +"""Consolidated attrs-contract tests for the GeoTIFF module. + +This file folds in the four ``test_attrs_contract_*_1984.py`` files +that previously lived at the top of ``xrspatial/geotiff/tests/``: + +* ``canonical`` -- keys xrspatial owns and guarantees round-trip + stable through ``to_geotiff`` -> ``open_geotiff``. +* ``aliases`` -- compatibility aliases (rioxarray ``nodatavals``, + CF ``_FillValue``) honoured on read when canonical + ``nodata`` is absent. +* ``passthrough`` -- best-effort tier: writer does not consume, reader + rebuilds the value from a TIFF tag if one survived. +* ``version`` -- the ``attrs['_xrspatial_geotiff_contract']`` stamp, + emitted by every read backend (eager numpy, dask, + GPU, dask+GPU, VRT eager, VRT chunked). + +The four dimensions stay separate top-level sections in this file. Tests +are parametrised within each dimension and given descriptive IDs +(``canonical[GDAL_NODATA]``, ``alias[nodatavals->nodata]``, etc.) rather +than per-issue filenames. Git history remains the audit trail for which +issue introduced each assertion. + +The release-gate sibling (``test_release_gate_attrs_contract.py``) is +intentionally not folded here; it lives on its own under PR 10 of the +epic. Parity-flavoured siblings (``test_attrs_parity_1548.py``, +``test_attrs_finalization_parity_2211.py``, +``test_attrs_kwarg_parity_1561.py``) belong with PR 4's parity cluster. +""" +from __future__ import annotations + +import os +import re +import warnings + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import ( + ConflictingNodataError, + open_geotiff, + read_vrt, + to_geotiff, +) +from xrspatial.geotiff import _attrs as _attrs_module +from xrspatial.geotiff._attrs import _ATTRS_CONTRACT_VERSION, _resolve_nodata_attr + +from .._helpers.markers import requires_gpu + +tifffile = pytest.importorskip("tifffile") + + +_CONTRACT_KEY = '_xrspatial_geotiff_contract' + + +# =========================================================================== +# Canonical tier +# +# Keys xrspatial owns; round-trip is guaranteed through write -> read. +# Sourced from ``test_attrs_contract_canonical_1984.py``. +# =========================================================================== + +# Every key the canonical tier guarantees round-trip stable. Order +# mirrors ``docs/source/user_guide/attrs_contract.rst`` so a diff here +# lines up with a diff in the contract docs. +# +# ``raster_type`` is canonical but absent from this constant: the +# implicit default 'area' is encoded as *absence* of the attr, so the +# "must be present after round-trip" check below cannot express it. +# The dedicated ``test_raster_type_*`` tests below lock both branches. +_CANONICAL_KEYS = ( + 'crs', + 'crs_wkt', + 'transform', + 'nodata', + 'extra_tags', + 'gdal_metadata', + 'gdal_metadata_xml', + 'x_resolution', + 'y_resolution', + 'resolution_unit', + # Added in contract v3 (issue #2136). The fixture is georef + CRS + # so the round-tripped value is the ``'full'`` literal; value + # equality lives on ``test_canonical[georef_status]`` below. + 'georef_status', + _CONTRACT_KEY, +) + +# Fixture values, written into ``attrs`` on the synthetic DataArray and +# compared to the read-back attrs after round-trip. ``transform`` is +# pinned by ``y`` / ``x`` coords so we expect that exact 6-tuple back. +_NODATA_SENTINEL = -9999.0 +_X_RES = 300.0 +_Y_RES = 300.0 +_RES_UNIT = 'inch' +# Software tag (305, ASCII). Benign (no spatial interpretation, no +# security filter) and tifffile decodes it as-is. Count includes the +# trailing NUL byte. The Software identity also appears as +# ``gdal_metadata['TIFFTAG_SOFTWARE']``; the two are independent +# channels (raw TIFF tag vs an entry in the GDAL_METADATA XML payload) +# and the writer does not synchronise them. +_GDAL_META = {'TIFFTAG_SOFTWARE': 'xrspatial-1984'} +_SOFTWARE_STR = 'xrspatial-canonical-1984' +_EXTRA_TAGS = [(305, 2, len(_SOFTWARE_STR) + 1, _SOFTWARE_STR)] + + +def _make_canonical_da(): + """Build a synthetic DataArray exercising every canonical attr. + + Returns the DataArray plus the expected ``transform`` tuple so the + test can assert on the round-tripped value without recomputing it. + """ + h, w = 4, 4 + data = np.arange(h * w, dtype=np.float32).reshape(h, w) + # Pin a non-identity transform so the round-trip check catches a + # writer that drops the tiepoint / pixel-scale tags. Coords are + # interpreted as pixel centres; the emitted transform's origin is + # the top-left corner, so origin_x = 100 - 10/2 = 95 and + # origin_y = 240 - (-10)/2 = 245. + x = np.array([100.0, 110.0, 120.0, 130.0], dtype=np.float64) + y = np.array([240.0, 230.0, 220.0, 210.0], dtype=np.float64) + expected_transform = (10.0, 0.0, 95.0, 0.0, -10.0, 245.0) + + da = xr.DataArray( + data, dims=('y', 'x'), coords={'y': y, 'x': x}, + attrs={ + 'crs': 4326, + 'nodata': _NODATA_SENTINEL, + 'extra_tags': list(_EXTRA_TAGS), + 'gdal_metadata': dict(_GDAL_META), + 'x_resolution': _X_RES, + 'y_resolution': _Y_RES, + 'resolution_unit': _RES_UNIT, + }, + ) + return da, expected_transform + + +@pytest.fixture +def canonical_roundtrip(tmp_path): + """Round-trip the canonical fixture through write -> read. + + Returns ``(read_da, expected_transform)``. Scoped to one round-trip + per test so per-key assertions stay independent and a single failure + points at one key rather than cascading. + """ + da, expected_transform = _make_canonical_da() + path = str(tmp_path / 'attrs_contract_canonical.tif') + # The canonical fixture carries ``extra_tags``; the rich-tag gate + # (#2340) exempts attrs carrying ``_xrspatial_geotiff_contract`` + # (set by the readers), but this fixture builds a fresh DataArray + # without the marker, so the initial write must opt in. + to_geotiff(da, path, allow_experimental_codecs=True) + rd = open_geotiff(path) + return rd, expected_transform + + +def test_canonical_every_key_present(canonical_roundtrip): + """Pin the canonical key set after a round-trip. + + A writer that drops one canonical key (e.g. forgets to emit + GDAL_METADATA) shows up here as one missing key rather than as a + later equality failure with a less obvious cause. + """ + rd, _ = canonical_roundtrip + missing = sorted(k for k in _CANONICAL_KEYS if k not in rd.attrs) + assert missing == [], ( + f"canonical attrs missing after round-trip: {missing}. " + f"attrs keys present: {sorted(rd.attrs.keys())}" + ) + + +def _check_canonical_crs(rd, _expected_transform): + assert rd.attrs['crs'] == 4326 + + +def _check_canonical_crs_wkt(rd, _expected_transform): + """``crs_wkt`` is reader-emitted from the EPSG code. Pin presence + and the CRS-identity substring callers rely on. The exact WKT is + PROJ-version dependent, so match ``WGS 84`` / ``WGS_1984`` / + ``WGS-84`` with one regex rather than a single literal.""" + wkt = rd.attrs['crs_wkt'] + assert isinstance(wkt, str) and len(wkt) > 0 + assert re.search(r'WGS[\s_-]?84|WGS_1984', wkt), ( + f"crs_wkt round-trip lost the CRS identity: {wkt!r}" + ) + + +def _check_canonical_transform(rd, expected_transform): + t = tuple(rd.attrs['transform']) + assert t == pytest.approx(expected_transform), ( + f"transform round-trip mismatch.\n expected: {expected_transform}\n" + f" got : {t}" + ) + + +def _check_canonical_nodata(rd, _expected_transform): + assert rd.attrs['nodata'] == _NODATA_SENTINEL + + +def _check_canonical_extra_tags(rd, _expected_transform): + """A non-friendly extra_tags entry (Software, 305) round-trips + intact. The writer must preserve unknown tags so users can attach + arbitrary metadata.""" + got = rd.attrs['extra_tags'] + # Look up tag 305 specifically; ordering and any reader-added + # entries are not part of the contract for this assertion. + by_id = {t[0]: t for t in got} + assert 305 in by_id, ( + f"Software tag (305) missing from read-back extra_tags: {got}" + ) + tag_id, type_id, _count, value = by_id[305] + assert tag_id == 305 + assert type_id == 2 # TIFF ASCII + assert value == _SOFTWARE_STR + + +def _check_canonical_gdal_metadata(rd, _expected_transform): + """The parsed dict survives the round-trip key-by-key. Allow extra + entries the reader might inject (e.g. ``STATISTICS_*``) so this + test is not a tripwire for unrelated reader changes.""" + got = rd.attrs['gdal_metadata'] + assert isinstance(got, dict), f"gdal_metadata is not a dict: {got!r}" + for k, v in _GDAL_META.items(): + assert got.get(k) == v, ( + f"gdal_metadata[{k!r}] mismatch.\n expected: {v!r}\n" + f" got : {got.get(k)!r}\n full read-back: {got!r}" + ) + + +def _check_canonical_gdal_metadata_xml(rd, _expected_transform): + """The raw XML string is reconstructed by the writer from the + ``gdal_metadata`` dict. Pin presence + the substring that proves + our fixture survived; the exact XML formatting is writer-dependent. + """ + xml = rd.attrs['gdal_metadata_xml'] + assert isinstance(xml, str) and xml.startswith('') + assert 'xrspatial-1984' in xml, ( + f"gdal_metadata_xml lost the fixture marker: {xml!r}" + ) + + +def _check_canonical_resolution_group(rd, _expected_transform): + """``x_resolution`` / ``y_resolution`` / ``resolution_unit`` are + written and read as one logical unit -- pin them together so a + writer that drops one but keeps the others fails here.""" + assert rd.attrs['x_resolution'] == pytest.approx(_X_RES) + assert rd.attrs['y_resolution'] == pytest.approx(_Y_RES) + assert rd.attrs['resolution_unit'] == _RES_UNIT + + +def _check_canonical_contract_version(rd, _expected_transform): + """``_xrspatial_geotiff_contract`` is stamped on every read; pin + that the canonical fixture sees the current version. Per-backend + coverage lives in the version section below.""" + assert rd.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION + + +def _check_canonical_georef_status(rd, _expected_transform): + """``georef_status`` (issue #2136) is canonical from contract v3. + The fixture sets ``crs`` + axis-aligned transform-from-coords, so + the round-tripped value is the ``'full'`` literal.""" + assert rd.attrs['georef_status'] == 'full' + + +# (id_suffix, check_callable). One row per canonical key with a +# value-level assertion. Parametrised so a single failure names the +# offending key in the test ID rather than firing inside a per-key +# helper buried in a generic test. +_CANONICAL_VALUE_CHECKS = [ + ('crs', _check_canonical_crs), + ('crs_wkt', _check_canonical_crs_wkt), + ('transform', _check_canonical_transform), + ('GDAL_NODATA', _check_canonical_nodata), + ('extra_tags', _check_canonical_extra_tags), + ('gdal_metadata', _check_canonical_gdal_metadata), + ('gdal_metadata_xml', _check_canonical_gdal_metadata_xml), + ('resolution_group', _check_canonical_resolution_group), + ('contract_version', _check_canonical_contract_version), + ('georef_status', _check_canonical_georef_status), +] + + +@pytest.mark.parametrize( + 'check', [c[1] for c in _CANONICAL_VALUE_CHECKS], + ids=[f'canonical[{c[0]}]' for c in _CANONICAL_VALUE_CHECKS], +) +def test_canonical_value_roundtrip(canonical_roundtrip, check): + """Per-key value-equality check after a write -> read round-trip.""" + rd, expected_transform = canonical_roundtrip + check(rd, expected_transform) + + +# Per-backend coverage for canonical-key *presence*. +# +# Read-time backends share ``_populate_attrs_from_geo_info``, so per-key +# value round-trips are pinned once (above) on the eager numpy path. +# What the per-backend check guards against is a backend skipping the +# shared helper or building its attrs dict independently. The version +# stamp test does this for one canonical key, and the loop below does +# it for the rest. + + +def _open_eager(path): + return open_geotiff(path) + + +def _open_dask(path): + return open_geotiff(path, chunks=2) + + +def _open_gpu(path): + return open_geotiff(path, gpu=True) + + +def _open_dask_gpu(path): + return open_geotiff(path, gpu=True, chunks=2) + + +_BACKEND_OPENERS = [ + pytest.param(_open_eager, id='canonical[eager-numpy]'), + pytest.param(_open_dask, id='canonical[dask-numpy]'), + pytest.param(_open_gpu, id='canonical[gpu]', marks=requires_gpu), + pytest.param(_open_dask_gpu, id='canonical[dask-gpu]', marks=requires_gpu), +] + + +@pytest.mark.parametrize('opener', _BACKEND_OPENERS) +def test_canonical_keys_present_per_backend(tmp_path, opener): + """Each read backend emits the full canonical key set. + + Writes the canonical fixture once with the eager writer (only the + eager + dask writers exist; the GPU writer is exercised in its own + parity tests), then re-reads through every supported backend and + asserts presence. Value-level round-trip is checked by the per-key + cases above; this loop guards against a backend that bypasses + ``_populate_attrs_from_geo_info``. + """ + da, _ = _make_canonical_da() + path = str(tmp_path / f'canonical_{opener.__name__}.tif') + # Fresh DataArray with ``extra_tags`` -> rich-tag opt-in required + # (see ``canonical_roundtrip`` fixture for details). + to_geotiff(da, path, allow_experimental_codecs=True) + + rd = opener(path) + missing = sorted(k for k in _CANONICAL_KEYS if k not in rd.attrs) + assert missing == [], ( + f"{opener.__name__}: canonical attrs missing after round-trip: " + f"{missing}. attrs keys present: {sorted(rd.attrs.keys())}" + ) + + +# ``raster_type`` lives outside the shared fixture because the canonical +# default ('area') is encoded as *absence* in attrs. The two branches +# need different fixtures. + + +def test_canonical_raster_type_area_omitted_on_roundtrip(tmp_path): + """RasterPixelIsArea is the implicit default and is encoded as + *absence* of ``attrs['raster_type']``. A DataArray with no + ``raster_type`` attr round-trips to a DataArray that still has no + ``raster_type`` attr.""" + da, _ = _make_canonical_da() + assert 'raster_type' not in da.attrs + path = str(tmp_path / 'raster_type_area.tif') + to_geotiff(da, path, allow_experimental_codecs=True) + rd = open_geotiff(path) + assert 'raster_type' not in rd.attrs, ( + f"area is the implicit default but the reader emitted " + f"raster_type={rd.attrs['raster_type']!r}" + ) + + +def test_canonical_raster_type_point_roundtrip(tmp_path): + """``raster_type='point'`` is the only value the writer accepts via + attrs; the reader emits it back on a round-trip.""" + da, _ = _make_canonical_da() + da.attrs['raster_type'] = 'point' + path = str(tmp_path / 'raster_type_point.tif') + to_geotiff(da, path, allow_experimental_codecs=True) + rd = open_geotiff(path) + assert rd.attrs.get('raster_type') == 'point' + + +# =========================================================================== +# Aliases tier +# +# Compatibility aliases (rioxarray ``nodatavals``, CF ``_FillValue``) +# are honoured on read when canonical ``nodata`` is absent. The write +# side emits only ``nodata``, never the aliases. Sourced from +# ``test_attrs_contract_aliases_1984.py``. +# =========================================================================== + +# Arbitrary float-castable sentinel distinct from any data value below. +# The value itself does not matter; the tests assert it survives the +# alias resolver and the round-trip. +_ALIAS_SENTINEL = -9999.0 + + +def _da_float(arr, **attrs): + return xr.DataArray( + arr, dims=["y", "x"], + coords={"y": np.arange(arr.shape[0], dtype=np.float64), + "x": np.arange(arr.shape[1], dtype=np.float64)}, + attrs=attrs, + ) + + +@pytest.fixture +def _arr_with_sentinel(): + return np.array( + [[1.0, 2.0, _ALIAS_SENTINEL], [3.0, _ALIAS_SENTINEL, 5.0]], + dtype=np.float32, + ) + + +# (id_suffix, alias_key). The two read-side aliases the resolver +# accepts when ``attrs['nodata']`` is absent. +_READ_ALIASES = [ + ('nodatavals->nodata', 'nodatavals'), + ('_FillValue->nodata', '_FillValue'), +] + + +@pytest.mark.parametrize( + 'alias_key', [a[1] for a in _READ_ALIASES], + ids=[f'alias[{a[0]}]' for a in _READ_ALIASES], +) +def test_alias_read_used_when_nodata_absent( + tmp_path, _arr_with_sentinel, alias_key): + """Pin: a foreign DataArray carrying only an alias round-trips + through ``to_geotiff`` with the sentinel preserved. + + Covers both rioxarray ``nodatavals`` (tuple of one) and CF + ``_FillValue`` (scalar) in one parametrised case. + """ + if alias_key == 'nodatavals': + alias_value = (_ALIAS_SENTINEL,) + else: + alias_value = _ALIAS_SENTINEL + da = _da_float(_arr_with_sentinel, crs=4326, **{alias_key: alias_value}) + assert "nodata" not in da.attrs # precondition: only the alias is set + out = str(tmp_path / f"alias_{alias_key}.tif") + to_geotiff(da, out) + + rd = open_geotiff(out) + assert rd.attrs.get("nodata") == _ALIAS_SENTINEL + + +def test_alias_canonical_nodata_wins_at_resolver(_arr_with_sentinel): + """``_resolve_nodata_attr`` (the read-side chokepoint) still prefers + canonical ``nodata`` over both aliases. This is the resolver-layer + contract that downstream code consumes; the write-side fail-closed + check is exercised below in + ``test_alias_canonical_nodata_wins_at_write``.""" + canonical = -8888.0 + da = _da_float( + _arr_with_sentinel, crs=4326, + nodata=canonical, + nodatavals=(_ALIAS_SENTINEL,), + **{"_FillValue": -7777.0}, + ) + assert _resolve_nodata_attr(dict(da.attrs)) == canonical + + +def test_alias_canonical_nodata_wins_at_write( + tmp_path, _arr_with_sentinel): + """Pin: the legacy "canonical nodata silently wins on write" was + replaced by a fail-closed raise (issue #1987). A DataArray with + disagreeing ``nodata`` and ``nodatavals`` attrs refuses to write + instead of dropping the alias. The opt-out is the explicit + ``nodata=`` kwarg, which overrides both attrs.""" + da = _da_float( + _arr_with_sentinel, crs=4326, + nodata=-8888.0, + nodatavals=(_ALIAS_SENTINEL,), + **{"_FillValue": -7777.0}, + ) + + out = str(tmp_path / "canonical_wins.tif") + with pytest.raises(ConflictingNodataError): + to_geotiff(da, out) + + # Explicit ``nodata=`` bypasses the check. + to_geotiff(da, out, nodata=-8888.0) + rd = open_geotiff(out) + assert rd.attrs.get("nodata") == -8888.0 + + +def test_alias_write_does_not_emit_aliases_when_canonical_present( + tmp_path, _arr_with_sentinel): + """Pin: round-tripping a DataArray that has only ``attrs['nodata']`` + set returns attrs containing ``nodata`` and not the two aliases. + + Reader paths emit a single canonical key; this guards against a + future change that copies the resolved value into the rioxarray or + CF alias slot as well (which would silently double-write the + sentinel and confuse downstream consumers that special-case one + alias over another).""" + da = _da_float(_arr_with_sentinel, crs=4326, nodata=_ALIAS_SENTINEL) + out = str(tmp_path / "canonical_only.tif") + to_geotiff(da, out) + + rd = open_geotiff(out) + assert rd.attrs.get("nodata") == _ALIAS_SENTINEL + assert "nodatavals" not in rd.attrs, ( + f"reader emitted rioxarray alias alongside canonical nodata: " + f"attrs={sorted(rd.attrs)}" + ) + assert "_FillValue" not in rd.attrs, ( + f"reader emitted CF alias alongside canonical nodata: " + f"attrs={sorted(rd.attrs)}" + ) + + +def test_alias_nan_in_nodatavals_is_skipped(): + """Pin: NaN entries in ``attrs['nodatavals']`` are not a sentinel. + + NaN means the float NaN is already the implicit sentinel, which + doesn't need a GDAL_NODATA tag. The resolver skips NaN entries and + (in a mixed tuple) returns the first concrete numeric value.""" + # Pure-NaN tuple resolves to None. + assert _resolve_nodata_attr({"nodatavals": (float("nan"),)}) is None + # NaN followed by a concrete sentinel: the resolver skips NaN and + # returns the concrete value. + assert _resolve_nodata_attr( + {"nodatavals": (float("nan"), _ALIAS_SENTINEL)}) == _ALIAS_SENTINEL + # None entries are also skipped (rioxarray uses None for "no + # sentinel on this band"). + assert _resolve_nodata_attr( + {"nodatavals": (None, _ALIAS_SENTINEL)}) == _ALIAS_SENTINEL + + +# =========================================================================== +# Passthrough tier +# +# Writer does not consume; reader rebuilds the value from a TIFF tag +# if one survived. Sourced from +# ``test_attrs_contract_passthrough_1984.py``. +# =========================================================================== + +# Full set of pass-through keys defined by the contract. Contract v2 +# (issue #2016) trimmed this set to the three TIFF-tag-derived keys +# that actually round-trip via ``_merge_friendly_extra_tags``. +_ALL_PASSTHROUGH_KEYS = ( + 'image_description', + 'extra_samples', + 'colormap', +) + + +# Attrs that the reader emitted under a ``DeprecationWarning`` in +# contract v1 and that contract v2 (issue #2016) removed entirely. +_REMOVED_IN_V2_ATTRS = ( + 'crs_name', + 'geog_citation', + 'datum_code', + 'angular_units', + 'linear_units', + 'semi_major_axis', + 'inv_flattening', + 'projection_code', + 'vertical_crs', + 'vertical_citation', + 'vertical_units', + 'colormap_rgba', + 'cmap', +) + + +def _make_passthrough_da(crs=None, attrs=None, shape=(4, 4), dtype=np.float32): + """Build a minimal georeferenced DataArray with optional CRS and attrs.""" + data = np.ones(shape, dtype=dtype) + h, w = shape + coords = { + 'y': np.arange(h, 0, -1, dtype=np.float64), + 'x': np.arange(w, dtype=np.float64), + } + a = dict(attrs) if attrs else {} + if crs is not None: + a['crs'] = crs + return xr.DataArray(data, dims=('y', 'x'), coords=coords, attrs=a) + + +def _passthrough_roundtrip(tmp_path, da, name='roundtrip.tif'): + """Write ``da`` to ``tmp_path/name`` and read it back.""" + path = str(tmp_path / name) + to_geotiff(da, path) + return open_geotiff(path) + + +# (key, crs_to_use, value_set_on_write_or_None, expected_outcome) +# +# ``crs_to_use``: the CRS attached to the test DataArray; 4326 is fine +# for every remaining key. +# ``value_set_on_write``: the value the test sets on ``da.attrs`` +# before write. +# ``expected``: ``'reconstructible'`` (key must be present in the +# read-back attrs and equal to the written value) or ``'dropped'`` (key +# must be absent). After contract v2 (issue #2016), every row carries +# ``'reconstructible'``; the ``'dropped'`` arm is kept so a future +# addition can use it without restructuring the test. +_PASSTHROUGH_CASES = [ + # Non-GeoKey tag passthroughs. The writer folds these into + # extra_tags via _merge_friendly_extra_tags, so the reader can + # rebuild them. + ('image_description', 4326, 'pr1984 fixture', 'reconstructible'), + ('extra_samples', 4326, (1,), 'reconstructible'), + # ``colormap`` round-trips as the raw uint16 triple list. The TIFF + # ColorMap tag (320) stores RGB triples as uint16 values in the + # 0-65535 range. Values below are written as-is and compared by + # equality after the round-trip; if the writer ever rescales 8-bit + # input to 16-bit (or vice versa), update this fixture rather than + # the contract. + ( + 'colormap', 4326, + tuple([0] * 256 + [128] * 256 + [255] * 256), + 'reconstructible', + ), +] + + +def test_passthrough_cases_cover_all_keys(): + """``_PASSTHROUGH_CASES`` and ``_ALL_PASSTHROUGH_KEYS`` carry the + same set in two forms. Pin them so a key added to one list and + forgotten on the other fails here rather than silently skipping + coverage in ``test_passthrough_dropped_when_no_crs``.""" + case_keys = {c[0] for c in _PASSTHROUGH_CASES} + assert case_keys == set(_ALL_PASSTHROUGH_KEYS), ( + f"_PASSTHROUGH_CASES and _ALL_PASSTHROUGH_KEYS diverge.\n" + f" only in cases: {sorted(case_keys - set(_ALL_PASSTHROUGH_KEYS))}\n" + f" only in keys : {sorted(set(_ALL_PASSTHROUGH_KEYS) - case_keys)}" + ) + + +@pytest.mark.parametrize( + 'key,crs,value,expected', + _PASSTHROUGH_CASES, + ids=[f'passthrough[{c[0]}]' for c in _PASSTHROUGH_CASES], +) +def test_passthrough_key_roundtrip(tmp_path, key, crs, value, expected): + """Lock per-key round-trip outcome for the pass-through attr tier.""" + attrs = {} + if value is not None: + attrs[key] = value + da = _make_passthrough_da(crs=crs, attrs=attrs) + # Single-band uint8 needed for the colormap tag to be valid in + # TIFF. + if key == 'colormap': + da = _make_passthrough_da(crs=crs, attrs=attrs, dtype=np.uint8) + + rd = _passthrough_roundtrip(tmp_path, da, name=f'{key}.tif') + + if expected == 'reconstructible': + assert key in rd.attrs, ( + f"pass-through key {key!r} was expected to round-trip but is " + f"absent. attrs keys present: {sorted(rd.attrs.keys())}" + ) + if value is not None: + got = rd.attrs[key] + if isinstance(value, tuple): + assert tuple(got) == value, ( + f"{key!r}: value mismatch on round-trip\n" + f" written: {value}\n" + f" read : {got}" + ) + else: + assert got == value, ( + f"{key!r}: value mismatch on round-trip\n" + f" written: {value!r}\n" + f" read : {got!r}" + ) + else: # 'dropped' + assert key not in rd.attrs, ( + f"pass-through key {key!r} was expected to drop on round-trip " + f"(writer does not emit a tag the reader can rebuild it from) " + f"but it is present with value {rd.attrs[key]!r}. If a writer " + f"change started emitting this key, decide whether to promote " + f"the key to canonical (issue #1984) and update this test." + ) + + +def test_passthrough_dropped_when_no_crs(tmp_path): + """Files without a CRS do not surface any pass-through attrs.""" + da = _make_passthrough_da(crs=None) + rd = _passthrough_roundtrip(tmp_path, da, name='no_crs.tif') + + present = sorted(k for k in _ALL_PASSTHROUGH_KEYS if k in rd.attrs) + assert present == [], ( + f"pass-through keys leaked into a no-CRS round-trip: {present}. " + f"All keys present: {sorted(rd.attrs.keys())}" + ) + # Sanity: no CRS attrs either. + assert 'crs' not in rd.attrs + assert 'crs_wkt' not in rd.attrs + + +def test_passthrough_does_not_promote_to_canonical(tmp_path): + """Setting legacy GeoKey-derived attrs without a CRS does not + inject one. + + Contract v2 (issue #2016) removed these keys from the reader's + emission set, but a user with a hand-built ``DataArray`` may still + set them. The writer treats them as advisory and never synthesises + a CRS from them; this test pins that invariant. + """ + # Mix of GeoKey-derived keys, but no ``crs`` / ``crs_wkt``. If the + # writer ever started inferring a CRS from these (e.g. picking 4326 + # because angular_units == 'degree') this test would fail. + attrs = { + 'crs_name': 'WGS 84', + 'geog_citation': 'WGS 84', + 'angular_units': 'degree', + 'linear_units': 'metre', + 'semi_major_axis': 6378137.0, + 'inv_flattening': 298.257223563, + 'datum_code': 6326, + } + da = _make_passthrough_da(crs=None, attrs=attrs) + + with warnings.catch_warnings(): + # The writer warns on user-defined-CRS WKT writes; we are not + # on that path here, but suppress generously so a future + # warning tweak does not turn this test into a warning + # regression test. + warnings.simplefilter('ignore') + rd = _passthrough_roundtrip( + tmp_path, da, name='no_crs_with_attrs.tif') + + assert 'crs' not in rd.attrs, ( + f"pass-through attrs caused the writer to synthesise a CRS: " + f"crs={rd.attrs.get('crs')!r}. The contract says pass-through " + f"attrs are advisory only; the writer must rely on attrs['crs'] " + f"or attrs['crs_wkt'] to emit georeferencing." + ) + assert 'crs_wkt' not in rd.attrs, ( + f"pass-through attrs caused the writer to synthesise a CRS WKT: " + f"crs_wkt={rd.attrs.get('crs_wkt')!r}." + ) + + +def test_passthrough_removed_attrs_not_emitted(tmp_path): + """Contract v2 (issue #2016) removed 13 deprecated reader attrs. + + A freshly read DataArray does not carry any of them, even when the + underlying GeoTIFF's GeoKey directory advertises the values. Pins + the removal so a regression that re-adds an emit site fails here + rather than silently leaking the attr back into the public surface. + """ + da = _make_passthrough_da(crs=4326) + rd = _passthrough_roundtrip( + tmp_path, da, name='removed_attrs_no_emit.tif') + + leaked = sorted(k for k in _REMOVED_IN_V2_ATTRS if k in rd.attrs) + assert leaked == [], ( + f"contract v2 attrs leaked into a fresh read: {leaked}. " + f"All attrs keys present: {sorted(rd.attrs.keys())}. Issue " + f"#2016 dropped these from the reader; re-emitting them is a " + f"behaviour regression." + ) + + +def test_passthrough_removed_attrs_absent_after_roundtrip(tmp_path): + """A write -> read cycle does not resurrect any v2-removed attr. + + Even when the input ``DataArray`` carries every removed attr as a + legacy value, the writer ignores them and the reader never adds + them back. The reopened DataArray's attrs is the public surface + callers rely on. + """ + legacy_payload = { + 'crs_name': 'WGS 84', + 'geog_citation': 'WGS 84', + 'datum_code': 6326, + 'angular_units': 'degree', + 'linear_units': 'metre', + 'semi_major_axis': 6378137.0, + 'inv_flattening': 298.257223563, + 'projection_code': 16033, + 'vertical_crs': 5703, + 'vertical_citation': 'NAVD88', + 'vertical_units': 'metre', + 'colormap_rgba': ((1.0, 0.0, 0.0, 1.0),), + 'cmap': 'tiff_palette_placeholder', + } + da = _make_passthrough_da(crs=4326, attrs=legacy_payload) + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + rd = _passthrough_roundtrip( + tmp_path, da, name='removed_attrs_roundtrip.tif') + + resurrected = sorted(k for k in _REMOVED_IN_V2_ATTRS if k in rd.attrs) + assert resurrected == [], ( + f"removed attrs survived a write -> read cycle: {resurrected}. " + f"All attrs keys present: {sorted(rd.attrs.keys())}. The " + f"reader drops these per contract v2 (issue #2016)." + ) + + +def test_passthrough_contract_version_is_current(tmp_path): + """``attrs['_xrspatial_geotiff_contract']`` matches the constant on + every read. + + The contract version is the user-visible signal that a tier change + landed. Issue #2016 bumped it to 2 (removal of deprecated GeoKey + attrs); issue #2136 bumped it to 3 (addition of + ``attrs['georef_status']``). Pinning against + ``_ATTRS_CONTRACT_VERSION`` means the next bump only has to touch + the constant and the bump-specific tests, not every "is the stamp + set" assertion. + """ + da = _make_passthrough_da(crs=4326) + rd = _passthrough_roundtrip( + tmp_path, da, name='contract_version_signal.tif') + + assert rd.attrs.get(_CONTRACT_KEY) == _ATTRS_CONTRACT_VERSION, ( + f"contract version stamp on a fresh read is " + f"{rd.attrs.get(_CONTRACT_KEY)!r}; expected " + f"{_ATTRS_CONTRACT_VERSION}." + ) + + +# =========================================================================== +# Version tier +# +# Per-backend coverage of the ``_xrspatial_geotiff_contract`` stamp. +# Sourced from ``test_attrs_contract_version_1984.py``. +# =========================================================================== + + +def _write_small_tiff(path): + """Write a small tiled float32 TIFF used by every read-path + assertion.""" + arr = np.arange(64 * 64, dtype=np.float32).reshape(64, 64) + tifffile.imwrite( + path, arr, photometric='minisblack', planarconfig='contig', + tile=(32, 32), compression='deflate', metadata=None, + ) + return arr + + +def _write_minimal_vrt(vrt_path, source_name, *, height, width): + """Write a VRT that references ``source_name`` as a single-band + source.""" + vrt_path.write_text( + f'\n' + ' \n' + ' \n' + f' {source_name}' + '\n' + ' 1\n' + f' \n' + f' \n' + ' \n' + ' \n' + '\n' + ) + + +def test_version_constant_is_current(): + """Pin the integer value so a careless bump shows up here first. + + Contract v3 (issue #2136) added ``attrs['georef_status']`` to the + canonical tier. Contract v4 (issue #2129) added + ``attrs['rotated_affine']`` for the ``allow_rotated=True`` opt-in + path. Bumping past 4 should be paired with a docs update and a + sibling test for the new key. + """ + assert _ATTRS_CONTRACT_VERSION == 4 + + +def test_version_module_docstring_matches_constant(): + """Guard against the docstring and the constant drifting apart + (#2237). + + The ``_attrs.py`` module docstring spells out the current contract + version inline (``The contract version is recorded in + ``attrs['_xrspatial_geotiff_contract']`` (currently ````)``). + A previous bump (v3 -> v4 for issue #2129's ``rotated_affine`` + attr) updated the constant but left the docstring at ``3``. This + test parses the documented number out of the docstring and asserts + it equals ``_ATTRS_CONTRACT_VERSION`` so the next drift gets caught + in CI rather than in code review. + """ + docstring = _attrs_module.__doc__ + assert docstring is not None, ( + "xrspatial.geotiff._attrs lost its module docstring; the contract " + "documentation lives in that docstring and must be restored." + ) + + # Match the canonical phrasing + # ``... ``attrs['_xrspatial_geotiff_contract']`` (currently + # ````)`` while staying tolerant of trivial whitespace changes + # around the parenthetical. + match = re.search( + r"attrs\['_xrspatial_geotiff_contract'\]``\s*\(currently\s*``(\d+)``\)", + docstring, + ) + assert match is not None, ( + "Could not find the documented contract version in the " + "_attrs.py module docstring. Expected a phrase of the form " + "``attrs['_xrspatial_geotiff_contract']`` (currently ````). " + "Update the docstring or this test if the phrasing changed." + ) + + documented_version = int(match.group(1)) + assert documented_version == _ATTRS_CONTRACT_VERSION, ( + f"_attrs.py module docstring says the contract version is " + f"{documented_version}, but _ATTRS_CONTRACT_VERSION is " + f"{_ATTRS_CONTRACT_VERSION}. Update the docstring " + f"'(currently ``{documented_version}``)' to " + f"'(currently ``{_ATTRS_CONTRACT_VERSION}``)' so the two stay " + f"in lockstep." + ) + + +# (id_suffix, opener_callable). Each opener takes a path and returns a +# DataArray; the stamp assertion is shared. +def _v_eager(path): + return open_geotiff(path) + + +def _v_dask(path): + return open_geotiff(path, chunks=32) + + +def _v_gpu(path): + return open_geotiff(path, gpu=True) + + +def _v_dask_gpu(path): + return open_geotiff(path, gpu=True, chunks=32) + + +_VERSION_TIFF_OPENERS = [ + pytest.param(_v_eager, id='version[eager-numpy]'), + pytest.param(_v_dask, id='version[dask-numpy]'), + pytest.param(_v_gpu, id='version[gpu]', marks=requires_gpu), + pytest.param(_v_dask_gpu, id='version[dask-gpu]', marks=requires_gpu), +] + + +@pytest.mark.parametrize('opener', _VERSION_TIFF_OPENERS) +def test_version_stamp_present_per_tiff_backend(tmp_path, opener): + """Each TIFF read backend stamps the contract version.""" + path = str(tmp_path / f"contract_{opener.__name__}.tif") + _write_small_tiff(path) + + da = opener(path) + assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION + + +def _v_vrt_eager(path): + return read_vrt(path) + + +def _v_vrt_chunked(path): + return read_vrt(path, chunks=32) + + +_VERSION_VRT_OPENERS = [ + pytest.param(_v_vrt_eager, id='version[vrt-eager]'), + pytest.param(_v_vrt_chunked, id='version[vrt-chunked]'), +] + + +@pytest.mark.parametrize('opener', _VERSION_VRT_OPENERS) +def test_version_stamp_present_per_vrt_backend(tmp_path, opener): + """Both VRT read paths stamp the contract version.""" + src = tmp_path / f"contract_{opener.__name__}_source.tif" + _write_small_tiff(str(src)) + vrt = tmp_path / f"contract_{opener.__name__}.vrt" + _write_minimal_vrt(vrt, os.path.basename(src), height=64, width=64) + + da = opener(str(vrt)) + assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION diff --git a/xrspatial/geotiff/tests/test_attrs_contract_aliases_1984.py b/xrspatial/geotiff/tests/test_attrs_contract_aliases_1984.py deleted file mode 100644 index 0b15f7d0f..000000000 --- a/xrspatial/geotiff/tests/test_attrs_contract_aliases_1984.py +++ /dev/null @@ -1,182 +0,0 @@ -"""Locking test for the compatibility-alias tier of the attrs contract -(issue #1984, PR 5 of 7). - -xrspatial's canonical NoData attr is ``attrs['nodata']``. Two -ecosystem aliases are tolerated on *read* paths so foreign DataArrays -(rioxarray-style ``attrs['nodatavals']`` tuples, CF-style -``attrs['_FillValue']``) survive a round-trip through ``to_geotiff``. - -This module pins both halves of that contract in one place so future -drift surfaces here: - -* Read paths must accept the two aliases when ``attrs['nodata']`` is - absent, and must prefer ``nodata`` when present. -* Write paths must not emit either alias into the resulting attrs - after a round-trip; only ``nodata`` should appear. - -A finer-grained set of regression tests for the same aliases lives in -``test_nodata_attr_aliases_1582.py``. This file is intentionally -contract-shaped: it does not duplicate those cases but locks the -high-level rules. -""" -from __future__ import annotations - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import open_geotiff, to_geotiff -from xrspatial.geotiff._attrs import _resolve_nodata_attr - -# Arbitrary float-castable sentinel that is distinct from any data value -# used below. The value itself does not matter; the tests assert it -# survives the alias resolver and the round-trip. -_SENTINEL = -9999.0 - - -def _da_float(arr, **attrs): - return xr.DataArray( - arr, dims=["y", "x"], - coords={"y": np.arange(arr.shape[0], dtype=np.float64), - "x": np.arange(arr.shape[1], dtype=np.float64)}, - attrs=attrs, - ) - - -@pytest.fixture -def _arr_with_sentinel(): - return np.array( - [[1.0, 2.0, _SENTINEL], [3.0, _SENTINEL, 5.0]], - dtype=np.float32, - ) - - -# --------------------------------------------------------------------------- -# Read-side: aliases are honoured when ``attrs['nodata']`` is absent. -# --------------------------------------------------------------------------- - - -def test_read_uses_nodatavals_when_nodata_absent(tmp_path, _arr_with_sentinel): - """Pin: a foreign DataArray carrying only ``nodatavals`` round-trips - through ``to_geotiff`` with the sentinel preserved.""" - da = _da_float(_arr_with_sentinel, crs=4326, nodatavals=(_SENTINEL,)) - assert "nodata" not in da.attrs # precondition: only the alias is set - out = str(tmp_path / "alias_nodatavals.tif") - to_geotiff(da, out) - - rd = open_geotiff(out) - assert rd.attrs.get("nodata") == _SENTINEL - - -def test_read_uses_fill_value_when_nodata_absent(tmp_path, _arr_with_sentinel): - """Pin: a foreign DataArray carrying only CF ``_FillValue`` round-trips - through ``to_geotiff`` with the sentinel preserved.""" - da = _da_float(_arr_with_sentinel, crs=4326, - **{"_FillValue": _SENTINEL}) - assert "nodata" not in da.attrs - out = str(tmp_path / "alias_fillvalue.tif") - to_geotiff(da, out) - - rd = open_geotiff(out) - assert rd.attrs.get("nodata") == _SENTINEL - - -# --------------------------------------------------------------------------- -# Precedence: canonical ``nodata`` wins over the two aliases. -# --------------------------------------------------------------------------- - - -def test_canonical_nodata_wins_over_aliases_at_resolver(_arr_with_sentinel): - """``_resolve_nodata_attr`` (the read-side chokepoint) still prefers - canonical ``nodata`` over both aliases. This is the resolver-layer - contract that downstream code consumes; the write-side fail-closed - check is exercised in ``test_canonical_nodata_wins_over_aliases_at_write`` - below.""" - canonical = -8888.0 - da = _da_float( - _arr_with_sentinel, crs=4326, - nodata=canonical, - nodatavals=(_SENTINEL,), - **{"_FillValue": -7777.0}, - ) - assert _resolve_nodata_attr(dict(da.attrs)) == canonical - - -def test_canonical_nodata_wins_over_aliases_at_write( - tmp_path, _arr_with_sentinel): - """Pin: the legacy "canonical nodata silently wins on write" was - replaced by a fail-closed raise (issue #1987 PR 7). A DataArray with - disagreeing ``nodata`` and ``nodatavals`` attrs now refuses to write - instead of dropping the alias. The opt-out is the explicit - ``nodata=`` kwarg, which overrides both attrs.""" - from xrspatial.geotiff import ConflictingNodataError - - da = _da_float( - _arr_with_sentinel, crs=4326, - nodata=-8888.0, - nodatavals=(_SENTINEL,), - **{"_FillValue": -7777.0}, - ) - - out = str(tmp_path / "canonical_wins.tif") - with pytest.raises(ConflictingNodataError): - to_geotiff(da, out) - - # Explicit ``nodata=`` bypasses the check. - to_geotiff(da, out, nodata=-8888.0) - rd = open_geotiff(out) - assert rd.attrs.get("nodata") == -8888.0 - - -# --------------------------------------------------------------------------- -# Write-side: a round-trip must emit only ``nodata``, not the aliases. -# --------------------------------------------------------------------------- - - -def test_write_does_not_emit_aliases_when_canonical_present( - tmp_path, _arr_with_sentinel): - """Pin: round-tripping a DataArray that has only ``attrs['nodata']`` - set returns attrs containing ``nodata`` and *not* the two aliases. - - Reader paths emit a single canonical key; this guards against a - future change that copies the resolved value into the rioxarray or - CF alias slot as well (which would silently double-write the - sentinel and confuse downstream consumers that special-case one - alias over another).""" - da = _da_float(_arr_with_sentinel, crs=4326, nodata=_SENTINEL) - out = str(tmp_path / "canonical_only.tif") - to_geotiff(da, out) - - rd = open_geotiff(out) - assert rd.attrs.get("nodata") == _SENTINEL - assert "nodatavals" not in rd.attrs, ( - f"reader emitted rioxarray alias alongside canonical nodata: " - f"attrs={sorted(rd.attrs)}" - ) - assert "_FillValue" not in rd.attrs, ( - f"reader emitted CF alias alongside canonical nodata: " - f"attrs={sorted(rd.attrs)}" - ) - - -# --------------------------------------------------------------------------- -# NaN-in-nodatavals quirk: NaN entries are skipped, not treated as sentinel. -# --------------------------------------------------------------------------- - - -def test_nan_in_nodatavals_is_skipped(): - """Pin: NaN entries in ``attrs['nodatavals']`` are not a sentinel. - - NaN means the float NaN is already the implicit sentinel, which - doesn't need a GDAL_NODATA tag. The resolver must skip NaN entries - and (in a mixed tuple) return the first concrete numeric value.""" - # Pure-NaN tuple resolves to None. - assert _resolve_nodata_attr({"nodatavals": (float("nan"),)}) is None - # NaN followed by a concrete sentinel: the resolver skips NaN and - # returns the concrete value. - assert _resolve_nodata_attr( - {"nodatavals": (float("nan"), _SENTINEL)}) == _SENTINEL - # None entries are also skipped (rioxarray uses None for "no sentinel - # on this band"). - assert _resolve_nodata_attr( - {"nodatavals": (None, _SENTINEL)}) == _SENTINEL diff --git a/xrspatial/geotiff/tests/test_attrs_contract_canonical_1984.py b/xrspatial/geotiff/tests/test_attrs_contract_canonical_1984.py deleted file mode 100644 index 59f314f62..000000000 --- a/xrspatial/geotiff/tests/test_attrs_contract_canonical_1984.py +++ /dev/null @@ -1,407 +0,0 @@ -"""Locking test for the canonical tier of the attrs contract. - -Issue #1984, PR 4 of 7. - -The attrs contract (see ``xrspatial/geotiff/_attrs.py`` and -``docs/source/user_guide/attrs_contract.rst``) splits every key the -read paths emit into three tiers. This file pins the *canonical* tier: -keys xrspatial owns and guarantees round-trip stable through -``to_geotiff`` -> ``open_geotiff``. - -Sibling files cover the other tiers: - -* ``test_attrs_contract_aliases_1984.py`` -- compatibility aliases. -* ``test_attrs_contract_passthrough_1984.py`` -- best-effort - pass-through. -* ``test_attrs_contract_version_1984.py`` -- per-backend stamping of - ``attrs['_xrspatial_geotiff_contract']`` (also canonical, kept in its - own file because the assertion is per read path rather than - per round-trip). - -The canonical keys locked here: - -* ``crs`` -- EPSG integer code. -* ``crs_wkt`` -- horizontal CRS WKT string. -* ``transform`` -- rasterio-style 6-tuple. -* ``nodata`` -- declared file sentinel (GDAL_NODATA). -* ``raster_type`` -- 'point' (set explicitly) or absent - (= 'area', the implicit default). -* ``extra_tags`` -- list of (id, type, count, value) - tuples for out-of-band TIFF tags. -* ``gdal_metadata`` -- dict parsed from GDAL_METADATA XML. -* ``gdal_metadata_xml`` -- raw GDAL_METADATA XML string. -* ``x_resolution``, ``y_resolution``, - ``resolution_unit`` -- TIFF XResolution / YResolution / - ResolutionUnit. -* ``_xrspatial_geotiff_contract`` -- integer contract version. Stamped - on every read. - -The fixture below sets every canonical key on a synthetic DataArray, -round-trips it through ``to_geotiff`` -> ``open_geotiff``, and the test -suite below asserts both presence and value equality per key. The -single-fixture shape is intentional: a future writer change that drops -one canonical key shows up here as one failing assertion rather than -being lost in a larger diff. - -Issues #1985 (parity matrix) and #1986 (round-trip invariants) consume -this assertion list. If you add a key here, update the canonical block -in the contract page and the ``_attrs.py`` module docstring as well. -""" -from __future__ import annotations - -import importlib.util -import re - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import open_geotiff, to_geotiff -from xrspatial.geotiff._attrs import _ATTRS_CONTRACT_VERSION - -_CONTRACT_KEY = '_xrspatial_geotiff_contract' - -# Every key the canonical tier guarantees round-trip stable. Keep the -# order consistent with the contract docs so a diff here lines up with -# a diff in ``attrs_contract.rst``. -# -# ``raster_type`` is canonical but absent from this constant: the -# implicit default 'area' is encoded as *absence* of the attr, so the -# "must be present after round-trip" check below cannot express it. -# The dedicated ``test_raster_type_*`` tests at the bottom of this -# file lock both branches. -_CANONICAL_KEYS = ( - 'crs', - 'crs_wkt', - 'transform', - 'nodata', - 'extra_tags', - 'gdal_metadata', - 'gdal_metadata_xml', - 'x_resolution', - 'y_resolution', - 'resolution_unit', - # Added in contract v3 (issue #2136). The fixture is georef + CRS - # so the round-tripped value is the ``'full'`` literal; the - # value-equality check lives on ``test_georef_status_roundtrip`` - # below to keep the membership and value assertions independent. - 'georef_status', - _CONTRACT_KEY, -) - - -def _gpu_available() -> bool: - if importlib.util.find_spec("cupy") is None: - return False - try: - import cupy - return bool(cupy.cuda.is_available()) - except Exception: - return False - - -_HAS_GPU = _gpu_available() -_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") - -# Fixture values, written into ``attrs`` on the synthetic DataArray and -# compared to the read-back attrs after round-trip. ``transform`` is -# pinned by ``y`` / ``x`` coords so we expect that exact 6-tuple back. -_NODATA_SENTINEL = -9999.0 -_X_RES = 300.0 -_Y_RES = 300.0 -_RES_UNIT = 'inch' -# The shared fixture leaves raster_type unset (= implicit 'area'). The -# point-specific test rebuilds the attrs dict so the two raster_type -# branches do not share state. Keep ``_GDAL_META`` free of an -# ``AREA_OR_POINT`` entry so the fixture stays consistent under both -# branches. -_GDAL_META = {'TIFFTAG_SOFTWARE': 'xrspatial-1984'} -# Software tag (305, ASCII). Picked because it is benign (no spatial -# interpretation, no security filter) and tifffile decodes it as-is. -# Count must include the trailing NUL byte. The Software identity also -# appears as ``gdal_metadata['TIFFTAG_SOFTWARE']`` above; the two are -# independent channels (a raw TIFF tag vs an entry in the GDAL_METADATA -# XML payload) and the writer does not synchronise them. -_SOFTWARE_STR = 'xrspatial-canonical-1984' -_EXTRA_TAGS = [(305, 2, len(_SOFTWARE_STR) + 1, _SOFTWARE_STR)] -# ``crs_wkt`` is round-tripped via attrs['crs'] (the EPSG code drives -# the writer), so we leave it for the reader to emit. Setting it on -# write is allowed but not required; the read-back value comes from the -# PROJ database. - - -def _make_canonical_da(): - """Build a synthetic DataArray exercising every canonical attr. - - Returns the DataArray plus the expected ``transform`` tuple so the - test can assert on the round-tripped value without recomputing it. - """ - h, w = 4, 4 - data = np.arange(h * w, dtype=np.float32).reshape(h, w) - # Pin a non-identity transform so the round-trip check catches a - # writer that drops the tiepoint / pixel-scale tags. Coords are - # interpreted as pixel centres; the emitted transform's origin is - # the top-left corner, so origin_x = 100 - 10/2 = 95 and - # origin_y = 240 - (-10)/2 = 245. - x = np.array([100.0, 110.0, 120.0, 130.0], dtype=np.float64) - y = np.array([240.0, 230.0, 220.0, 210.0], dtype=np.float64) - expected_transform = (10.0, 0.0, 95.0, 0.0, -10.0, 245.0) - - da = xr.DataArray( - data, dims=('y', 'x'), coords={'y': y, 'x': x}, - attrs={ - 'crs': 4326, - 'nodata': _NODATA_SENTINEL, - 'extra_tags': list(_EXTRA_TAGS), - 'gdal_metadata': dict(_GDAL_META), - 'x_resolution': _X_RES, - 'y_resolution': _Y_RES, - 'resolution_unit': _RES_UNIT, - }, - ) - return da, expected_transform - - -@pytest.fixture -def canonical_roundtrip(tmp_path): - """Round-trip the canonical fixture through write -> read. - - Returns ``(read_da, expected_transform)``. Scoped to one round-trip - per test so per-key assertions stay independent and a single failure - points at one key rather than cascading. - """ - da, expected_transform = _make_canonical_da() - path = str(tmp_path / 'attrs_contract_canonical.tif') - # The canonical fixture carries ``extra_tags``; the PR 4 rich-tag - # gate (#2340) exempts attrs carrying ``_xrspatial_geotiff_contract`` - # (set by the readers), but this fixture builds a fresh DataArray - # without the marker, so the initial write must opt in. The - # round-trip's subsequent open->write would NOT need the flag -- - # this fixture exercises the initial write only. - to_geotiff(da, path, allow_experimental_codecs=True) - rd = open_geotiff(path) - return rd, expected_transform - - -# --------------------------------------------------------------------------- -# Single-fixture coverage: every canonical key is present on read-back. -# --------------------------------------------------------------------------- - - -def test_every_canonical_key_present(canonical_roundtrip): - """Pin the canonical key set after a round-trip. - - A writer that drops one canonical key (e.g. forgets to emit - GDAL_METADATA) shows up here as one missing key rather than as a - later equality failure with a less obvious cause. - """ - rd, _ = canonical_roundtrip - missing = sorted(k for k in _CANONICAL_KEYS if k not in rd.attrs) - assert missing == [], ( - f"canonical attrs missing after round-trip: {missing}. " - f"attrs keys present: {sorted(rd.attrs.keys())}" - ) - - -# --------------------------------------------------------------------------- -# Per-key value assertions: each canonical key round-trips by value. -# --------------------------------------------------------------------------- - - -def test_crs_roundtrip(canonical_roundtrip): - rd, _ = canonical_roundtrip - assert rd.attrs['crs'] == 4326 - - -def test_crs_wkt_roundtrip(canonical_roundtrip): - """``crs_wkt`` is reader-emitted from the EPSG code. Pin presence - and the CRS-identity substring callers rely on. The exact WKT is - PROJ-version dependent, so match ``WGS 84`` / ``WGS_1984`` / ``WGS-84`` - with one regex rather than a single literal.""" - rd, _ = canonical_roundtrip - wkt = rd.attrs['crs_wkt'] - assert isinstance(wkt, str) and len(wkt) > 0 - assert re.search(r'WGS[\s_-]?84|WGS_1984', wkt), ( - f"crs_wkt round-trip lost the CRS identity: {wkt!r}" - ) - - -def test_transform_roundtrip(canonical_roundtrip): - rd, expected_transform = canonical_roundtrip - t = tuple(rd.attrs['transform']) - assert t == pytest.approx(expected_transform), ( - f"transform round-trip mismatch.\n expected: {expected_transform}\n" - f" got : {t}" - ) - - -def test_nodata_roundtrip(canonical_roundtrip): - rd, _ = canonical_roundtrip - assert rd.attrs['nodata'] == _NODATA_SENTINEL - - -def test_extra_tags_roundtrip(canonical_roundtrip): - """A non-friendly extra_tags entry (Software, 305) round-trips - intact. The writer must preserve unknown tags so users can attach - arbitrary metadata.""" - rd, _ = canonical_roundtrip - got = rd.attrs['extra_tags'] - # Look up tag 305 specifically; ordering and any reader-added - # entries are not part of the contract for this assertion. - by_id = {t[0]: t for t in got} - assert 305 in by_id, ( - f"Software tag (305) missing from read-back extra_tags: {got}" - ) - tag_id, type_id, count, value = by_id[305] - assert tag_id == 305 - assert type_id == 2 # TIFF ASCII - assert value == _SOFTWARE_STR - - -def test_gdal_metadata_roundtrip(canonical_roundtrip): - """The parsed dict survives the round-trip key-by-key. Allow extra - entries the reader might inject (e.g. ``STATISTICS_*``) so this - test is not a tripwire for unrelated reader changes.""" - rd, _ = canonical_roundtrip - got = rd.attrs['gdal_metadata'] - assert isinstance(got, dict), f"gdal_metadata is not a dict: {got!r}" - for k, v in _GDAL_META.items(): - assert got.get(k) == v, ( - f"gdal_metadata[{k!r}] mismatch.\n expected: {v!r}\n" - f" got : {got.get(k)!r}\n full read-back: {got!r}" - ) - - -def test_gdal_metadata_xml_roundtrip(canonical_roundtrip): - """The raw XML string is reconstructed by the writer from the - ``gdal_metadata`` dict. Pin presence + the substring that proves - our fixture survived; the exact XML formatting is writer-dependent.""" - rd, _ = canonical_roundtrip - xml = rd.attrs['gdal_metadata_xml'] - assert isinstance(xml, str) and xml.startswith('') - assert 'xrspatial-1984' in xml, ( - f"gdal_metadata_xml lost the fixture marker: {xml!r}" - ) - - -def test_resolution_group_roundtrip(canonical_roundtrip): - """``x_resolution`` / ``y_resolution`` / ``resolution_unit`` are - written and read as one logical unit -- pin them together so a - writer that drops one but keeps the others fails here.""" - rd, _ = canonical_roundtrip - assert rd.attrs['x_resolution'] == pytest.approx(_X_RES) - assert rd.attrs['y_resolution'] == pytest.approx(_Y_RES) - assert rd.attrs['resolution_unit'] == _RES_UNIT - - -def test_contract_version_roundtrip(canonical_roundtrip): - """``_xrspatial_geotiff_contract`` is stamped on every read; pin - that the canonical fixture sees the current version. Per-backend - coverage lives in ``test_attrs_contract_version_1984.py``.""" - rd, _ = canonical_roundtrip - assert rd.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION - - -def test_georef_status_roundtrip(canonical_roundtrip): - """``georef_status`` (issue #2136) is canonical from contract v3. - The fixture sets ``crs`` + axis-aligned transform-from-coords, so - the round-tripped value must be the ``'full'`` literal.""" - rd, _ = canonical_roundtrip - assert rd.attrs['georef_status'] == 'full' - - -# --------------------------------------------------------------------------- -# Per-backend coverage for canonical-key *presence*. -# -# The 7-PR plan in issue #1984 asked for "one fixture per backend with -# explicit assertions that every canonical key is present and round-trips -# byte-equivalent through write -> read". Read-time backends share -# ``_populate_attrs_from_geo_info``, so per-key value round-trips are -# pinned once (above) on the eager numpy path. What the per-backend -# check guards against is a backend skipping the shared helper or -# building its attrs dict independently; the version-stamp test does -# this for one canonical key, and the loop below does it for the rest. -# --------------------------------------------------------------------------- - - -def _open_eager(path): - return open_geotiff(path) - - -def _open_dask(path): - return open_geotiff(path, chunks=2) - - -def _open_gpu(path): - return open_geotiff(path, gpu=True) - - -def _open_dask_gpu(path): - return open_geotiff(path, gpu=True, chunks=2) - - -_BACKEND_OPENERS = [ - pytest.param(_open_eager, id='eager-numpy'), - pytest.param(_open_dask, id='dask-numpy'), - pytest.param(_open_gpu, id='gpu', marks=_gpu_only), - pytest.param(_open_dask_gpu, id='dask-gpu', marks=_gpu_only), -] - - -@pytest.mark.parametrize('opener', _BACKEND_OPENERS) -def test_canonical_keys_present_per_backend(tmp_path, opener): - """Each read backend emits the full canonical key set. - - Writes the canonical fixture once with the eager writer (only the - eager + dask writers exist; the GPU writer is exercised in its own - parity tests), then re-reads it through every supported backend and - asserts presence. Value-level round-trip is checked by the per-key - tests above; this loop guards against a backend that bypasses - ``_populate_attrs_from_geo_info``. - """ - da, _ = _make_canonical_da() - path = str(tmp_path / f'canonical_{opener.__name__}.tif') - # Fresh DataArray with ``extra_tags`` -> rich-tag opt-in required - # (PR 4 of epic #2340; see ``canonical_roundtrip`` for details). - to_geotiff(da, path, allow_experimental_codecs=True) - - rd = opener(path) - missing = sorted(k for k in _CANONICAL_KEYS if k not in rd.attrs) - assert missing == [], ( - f"{opener.__name__}: canonical attrs missing after round-trip: " - f"{missing}. attrs keys present: {sorted(rd.attrs.keys())}" - ) - - -# --------------------------------------------------------------------------- -# ``raster_type`` lives outside the shared fixture because the canonical -# default ('area') is encoded as *absence* in attrs. The two branches need -# different fixtures. -# --------------------------------------------------------------------------- - - -def test_raster_type_area_omitted_on_roundtrip(tmp_path): - """RasterPixelIsArea is the implicit default and is encoded as - *absence* of ``attrs['raster_type']``. A DataArray with no - ``raster_type`` attr must round-trip to a DataArray that still has - no ``raster_type`` attr.""" - da, _ = _make_canonical_da() - assert 'raster_type' not in da.attrs - path = str(tmp_path / 'raster_type_area.tif') - to_geotiff(da, path, allow_experimental_codecs=True) - rd = open_geotiff(path) - assert 'raster_type' not in rd.attrs, ( - f"area is the implicit default but the reader emitted " - f"raster_type={rd.attrs['raster_type']!r}" - ) - - -def test_raster_type_point_roundtrip(tmp_path): - """``raster_type='point'`` is the only value the writer accepts via - attrs; the reader emits it back on a round-trip.""" - da, _ = _make_canonical_da() - da.attrs['raster_type'] = 'point' - path = str(tmp_path / 'raster_type_point.tif') - to_geotiff(da, path, allow_experimental_codecs=True) - rd = open_geotiff(path) - assert rd.attrs.get('raster_type') == 'point' diff --git a/xrspatial/geotiff/tests/test_attrs_contract_passthrough_1984.py b/xrspatial/geotiff/tests/test_attrs_contract_passthrough_1984.py deleted file mode 100644 index 41ef1a7ea..000000000 --- a/xrspatial/geotiff/tests/test_attrs_contract_passthrough_1984.py +++ /dev/null @@ -1,325 +0,0 @@ -"""Locking test for the best-effort pass-through tier of the attrs contract. - -Issue #1984, PR 6 of 7. - -The attrs contract has three tiers: - -1. Canonical: writers consume the attr; round-trip is guaranteed. -2. Pass-through: writers do not consume the attr. The reader rebuilds - the value from the GeoKey directory (or another TIFF tag) on read. - Round-trip is best-effort: it works only when the writer happens to - emit a tag the reader can rebuild the attr from. -3. Ignored: writer never touches; attr is dropped silently. - -This file pins the *current* behaviour of every key in the pass-through -tier so future writer changes have to decide whether to promote a key to -canonical or to keep it dropping. The split between "reconstructible" -and "dropped on round-trip" is captured in the parametrisation below and -mirrored in PR 6's body so the next PR (canonical promotion) has a -shopping list. - -Current state of every pass-through key, as locked here: - -* Reconstructible (writer puts a TIFF tag the reader rebuilds from): - - ``image_description`` -> tag 270 (ImageDescription) - - ``extra_samples`` -> tag 338 (ExtraSamples) - - ``colormap`` -> tag 320 (ColorMap, raw uint16 triples) - -Contract v2 (issue #2016) removed the 13 GeoKey-derived and -matplotlib-colormap keys that v1 emitted on read under a -``DeprecationWarning`` (``crs_name``, ``geog_citation``, -``datum_code``, ``angular_units``, ``linear_units``, -``semi_major_axis``, ``inv_flattening``, ``projection_code``, -``vertical_crs``, ``vertical_citation``, ``vertical_units``, -``colormap_rgba``, ``cmap``). The reader no longer surfaces them as -attrs; ``test_removed_attrs_not_emitted`` and -``test_removed_attrs_absent_after_roundtrip`` lock that absence. -""" -from __future__ import annotations - -import warnings - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import open_geotiff, to_geotiff - -# Full set of pass-through keys defined by the contract. Contract v2 -# (issue #2016) trimmed this set to the three TIFF-tag-derived keys -# that actually round-trip via ``_merge_friendly_extra_tags``. -_ALL_PASSTHROUGH_KEYS = ( - 'image_description', - 'extra_samples', - 'colormap', -) - - -# Attrs that the reader emitted under a ``DeprecationWarning`` in -# contract v1 and that contract v2 (issue #2016) removed entirely. -# ``test_removed_attrs_not_emitted`` pins their absence after a fresh -# read; ``test_removed_attrs_absent_after_roundtrip`` pins their -# absence after a write -> read cycle. -_REMOVED_IN_V2_ATTRS = ( - 'crs_name', - 'geog_citation', - 'datum_code', - 'angular_units', - 'linear_units', - 'semi_major_axis', - 'inv_flattening', - 'projection_code', - 'vertical_crs', - 'vertical_citation', - 'vertical_units', - 'colormap_rgba', - 'cmap', -) - - -def _make_da(crs=None, attrs=None, shape=(4, 4), dtype=np.float32): - """Build a minimal georeferenced DataArray with optional CRS and attrs.""" - data = np.ones(shape, dtype=dtype) - h, w = shape - coords = { - 'y': np.arange(h, 0, -1, dtype=np.float64), - 'x': np.arange(w, dtype=np.float64), - } - a = dict(attrs) if attrs else {} - if crs is not None: - a['crs'] = crs - return xr.DataArray(data, dims=('y', 'x'), coords=coords, attrs=a) - - -def _roundtrip(tmp_path, da, name='roundtrip.tif'): - """Write ``da`` to ``tmp_path/name`` and read it back.""" - path = str(tmp_path / name) - to_geotiff(da, path) - return open_geotiff(path) - - -# (key, crs_to_use, value_set_on_write_or_None, expected_outcome) -# -# ``crs_to_use``: the CRS attached to the test DataArray; 4326 is -# fine for every remaining key. -# -# ``value_set_on_write``: the value the test sets on ``da.attrs`` before -# write. -# -# ``expected``: ``'reconstructible'`` (key must be present in the -# read-back attrs and equal to the written value) or ``'dropped'`` -# (key must be absent). After contract v2 (issue #2016), every -# row carries ``'reconstructible'``; the ``'dropped'`` arm is kept -# so a future addition can use it without restructuring the test. -_PASSTHROUGH_CASES = [ - # Non-GeoKey tag passthroughs. The writer folds these into extra_tags - # via _merge_friendly_extra_tags, so the reader can rebuild them. - ('image_description', 4326, 'pr1984 fixture', 'reconstructible'), - ('extra_samples', 4326, (1,), 'reconstructible'), - # ``colormap`` round-trips as the raw uint16 triple list. - # The TIFF ColorMap tag (320) stores RGB triples as uint16 values in - # the 0-65535 range. Values below are written as-is and compared - # by-equality after the round-trip; if the writer ever rescales 8-bit - # input to 16-bit (or vice versa), update this fixture rather than - # the contract. - ( - 'colormap', 4326, - tuple([0] * 256 + [128] * 256 + [255] * 256), - 'reconstructible', - ), -] - - -def test_passthrough_cases_cover_all_keys(): - """``_PASSTHROUGH_CASES`` and ``_ALL_PASSTHROUGH_KEYS`` carry the - same set in two forms. Pin them so a key added to one list and - forgotten on the other fails here rather than silently skipping - coverage in ``test_passthrough_dropped_when_no_crs``.""" - case_keys = {c[0] for c in _PASSTHROUGH_CASES} - assert case_keys == set(_ALL_PASSTHROUGH_KEYS), ( - f"_PASSTHROUGH_CASES and _ALL_PASSTHROUGH_KEYS diverge.\n" - f" only in cases: {sorted(case_keys - set(_ALL_PASSTHROUGH_KEYS))}\n" - f" only in keys : {sorted(set(_ALL_PASSTHROUGH_KEYS) - case_keys)}" - ) - - -@pytest.mark.parametrize( - 'key,crs,value,expected', - _PASSTHROUGH_CASES, - ids=[c[0] for c in _PASSTHROUGH_CASES], -) -def test_passthrough_key_roundtrip(tmp_path, key, crs, value, expected): - """Lock per-key round-trip outcome for the pass-through attr tier.""" - attrs = {} - if value is not None: - attrs[key] = value - da = _make_da(crs=crs, attrs=attrs) - # Single-band uint8 needed for the colormap tag to be valid in TIFF. - if key == 'colormap': - da = _make_da(crs=crs, attrs=attrs, dtype=np.uint8) - - rd = _roundtrip(tmp_path, da, name=f'{key}.tif') - - if expected == 'reconstructible': - assert key in rd.attrs, ( - f"pass-through key {key!r} was expected to round-trip but is " - f"absent. attrs keys present: {sorted(rd.attrs.keys())}" - ) - if value is not None: - got = rd.attrs[key] - if isinstance(value, tuple): - assert tuple(got) == value, ( - f"{key!r}: value mismatch on round-trip\n" - f" written: {value}\n" - f" read : {got}" - ) - else: - assert got == value, ( - f"{key!r}: value mismatch on round-trip\n" - f" written: {value!r}\n" - f" read : {got!r}" - ) - else: # 'dropped' - assert key not in rd.attrs, ( - f"pass-through key {key!r} was expected to drop on round-trip " - f"(writer does not emit a tag the reader can rebuild it from) " - f"but it is present with value {rd.attrs[key]!r}. If a writer " - f"change started emitting this key, decide whether to promote " - f"the key to canonical (issue #1984) and update this test." - ) - - -def test_passthrough_dropped_when_no_crs(tmp_path): - """Files without a CRS do not surface any pass-through attrs.""" - da = _make_da(crs=None) - rd = _roundtrip(tmp_path, da, name='no_crs.tif') - - present = sorted(k for k in _ALL_PASSTHROUGH_KEYS if k in rd.attrs) - assert present == [], ( - f"pass-through keys leaked into a no-CRS round-trip: {present}. " - f"All keys present: {sorted(rd.attrs.keys())}" - ) - # Sanity: no CRS attrs either. - assert 'crs' not in rd.attrs - assert 'crs_wkt' not in rd.attrs - - -def test_passthrough_does_not_promote_to_canonical(tmp_path): - """Setting legacy GeoKey-derived attrs without a CRS must not inject one. - - Contract v2 (issue #2016) removed these keys from the reader's - emission set, but a user with a hand-built ``DataArray`` may still - set them. The writer must treat them as advisory and never synthesise - a CRS from them; this test pins that invariant. - """ - # Mix of GeoKey-derived keys, but no ``crs`` / ``crs_wkt``. If the - # writer ever started inferring a CRS from these (e.g. picking 4326 - # because angular_units == 'degree') this test would fail. - attrs = { - 'crs_name': 'WGS 84', - 'geog_citation': 'WGS 84', - 'angular_units': 'degree', - 'linear_units': 'metre', - 'semi_major_axis': 6378137.0, - 'inv_flattening': 298.257223563, - 'datum_code': 6326, - } - da = _make_da(crs=None, attrs=attrs) - - with warnings.catch_warnings(): - # The writer warns on user-defined-CRS WKT writes; we are not on - # that path here, but suppress generously so a future warning - # tweak does not turn this test into a warning regression test. - warnings.simplefilter('ignore') - rd = _roundtrip(tmp_path, da, name='no_crs_with_attrs.tif') - - assert 'crs' not in rd.attrs, ( - f"pass-through attrs caused the writer to synthesise a CRS: " - f"crs={rd.attrs.get('crs')!r}. The contract says pass-through " - f"attrs are advisory only; the writer must rely on attrs['crs'] " - f"or attrs['crs_wkt'] to emit georeferencing." - ) - assert 'crs_wkt' not in rd.attrs, ( - f"pass-through attrs caused the writer to synthesise a CRS WKT: " - f"crs_wkt={rd.attrs.get('crs_wkt')!r}." - ) - - -def test_removed_attrs_not_emitted(tmp_path): - """Contract v2 (issue #2016) removed 13 deprecated reader attrs. - - A freshly read DataArray must not carry any of them, even when the - underlying GeoTIFF's GeoKey directory advertises the values. This - test pins the removal so a regression that re-adds an emit site - fails here rather than silently leaking the attr back into the - public surface. - """ - da = _make_da(crs=4326) - rd = _roundtrip(tmp_path, da, name='removed_attrs_no_emit.tif') - - leaked = sorted(k for k in _REMOVED_IN_V2_ATTRS if k in rd.attrs) - assert leaked == [], ( - f"contract v2 attrs leaked into a fresh read: {leaked}. " - f"All attrs keys present: {sorted(rd.attrs.keys())}. Issue " - f"#2016 dropped these from the reader; re-emitting them is a " - f"behaviour regression." - ) - - -def test_removed_attrs_absent_after_roundtrip(tmp_path): - """A write -> read cycle must not resurrect any v2-removed attr. - - Even when the input ``DataArray`` carries every removed attr as a - legacy value, the writer ignores them and the reader never adds - them back. The reopened DataArray's attrs is the public surface - callers rely on. - """ - legacy_payload = { - 'crs_name': 'WGS 84', - 'geog_citation': 'WGS 84', - 'datum_code': 6326, - 'angular_units': 'degree', - 'linear_units': 'metre', - 'semi_major_axis': 6378137.0, - 'inv_flattening': 298.257223563, - 'projection_code': 16033, - 'vertical_crs': 5703, - 'vertical_citation': 'NAVD88', - 'vertical_units': 'metre', - 'colormap_rgba': ((1.0, 0.0, 0.0, 1.0),), - 'cmap': 'tiff_palette_placeholder', - } - da = _make_da(crs=4326, attrs=legacy_payload) - with warnings.catch_warnings(): - warnings.simplefilter('ignore') - rd = _roundtrip(tmp_path, da, name='removed_attrs_roundtrip.tif') - - resurrected = sorted(k for k in _REMOVED_IN_V2_ATTRS if k in rd.attrs) - assert resurrected == [], ( - f"removed attrs survived a write -> read cycle: {resurrected}. " - f"All attrs keys present: {sorted(rd.attrs.keys())}. The " - f"reader must drop these per contract v2 (issue #2016)." - ) - - -def test_contract_version_is_current(tmp_path): - """``attrs['_xrspatial_geotiff_contract']`` matches the constant on - every read. - - The contract version is the user-visible signal that a tier change - landed. Issue #2016 bumped it to 2 (removal of deprecated GeoKey - attrs); issue #2136 bumped it to 3 (addition of - ``attrs['georef_status']``). Pinning against ``_ATTRS_CONTRACT_VERSION`` - means the next bump only has to touch the constant and the - bump-specific tests, not every "is the stamp set" assertion. - """ - from xrspatial.geotiff._attrs import _ATTRS_CONTRACT_VERSION - - da = _make_da(crs=4326) - rd = _roundtrip(tmp_path, da, name='contract_version_signal.tif') - - assert rd.attrs.get('_xrspatial_geotiff_contract') == _ATTRS_CONTRACT_VERSION, ( - f"contract version stamp on a fresh read is " - f"{rd.attrs.get('_xrspatial_geotiff_contract')!r}; expected " - f"{_ATTRS_CONTRACT_VERSION}." - ) diff --git a/xrspatial/geotiff/tests/test_attrs_contract_version_1984.py b/xrspatial/geotiff/tests/test_attrs_contract_version_1984.py deleted file mode 100644 index 348954302..000000000 --- a/xrspatial/geotiff/tests/test_attrs_contract_version_1984.py +++ /dev/null @@ -1,196 +0,0 @@ -"""Contract-version marker tests for issue #1984 / #2016. - -PR 3 of issue #1984 stamped every DataArray returned by an xrspatial -geotiff read path with ``attrs['_xrspatial_geotiff_contract']``. Issue -#2016 (removal phase of #1984) bumped the version to ``2`` and dropped -the 13 deprecated GeoKey-derived and matplotlib-colormap attrs. -Downstream code reads this marker to learn which attrs-contract -revision produced the array. - -The stamp must appear on every backend: - -* eager numpy (``open_geotiff``) -* dask + numpy (``open_geotiff(chunks=...)`` / ``read_geotiff_dask``) -* cupy / GPU (``open_geotiff(gpu=True)`` / ``read_geotiff_gpu``) -* dask + cupy (``open_geotiff(gpu=True, chunks=...)``) -* VRT eager (``read_vrt``) -* VRT dask chunked (``read_vrt(chunks=...)``) - -The fixture style mirrors ``test_attrs_parity_1548.py``: build a small -on-disk TIFF (and a small VRT pointing at one) inside ``tmp_path``, -open it through each backend, and assert on the resulting attrs. -""" -from __future__ import annotations - -import importlib.util -import os -import re - -import numpy as np -import pytest - -from xrspatial.geotiff import _attrs as _attrs_module -from xrspatial.geotiff import open_geotiff, read_vrt -from xrspatial.geotiff._attrs import _ATTRS_CONTRACT_VERSION - -tifffile = pytest.importorskip("tifffile") - - -_CONTRACT_KEY = '_xrspatial_geotiff_contract' - - -def _gpu_available() -> bool: - if importlib.util.find_spec("cupy") is None: - return False - try: - import cupy - return bool(cupy.cuda.is_available()) - except Exception: - return False - - -_HAS_GPU = _gpu_available() -_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") - - -def _write_small_tiff(path): - """Write a small tiled float32 TIFF used by every read-path assertion.""" - arr = np.arange(64 * 64, dtype=np.float32).reshape(64, 64) - tifffile.imwrite( - path, arr, photometric='minisblack', planarconfig='contig', - tile=(32, 32), compression='deflate', metadata=None, - ) - return arr - - -def _write_minimal_vrt(vrt_path, source_name, *, height, width): - """Write a VRT that references ``source_name`` as a single-band source.""" - vrt_path.write_text( - f'\n' - ' \n' - ' \n' - f' {source_name}' - '\n' - ' 1\n' - f' \n' - f' \n' - ' \n' - ' \n' - '\n' - ) - - -def test_attrs_contract_version_constant_is_current(): - """Pin the integer value so a careless bump shows up here first. - - Contract v3 (issue #2136) added ``attrs['georef_status']`` to the - canonical tier. Contract v4 (issue #2129) added - ``attrs['rotated_affine']`` for the ``allow_rotated=True`` opt-in - path. Bumping past 4 should be paired with a docs update and a - sibling test for the new key. - """ - assert _ATTRS_CONTRACT_VERSION == 4 - - -def test_attrs_module_docstring_version_matches_constant(): - """Guard against the docstring and the constant drifting apart (#2237). - - The ``_attrs.py`` module docstring spells out the current contract - version inline (``The contract version is recorded in - ``attrs['_xrspatial_geotiff_contract']`` (currently ````)``). - A previous bump (v3 -> v4 for issue #2129's ``rotated_affine`` attr) - updated the constant but left the docstring at ``3``. This test - parses the documented number out of the docstring and asserts it - equals ``_ATTRS_CONTRACT_VERSION`` so the next drift gets caught - in CI rather than in code review. - """ - docstring = _attrs_module.__doc__ - assert docstring is not None, ( - "xrspatial.geotiff._attrs lost its module docstring; the contract " - "documentation lives in that docstring and must be restored." - ) - - # Match the canonical phrasing - # ``... ``attrs['_xrspatial_geotiff_contract']`` (currently ````)`` - # while staying tolerant of trivial whitespace changes around the - # parenthetical. - match = re.search( - r"attrs\['_xrspatial_geotiff_contract'\]``\s*\(currently\s*``(\d+)``\)", - docstring, - ) - assert match is not None, ( - "Could not find the documented contract version in the " - "_attrs.py module docstring. Expected a phrase of the form " - "``attrs['_xrspatial_geotiff_contract']`` (currently ````). " - "Update the docstring or this test if the phrasing changed." - ) - - documented_version = int(match.group(1)) - assert documented_version == _ATTRS_CONTRACT_VERSION, ( - f"_attrs.py module docstring says the contract version is " - f"{documented_version}, but _ATTRS_CONTRACT_VERSION is " - f"{_ATTRS_CONTRACT_VERSION}. Update the docstring " - f"'(currently ``{documented_version}``)' to " - f"'(currently ``{_ATTRS_CONTRACT_VERSION}``)' so the two stay in " - f"lockstep." - ) - - -def test_eager_numpy_stamps_contract_version(tmp_path): - path = str(tmp_path / "contract_v1_eager.tif") - _write_small_tiff(path) - - da = open_geotiff(path) - - assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION - - -def test_dask_numpy_stamps_contract_version(tmp_path): - path = str(tmp_path / "contract_v1_dask.tif") - _write_small_tiff(path) - - da = open_geotiff(path, chunks=32) - - assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION - - -@_gpu_only -def test_gpu_stamps_contract_version(tmp_path): - path = str(tmp_path / "contract_v1_gpu.tif") - _write_small_tiff(path) - - da = open_geotiff(path, gpu=True) - - assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION - - -@_gpu_only -def test_dask_gpu_stamps_contract_version(tmp_path): - path = str(tmp_path / "contract_v1_dask_gpu.tif") - _write_small_tiff(path) - - da = open_geotiff(path, gpu=True, chunks=32) - - assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION - - -def test_vrt_eager_stamps_contract_version(tmp_path): - src = tmp_path / "contract_v1_vrt_source.tif" - _write_small_tiff(str(src)) - vrt = tmp_path / "contract_v1_vrt_eager.vrt" - _write_minimal_vrt(vrt, os.path.basename(src), height=64, width=64) - - da = read_vrt(str(vrt)) - - assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION - - -def test_vrt_chunked_stamps_contract_version(tmp_path): - src = tmp_path / "contract_v1_vrt_chunked_source.tif" - _write_small_tiff(str(src)) - vrt = tmp_path / "contract_v1_vrt_chunked.vrt" - _write_minimal_vrt(vrt, os.path.basename(src), height=64, width=64) - - da = read_vrt(str(vrt), chunks=32) - - assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION From f5546017c3e125bc552dcfed25a0bcfa936669b0 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 25 May 2026 20:27:00 -0700 Subject: [PATCH 2/2] Address review nits: drop redundant stamp test, tidy backend labels (#2397) - Drop ``test_passthrough_contract_version_is_current``: the same stamp value is asserted by ``canonical[contract_version]`` on a richer fixture and by the per-backend ``version[*]`` tests. - Switch ``_BACKEND_OPENERS`` and the version-section opener lists to ``(opener, label)`` tuples so tmp_path filenames use the parametrize label (``canonical_eager-numpy.tif``) instead of the leading-underscore function name (``canonical__open_eager.tif``). - Expand the docstring above ``_BACKEND_OPENERS`` to spell out that value-equality on the canonical fixture is eager-only by design (the per-backend loop is presence-only). - ``CLUSTER_AUDIT_PR5.md`` now records the intentional drop, updates the coverage parity count (40 -> 39), and adds a sentence describing what the deferred ``attrs/test_roundtrip.py`` slice should look like. Part of epic #2390 (GeoTIFF test consolidation, PR 5 of 11). --- xrspatial/geotiff/tests/CLUSTER_AUDIT_PR5.md | 18 +++- .../geotiff/tests/attrs/test_contract.py | 90 +++++++++---------- 2 files changed, 58 insertions(+), 50 deletions(-) diff --git a/xrspatial/geotiff/tests/CLUSTER_AUDIT_PR5.md b/xrspatial/geotiff/tests/CLUSTER_AUDIT_PR5.md index debb7ca49..c7f6c9be3 100644 --- a/xrspatial/geotiff/tests/CLUSTER_AUDIT_PR5.md +++ b/xrspatial/geotiff/tests/CLUSTER_AUDIT_PR5.md @@ -59,7 +59,7 @@ File count delta: `-4 +1 = -3`. | `test_attrs_contract_passthrough_1984.py::test_passthrough_does_not_promote_to_canonical` | `attrs/test_contract.py::test_passthrough_does_not_promote_to_canonical` | Verbatim. | | `test_attrs_contract_passthrough_1984.py::test_removed_attrs_not_emitted` | `attrs/test_contract.py::test_passthrough_removed_attrs_not_emitted` | Renamed for prefix. | | `test_attrs_contract_passthrough_1984.py::test_removed_attrs_absent_after_roundtrip` | `attrs/test_contract.py::test_passthrough_removed_attrs_absent_after_roundtrip` | Renamed. | -| `test_attrs_contract_passthrough_1984.py::test_contract_version_is_current` | `attrs/test_contract.py::test_passthrough_contract_version_is_current` | Renamed; assertion unchanged. | +| `test_attrs_contract_passthrough_1984.py::test_contract_version_is_current` | dropped | Redundant with `canonical[contract_version]` (which exercises the same stamp on a richer fixture) and the per-backend version-section tests. Removed during review-round fixes. | ## Version tier mapping @@ -87,9 +87,12 @@ Old surface (deduplicated functions/params): New surface: - Canonical: 1 + 10 (parametrize) + 4 (parametrize) + 2 = 17 cases. - Aliases: 2 (parametrize) + 4 = 6 cases. -- Passthrough: 1 + 3 (parametrize) + 5 = 9 cases. +- Passthrough: 1 + 3 (parametrize) + 4 = 8 cases. (The redundant + contract-version stamp test was dropped during review; the same + assertion is exercised by `canonical[contract_version]` and the + per-backend version section.) - Version: 2 + 4 (parametrize) + 2 (parametrize) = 8 cases. -- Total new: 40 cases. **No coverage drop.** +- Total new: 39 cases. One intentional drop documented above. ## Out of scope (intentionally untouched) @@ -112,3 +115,12 @@ reconstruction) and is exercised inside that tier's section. Adding a standalone `test_roundtrip.py` now would duplicate the canonical fixture without adding coverage. Leaving for a follow-up so it can be designed against a real coverage gap. + +The natural shape for the deferred file would be a single fixture that +writes every canonical key (plus the two aliases and the three +reconstructible passthrough keys), reads it back, and asserts +structural equality of the read-back attrs dict against a golden +expected dict. The current per-key value checks would stay where they +are (one failure points at one key); the roundtrip file would catch +dict-shape drift (extra keys leaking in, ordering issues in tuple-valued +attrs, etc.). Worth picking up alongside any future contract bump. diff --git a/xrspatial/geotiff/tests/attrs/test_contract.py b/xrspatial/geotiff/tests/attrs/test_contract.py index dfb4adfae..cb4a37f6f 100644 --- a/xrspatial/geotiff/tests/attrs/test_contract.py +++ b/xrspatial/geotiff/tests/attrs/test_contract.py @@ -294,11 +294,13 @@ def test_canonical_value_roundtrip(canonical_roundtrip, check): # Per-backend coverage for canonical-key *presence*. # # Read-time backends share ``_populate_attrs_from_geo_info``, so per-key -# value round-trips are pinned once (above) on the eager numpy path. -# What the per-backend check guards against is a backend skipping the -# shared helper or building its attrs dict independently. The version -# stamp test does this for one canonical key, and the loop below does -# it for the rest. +# value round-trips are pinned once (above) on the eager numpy path +# only -- value equality on the canonical fixture (which carries +# ``extra_tags`` + the experimental-codec opt-in) is exercised by the +# eager parametrize case at ``canonical[contract_version]``. The +# per-backend loop below guards against a backend that bypasses the +# shared attrs helper, not against a backend that emits the wrong +# value. def _open_eager(path): @@ -317,16 +319,22 @@ def _open_dask_gpu(path): return open_geotiff(path, gpu=True, chunks=2) +# Each entry: (opener, label). The label is the suffix used in the +# parametrize id AND in the tmp_path filename, so a failing case names +# its backend in both places without leaking the leading-underscore +# function name into the filename. _BACKEND_OPENERS = [ - pytest.param(_open_eager, id='canonical[eager-numpy]'), - pytest.param(_open_dask, id='canonical[dask-numpy]'), - pytest.param(_open_gpu, id='canonical[gpu]', marks=requires_gpu), - pytest.param(_open_dask_gpu, id='canonical[dask-gpu]', marks=requires_gpu), + pytest.param(_open_eager, 'eager-numpy', id='canonical[eager-numpy]'), + pytest.param(_open_dask, 'dask-numpy', id='canonical[dask-numpy]'), + pytest.param(_open_gpu, 'gpu', id='canonical[gpu]', marks=requires_gpu), + pytest.param( + _open_dask_gpu, 'dask-gpu', + id='canonical[dask-gpu]', marks=requires_gpu), ] -@pytest.mark.parametrize('opener', _BACKEND_OPENERS) -def test_canonical_keys_present_per_backend(tmp_path, opener): +@pytest.mark.parametrize('opener,label', _BACKEND_OPENERS) +def test_canonical_keys_present_per_backend(tmp_path, opener, label): """Each read backend emits the full canonical key set. Writes the canonical fixture once with the eager writer (only the @@ -337,7 +345,7 @@ def test_canonical_keys_present_per_backend(tmp_path, opener): ``_populate_attrs_from_geo_info``. """ da, _ = _make_canonical_da() - path = str(tmp_path / f'canonical_{opener.__name__}.tif') + path = str(tmp_path / f'canonical_{label}.tif') # Fresh DataArray with ``extra_tags`` -> rich-tag opt-in required # (see ``canonical_roundtrip`` fixture for details). to_geotiff(da, path, allow_experimental_codecs=True) @@ -345,7 +353,7 @@ def test_canonical_keys_present_per_backend(tmp_path, opener): rd = opener(path) missing = sorted(k for k in _CANONICAL_KEYS if k not in rd.attrs) assert missing == [], ( - f"{opener.__name__}: canonical attrs missing after round-trip: " + f"{label}: canonical attrs missing after round-trip: " f"{missing}. attrs keys present: {sorted(rd.attrs.keys())}" ) @@ -797,27 +805,11 @@ def test_passthrough_removed_attrs_absent_after_roundtrip(tmp_path): ) -def test_passthrough_contract_version_is_current(tmp_path): - """``attrs['_xrspatial_geotiff_contract']`` matches the constant on - every read. - - The contract version is the user-visible signal that a tier change - landed. Issue #2016 bumped it to 2 (removal of deprecated GeoKey - attrs); issue #2136 bumped it to 3 (addition of - ``attrs['georef_status']``). Pinning against - ``_ATTRS_CONTRACT_VERSION`` means the next bump only has to touch - the constant and the bump-specific tests, not every "is the stamp - set" assertion. - """ - da = _make_passthrough_da(crs=4326) - rd = _passthrough_roundtrip( - tmp_path, da, name='contract_version_signal.tif') - - assert rd.attrs.get(_CONTRACT_KEY) == _ATTRS_CONTRACT_VERSION, ( - f"contract version stamp on a fresh read is " - f"{rd.attrs.get(_CONTRACT_KEY)!r}; expected " - f"{_ATTRS_CONTRACT_VERSION}." - ) +# Note: the ``passthrough[contract_version]`` stamp check that the old +# source file held is intentionally dropped here. The canonical fixture +# already exercises a stamped round-trip +# (``test_canonical_value_roundtrip[canonical[contract_version]]``) and +# the version section below owns per-backend stamp coverage. # =========================================================================== @@ -932,18 +924,22 @@ def _v_dask_gpu(path): return open_geotiff(path, gpu=True, chunks=32) +# Each entry: (opener, label). Same shape as ``_BACKEND_OPENERS`` above +# so the filename matches the parametrize id. _VERSION_TIFF_OPENERS = [ - pytest.param(_v_eager, id='version[eager-numpy]'), - pytest.param(_v_dask, id='version[dask-numpy]'), - pytest.param(_v_gpu, id='version[gpu]', marks=requires_gpu), - pytest.param(_v_dask_gpu, id='version[dask-gpu]', marks=requires_gpu), + pytest.param(_v_eager, 'eager-numpy', id='version[eager-numpy]'), + pytest.param(_v_dask, 'dask-numpy', id='version[dask-numpy]'), + pytest.param(_v_gpu, 'gpu', id='version[gpu]', marks=requires_gpu), + pytest.param( + _v_dask_gpu, 'dask-gpu', + id='version[dask-gpu]', marks=requires_gpu), ] -@pytest.mark.parametrize('opener', _VERSION_TIFF_OPENERS) -def test_version_stamp_present_per_tiff_backend(tmp_path, opener): +@pytest.mark.parametrize('opener,label', _VERSION_TIFF_OPENERS) +def test_version_stamp_present_per_tiff_backend(tmp_path, opener, label): """Each TIFF read backend stamps the contract version.""" - path = str(tmp_path / f"contract_{opener.__name__}.tif") + path = str(tmp_path / f"contract_{label}.tif") _write_small_tiff(path) da = opener(path) @@ -959,17 +955,17 @@ def _v_vrt_chunked(path): _VERSION_VRT_OPENERS = [ - pytest.param(_v_vrt_eager, id='version[vrt-eager]'), - pytest.param(_v_vrt_chunked, id='version[vrt-chunked]'), + pytest.param(_v_vrt_eager, 'vrt-eager', id='version[vrt-eager]'), + pytest.param(_v_vrt_chunked, 'vrt-chunked', id='version[vrt-chunked]'), ] -@pytest.mark.parametrize('opener', _VERSION_VRT_OPENERS) -def test_version_stamp_present_per_vrt_backend(tmp_path, opener): +@pytest.mark.parametrize('opener,label', _VERSION_VRT_OPENERS) +def test_version_stamp_present_per_vrt_backend(tmp_path, opener, label): """Both VRT read paths stamp the contract version.""" - src = tmp_path / f"contract_{opener.__name__}_source.tif" + src = tmp_path / f"contract_{label}_source.tif" _write_small_tiff(str(src)) - vrt = tmp_path / f"contract_{opener.__name__}.vrt" + vrt = tmp_path / f"contract_{label}.vrt" _write_minimal_vrt(vrt, os.path.basename(src), height=64, width=64) da = opener(str(vrt))