Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions xrspatial/geotiff/_attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@
_TIFF_SHORT = 3


# Contract version emitted on every read; bumped when the attrs contract
# changes. Downstream code reads ``attrs['_xrspatial_geotiff_contract']``
# to learn which attrs-contract revision produced the array. See issue
# #1984 and ``docs/source/user_guide/attrs_contract.rst``.
_ATTRS_CONTRACT_VERSION = 1


# String identifiers (used in xrspatial attrs) -> TIFF ResolutionUnit tag ids.
_RESOLUTION_UNIT_IDS = {'none': 1, 'inch': 2, 'centimeter': 3}

Expand Down Expand Up @@ -100,7 +107,19 @@ def _populate_attrs_from_geo_info(attrs: dict, geo_info, *, window=None) -> None
the outer window through this helper so the resulting DataArray
advertises the windowed transform. The GPU path does not currently
expose a windowed read, so it passes ``window=None``.

``attrs['_xrspatial_geotiff_contract']`` is stamped unconditionally
as the first step. Any pre-existing value on the passed-in dict is
overwritten with the current ``_ATTRS_CONTRACT_VERSION``; callers
pass freshly built dicts, so this is the intended behaviour.
"""
# Stamp the contract version first so every read path that funnels
# through this helper carries the marker. The VRT backends build
# their attrs dict directly and stamp the version there (see
# ``_backends/vrt.py``); keep both sites in sync via the constant
# rather than the bare literal.
attrs['_xrspatial_geotiff_contract'] = _ATTRS_CONTRACT_VERSION

if geo_info.crs_epsg is not None:
attrs['crs'] = geo_info.crs_epsg
if geo_info.crs_wkt is not None:
Expand Down
11 changes: 9 additions & 2 deletions xrspatial/geotiff/_backends/vrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import numpy as np
import xarray as xr

from .._attrs import _ATTRS_CONTRACT_VERSION
from .._coords import (
coords_from_pixel_geometry as _coords_from_pixel_geometry,
transform_tuple_from_pixel_geometry as _transform_tuple_from_pixel_geometry,
Expand Down Expand Up @@ -193,7 +194,10 @@ def read_vrt(source: str, *,
else:
coords = {}

attrs = {}
# VRT builds its attrs dict inline rather than going through
# ``_populate_attrs_from_geo_info``; stamp the contract version here
# so both code paths emit the same marker.
attrs = {'_xrspatial_geotiff_contract': _ATTRS_CONTRACT_VERSION}
if vrt.crs_wkt:
epsg = _wkt_to_epsg(vrt.crs_wkt)
if epsg is not None:
Expand Down Expand Up @@ -562,7 +566,10 @@ def _read_vrt_chunked(source, *, window, band, name, chunks, gpu, dtype,
# eager reads share the same x/y arrays.
gt = vrt.geo_transform
coords = {}
attrs = {}
# Mirrors the eager VRT branch: this code path bypasses
# ``_populate_attrs_from_geo_info``, so the contract version is
# stamped inline using the shared constant to stay in lockstep.
attrs = {'_xrspatial_geotiff_contract': _ATTRS_CONTRACT_VERSION}
if gt is not None:
origin_x, res_x, _, origin_y, _, res_y = gt
coord_window = (win_r0, win_c0, win_r0 + full_h, win_c0 + full_w)
Expand Down
141 changes: 141 additions & 0 deletions xrspatial/geotiff/tests/test_attrs_contract_version_1984.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
"""Contract-version marker tests for issue #1984.

PR 3 of the 7-PR plan attached to issue #1984 stamps every DataArray
returned by an xrspatial geotiff read path with
``attrs['_xrspatial_geotiff_contract'] = 1``. Downstream code reads
this marker to learn which attrs-contract revision produced the array.

The stamp must appear on every backend:

* eager numpy (``open_geotiff``)
* dask + numpy (``open_geotiff(chunks=...)`` / ``read_geotiff_dask``)
* cupy / GPU (``open_geotiff(gpu=True)`` / ``read_geotiff_gpu``)
* dask + cupy (``open_geotiff(gpu=True, chunks=...)``)
* VRT eager (``read_vrt``)
* VRT dask chunked (``read_vrt(chunks=...)``)

The fixture style mirrors ``test_attrs_parity_1548.py``: build a small
on-disk TIFF (and a small VRT pointing at one) inside ``tmp_path``,
open it through each backend, and assert on the resulting attrs.
"""
from __future__ import annotations

import importlib.util
import os

import numpy as np
import pytest

from xrspatial.geotiff import open_geotiff, read_vrt
from xrspatial.geotiff._attrs import _ATTRS_CONTRACT_VERSION

tifffile = pytest.importorskip("tifffile")


_CONTRACT_KEY = '_xrspatial_geotiff_contract'


def _gpu_available() -> bool:
if importlib.util.find_spec("cupy") is None:
return False
try:
import cupy
return bool(cupy.cuda.is_available())
except Exception:
return False


_HAS_GPU = _gpu_available()
_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required")


def _write_small_tiff(path):
"""Write a small tiled float32 TIFF used by every read-path assertion."""
arr = np.arange(64 * 64, dtype=np.float32).reshape(64, 64)
tifffile.imwrite(
path, arr, photometric='minisblack', planarconfig='contig',
tile=(32, 32), compression='deflate', metadata=None,
)
return arr


def _write_minimal_vrt(vrt_path, source_name, *, height, width):
"""Write a VRT that references ``source_name`` as a single-band source."""
vrt_path.write_text(
f'<VRTDataset rasterXSize="{width}" rasterYSize="{height}">\n'
' <VRTRasterBand dataType="Float32" band="1">\n'
' <SimpleSource>\n'
f' <SourceFilename relativeToVRT="1">{source_name}'
'</SourceFilename>\n'
' <SourceBand>1</SourceBand>\n'
f' <SrcRect xOff="0" yOff="0" xSize="{width}" ySize="{height}"/>\n'
f' <DstRect xOff="0" yOff="0" xSize="{width}" ySize="{height}"/>\n'
' </SimpleSource>\n'
' </VRTRasterBand>\n'
'</VRTDataset>\n'
)


def test_attrs_contract_version_constant_is_one():
"""Pin the integer value so a careless bump shows up here first."""
assert _ATTRS_CONTRACT_VERSION == 1


def test_eager_numpy_stamps_contract_version(tmp_path):
path = str(tmp_path / "contract_v1_eager.tif")
_write_small_tiff(path)

da = open_geotiff(path)

assert da.attrs[_CONTRACT_KEY] == 1


def test_dask_numpy_stamps_contract_version(tmp_path):
path = str(tmp_path / "contract_v1_dask.tif")
_write_small_tiff(path)

da = open_geotiff(path, chunks=32)

assert da.attrs[_CONTRACT_KEY] == 1


@_gpu_only
def test_gpu_stamps_contract_version(tmp_path):
path = str(tmp_path / "contract_v1_gpu.tif")
_write_small_tiff(path)

da = open_geotiff(path, gpu=True)

assert da.attrs[_CONTRACT_KEY] == 1


@_gpu_only
def test_dask_gpu_stamps_contract_version(tmp_path):
path = str(tmp_path / "contract_v1_dask_gpu.tif")
_write_small_tiff(path)

da = open_geotiff(path, gpu=True, chunks=32)

assert da.attrs[_CONTRACT_KEY] == 1


def test_vrt_eager_stamps_contract_version(tmp_path):
src = tmp_path / "contract_v1_vrt_source.tif"
_write_small_tiff(str(src))
vrt = tmp_path / "contract_v1_vrt_eager.vrt"
_write_minimal_vrt(vrt, os.path.basename(src), height=64, width=64)

da = read_vrt(str(vrt))

assert da.attrs[_CONTRACT_KEY] == 1


def test_vrt_chunked_stamps_contract_version(tmp_path):
src = tmp_path / "contract_v1_vrt_chunked_source.tif"
_write_small_tiff(str(src))
vrt = tmp_path / "contract_v1_vrt_chunked.vrt"
_write_minimal_vrt(vrt, os.path.basename(src), height=64, width=64)

da = read_vrt(str(vrt), chunks=32)

assert da.attrs[_CONTRACT_KEY] == 1
Loading