From deb6eae158dcbe9ac597c96541388fb8a2e378cb Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 19 May 2026 11:58:13 -0700 Subject: [PATCH 1/4] geotiff: push down byte-affecting validation into array-level writers (#2138) Hardens the lower-level write entry points so direct callers cannot silently bypass the checks that ``to_geotiff`` runs upstream. Six byte-affecting gaps now fire at ``_write`` / ``_write_streaming``: compression-name validation against the canonical list, JPEG opt-in gate (#1845), ``max_z_error`` sign + LERC pairing, ``crs_epsg`` bool rejection, defensive copy on the NaN-to-sentinel rewrite, and ``float16`` / ``bool_`` auto-promotion. Renames ``write`` / ``write_streaming`` / ``read_to_array`` to underscore-prefixed canonical names, with the old names kept as aliases to avoid breaking the ~50 internal call sites that import them. Adds 26 push-down + byte-parity tests and updates module docstrings to spell out the private contract. --- xrspatial/geotiff/_reader.py | 29 +- xrspatial/geotiff/_writer.py | 262 +++++++++++++- xrspatial/geotiff/_writers/eager.py | 13 + xrspatial/geotiff/tests/test_jpeg.py | 18 +- .../test_lowlevel_write_pushdown_2138.py | 320 ++++++++++++++++++ .../tests/test_to_geotiff_zero_bands_2095.py | 10 +- xrspatial/geotiff/tests/test_writer.py | 7 +- 7 files changed, 636 insertions(+), 23 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_lowlevel_write_pushdown_2138.py diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py index b17cd13ef..3b7474ad9 100644 --- a/xrspatial/geotiff/_reader.py +++ b/xrspatial/geotiff/_reader.py @@ -1,4 +1,20 @@ -"""TIFF/COG reader: tile/strip assembly, windowed reads, HTTP range requests.""" +"""TIFF/COG reader: tile/strip assembly, windowed reads, HTTP range requests. + +This module is private to :mod:`xrspatial.geotiff`. The supported public +read entry points are :func:`xrspatial.geotiff.open_geotiff`, +:func:`xrspatial.geotiff.read_geotiff_gpu`, +:func:`xrspatial.geotiff.read_geotiff_dask`, and +:func:`xrspatial.geotiff.read_vrt`. Direct callers of the helpers +defined here bypass the DataArray-level work that the public wrappers +perform (ambiguous-metadata fail-closed, nodata-to-NaN promotion, +``masked_nodata`` attr, ``transform`` / ``crs`` attrs population) and +have to replicate those steps by hand. See issue #2138. + +For source modules inside :mod:`xrspatial.geotiff`, the canonical +internal name for the array-level reader is :func:`_read_to_array`. +The non-underscored :func:`read_to_array` is kept as an alias for +internal call sites that pre-date the rename. +""" from __future__ import annotations import math @@ -3185,13 +3201,13 @@ def _miniswhite_inverted_nodata(nodata, ifd: IFD, dtype: np.dtype): return nodata -def read_to_array(source, *, window=None, overview_level: int | None = None, +def _read_to_array(source, *, window=None, overview_level: int | None = None, band: int | None = None, max_pixels: int = MAX_PIXELS_DEFAULT, max_cloud_bytes=_MAX_CLOUD_BYTES_SENTINEL, allow_rotated: bool = False, ) -> tuple[np.ndarray, GeoInfo]: - """Read a GeoTIFF/COG to a numpy array. + """Read a GeoTIFF/COG to a numpy array (module-private). Parameters ---------- @@ -3428,3 +3444,10 @@ def read_to_array(source, *, window=None, overview_level: int | None = None, close_sidecar(sidecar) return arr, geo_info + + +# Backward-compatible alias for internal call sites that pre-date the +# rename to :func:`_read_to_array`. New code inside +# ``xrspatial.geotiff`` should import :func:`_read_to_array` directly. +# See issue #2138. +read_to_array = _read_to_array diff --git a/xrspatial/geotiff/_writer.py b/xrspatial/geotiff/_writer.py index 97df27044..8d8b63b02 100644 --- a/xrspatial/geotiff/_writer.py +++ b/xrspatial/geotiff/_writer.py @@ -1,4 +1,21 @@ -"""GeoTIFF/COG writer.""" +"""GeoTIFF/COG writer (private). + +This module is private to :mod:`xrspatial.geotiff`. The supported public +write entry points are :func:`xrspatial.geotiff.to_geotiff`, +:func:`xrspatial.geotiff.write_geotiff_gpu`, and +:func:`xrspatial.geotiff.write_vrt`. Direct callers of the helpers +defined here bypass the DataArray-level validation that the public +wrappers run (``transform`` derivation, ``masked_nodata`` handling, +``band``-first dim reordering, ...) and must accept the resulting byte +divergence. See issue #2138. + +For source modules inside :mod:`xrspatial.geotiff`, the canonical +internal names for the array-level write functions are +:func:`_write` and :func:`_write_streaming`. The non-underscored +:func:`write` and :func:`write_streaming` are kept as aliases for +internal call sites that pre-date the rename and may be removed in a +future release. +""" from __future__ import annotations import math @@ -1632,10 +1649,125 @@ def _assemble_cog_layout(header_size: int, # --------------------------------------------------------------------------- -# Public write function +# Array-level write entry points (module-private; see module docstring) # --------------------------------------------------------------------------- -def write(data: np.ndarray, path: str, *, + +def _validate_lowlevel_write_kwargs(*, + compression, + allow_internal_only_jpeg: bool, + max_z_error, + crs_epsg, + crs_wkt, + allow_unparseable_crs: bool, + entry_point: str) -> None: + """Push-down byte-affecting validation for ``_write`` / ``_write_streaming``. + + Centralises the checks that the public :func:`to_geotiff` wrapper + runs but that the array-level entry points used to skip, so a + direct caller cannot quietly produce a different file. See issue + #2138 for the gap inventory. The checks are byte-affecting: each + of them changes the bytes on disk (or refuses to write garbage), + so they belong with the array-level writer rather than the + DataArray wrapper. + + Parameters + ---------- + compression : str or other + Codec name. Validated against :data:`_VALID_COMPRESSIONS` and + the JPEG-in-TIFF opt-in gate. Non-string values are not + rejected here; ``_compression_tag`` raises downstream. + allow_internal_only_jpeg : bool + If False (the default), ``compression='jpeg'`` is rejected + because the encoder writes JFIF tiles without the + ``JPEGTables`` tag (issue #1845). + max_z_error : float + Per-pixel LERC error budget. Must be ``>= 0`` and is only + meaningful with ``compression='lerc'``. + crs_epsg : int or None + EPSG kwarg. ``bool`` is rejected because it would otherwise + be silently written as ``EPSG=1`` / ``EPSG=0``. + crs_wkt : str or None + WKT fallback. Validated against the structural ``_looks_like_wkt`` + gate unless ``allow_unparseable_crs=True``. + allow_unparseable_crs : bool + Opt-in to keep pre-#1929 behaviour for unparseable CRS strings. + entry_point : str + Name of the calling function ('_write' or '_write_streaming'), + used in error messages so the source of the rejection is clear. + """ + from ._attrs import _VALID_COMPRESSIONS + from ._crs import _validate_crs_fallback + + # Gap #1: compression name validation against the canonical list. + # ``_compression_tag`` already raises on unknown names with a list, + # but only ``str`` inputs reach it; non-string values would land in + # ``compression_name.lower()`` and surface as ``AttributeError``. + if isinstance(compression, str): + if compression.lower() not in _VALID_COMPRESSIONS: + raise ValueError( + f"Unknown compression {compression!r} (in {entry_point}). " + f"Valid options: {list(_VALID_COMPRESSIONS)}.") + elif compression is not None: + raise TypeError( + f"compression must be a str (in {entry_point}); " + f"got {type(compression).__name__}.") + + # Gap #2: JPEG opt-in gate. The encoder writes self-contained JFIF + # streams without the JPEGTables tag (347), so the file decodes + # through xrspatial but not through libtiff / GDAL / rasterio. + # ``to_geotiff`` already enforces this; surface the same gate here + # so direct callers cannot silently produce a non-interop file. + if (isinstance(compression, str) + and compression.lower() == 'jpeg' + and not allow_internal_only_jpeg): + raise ValueError( + "compression='jpeg' is not supported: the encoder writes " + "self-contained JFIF streams without the required " + "JPEGTables tag (347), so other readers (libtiff, GDAL, " + "rasterio) reject the file. Use 'deflate', 'zstd', or " + "'lzw' instead. Pass allow_internal_only_jpeg=True to " + "opt in to the experimental internal-reader-only path " + "(issue #1845).") + + # Gap #3: max_z_error must be >= 0 and only applies with LERC. + # ``_write_tiled`` / ``_write_stripped`` silently ignore the value + # on non-LERC codecs, which lets a typo or a stale arg slip past. + if max_z_error is not None and max_z_error < 0: + raise ValueError( + f"max_z_error must be >= 0, got {max_z_error} " + f"(in {entry_point})") + if (max_z_error is not None and max_z_error != 0 + and (not isinstance(compression, str) + or compression.lower() != 'lerc')): + raise ValueError( + f"max_z_error is only valid with compression='lerc' " + f"(in {entry_point}); got compression={compression!r}.") + + # Gap #4: ``crs_epsg`` must not be ``bool``. ``bool`` is an ``int`` + # subclass in Python, so ``crs_epsg=True`` would otherwise be + # written as ``EPSG=1`` (and ``False`` as ``EPSG=0``) -- neither + # resolves with any CRS database. Mirrors the public + # ``_validate_crs_arg`` gate. + if isinstance(crs_epsg, bool): + raise ValueError( + f"crs_epsg must be an int (EPSG code) or None (in " + f"{entry_point}); got bool ({crs_epsg!r}). bool is an int " + f"subclass in Python, so True/False would otherwise be " + f"written as EPSG=1 / EPSG=0.") + + # Gap #9: refuse to land an unvalidatable string in + # GTCitationGeoKey unless the caller opts in. ``to_geotiff`` runs + # this against the post-``_wkt_to_epsg`` fallback; ``_write`` only + # sees the raw ``crs_wkt`` kwarg, so apply the same structural + # check here. This is a strict subset of the upstream gate -- the + # upstream call already rejected the same input, so this is a + # no-op when ``to_geotiff`` is the caller. + if crs_wkt is not None and crs_epsg is None: + _validate_crs_fallback(crs_wkt, allow_unparseable_crs) + + +def _write(data: np.ndarray, path: str, *, geo_transform: GeoTransform | None = None, crs_epsg: int | None = None, crs_wkt: str | None = None, @@ -1657,7 +1789,9 @@ def write(data: np.ndarray, path: str, *, bigtiff: bool | None = None, max_z_error: float = 0.0, photometric='auto', - restore_sentinel: bool = True) -> None: + restore_sentinel: bool = True, + allow_internal_only_jpeg: bool = False, + allow_unparseable_crs: bool = False) -> None: """Write a numpy array as a GeoTIFF or COG. Parameters @@ -1729,17 +1863,82 @@ def write(data: np.ndarray, path: str, *, max_z_error : float Per-pixel error budget for LERC compression. ``0.0`` (default) is lossless. Only valid with ``compression='lerc'``. + allow_internal_only_jpeg : bool + Opt in to the experimental JPEG-in-TIFF path. The encoder + writes self-contained JFIF tiles without the ``JPEGTables`` + tag (347), so external readers (libtiff, GDAL, rasterio) + reject the file. ``False`` (default) rejects + ``compression='jpeg'`` with a clear error. See issue #1845. + allow_unparseable_crs : bool + Opt in to writing a ``crs_wkt`` string that does not parse as + WKT and does not resolve via pyproj into ``GTCitationGeoKey`` + (pre-#1929 behaviour). Default ``False`` refuses to land + unvalidatable strings in the citation field. + + Notes + ----- + This is a module-private array-level entry point. The supported + public surface is :func:`xrspatial.geotiff.to_geotiff`. Several + DataArray-level checks (3D dim-order rejection, ``band``-first + reorder, fail-closed georeferenced-transform, ``masked_nodata`` + handling) are deliberately *not* performed here -- they need + ``data.dims`` / ``data.attrs`` state that the array-level entry + point does not have. Direct callers that need those checks should + use :func:`xrspatial.geotiff.to_geotiff` instead. See issue #2138. """ # Issue #2075: reject empty spatial shapes before any IFD layout # math runs. ``to_geotiff`` already guards this for DataArray inputs, - # but ``write`` is also called directly by tests and by the GPU - # path, so guard here too. ``write`` always receives band-last + # but ``_write`` is also called directly by tests and by the GPU + # path, so guard here too. ``_write`` always receives band-last # arrays (eager moveaxis ran upstream), so the ndim-based pair # picked by ``_validate_writer_spatial_shape`` without ``dims`` is # correct. from ._validation import _validate_writer_spatial_shape _validate_writer_spatial_shape( - getattr(data, 'shape', None), entry_point="write") + getattr(data, 'shape', None), entry_point="_write") + + # Issue #2138: push down byte-affecting validation that the public + # ``to_geotiff`` wrapper used to perform on its own. The wrapper + # still runs the same checks upstream, so this is a no-op when + # ``to_geotiff`` is the caller; the gate matters for direct callers + # of ``_write`` (the GPU CPU-fallback path, internal tests, and + # downstream code that imports the array-level entry point). + _validate_lowlevel_write_kwargs( + compression=compression, + allow_internal_only_jpeg=allow_internal_only_jpeg, + max_z_error=max_z_error, + crs_epsg=crs_epsg, + crs_wkt=crs_wkt, + allow_unparseable_crs=allow_unparseable_crs, + entry_point="_write", + ) + + # Issue #2138 gap #7: auto-promote ``float16`` and ``bool_`` before + # the dtype mapper. ``to_geotiff`` already does this upstream; the + # push-down here lets direct callers feed unsupported dtypes without + # tripping ``numpy_to_tiff_dtype`` with a less actionable error. + if isinstance(data, np.ndarray): + if data.dtype == np.float16: + data = data.astype(np.float32) + elif data.dtype == np.bool_: + data = data.astype(np.uint8) + + # Issue #2138 gap #5: defensive copy on the NaN-to-sentinel rewrite. + # ``to_geotiff`` already copies for DataArray inputs, but a direct + # caller passing a NaN-containing float array would otherwise have + # their buffer mutated (numpy arrays are passed by reference). The + # rewrite is gated on ``restore_sentinel`` so DataArrays carrying + # ``masked_nodata=False`` (read with ``mask_nodata=False``) keep + # their literal sentinel bytes untouched (#1988). + if (isinstance(data, np.ndarray) + and nodata is not None + and data.dtype.kind == 'f' + and not np.isnan(nodata) + and restore_sentinel): + nan_mask = np.isnan(data) + if nan_mask.any(): + data = data.copy() + data[nan_mask] = data.dtype.type(nodata) comp_tag = _compression_tag(compression) pred_int = normalize_predictor(predictor, data.dtype, comp_tag) @@ -1891,6 +2090,13 @@ def write(data: np.ndarray, path: str, *, _write_bytes(file_bytes, path) +# Backward-compatible alias for internal call sites that pre-date the +# rename to :func:`_write`. New code inside ``xrspatial.geotiff`` should +# import :func:`_write` directly. See issue #2138 and the module +# docstring. +write = _write + + def _compress_block(arr, block_w, block_h, samples, dtype, bytes_per_sample, predictor: int, compression, compression_level=None, max_z_error: float = 0.0, gil_friendly: bool = False): @@ -1996,7 +2202,7 @@ def _should_use_bigtiff_streaming(uncompressed_bytes: int, return uncompressed_bytes + reserved_overhead > UINT32_MAX -def write_streaming(dask_data, path: str, *, +def _write_streaming(dask_data, path: str, *, geo_transform: 'GeoTransform | None' = None, crs_epsg: int | None = None, crs_wkt: str | None = None, @@ -2016,7 +2222,9 @@ def write_streaming(dask_data, path: str, *, streaming_buffer_bytes: int = 256 * 1024 * 1024, max_z_error: float = 0.0, photometric='auto', - restore_sentinel: bool = True) -> None: + restore_sentinel: bool = True, + allow_internal_only_jpeg: bool = False, + allow_unparseable_crs: bool = False) -> None: """Write a dask array as a GeoTIFF by streaming pixel data. For tiled output, each tile-row is computed in horizontal segments @@ -2040,6 +2248,18 @@ def write_streaming(dask_data, path: str, *, Soft cap on bytes materialised per dask compute call when writing tiles. Defaults to 256 MB. Values smaller than one tile column are clamped up to one tile column. + allow_internal_only_jpeg : bool + Opt in to the experimental JPEG-in-TIFF path. See ``_write`` for + the rationale. Default ``False``. + allow_unparseable_crs : bool + Opt in to writing an unparseable ``crs_wkt`` string into + ``GTCitationGeoKey``. Default ``False``. + + Notes + ----- + This is a module-private array-level entry point. The supported + public surface is :func:`xrspatial.geotiff.to_geotiff`. See + :func:`_write` and issue #2138 for the full rationale. """ import os import tempfile @@ -2053,11 +2273,24 @@ def write_streaming(dask_data, path: str, *, # Issue #2075: reject empty spatial shapes before tile/strip count # math (``math.ceil(width / tw)`` etc. below at the layout block) # silently produces zero entries. ``to_geotiff`` already validates - # this upstream, but direct callers of ``write_streaming`` go + # this upstream, but direct callers of ``_write_streaming`` go # through here too. from ._validation import _validate_writer_spatial_shape _validate_writer_spatial_shape( - getattr(dask_data, 'shape', None), entry_point="write_streaming") + getattr(dask_data, 'shape', None), entry_point="_write_streaming") + + # Issue #2138: push-down validation for byte-affecting kwargs. + # ``to_geotiff`` runs these upstream as well, so this is a no-op + # on that path; the gate matters for direct callers. + _validate_lowlevel_write_kwargs( + compression=compression, + allow_internal_only_jpeg=allow_internal_only_jpeg, + max_z_error=max_z_error, + crs_epsg=crs_epsg, + crs_wkt=crs_wkt, + allow_unparseable_crs=allow_unparseable_crs, + entry_point="_write_streaming", + ) height, width = dask_data.shape[:2] samples = dask_data.shape[2] if dask_data.ndim == 3 else 1 @@ -2508,6 +2741,13 @@ def write_streaming(dask_data, path: str, *, raise +# Backward-compatible alias for internal call sites that pre-date the +# rename to :func:`_write_streaming`. New code inside +# ``xrspatial.geotiff`` should import :func:`_write_streaming` directly. +# See issue #2138. +write_streaming = _write_streaming + + def _is_fsspec_uri(path) -> bool: """Check if a path is a fsspec-compatible URI (string only).""" if not isinstance(path, str): diff --git a/xrspatial/geotiff/_writers/eager.py b/xrspatial/geotiff/_writers/eager.py index 012ef683a..e945f4bd1 100644 --- a/xrspatial/geotiff/_writers/eager.py +++ b/xrspatial/geotiff/_writers/eager.py @@ -668,6 +668,13 @@ def to_geotiff(data: xr.DataArray | np.ndarray, max_z_error=max_z_error, photometric=photometric, restore_sentinel=restore_sentinel, + # ``to_geotiff`` ran the JPEG opt-in and the CRS + # fallback gates upstream; forwarding the kwargs lets + # ``_write_streaming``'s push-down check stay aligned + # rather than rejecting input the wrapper accepted. + # Issue #2138. + allow_internal_only_jpeg=allow_internal_only_jpeg, + allow_unparseable_crs=allow_unparseable_crs, ) return path @@ -756,6 +763,12 @@ def to_geotiff(data: xr.DataArray | np.ndarray, max_z_error=max_z_error, photometric=photometric, restore_sentinel=restore_sentinel, + # ``to_geotiff`` ran the JPEG opt-in and the CRS fallback + # gates upstream; forwarding the kwargs keeps ``_write``'s + # push-down check from rejecting input the wrapper accepted. + # Issue #2138. + allow_internal_only_jpeg=allow_internal_only_jpeg, + allow_unparseable_crs=allow_unparseable_crs, ) return path diff --git a/xrspatial/geotiff/tests/test_jpeg.py b/xrspatial/geotiff/tests/test_jpeg.py index 66309ecf7..55c447033 100644 --- a/xrspatial/geotiff/tests/test_jpeg.py +++ b/xrspatial/geotiff/tests/test_jpeg.py @@ -77,7 +77,8 @@ def test_grayscale_tiled(self, tmp_path): rng = np.random.RandomState(1050) expected = rng.randint(50, 200, (32, 32), dtype=np.uint8) path = str(tmp_path / 'gray_1050_tiled.tif') - write(expected, path, compression='jpeg', tiled=True, tile_size=16) + write(expected, path, compression='jpeg', tiled=True, tile_size=16, + allow_internal_only_jpeg=True) arr, geo = read_to_array(path) assert arr.shape == expected.shape @@ -89,7 +90,8 @@ def test_grayscale_stripped(self, tmp_path): rng = np.random.RandomState(1050) expected = rng.randint(50, 200, (32, 32), dtype=np.uint8) path = str(tmp_path / 'gray_1050_stripped.tif') - write(expected, path, compression='jpeg', tiled=False) + write(expected, path, compression='jpeg', tiled=False, + allow_internal_only_jpeg=True) arr, geo = read_to_array(path) assert arr.shape == expected.shape @@ -104,7 +106,8 @@ def test_rgb_tiled(self, tmp_path): b = np.full((32, 32), 128, dtype=np.uint8) expected = np.stack([r, g, b], axis=2) path = str(tmp_path / 'rgb_1050_tiled.tif') - write(expected, path, compression='jpeg', tiled=True, tile_size=16) + write(expected, path, compression='jpeg', tiled=True, tile_size=16, + allow_internal_only_jpeg=True) arr, geo = read_to_array(path) assert arr.shape == expected.shape @@ -118,19 +121,22 @@ def test_float_data_rejected(self, tmp_path): arr = np.zeros((8, 8), dtype=np.float32) path = str(tmp_path / 'bad_1050.tif') with pytest.raises(ValueError, match="uint8"): - write(arr, path, compression='jpeg') + write(arr, path, compression='jpeg', + allow_internal_only_jpeg=True) def test_uint16_data_rejected(self, tmp_path): arr = np.zeros((8, 8), dtype=np.uint16) path = str(tmp_path / 'bad16_1050.tif') with pytest.raises(ValueError, match="uint8"): - write(arr, path, compression='jpeg') + write(arr, path, compression='jpeg', + allow_internal_only_jpeg=True) def test_4band_rejected(self, tmp_path): arr = np.zeros((8, 8, 4), dtype=np.uint8) path = str(tmp_path / 'bad4b_1050.tif') with pytest.raises(ValueError, match="1 or 3 bands"): - write(arr, path, compression='jpeg') + write(arr, path, compression='jpeg', + allow_internal_only_jpeg=True) class TestWriteGeotiffJpeg: diff --git a/xrspatial/geotiff/tests/test_lowlevel_write_pushdown_2138.py b/xrspatial/geotiff/tests/test_lowlevel_write_pushdown_2138.py new file mode 100644 index 000000000..9bcf185eb --- /dev/null +++ b/xrspatial/geotiff/tests/test_lowlevel_write_pushdown_2138.py @@ -0,0 +1,320 @@ +"""Issue #2138: push-down byte-affecting validation for the +array-level write entry points (``_write`` / ``_write_streaming``) +and byte-parity with ``to_geotiff``. + +The public ``to_geotiff`` wrapper runs several checks before its +array-level callees: + +* compression-name validation against ``_VALID_COMPRESSIONS`` +* JPEG-in-TIFF opt-in gate (issue #1845) +* ``max_z_error`` sign + LERC-only pairing +* ``crs_epsg`` bool rejection +* unparseable-CRS fail-closed (issue #1929) +* NaN-to-sentinel rewrite with a defensive copy +* ``float16`` / ``bool_`` auto-promotion + +This file covers the push-down: each of those gaps must now fire +inside ``_write`` / ``_write_streaming`` so a direct caller cannot +bypass them. It also covers byte parity between ``_write`` and the +matching ``to_geotiff(xr.DataArray(...))`` call for every entry in +``_VALID_COMPRESSIONS`` -- if the wrapper and the lower-level +function disagree on a single byte, a direct caller silently +produces a different file. +""" +from __future__ import annotations + +import os + +import dask.array as dsk +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import to_geotiff +from xrspatial.geotiff._writer import _write, _write_streaming +from xrspatial.geotiff._reader import _read_to_array + + +def _make_uint8_band(seed: int = 2138, shape=(32, 32)) -> np.ndarray: + """Deterministic 2D uint8 array used by the byte-parity tests.""" + rng = np.random.RandomState(seed) + return rng.randint(0, 256, shape, dtype=np.uint8) + + +def _make_float32_band(seed: int = 2138, shape=(32, 32)) -> np.ndarray: + """Deterministic 2D float32 array for codecs that require floats (LERC).""" + rng = np.random.RandomState(seed) + return rng.rand(*shape).astype(np.float32) + + +def _bytes(path: str) -> bytes: + with open(path, "rb") as f: + return f.read() + + +# --------------------------------------------------------------------------- +# Gap #1: compression name validation +# --------------------------------------------------------------------------- + + +class TestCompressionNamePushdown: + """``_write`` must reject unknown compression names with the canonical + list, the same way ``to_geotiff`` does. Before #2138 the array-level + entry point relied on ``_compression_tag`` which raised but without + the canonical list.""" + + def test_write_rejects_unknown_compression(self, tmp_path): + arr = _make_uint8_band() + out = str(tmp_path / "tmp_2138_unknown_comp.tif") + with pytest.raises(ValueError) as excinfo: + _write(arr, out, compression="zstandard") + msg = str(excinfo.value) + assert "zstandard" in msg + # Canonical list is part of the new wording. + assert "zstd" in msg + + def test_write_streaming_rejects_unknown_compression(self, tmp_path): + arr = dsk.from_array(_make_uint8_band(), chunks=(16, 16)) + out = str(tmp_path / "tmp_2138_unknown_comp_streaming.tif") + with pytest.raises(ValueError, match="zstandard"): + _write_streaming(arr, out, compression="zstandard") + + +# --------------------------------------------------------------------------- +# Gap #2: JPEG opt-in gate +# --------------------------------------------------------------------------- + + +class TestJpegOptInPushdown: + """``_write`` must refuse ``compression='jpeg'`` unless the caller + opts in, mirroring ``to_geotiff``'s gate. Before #2138 direct + callers could silently produce a JFIF-tile file that other readers + reject.""" + + def test_write_rejects_jpeg_without_opt_in(self, tmp_path): + arr = _make_uint8_band() + out = str(tmp_path / "tmp_2138_jpeg_no_optin.tif") + with pytest.raises(ValueError, match="allow_internal_only_jpeg"): + _write(arr, out, compression="jpeg") + + def test_write_accepts_jpeg_with_opt_in(self, tmp_path): + arr = _make_uint8_band() + out = str(tmp_path / "tmp_2138_jpeg_optin.tif") + _write(arr, out, compression="jpeg", + allow_internal_only_jpeg=True) + assert os.path.exists(out) and os.path.getsize(out) > 0 + + def test_write_streaming_rejects_jpeg_without_opt_in(self, tmp_path): + arr = dsk.from_array(_make_uint8_band(), chunks=(16, 16)) + out = str(tmp_path / "tmp_2138_jpeg_streaming.tif") + with pytest.raises(ValueError, match="allow_internal_only_jpeg"): + _write_streaming(arr, out, compression="jpeg") + + +# --------------------------------------------------------------------------- +# Gap #3: max_z_error sign + codec pairing +# --------------------------------------------------------------------------- + + +class TestMaxZErrorPushdown: + def test_write_rejects_negative_max_z_error(self, tmp_path): + arr = _make_float32_band() + out = str(tmp_path / "tmp_2138_negative_mze.tif") + with pytest.raises(ValueError, match="max_z_error"): + _write(arr, out, compression="lerc", max_z_error=-0.01) + + def test_write_rejects_max_z_error_on_non_lerc(self, tmp_path): + arr = _make_float32_band() + out = str(tmp_path / "tmp_2138_mze_zstd.tif") + with pytest.raises(ValueError, match="max_z_error"): + _write(arr, out, compression="zstd", max_z_error=0.05) + + def test_write_streaming_rejects_negative_max_z_error(self, tmp_path): + arr = dsk.from_array(_make_float32_band(), chunks=(16, 16)) + out = str(tmp_path / "tmp_2138_streaming_neg_mze.tif") + with pytest.raises(ValueError, match="max_z_error"): + _write_streaming(arr, out, compression="lerc", + max_z_error=-0.01) + + +# --------------------------------------------------------------------------- +# Gap #4: crs_epsg bool rejection +# --------------------------------------------------------------------------- + + +class TestCrsEpsgBoolPushdown: + """``crs_epsg=True`` would otherwise be written as ``EPSG=1`` because + ``bool`` is an ``int`` subclass in Python. Both the public wrapper + and the array-level entry points must reject it.""" + + def test_write_rejects_bool_crs_epsg(self, tmp_path): + arr = _make_uint8_band() + out = str(tmp_path / "tmp_2138_bool_crs.tif") + with pytest.raises(ValueError, match="bool"): + _write(arr, out, crs_epsg=True) + + def test_write_rejects_false_crs_epsg(self, tmp_path): + arr = _make_uint8_band() + out = str(tmp_path / "tmp_2138_false_crs.tif") + with pytest.raises(ValueError, match="bool"): + _write(arr, out, crs_epsg=False) + + def test_write_streaming_rejects_bool_crs_epsg(self, tmp_path): + arr = dsk.from_array(_make_uint8_band(), chunks=(16, 16)) + out = str(tmp_path / "tmp_2138_streaming_bool_crs.tif") + with pytest.raises(ValueError, match="bool"): + _write_streaming(arr, out, crs_epsg=True) + + +# --------------------------------------------------------------------------- +# Gap #5: defensive copy on NaN-to-sentinel rewrite +# --------------------------------------------------------------------------- + + +class TestNanToSentinelDefensiveCopy: + """``to_geotiff`` rewrites NaN pixels to the nodata sentinel via + ``arr.copy()`` so the caller's buffer is never mutated. Direct + callers of ``_write`` used to skip this and write NaN bytes to + disk. Push the rewrite (and the defensive copy) down so the + invariant holds at every entry point.""" + + def test_write_does_not_mutate_caller_buffer(self, tmp_path): + # Float32 array with a real NaN and a non-NaN nodata sentinel. + arr = np.full((8, 8), 1.5, dtype=np.float32) + arr[2, 3] = np.nan + original = arr.copy() + out = str(tmp_path / "tmp_2138_no_mutate.tif") + _write(arr, out, nodata=-9999.0, compression="zstd") + # Caller's buffer must still carry the NaN it started with. + np.testing.assert_array_equal(np.isnan(arr), np.isnan(original)) + # And the non-NaN positions must be untouched. + finite = ~np.isnan(original) + np.testing.assert_array_equal(arr[finite], original[finite]) + + def test_write_writes_sentinel_in_file(self, tmp_path): + arr = np.full((8, 8), 1.5, dtype=np.float32) + arr[2, 3] = np.nan + out = str(tmp_path / "tmp_2138_sentinel.tif") + _write(arr, out, nodata=-9999.0, compression="zstd") + # ``mask_nodata`` defaults to True on ``open_geotiff`` so the + # sentinel comes back as NaN. Use ``_read_to_array`` (the raw + # buffer) to confirm the sentinel actually hit disk. + decoded, _ = _read_to_array(out) + assert decoded[2, 3] == np.float32(-9999.0) + + +# --------------------------------------------------------------------------- +# Gap #7: float16 / bool_ auto-promotion +# --------------------------------------------------------------------------- + + +class TestDtypePromotionPushdown: + def test_write_promotes_float16(self, tmp_path): + # Float16 is not a TIFF SampleFormat; the wrapper promotes to + # float32 before encode, and the push-down means a direct + # caller gets the same behaviour rather than a dtype-mapper + # ``ValueError``. + arr = (np.linspace(0, 1, 64, dtype=np.float16).reshape(8, 8)) + out = str(tmp_path / "tmp_2138_float16.tif") + _write(arr, out, compression="zstd") + decoded, _ = _read_to_array(out) + assert decoded.dtype == np.float32 + np.testing.assert_allclose(decoded, arr.astype(np.float32)) + + def test_write_promotes_bool(self, tmp_path): + arr = np.array([[True, False], [False, True]], dtype=np.bool_) + out = str(tmp_path / "tmp_2138_bool.tif") + _write(arr, out, compression="zstd") + decoded, _ = _read_to_array(out) + assert decoded.dtype == np.uint8 + np.testing.assert_array_equal(decoded, arr.astype(np.uint8)) + + +# --------------------------------------------------------------------------- +# Byte-parity: _write vs to_geotiff +# --------------------------------------------------------------------------- + + +# JPEG omitted from the byte-parity sweep on purpose: it requires the +# opt-in, which the wrapper emits a runtime warning for, and JPEG is +# lossy so trivial seed changes can shift bytes. ``_write`` is exercised +# elsewhere; the parity sweep covers the lossless codec set that direct +# callers reach for first. +_PARITY_CODECS = ( + "none", + "deflate", + "lzw", + "packbits", + "zstd", + "lz4", +) + + +@pytest.mark.parametrize("compression", _PARITY_CODECS) +def test_write_vs_to_geotiff_byte_parity_uint8(compression, tmp_path): + """``_write(arr, ...)`` and ``to_geotiff(xr.DataArray(arr), ...)`` + must produce byte-identical files for every entry in + ``_VALID_COMPRESSIONS`` that round-trips losslessly. A divergence + here is exactly the silent-different-file footgun #2138 names. + """ + arr = _make_uint8_band(seed=2138 + hash(compression) % 1000) + out_direct = str(tmp_path / f"tmp_2138_direct_{compression}.tif") + out_wrapper = str(tmp_path / f"tmp_2138_wrapper_{compression}.tif") + _write(arr, out_direct, compression=compression, tiled=True, + tile_size=16) + to_geotiff(xr.DataArray(arr, dims=("y", "x")), out_wrapper, + compression=compression, tiled=True, tile_size=16) + assert _bytes(out_direct) == _bytes(out_wrapper), ( + f"byte-parity violated for compression={compression!r}: " + f"_write and to_geotiff produced different output files." + ) + + +@pytest.mark.parametrize("compression", ("zstd", "deflate", "lzw")) +def test_write_streaming_vs_to_geotiff_byte_parity_uint8( + compression, tmp_path): + """Same idea for the dask streaming path. ``to_geotiff`` on a + dask-backed DataArray dispatches into ``_write_streaming``; feed + ``_write_streaming`` and the wrapper the same dask source and a + matching tile geometry and they must agree byte-for-byte.""" + raw = _make_uint8_band(seed=4276 + hash(compression) % 1000, + shape=(48, 48)) + chunks = (16, 16) + dask_arr = dsk.from_array(raw, chunks=chunks) + + out_direct = str( + tmp_path / f"tmp_2138_direct_streaming_{compression}.tif" + ) + out_wrapper = str( + tmp_path / f"tmp_2138_wrapper_streaming_{compression}.tif" + ) + + _write_streaming(dask_arr, out_direct, compression=compression, + tiled=True, tile_size=16) + to_geotiff(xr.DataArray(dask_arr, dims=("y", "x")), out_wrapper, + compression=compression, tiled=True, tile_size=16) + assert _bytes(out_direct) == _bytes(out_wrapper), ( + f"byte-parity violated for streaming compression={compression!r}" + ) + + +def test_write_lerc_lossless_round_trip(tmp_path): + """LERC with ``max_z_error=0`` is lossless. Confirm the codec + survives the push-down and still round-trips bit-exactly when the + pairing check passes.""" + arr = _make_float32_band() + out = str(tmp_path / "tmp_2138_lerc_lossless.tif") + _write(arr, out, compression="lerc", max_z_error=0.0) + decoded, _ = _read_to_array(out) + np.testing.assert_array_equal(decoded, arr) + + +def test_aliases_match_underscore_names(): + """``write`` / ``write_streaming`` / ``read_to_array`` must be the + exact same objects as their underscore-prefixed canonical names so + backward-compatible internal callers do not silently dispatch + into stale copies.""" + from xrspatial.geotiff import _reader, _writer + assert _writer.write is _writer._write + assert _writer.write_streaming is _writer._write_streaming + assert _reader.read_to_array is _reader._read_to_array diff --git a/xrspatial/geotiff/tests/test_to_geotiff_zero_bands_2095.py b/xrspatial/geotiff/tests/test_to_geotiff_zero_bands_2095.py index 4a8e16d07..9fc9380f8 100644 --- a/xrspatial/geotiff/tests/test_to_geotiff_zero_bands_2095.py +++ b/xrspatial/geotiff/tests/test_to_geotiff_zero_bands_2095.py @@ -99,7 +99,11 @@ def test_write_band_last_zero_bands_direct(tmp_path): # also contains the substring "write" further on, so an `in` # check would not distinguish ``write`` from ``write_streaming`` # or ``write_geotiff_gpu``). - assert msg.startswith("write cannot write") + # The array-level entry point was renamed from ``write`` to + # ``_write`` in #2138 to mark it as module-private. ``write`` is + # kept as a backward-compatible alias, so the entry-point token in + # the error message reflects the underlying function name. + assert msg.startswith("_write cannot write") assert "0 bands" in msg or "no bands" in msg.lower() assert not out.exists() @@ -115,7 +119,9 @@ def test_write_streaming_zero_bands_direct(tmp_path): with pytest.raises(ValueError) as excinfo: write_streaming(arr, str(out)) msg = str(excinfo.value) - assert msg.startswith("write_streaming cannot write") + # Renamed to ``_write_streaming`` in #2138; ``write_streaming`` + # remains a backward-compatible alias. + assert msg.startswith("_write_streaming cannot write") assert "0 bands" in msg or "no bands" in msg.lower() assert not out.exists() diff --git a/xrspatial/geotiff/tests/test_writer.py b/xrspatial/geotiff/tests/test_writer.py index 8a33a375b..b5033e618 100644 --- a/xrspatial/geotiff/tests/test_writer.py +++ b/xrspatial/geotiff/tests/test_writer.py @@ -100,5 +100,10 @@ def test_predictor_deflate(self, tmp_path): class TestWriteInvalidInput: def test_unsupported_compression(self, tmp_path): arr = np.zeros((4, 4), dtype=np.float32) - with pytest.raises(ValueError, match="Unsupported compression"): + # Issue #2138 pushed the canonical compression-list check from + # ``to_geotiff`` down into ``_write`` so direct callers get the + # same actionable error as the public wrapper. The wording + # shifted from ``_compression_tag``'s "Unsupported compression" + # to the wrapper's "Unknown compression" + canonical list. + with pytest.raises(ValueError, match="(Unsupported|Unknown) compression"): write(arr, str(tmp_path / 'bad.tif'), compression='bzip2') From b81d298fadede2fa210d76c8fb77dd7ad309ebbe Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 19 May 2026 12:04:43 -0700 Subject: [PATCH 2/4] geotiff: thread JPEG opt-in through VRT tile path and drop write re-export (#2138) Two follow-ups uncovered by self-review of the push-down work: * ``_write_vrt_tiled`` / ``_write_single_tile`` now forward ``allow_internal_only_jpeg`` and ``allow_unparseable_crs`` to the per-tile ``_write`` call. Without this, ``to_geotiff(da, '...vrt', compression='jpeg', allow_internal_only_jpeg=True)`` would clear the upstream gate then trip the new push-down gate inside ``_write``. * ``xrspatial/geotiff/__init__.py`` no longer does ``from ._writer import write``. The import bound ``write`` as an attribute of ``xrspatial.geotiff`` even though the name was not in ``__all__`` and not part of the documented public API. Mirrors the ``read_to_array`` cleanup from #1708. Adds a namespace-no-leak regression test alongside the existing ``read_to_array`` pin. --- xrspatial/geotiff/__init__.py | 6 ++++- xrspatial/geotiff/_writers/eager.py | 25 ++++++++++++++----- .../test_lowlevel_write_pushdown_2138.py | 16 ++++++++++++ 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 342811f00..7ac437cfc 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -109,7 +109,11 @@ _validate_dtype_cast, _validate_tile_size_arg, ) -from ._writer import write +# ``_writer.write`` (alias for ``_writer._write``) is module-private; +# see ``_writer.py`` docstring and issue #2138. The public eager write +# surface is :func:`to_geotiff`; do not re-export the array-level +# entry point here. The dotted path ``xrspatial.geotiff._writer._write`` +# still works for the handful of internal call sites that need it. from ._writers.eager import to_geotiff # Re-export only; called by xrspatial/geotiff/tests/test_nodata_no_extra_copy_1553.py. from ._writers.eager import _write_single_tile # noqa: F401 diff --git a/xrspatial/geotiff/_writers/eager.py b/xrspatial/geotiff/_writers/eager.py index e945f4bd1..c61240588 100644 --- a/xrspatial/geotiff/_writers/eager.py +++ b/xrspatial/geotiff/_writers/eager.py @@ -439,7 +439,8 @@ def to_geotiff(data: xr.DataArray | np.ndarray, bigtiff=bigtiff, max_z_error=max_z_error, photometric=photometric, - allow_unparseable_crs=allow_unparseable_crs) + allow_unparseable_crs=allow_unparseable_crs, + allow_internal_only_jpeg=allow_internal_only_jpeg) return path # Dispatch to write_geotiff_gpu when GPU was selected (explicit @@ -784,7 +785,9 @@ def _write_single_tile(chunk_data, path, geo_transform, epsg, wkt, gdal_metadata_xml=None, extra_tags=None, photometric: str | int = 'auto', - restore_sentinel: bool = True): + restore_sentinel: bool = True, + allow_internal_only_jpeg: bool = False, + allow_unparseable_crs: bool = False): """Write a single tile GeoTIFF. Used by _write_vrt_tiled. Forwards the same rich-tag set that ``to_geotiff`` passes through to @@ -839,7 +842,12 @@ def _write_single_tile(chunk_data, path, geo_transform, epsg, wkt, bigtiff=bigtiff, max_z_error=max_z_error, photometric=photometric, - restore_sentinel=restore_sentinel) + restore_sentinel=restore_sentinel, + # Forward the JPEG / CRS-fallback opt-ins so the per-tile + # write does not re-trip the push-down gate ``to_geotiff`` + # / ``_write_vrt_tiled`` already cleared upstream (#2138). + allow_internal_only_jpeg=allow_internal_only_jpeg, + allow_unparseable_crs=allow_unparseable_crs) def _write_vrt_tiled(data, vrt_path, *, crs=None, nodata=None, @@ -847,7 +855,8 @@ def _write_vrt_tiled(data, vrt_path, *, crs=None, nodata=None, tile_size=256, predictor: bool | int = False, bigtiff=None, max_z_error: float = 0.0, photometric: str | int = 'auto', - allow_unparseable_crs: bool = False): + allow_unparseable_crs: bool = False, + allow_internal_only_jpeg: bool = False): """Write a DataArray as a directory of tiled GeoTIFFs with a VRT index. This enables streaming dask arrays to disk without materializing the @@ -1063,7 +1072,9 @@ def _write_vrt_tiled(data, vrt_path, *, crs=None, nodata=None, gdal_metadata_xml=gdal_meta_xml, extra_tags=extra_tags_list, photometric=photometric, - restore_sentinel=restore_sentinel) + restore_sentinel=restore_sentinel, + allow_internal_only_jpeg=allow_internal_only_jpeg, + allow_unparseable_crs=allow_unparseable_crs) delayed_tasks.append(task) else: # Numpy: slice and write directly @@ -1080,7 +1091,9 @@ def _write_vrt_tiled(data, vrt_path, *, crs=None, nodata=None, gdal_metadata_xml=gdal_meta_xml, extra_tags=extra_tags_list, photometric=photometric, - restore_sentinel=restore_sentinel) + restore_sentinel=restore_sentinel, + allow_internal_only_jpeg=allow_internal_only_jpeg, + allow_unparseable_crs=allow_unparseable_crs) col_offset += chunk_w row_offset += chunk_h diff --git a/xrspatial/geotiff/tests/test_lowlevel_write_pushdown_2138.py b/xrspatial/geotiff/tests/test_lowlevel_write_pushdown_2138.py index 9bcf185eb..2d3dde6e5 100644 --- a/xrspatial/geotiff/tests/test_lowlevel_write_pushdown_2138.py +++ b/xrspatial/geotiff/tests/test_lowlevel_write_pushdown_2138.py @@ -318,3 +318,19 @@ def test_aliases_match_underscore_names(): assert _writer.write is _writer._write assert _writer.write_streaming is _writer._write_streaming assert _reader.read_to_array is _reader._read_to_array + + +def test_write_not_leaked_into_public_namespace(): + """The array-level write entry points are module-private. They + must not appear as attributes of ``xrspatial.geotiff`` (the + documented public surface is ``to_geotiff``). Mirrors the #1708 + contract for ``read_to_array``.""" + import xrspatial.geotiff as g + + for name in ('write', 'write_streaming', '_write', '_write_streaming'): + assert not hasattr(g, name), ( + f"{name!r} leaked into xrspatial.geotiff's public namespace. " + "The supported public eager-write entry point is to_geotiff. " + "Internal callers should import the array-level function " + "from xrspatial.geotiff._writer directly. See issue #2138." + ) From adbfd38606579bd2dcfccc662fdc7bfa4704ba95 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 19 May 2026 12:05:57 -0700 Subject: [PATCH 3/4] geotiff: address review nits with explanatory comments on push-down guards (#2138) Two comments clarifying the reasoning for guards that are intentionally narrower than they look: * The ``TypeError`` branch on non-string ``compression`` is unreachable from ``to_geotiff`` (which only forwards ``str``); it exists so direct callers get a typed error instead of an ``AttributeError`` from ``compression.lower()``. * The ``isinstance(data, np.ndarray)`` gate on the float16 / bool_ promotion intentionally skips dask arrays because ``_write_streaming`` handles dtype promotion separately on the dask path. --- xrspatial/geotiff/_writer.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/xrspatial/geotiff/_writer.py b/xrspatial/geotiff/_writer.py index 8d8b63b02..6e335a31c 100644 --- a/xrspatial/geotiff/_writer.py +++ b/xrspatial/geotiff/_writer.py @@ -1709,6 +1709,10 @@ def _validate_lowlevel_write_kwargs(*, f"Unknown compression {compression!r} (in {entry_point}). " f"Valid options: {list(_VALID_COMPRESSIONS)}.") elif compression is not None: + # Unreachable from ``to_geotiff`` (which only forwards ``str`` + # or the default), but direct callers can hit this. Without the + # explicit guard the downstream ``compression.lower()`` would + # surface as ``AttributeError`` instead of a typed error. raise TypeError( f"compression must be a str (in {entry_point}); " f"got {type(compression).__name__}.") @@ -1917,6 +1921,12 @@ def _write(data: np.ndarray, path: str, *, # the dtype mapper. ``to_geotiff`` already does this upstream; the # push-down here lets direct callers feed unsupported dtypes without # tripping ``numpy_to_tiff_dtype`` with a less actionable error. + # The guard on ``isinstance(data, np.ndarray)`` is intentional: + # ``_write`` is the numpy-array entry point, so the dask streaming + # path goes through ``_write_streaming`` (which handles ``out_dtype`` + # promotion separately). A bare ``dask.array`` reaching ``_write`` + # is already a misuse; the dtype mapper below will surface the + # mismatch. if isinstance(data, np.ndarray): if data.dtype == np.float16: data = data.astype(np.float32) From d0d39d02c00b1e3760aafa111f3009075c70f2f6 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 19 May 2026 12:19:34 -0700 Subject: [PATCH 4/4] geotiff: skip lz4 / lerc byte-parity tests when codec is missing (#2138) CI on Ubuntu / Windows / macOS does not include the optional ``lz4`` and ``lerc`` codec packages in every matrix slot. The byte-parity sweep and the LERC lossless round-trip test now probe the codec import the way ``_compression`` itself does and skip cleanly instead of raising ``ImportError``. The push-down validation tests (reject negative ``max_z_error``, reject ``max_z_error`` with non-LERC codecs) do not need the codecs because they raise before any encode step runs. --- .../test_lowlevel_write_pushdown_2138.py | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/xrspatial/geotiff/tests/test_lowlevel_write_pushdown_2138.py b/xrspatial/geotiff/tests/test_lowlevel_write_pushdown_2138.py index 2d3dde6e5..57c604c3e 100644 --- a/xrspatial/geotiff/tests/test_lowlevel_write_pushdown_2138.py +++ b/xrspatial/geotiff/tests/test_lowlevel_write_pushdown_2138.py @@ -35,6 +35,35 @@ from xrspatial.geotiff._reader import _read_to_array +def _codec_available(name: str) -> bool: + """Optional codecs (``lz4``, ``lerc``, ``imagecodecs``-backed JPEG2000) + are not installed in every CI matrix slot. Probe the import the way + ``_compression`` itself does so tests skip cleanly rather than + failing on a missing dependency.""" + if name in ("none", "deflate", "lzw", "packbits", "zstd"): + # Built into the bundled compression module; always present. + return True + if name == "lz4": + try: + import lz4 # noqa: F401 + except ImportError: + return False + return True + if name == "lerc": + try: + import lerc # noqa: F401 + except ImportError: + return False + return True + if name in ("jpeg", "jpeg2000", "j2k"): + try: + import imagecodecs # noqa: F401 + except ImportError: + return False + return True + return True + + def _make_uint8_band(seed: int = 2138, shape=(32, 32)) -> np.ndarray: """Deterministic 2D uint8 array used by the byte-parity tests.""" rng = np.random.RandomState(seed) @@ -257,6 +286,8 @@ def test_write_vs_to_geotiff_byte_parity_uint8(compression, tmp_path): ``_VALID_COMPRESSIONS`` that round-trips losslessly. A divergence here is exactly the silent-different-file footgun #2138 names. """ + if not _codec_available(compression): + pytest.skip(f"{compression} codec not installed") arr = _make_uint8_band(seed=2138 + hash(compression) % 1000) out_direct = str(tmp_path / f"tmp_2138_direct_{compression}.tif") out_wrapper = str(tmp_path / f"tmp_2138_wrapper_{compression}.tif") @@ -302,6 +333,8 @@ def test_write_lerc_lossless_round_trip(tmp_path): """LERC with ``max_z_error=0`` is lossless. Confirm the codec survives the push-down and still round-trips bit-exactly when the pairing check passes.""" + if not _codec_available("lerc"): + pytest.skip("lerc codec not installed") arr = _make_float32_band() out = str(tmp_path / "tmp_2138_lerc_lossless.tif") _write(arr, out, compression="lerc", max_z_error=0.0)