Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#### Bug fixes and improvements
- Refresh the geotiff mmap cache when a file is replaced under the same path so re-reads after an atomic-rename overwrite no longer return stale bytes
- Decode TIFF predictor=3 un-transpose by file byte order so big-endian floating-point TIFFs read back exactly
- Default internal `_vrt.read_vrt` `missing_sources` to `'raise'` so an unreadable VRT source no longer produces a silent zero-fill hole on integer rasters; pass `missing_sources='warn'` to opt back into the previous lenient behaviour (#1843)


### Version 0.9.9 - 2026-05-05
Expand Down
65 changes: 39 additions & 26 deletions xrspatial/geotiff/_vrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,13 +252,14 @@ class VRTDataset:
# coords must be emitted without the shift. Parsed from
# ``<Metadata><MDI key="AREA_OR_POINT">Point</MDI></Metadata>``.
raster_type: str = 'area' # 'area' or 'point'
# Per-load record of sources skipped by ``read_vrt`` under the
# lenient default (not strict mode). Each entry is a dict with
# ``source``, ``band`` (1-based), ``dst_rect`` (xoff, yoff,
# xsize, ysize), and ``error`` keys. Empty when no sources failed
# to read. Populated by :func:`read_vrt` so callers can detect
# holes by attribute lookup instead of parsing a UserWarning
# message (issue #1734).
# Per-load record of sources skipped by ``read_vrt`` when called
# with ``missing_sources='warn'`` (the lenient opt-in; the default
# since #1843 is ``'raise'``, and strict mode always raises). Each
# entry is a dict with ``source``, ``band`` (1-based), ``dst_rect``
# (xoff, yoff, xsize, ysize), and ``error`` keys. Empty when no
# sources failed to read. Populated by :func:`read_vrt` so callers
# on the warn path can detect holes by attribute lookup instead of
# parsing the ``GeoTIFFFallbackWarning`` message (issue #1734).
holes: list[dict] = field(default_factory=list)


Expand Down Expand Up @@ -852,7 +853,7 @@ def _apply_integer_sentinel_mask(arr, vrt, band):
def read_vrt(vrt_path: str, *, window=None,
band: int | None = None,
max_pixels: int | None = None,
missing_sources: str = 'warn',
missing_sources: str = 'raise',
parsed: VRTDataset | None = None,
) -> tuple[np.ndarray, VRTDataset]:
"""Read a VRT file by assembling pixel data from its source files.
Expand All @@ -868,11 +869,22 @@ def read_vrt(vrt_path: str, *, window=None,
max_pixels : int or None
Maximum allowed pixel count (width * height * samples) for the
assembled VRT region. None uses the reader default.
missing_sources : {'warn', 'raise'}
missing_sources : {'raise', 'warn'}, default 'raise'
Policy for unreadable source files referenced by the VRT.
``'warn'`` emits ``GeoTIFFFallbackWarning`` and records
``vrt.holes`` unless ``XRSPATIAL_GEOTIFF_STRICT=1`` is set.
``'raise'`` fails immediately.
``'raise'`` (the default) fails immediately on an unreadable
source so a partial mosaic never surfaces silently. This matches
the rest of the geotiff module's up-front rejection of malformed
input and avoids the ambiguity of a zero-fill hole on an integer
raster without a nodata sentinel (see issue #1843). Prior to
#1843 the default was ``'warn'``; callers that relied on the
lenient behaviour should pass ``missing_sources='warn'``
explicitly.
``'warn'`` is the opt-in escape hatch for partial mosaics: it
emits ``GeoTIFFFallbackWarning`` and records the skipped source
on ``vrt.holes`` (surfaced as ``attrs['vrt_holes']`` on the
public DataArray).
``XRSPATIAL_GEOTIFF_STRICT=1`` forces a raise across the whole
module regardless of this kwarg (see issue #1662).
parsed : VRTDataset or None
Pre-parsed VRT structure. When supplied, ``vrt_path`` is not
re-read or re-parsed and the source-path containment check is
Expand Down Expand Up @@ -1160,21 +1172,22 @@ def read_vrt(vrt_path: str, *, window=None,
) + _CODEC_DECODE_EXCEPTIONS as e:
if isinstance(e, PixelSafetyLimitError):
raise
# Under XRSPATIAL_GEOTIFF_STRICT=1, surface the read failure
# so partial mosaics are caught in CI. Default mode warns
# once per missing source then continues, preserving the
# historical behaviour. See issue #1662.
# Default (``missing_sources='raise'``) surfaces the read
# failure immediately so a partial mosaic never ships
# silently. ``XRSPATIAL_GEOTIFF_STRICT=1`` forces the same
# raise module-wide regardless of the kwarg (#1662).
#
# The lenient default leaves zero-filled holes in
# integer VRTs that downstream code cannot tell from
# real data unless the band has a nodata sentinel set.
# Recording the skipped source on ``vrt.holes`` lets the
# public ``read_vrt`` surface a machine-readable
# ``attrs['vrt_holes']`` entry on the returned
# DataArray, alongside the warning. Callers that need
# to fail loudly can either inspect that attr or set
# ``XRSPATIAL_GEOTIFF_STRICT=1`` to raise here.
# See issue #1734.
# The ``missing_sources='warn'`` opt-in keeps the legacy
# lenient path: emit ``GeoTIFFFallbackWarning`` once per
# unreadable source and record the skip on ``vrt.holes``,
# which the public ``read_vrt`` surfaces as
# ``attrs['vrt_holes']`` on the returned DataArray. That
# path is for callers that explicitly want a holey mosaic
# plus a machine-readable list of which sources failed --
# zero-filled holes in an integer VRT are otherwise
# indistinguishable from real data without a nodata
# sentinel, which is why the default was flipped to raise
# in #1843. See also issues #1734 and #1843.
import warnings
from . import _geotiff_strict_mode, GeoTIFFFallbackWarning
if missing_sources == 'raise' or _geotiff_strict_mode():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,14 +117,19 @@ def test_negative_srcrect_message_names_bad_values():
def test_missing_source_still_takes_lenient_warning_path():
"""A *valid* SrcRect with a missing source file must still hit the
lenient warning path -- the new SrcRect check must not swallow the
missing-file case that PR #1675 narrowed."""
missing-file case that PR #1675 narrowed.

Issue #1843 flipped the default to ``missing_sources='raise'`` so
this test now passes ``'warn'`` explicitly to exercise the opt-in
lenient branch.
"""
from xrspatial.geotiff import GeoTIFFFallbackWarning
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td:
# No source file written; SrcRect itself is well-formed.
vrt_path = _write_vrt(td, src_filename='does_not_exist.tif')
with warnings.catch_warnings(record=True) as caught:
warnings.simplefilter('always')
arr, _ = read_vrt(vrt_path)
arr, _ = read_vrt(vrt_path, missing_sources='warn')
# The lenient path must produce a fallback warning and a result
# array (zero-filled at the hole), not raise.
fallback = [w for w in caught
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""Regression test for #1843: the internal ``read_vrt`` in ``_vrt.py``
defaults to ``missing_sources='raise'`` so an unreadable source halts
the call instead of leaving a silent zero-fill hole on integer rasters.

Callers wanting the historical lenient behaviour pass
``missing_sources='warn'`` explicitly. The strict-mode env var
``XRSPATIAL_GEOTIFF_STRICT=1`` continues to force-raise across the
whole module (orthogonal axis, not affected by this change).
"""
from __future__ import annotations

import pytest

from xrspatial.geotiff._vrt import read_vrt as _read_vrt_internal
from xrspatial.geotiff import GeoTIFFFallbackWarning


def _write_missing_source_vrt(path):
path.write_text(
'<VRTDataset rasterXSize="2" rasterYSize="2">\n'
' <VRTRasterBand dataType="Byte" band="1">\n'
' <SimpleSource>\n'
' <SourceFilename relativeToVRT="1">missing_1843.tif'
'</SourceFilename>\n'
' <SourceBand>1</SourceBand>\n'
' <SrcRect xOff="0" yOff="0" xSize="2" ySize="2"/>\n'
' <DstRect xOff="0" yOff="0" xSize="2" ySize="2"/>\n'
' </SimpleSource>\n'
' </VRTRasterBand>\n'
'</VRTDataset>\n'
)


def test_read_vrt_default_raises_on_unreadable_source(tmp_path):
"""Without an explicit ``missing_sources`` kwarg, an unreadable
backing source must raise rather than silently zero-fill.

This is the behaviour change from #1843. Before this commit the
default was ``'warn'`` and a missing ``Byte`` tile produced a hole
of zero pixels that was indistinguishable from real data unless
the caller checked ``attrs['vrt_holes']``.
"""
vrt = tmp_path / "tmp_1843_default_raise.vrt"
_write_missing_source_vrt(vrt)

with pytest.raises((OSError, ValueError)):
_read_vrt_internal(str(vrt))


def test_read_vrt_explicit_warn_preserves_lenient_behaviour(tmp_path):
"""``missing_sources='warn'`` is still the escape hatch for callers
that want partial mosaics with ``vrt.holes`` populated.

Pinning the lenient path here keeps the historical contract
available to callers who opt in. The warning and the hole record
must both still surface.
"""
vrt = tmp_path / "tmp_1843_explicit_warn.vrt"
_write_missing_source_vrt(vrt)

with pytest.warns(GeoTIFFFallbackWarning, match="could not be read"):
arr, parsed = _read_vrt_internal(str(vrt), missing_sources='warn')

assert arr.shape == (2, 2)
assert len(parsed.holes) == 1
assert parsed.holes[0]['source'].endswith('missing_1843.tif')


def test_read_vrt_strict_env_still_raises_under_warn(monkeypatch, tmp_path):
"""``XRSPATIAL_GEOTIFF_STRICT=1`` continues to force-raise even
when the caller explicitly asks for the lenient ``'warn'`` policy.

The strict env var is a module-wide override (see #1662); it must
still win over per-call ``missing_sources='warn'`` so CI runs with
strict mode catch partial mosaics regardless of caller settings.
"""
vrt = tmp_path / "tmp_1843_strict_env.vrt"
_write_missing_source_vrt(vrt)

monkeypatch.setenv("XRSPATIAL_GEOTIFF_STRICT", "1")

with pytest.raises((OSError, ValueError)):
_read_vrt_internal(str(vrt), missing_sources='warn')
Loading