From a6f94357a3805b7dd97126ae9e9cba3109d2b11e Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 26 May 2026 07:06:25 -0700 Subject: [PATCH 1/2] Reject contradictory GeoKey directories on read (#2417) The reader took ProjectedCSTypeGeoKey first, fell back to GeographicTypeGeoKey, and never cross-checked either against ModelTypeGeoKey. A TIFF declaring ModelType=geographic with an EPSG under ProjectedCSTypeGeoKey would publish trustworthy-looking attrs['crs'] / attrs['crs_wkt'] fabricated from contradictory inputs. Add InconsistentGeoKeysError (subclass of GeoTIFFAmbiguousMetadataError) and a registered read-side check that refuses three structural combinations: ModelType=projected with only GeographicType set, ModelType=geographic with ProjectedCSType set, and both type keys populated with different non-user-defined EPSG codes. Pass allow_inconsistent_geokeys=True on the public read entry points to keep the legacy permissive behaviour for known-quirky historical files. --- docs/source/user_guide/geotiff_safe_io.rst | 7 + xrspatial/geotiff/__init__.py | 27 +- xrspatial/geotiff/_attrs.py | 20 + xrspatial/geotiff/_backends/dask.py | 11 + xrspatial/geotiff/_backends/gpu.py | 19 + xrspatial/geotiff/_backends/vrt.py | 13 + xrspatial/geotiff/_errors.py | 29 +- xrspatial/geotiff/_validation.py | 136 +++++- xrspatial/geotiff/tests/test_features.py | 4 + .../tests/test_inconsistent_geokeys_2417.py | 391 ++++++++++++++++++ .../tests/test_reader_kwarg_order_1935.py | 4 + 11 files changed, 655 insertions(+), 6 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_inconsistent_geokeys_2417.py diff --git a/docs/source/user_guide/geotiff_safe_io.rst b/docs/source/user_guide/geotiff_safe_io.rst index 4d842e85..a62db188 100644 --- a/docs/source/user_guide/geotiff_safe_io.rst +++ b/docs/source/user_guide/geotiff_safe_io.rst @@ -222,6 +222,13 @@ the ambiguous-metadata family at once. - ``attrs['crs']`` and ``attrs['crs_wkt']`` do not canonicalise to the same WKT on write. - Resolve the conflict in caller code before writing. + * - :class:`~xrspatial.geotiff.InconsistentGeoKeysError` + - The source's GeoKey directory is internally contradictory: + ``ModelTypeGeoKey`` disagrees with the type-specific keys + actually populated, or ``ProjectedCSTypeGeoKey`` and + ``GeographicTypeGeoKey`` resolve to different EPSG codes. + - ``allow_inconsistent_geokeys=True`` to keep the legacy silent + acceptance for known-quirky historical files. * - :class:`~xrspatial.geotiff.ConflictingNodataError` - ``attrs['nodata']`` and ``attrs['nodatavals']`` disagree on write. diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 38574c89..ea565daf 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -72,9 +72,9 @@ transform_tuple_from_pixel_geometry as _transform_tuple_from_pixel_geometry # noqa: F401 from ._crs import _resolve_crs_to_wkt, _wkt_to_epsg # noqa: F401 from ._errors import (ConflictingCRSError, ConflictingNodataError, GeoTIFFAmbiguousMetadataError, - InvalidCRSCodeError, MixedBandMetadataError, NonUniformCoordsError, - RotatedTransformError, UnknownCRSModelTypeError, UnparseableCRSError, - UnsupportedGeoTIFFFeatureError) + InconsistentGeoKeysError, InvalidCRSCodeError, MixedBandMetadataError, + NonUniformCoordsError, RotatedTransformError, UnknownCRSModelTypeError, + UnparseableCRSError, UnsupportedGeoTIFFFeatureError) from ._geotags import RASTER_PIXEL_IS_AREA, RASTER_PIXEL_IS_POINT, GeoTransform # noqa: F401 from ._reader import _MAX_CLOUD_BYTES_SENTINEL, UnsafeURLError from ._reader import read_to_array as _read_to_array @@ -108,6 +108,7 @@ 'GEOREF_STATUS_ROTATED_DROPPED', 'GEOREF_STATUS_TRANSFORM_ONLY', 'GEOREF_STATUS_VALUES', + 'InconsistentGeoKeysError', 'InvalidCRSCodeError', 'MixedBandMetadataError', 'NonUniformCoordsError', @@ -345,6 +346,7 @@ def open_geotiff(source: str | BinaryIO, *, missing_sources: str = _MISSING_SOURCES_SENTINEL, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_inconsistent_geokeys: bool = False, allow_experimental_codecs: bool = False, allow_internal_only_jpeg: bool = False, band_nodata: str | None = None, @@ -543,6 +545,18 @@ def open_geotiff(source: str | BinaryIO, *, behaviour where the citation field passes through unchanged. Matches the same kwarg on ``to_geotiff`` / ``write_geotiff_gpu`` so a value the reader accepted can survive a round-trip. + allow_inconsistent_geokeys : bool, default False + [advanced] Read-side opt-in for GeoTIFF sources whose GeoKey + directory is internally contradictory: ``ModelTypeGeoKey`` + disagrees with the type-specific keys actually populated, or + ``ProjectedCSTypeGeoKey`` and ``GeographicTypeGeoKey`` resolve + to different EPSG codes. The legacy reader took the projected + code first and silently fabricated an + ``attrs['crs']`` / ``attrs['crs_wkt']`` from contradictory + inputs (issue #2417). When ``False`` (the default), the read + raises ``InconsistentGeoKeysError``. Set to ``True`` to keep + the legacy permissive behaviour for files known to carry + quirky-but-trusted GeoKey layouts. allow_experimental_codecs : bool, default False Read-side opt-in for sources compressed with the Tier 3 experimental codecs (``lerc``, ``jpeg2000`` / ``j2k``, ``lz4``). @@ -690,6 +704,8 @@ def open_geotiff(source: str | BinaryIO, *, max_pixels=max_pixels, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_inconsistent_geokeys=( + allow_inconsistent_geokeys), allow_experimental_codecs=allow_experimental_codecs, allow_internal_only_jpeg=allow_internal_only_jpeg, band_nodata=band_nodata, @@ -712,6 +728,8 @@ def open_geotiff(source: str | BinaryIO, *, max_pixels=max_pixels, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_inconsistent_geokeys=( + allow_inconsistent_geokeys), allow_experimental_codecs=( allow_experimental_codecs), allow_internal_only_jpeg=( @@ -727,6 +745,8 @@ def open_geotiff(source: str | BinaryIO, *, max_pixels=max_pixels, name=name, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_inconsistent_geokeys=( + allow_inconsistent_geokeys), allow_experimental_codecs=( allow_experimental_codecs), allow_internal_only_jpeg=( @@ -783,6 +803,7 @@ def open_geotiff(source: str | BinaryIO, *, name=name, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_inconsistent_geokeys=allow_inconsistent_geokeys, ) diff --git a/xrspatial/geotiff/_attrs.py b/xrspatial/geotiff/_attrs.py index f9f9c7d7..7041dec1 100644 --- a/xrspatial/geotiff/_attrs.py +++ b/xrspatial/geotiff/_attrs.py @@ -1051,6 +1051,7 @@ def _validate_read_geo_info( window=None, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_inconsistent_geokeys: bool = False, band_nodata: str | None = None, band_nodata_values: list | None = None, ) -> None: @@ -1102,11 +1103,26 @@ def _validate_read_geo_info( and getattr(geo_info, 'has_georef', True)) else None ) + # Pull the GeoKey shape onto the context so + # ``_check_read_inconsistent_geokeys`` (issue #2417) can audit + # ModelType / ProjectedCSType / GeographicType for contradictions. + # ``geo_info.geokeys`` is the parsed dict; missing keys map to + # ``None`` so the check treats the slot as "not declared" rather + # than as a default-zero, which would otherwise look like + # ``ModelType = undefined`` instead of "no model type tag at all". + raw_geokeys = getattr(geo_info, 'geokeys', None) or {} + model_type_ctx = raw_geokeys.get(1024) # GEOKEY_MODEL_TYPE + proj_cs_ctx = raw_geokeys.get(3072) # GEOKEY_PROJECTED_CS_TYPE + geog_ctx = raw_geokeys.get(2048) # GEOKEY_GEOGRAPHIC_TYPE validate_read_metadata({ 'allow_rotated': allow_rotated, 'allow_unparseable_crs': allow_unparseable_crs, + 'allow_inconsistent_geokeys': allow_inconsistent_geokeys, 'transform': transform_for_check, 'crs_wkt': geo_info.crs_wkt, + 'model_type': model_type_ctx, + 'projected_cs_type': proj_cs_ctx, + 'geographic_type': geog_ctx, 'band_nodata': band_nodata, 'band_nodata_values': band_nodata_values, }) @@ -1500,6 +1516,7 @@ def _finalize_eager_read( name, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_inconsistent_geokeys: bool = False, attrs_in: dict | None = None, ): """Validate, populate attrs, mask, cast, and build an eager DataArray. @@ -1546,6 +1563,7 @@ def _finalize_eager_read( geo_info, window=window, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_inconsistent_geokeys=allow_inconsistent_geokeys, ) # Step 2: populate attrs from geo_info onto a fresh dict (or onto a @@ -1613,6 +1631,7 @@ def _finalize_lazy_read_attrs( window, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_inconsistent_geokeys: bool = False, band_nodata: str | None = None, band_nodata_values: list | None = None, attrs_in: dict | None = None, @@ -1685,6 +1704,7 @@ def _finalize_lazy_read_attrs( geo_info, window=window, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_inconsistent_geokeys=allow_inconsistent_geokeys, band_nodata=band_nodata, band_nodata_values=band_nodata_values, ) diff --git a/xrspatial/geotiff/_backends/dask.py b/xrspatial/geotiff/_backends/dask.py index 61881109..b1bd18a9 100644 --- a/xrspatial/geotiff/_backends/dask.py +++ b/xrspatial/geotiff/_backends/dask.py @@ -42,6 +42,7 @@ def read_geotiff_dask(source: str, *, missing_sources: str = _MISSING_SOURCES_SENTINEL, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_inconsistent_geokeys: bool = False, allow_experimental_codecs: bool = False, allow_internal_only_jpeg: bool = False, band_nodata: str | None = None, @@ -126,6 +127,14 @@ def read_geotiff_dask(source: str, *, instead of carrying the unrecognised payload through ``attrs['crs_wkt']``. See ``open_geotiff`` for the full description. + allow_inconsistent_geokeys : bool, default False + [advanced] Read-side opt-in for sources whose GeoKey directory + is internally contradictory (``ModelTypeGeoKey`` disagrees + with the populated type-specific keys, or + ``ProjectedCSTypeGeoKey`` and ``GeographicTypeGeoKey`` resolve + to different EPSG codes). The default raises + ``InconsistentGeoKeysError``. See ``open_geotiff`` for the + full description (issue #2417). allow_experimental_codecs : bool, default False [advanced] Read-side opt-in for Tier 3 experimental codecs (``lerc``, ``jpeg2000`` / ``j2k``, ``lz4``). Fires at graph @@ -209,6 +218,7 @@ def read_geotiff_dask(source: str, *, chunks=chunks, max_pixels=max_pixels, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_inconsistent_geokeys=allow_inconsistent_geokeys, band_nodata=band_nodata, mask_nodata=mask_nodata, **vrt_kwargs, @@ -499,6 +509,7 @@ def read_geotiff_dask(source: str, *, window=window, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_inconsistent_geokeys=allow_inconsistent_geokeys, ) if isinstance(chunks, int): diff --git a/xrspatial/geotiff/_backends/gpu.py b/xrspatial/geotiff/_backends/gpu.py index 72fb99d4..1078180a 100644 --- a/xrspatial/geotiff/_backends/gpu.py +++ b/xrspatial/geotiff/_backends/gpu.py @@ -75,6 +75,7 @@ def read_geotiff_gpu(source: str, *, missing_sources: str = _MISSING_SOURCES_SENTINEL, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_inconsistent_geokeys: bool = False, allow_experimental_codecs: bool = False, allow_internal_only_jpeg: bool = False, band_nodata: str | None = None, @@ -203,6 +204,12 @@ def read_geotiff_gpu(source: str, *, since #1929) raises ``UnparseableCRSError``; ``True`` keeps the pre-#1929 permissive behaviour. See ``open_geotiff`` for the full description. + allow_inconsistent_geokeys : bool, default False + [experimental] Read-side opt-in for sources whose GeoKey + directory is internally contradictory. ``False`` (the default) + raises ``InconsistentGeoKeysError``; ``True`` restores the + legacy silent acceptance. See ``open_geotiff`` for the full + description (issue #2417). allow_experimental_codecs : bool, default False [experimental] Read-side opt-in for Tier 3 experimental codecs (``lerc``, ``jpeg2000`` / ``j2k``, ``lz4``). The GPU read path @@ -330,6 +337,7 @@ def read_geotiff_gpu(source: str, *, name=name, max_pixels=max_pixels, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_inconsistent_geokeys=allow_inconsistent_geokeys, allow_experimental_codecs=allow_experimental_codecs, allow_internal_only_jpeg=allow_internal_only_jpeg, mask_nodata=mask_nodata, @@ -381,6 +389,7 @@ def read_geotiff_gpu(source: str, *, overview_level=overview_level, band=band, name=name, max_pixels=max_pixels, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_inconsistent_geokeys=allow_inconsistent_geokeys, allow_experimental_codecs=allow_experimental_codecs, allow_internal_only_jpeg=allow_internal_only_jpeg, mask_nodata=mask_nodata, @@ -586,6 +595,7 @@ def read_geotiff_gpu(source: str, *, name=name, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_inconsistent_geokeys=allow_inconsistent_geokeys, ) # ``chunks`` was previously honoured only on the tiled path, # so stripped TIFFs returned an unchunked DataArray even when @@ -993,6 +1003,7 @@ def _read_once(): name=name, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_inconsistent_geokeys=allow_inconsistent_geokeys, ) # ``chunks=`` is handled at function entry via @@ -1018,6 +1029,7 @@ def _read_geotiff_gpu_eager_via_cpu(source, *, dtype, window, overview_level, band, name, max_pixels, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_inconsistent_geokeys: bool = False, allow_experimental_codecs: bool = False, allow_internal_only_jpeg: bool = False, mask_nodata: bool = True): @@ -1101,6 +1113,7 @@ def _read_geotiff_gpu_eager_via_cpu(source, *, dtype, window, overview_level, name=name, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_inconsistent_geokeys=allow_inconsistent_geokeys, ) @@ -1250,6 +1263,7 @@ def _read_geotiff_gpu_chunked(source, *, dtype, chunks, overview_level, window, band, name, max_pixels, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_inconsistent_geokeys: bool = False, allow_experimental_codecs: bool = False, allow_internal_only_jpeg: bool = False, mask_nodata: bool = True): @@ -1365,6 +1379,8 @@ def _read_geotiff_gpu_chunked(source, *, dtype, chunks, overview_level, name=name, max_pixels=max_pixels, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_inconsistent_geokeys=( + allow_inconsistent_geokeys), mask_nodata=mask_nodata, ) except Exception: @@ -1379,6 +1395,7 @@ def _read_geotiff_gpu_chunked(source, *, dtype, chunks, overview_level, max_pixels=max_pixels, name=name, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_inconsistent_geokeys=allow_inconsistent_geokeys, allow_experimental_codecs=allow_experimental_codecs, allow_internal_only_jpeg=allow_internal_only_jpeg, mask_nodata=mask_nodata, @@ -1406,6 +1423,7 @@ def _read_geotiff_gpu_chunked_gds(source, ifd, geo_info, header, *, max_pixels, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_inconsistent_geokeys: bool = False, mask_nodata: bool = True): """Build a Dask+CuPy graph that decodes each chunk disk->GPU. @@ -1634,6 +1652,7 @@ def _chunk_task(meta, r0, c0, r1, c1): window=window, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_inconsistent_geokeys=allow_inconsistent_geokeys, ) if name is None: diff --git a/xrspatial/geotiff/_backends/vrt.py b/xrspatial/geotiff/_backends/vrt.py index c570d5cc..ac56523a 100644 --- a/xrspatial/geotiff/_backends/vrt.py +++ b/xrspatial/geotiff/_backends/vrt.py @@ -128,6 +128,7 @@ def read_vrt(source: str, *, missing_sources: str = 'raise', allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_inconsistent_geokeys: bool = False, allow_experimental_codecs: bool = False, allow_internal_only_jpeg: bool = False, band_nodata: str | None = None, @@ -259,6 +260,14 @@ def read_vrt(source: str, *, #1929) raises ``UnparseableCRSError`` rather than carrying the unrecognised payload through. See ``open_geotiff`` for the full description. + allow_inconsistent_geokeys : bool, default False + [advanced] Read-side opt-in for per-source GeoTIFF files + referenced by the VRT whose GeoKey directory is internally + contradictory. Forwarded to the per-source reader. VRT + capability validation itself does not parse GeoKeys (it + consumes the GDAL ```` field), so this kwarg only affects + the per-source decode. See ``open_geotiff`` for the full + description (issue #2417). allow_experimental_codecs : bool, default False [advanced] Read-side opt-in for Tier 3 experimental codecs in any source file referenced by the VRT. Forwarded to the @@ -440,6 +449,7 @@ def read_vrt(source: str, *, missing_sources=missing_sources, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_inconsistent_geokeys=allow_inconsistent_geokeys, allow_experimental_codecs=allow_experimental_codecs, allow_internal_only_jpeg=allow_internal_only_jpeg, band_nodata=band_nodata, @@ -702,6 +712,7 @@ def read_vrt(source: str, *, window=window, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_inconsistent_geokeys=allow_inconsistent_geokeys, band_nodata=band_nodata, band_nodata_values=_band_nodata_values, attrs_in=attrs_seed, @@ -789,6 +800,7 @@ def _read_vrt_chunked(source, *, window, band, name, chunks, gpu, dtype, max_pixels, missing_sources, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_inconsistent_geokeys: bool = False, allow_experimental_codecs: bool = False, allow_internal_only_jpeg: bool = False, band_nodata: str | None = None, @@ -1221,6 +1233,7 @@ def _read_vrt_chunked(source, *, window, band, name, chunks, gpu, dtype, window=helper_window, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_inconsistent_geokeys=allow_inconsistent_geokeys, band_nodata=band_nodata, band_nodata_values=band_nodata_values, attrs_in=attrs_seed, diff --git a/xrspatial/geotiff/_errors.py b/xrspatial/geotiff/_errors.py index 8e6ae11d..bc8a52ce 100644 --- a/xrspatial/geotiff/_errors.py +++ b/xrspatial/geotiff/_errors.py @@ -21,7 +21,8 @@ ├── NonUniformCoordsError (PR 4) ├── MixedBandMetadataError (PR 5) ├── ConflictingCRSError (PR 6, blocked on #1984) - └── ConflictingNodataError (PR 7, blocked on #1988) + ├── ConflictingNodataError (PR 7, blocked on #1988) + └── InconsistentGeoKeysError (#2417) """ from __future__ import annotations @@ -121,6 +122,31 @@ class VRTUnsupportedError(GeoTIFFAmbiguousMetadataError): """ +class InconsistentGeoKeysError(GeoTIFFAmbiguousMetadataError): + """GeoKey set is internally contradictory on read (#2417). + + Raised when a GeoTIFF source ships a GeoKey directory whose + ``ModelTypeGeoKey`` does not agree with the type-specific keys + actually populated, or whose ``ProjectedCSTypeGeoKey`` and + ``GeographicTypeGeoKey`` resolve to different EPSG codes. + + The legacy reader took ``ProjectedCSTypeGeoKey`` first, then fell + back to ``GeographicTypeGeoKey``, without ever cross-checking + either against ``ModelTypeGeoKey``. A malformed or hostile input + could declare ``ModelTypeGeoKey = geographic`` while stashing an + EPSG under the projected key (or vice versa) and the reader would + publish a trustworthy-looking ``attrs['crs']`` / ``attrs['crs_wkt']`` + built from inconsistent inputs. Silent acceptance of contradictory + geospatial metadata is the failure mode the rest of the #1987 + series already rejects; this check extends the same contract to + the GeoKey shape itself. + + Pass ``allow_inconsistent_geokeys=True`` on the public read entry + points to keep the legacy permissive behaviour for files known to + carry quirky-but-trusted GeoKey layouts. + """ + + class UnknownCRSModelTypeError(GeoTIFFAmbiguousMetadataError): """Can't classify an EPSG as geographic or projected on write (#2277). @@ -154,6 +180,7 @@ class UnsupportedGeoTIFFFeatureError(ValueError): __all__ = [ "GeoTIFFAmbiguousMetadataError", + "InconsistentGeoKeysError", "InvalidCRSCodeError", "UnparseableCRSError", "RotatedTransformError", diff --git a/xrspatial/geotiff/_validation.py b/xrspatial/geotiff/_validation.py index 7a15a4fc..54bbcfa6 100644 --- a/xrspatial/geotiff/_validation.py +++ b/xrspatial/geotiff/_validation.py @@ -26,8 +26,9 @@ import numpy as np from ._coords import _BAND_DIM_NAMES -from ._errors import (ConflictingCRSError, ConflictingNodataError, MixedBandMetadataError, - NonUniformCoordsError, RotatedTransformError, UnparseableCRSError) +from ._errors import (ConflictingCRSError, ConflictingNodataError, InconsistentGeoKeysError, + MixedBandMetadataError, NonUniformCoordsError, RotatedTransformError, + UnparseableCRSError) from ._runtime import (_MISSING_SOURCES_SENTINEL, _ON_GPU_FAILURE_SENTINEL, _TIME_DIM_NAMES, _X_DIM_NAMES, _Y_DIM_NAMES) @@ -1213,3 +1214,134 @@ def _same_nodata(a: float, b: float) -> bool: # ``read_geotiff_dask``; the explicit opt-in surfaces the per-band # ambiguity at the call site instead of papering over it silently. register_read_metadata_check(_check_read_mixed_band_metadata) + + +# --------------------------------------------------------------------------- +# Inconsistent GeoKey read check (issue #2417) +# --------------------------------------------------------------------------- + + +# Mirrors the GeoTIFF spec ModelType enum values. Re-declared here so the +# validator does not pull a runtime import from ``_geotags`` (which would +# otherwise create a circular dep at module load time -- ``_geotags`` +# already lives below ``_validation`` in the import order). +_MODEL_TYPE_UNDEFINED = 0 +_MODEL_TYPE_PROJECTED = 1 +_MODEL_TYPE_GEOGRAPHIC = 2 +_MODEL_TYPE_GEOCENTRIC = 3 +_GEOKEY_USER_DEFINED = 32767 + + +def _check_read_inconsistent_geokeys(context: Mapping[str, Any]) -> None: + """Refuse reads whose GeoKey shape is internally contradictory (#2417). + + The legacy reader took ``ProjectedCSTypeGeoKey`` first, fell back to + ``GeographicTypeGeoKey``, and never cross-checked either against + ``ModelTypeGeoKey``. A TIFF declaring:: + + ModelTypeGeoKey = 2 (geographic) + ProjectedCSTypeGeoKey = 4326 + GeographicTypeGeoKey = absent + + is internally contradictory -- 4326 is a geographic CRS but appears + under the projected key while ``ModelTypeGeoKey`` declares the + model is geographic. The legacy path published the EPSG verbatim + in ``attrs['crs']`` and synthesised a matching ``attrs['crs_wkt']``, + handing downstream callers trustworthy-looking spatial metadata + fabricated from inconsistent inputs. + + The check rejects three structurally bad combinations: + + 1. ``ModelTypeGeoKey = projected`` with ``GeographicTypeGeoKey`` + populated but ``ProjectedCSTypeGeoKey`` absent. + 2. ``ModelTypeGeoKey = geographic`` with ``ProjectedCSTypeGeoKey`` + populated (the geographic model may not stash a projected code). + 3. Both ``ProjectedCSTypeGeoKey`` and ``GeographicTypeGeoKey`` + populated with different non-user-defined EPSG codes. + + ``32767`` (user-defined) is treated as "no resolved code" because + the actual CRS is defined by sibling keys (``GeogGeodeticDatumGeoKey`` + et al.) rather than the type-code slot itself. + + Context keys consumed: + + * ``allow_inconsistent_geokeys`` -- caller opt-out kwarg. + * ``model_type`` -- value of ``ModelTypeGeoKey`` (0 if absent). + * ``projected_cs_type`` -- value of ``ProjectedCSTypeGeoKey`` (None + if absent). + * ``geographic_type`` -- value of ``GeographicTypeGeoKey`` (None + if absent). + + All context keys are optional. With no GeoKey context the check is + a no-op, so callers that did not (or could not) parse a GeoKey + directory at all still pass through unchanged. + """ + if context.get('allow_inconsistent_geokeys'): + return + proj_cs = context.get('projected_cs_type') + geog = context.get('geographic_type') + model_type = context.get('model_type') + if model_type is None and proj_cs is None and geog is None: + return + + # Coerce ints. The reader stashes raw GeoKey ints, but a caller could + # plausibly pass floats; tolerate both and ignore the rest. + def _as_int(v): + if isinstance(v, bool): + return None + if isinstance(v, (int, float)): + return int(v) + return None + + model_type_i = _as_int(model_type) + proj_cs_i = _as_int(proj_cs) + geog_i = _as_int(geog) + + # ``32767`` (user-defined) is treated as "no resolved code" for + # consistency checks; the EPSG slot is intentionally a placeholder + # in that case. + proj_resolved = ( + proj_cs_i is not None and proj_cs_i != _GEOKEY_USER_DEFINED) + geog_resolved = ( + geog_i is not None and geog_i != _GEOKEY_USER_DEFINED) + + # Case 1: ModelType = projected but only GeographicTypeGeoKey set. + if (model_type_i == _MODEL_TYPE_PROJECTED + and not proj_resolved and geog_resolved): + raise InconsistentGeoKeysError( + f"GeoTIFF source declares ModelTypeGeoKey=projected (1) " + f"but carries only GeographicTypeGeoKey={geog_i!r} with no " + f"resolved ProjectedCSTypeGeoKey. A projected model must " + f"declare the projected CRS code in ProjectedCSTypeGeoKey; " + f"the geographic key on its own cannot identify the " + f"projection. Pass allow_inconsistent_geokeys=True to keep " + f"the legacy behaviour of treating the geographic code as " + f"the file's CRS. See issue #2417." + ) + + # Case 2: ModelType = geographic but ProjectedCSTypeGeoKey set. + if model_type_i == _MODEL_TYPE_GEOGRAPHIC and proj_resolved: + raise InconsistentGeoKeysError( + f"GeoTIFF source declares ModelTypeGeoKey=geographic (2) " + f"but populates ProjectedCSTypeGeoKey={proj_cs_i!r}. The " + f"geographic model may not carry a projected CRS code; the " + f"legacy reader would publish EPSG {proj_cs_i!r} verbatim " + f"in attrs['crs'] from contradictory inputs. Pass " + f"allow_inconsistent_geokeys=True to keep the legacy " + f"behaviour. See issue #2417." + ) + + # Case 3: both type keys resolved to different EPSG codes. + if proj_resolved and geog_resolved and proj_cs_i != geog_i: + raise InconsistentGeoKeysError( + f"GeoTIFF source populates both ProjectedCSTypeGeoKey=" + f"{proj_cs_i!r} and GeographicTypeGeoKey={geog_i!r} with " + f"different EPSG codes. The legacy reader took the " + f"projected code first and silently dropped the geographic " + f"one. Pass allow_inconsistent_geokeys=True to keep that " + f"behaviour, or fix the source's GeoKey directory. See " + f"issue #2417." + ) + + +register_read_metadata_check(_check_read_inconsistent_geokeys) diff --git a/xrspatial/geotiff/tests/test_features.py b/xrspatial/geotiff/tests/test_features.py index 272ee146..66018fa6 100644 --- a/xrspatial/geotiff/tests/test_features.py +++ b/xrspatial/geotiff/tests/test_features.py @@ -2779,6 +2779,10 @@ def test_all_lists_supported_functions(self): 'ConflictingCRSError', 'ConflictingNodataError', 'GeoTIFFAmbiguousMetadataError', + # Issue #2417: read-side fail-closed on contradictory + # ModelType / ProjectedCSType / GeographicType GeoKey + # combinations. + 'InconsistentGeoKeysError', 'InvalidCRSCodeError', 'MixedBandMetadataError', 'NonUniformCoordsError', diff --git a/xrspatial/geotiff/tests/test_inconsistent_geokeys_2417.py b/xrspatial/geotiff/tests/test_inconsistent_geokeys_2417.py new file mode 100644 index 00000000..8340fabb --- /dev/null +++ b/xrspatial/geotiff/tests/test_inconsistent_geokeys_2417.py @@ -0,0 +1,391 @@ +"""Fail-closed read check for internally contradictory GeoKeys (issue #2417). + +The legacy reader took ``ProjectedCSTypeGeoKey`` first, fell back to +``GeographicTypeGeoKey``, and never cross-checked either against +``ModelTypeGeoKey``. A malformed file declaring +``ModelTypeGeoKey = geographic (2)`` with an EPSG stashed under +``ProjectedCSTypeGeoKey`` would publish a trustworthy-looking +``attrs['crs']`` / ``attrs['crs_wkt']`` fabricated from contradictory +inputs. ``_check_read_inconsistent_geokeys`` rejects three structural +combinations: + +1. ``ModelType=projected`` with only ``GeographicTypeGeoKey`` set. +2. ``ModelType=geographic`` with ``ProjectedCSTypeGeoKey`` set. +3. Both type keys populated with different non-user-defined EPSG codes. + +Pass ``allow_inconsistent_geokeys=True`` on the public read entry +points to keep the legacy permissive behaviour. +""" +from __future__ import annotations + +import struct +from pathlib import Path + +import numpy as np +import pytest + +from xrspatial.geotiff import (GeoTIFFAmbiguousMetadataError, InconsistentGeoKeysError, + open_geotiff) +from xrspatial.geotiff._validation import (_check_read_inconsistent_geokeys, + _registered_read_metadata_checks, + validate_read_metadata) + + +# --------------------------------------------------------------------------- +# Registry sanity. +# --------------------------------------------------------------------------- + + +def test_inconsistent_geokeys_check_registered(): + names = {c.__name__ for c in _registered_read_metadata_checks()} + assert _check_read_inconsistent_geokeys.__name__ in names + + +def test_error_class_hierarchy(): + assert issubclass(InconsistentGeoKeysError, GeoTIFFAmbiguousMetadataError) + assert issubclass(InconsistentGeoKeysError, ValueError) + + +# --------------------------------------------------------------------------- +# Unit-level checks driven through ``validate_read_metadata``. +# --------------------------------------------------------------------------- + + +def test_validate_rejects_model_geographic_with_projected_key(): + """The exact scenario from issue #2417: ModelType=geographic but + ProjectedCSTypeGeoKey=4326 set, GeographicTypeGeoKey absent.""" + with pytest.raises(InconsistentGeoKeysError) as excinfo: + validate_read_metadata({ + 'model_type': 2, + 'projected_cs_type': 4326, + 'geographic_type': None, + }) + msg = str(excinfo.value) + assert 'ModelTypeGeoKey=geographic' in msg + assert '4326' in msg + assert '#2417' in msg + + +def test_validate_rejects_model_projected_with_only_geographic_key(): + """ModelType=projected but only GeographicTypeGeoKey is populated.""" + with pytest.raises(InconsistentGeoKeysError) as excinfo: + validate_read_metadata({ + 'model_type': 1, + 'projected_cs_type': None, + 'geographic_type': 4326, + }) + msg = str(excinfo.value) + assert 'ModelTypeGeoKey=projected' in msg + assert '4326' in msg + + +def test_validate_rejects_both_type_keys_with_different_epsg(): + """Both ProjectedCSTypeGeoKey and GeographicTypeGeoKey set to + different EPSG codes.""" + with pytest.raises(InconsistentGeoKeysError) as excinfo: + validate_read_metadata({ + 'model_type': 1, + 'projected_cs_type': 32633, + 'geographic_type': 4326, + }) + msg = str(excinfo.value) + assert '32633' in msg + assert '4326' in msg + + +def test_validate_passes_consistent_projected(): + """ModelType=projected with a projected EPSG and no geographic key.""" + validate_read_metadata({ + 'model_type': 1, + 'projected_cs_type': 32633, + 'geographic_type': None, + }) + + +def test_validate_passes_consistent_geographic(): + """ModelType=geographic with a geographic EPSG and no projected key.""" + validate_read_metadata({ + 'model_type': 2, + 'projected_cs_type': None, + 'geographic_type': 4326, + }) + + +def test_validate_passes_both_keys_with_same_epsg(): + """Some legitimate writers stash the same code under both slots; the + check only rejects when the codes disagree.""" + validate_read_metadata({ + 'model_type': 1, + 'projected_cs_type': 32633, + 'geographic_type': 32633, + }) + + +def test_validate_passes_user_defined_projected_with_geographic(): + """``32767`` (user-defined) under the projected slot means the + projected code is not actually resolved -- the geographic key is + then the only resolved CRS source, which is not a contradiction.""" + validate_read_metadata({ + 'model_type': 2, + 'projected_cs_type': 32767, + 'geographic_type': 4326, + }) + + +def test_validate_passes_user_defined_geographic_with_projected(): + validate_read_metadata({ + 'model_type': 1, + 'projected_cs_type': 32633, + 'geographic_type': 32767, + }) + + +def test_validate_passes_no_geokey_context(): + """A caller without any GeoKey context (e.g. VRT) short-circuits. + The check is opt-in to the context it consumes.""" + validate_read_metadata({}) + + +def test_validate_passes_when_model_type_undefined(): + """Some legitimate writers omit ModelTypeGeoKey entirely. The check + is not strict about that on its own; only the explicit conflicts + above raise.""" + validate_read_metadata({ + 'model_type': 0, + 'projected_cs_type': 4326, + 'geographic_type': None, + }) + + +def test_opt_out_short_circuits_each_case(): + """``allow_inconsistent_geokeys=True`` keeps the legacy permissive + behaviour across all three rejection cases.""" + # Case 2 (the issue's exact reproducer). + validate_read_metadata({ + 'model_type': 2, + 'projected_cs_type': 4326, + 'allow_inconsistent_geokeys': True, + }) + # Case 1. + validate_read_metadata({ + 'model_type': 1, + 'geographic_type': 4326, + 'allow_inconsistent_geokeys': True, + }) + # Case 3. + validate_read_metadata({ + 'model_type': 1, + 'projected_cs_type': 32633, + 'geographic_type': 4326, + 'allow_inconsistent_geokeys': True, + }) + + +def test_validate_tolerates_float_geokey_values(): + """``_parse_geokeys`` can stash float values for keys whose entries + point at the doubles section; the check coerces to int.""" + with pytest.raises(InconsistentGeoKeysError): + validate_read_metadata({ + 'model_type': 2.0, + 'projected_cs_type': 4326.0, + 'geographic_type': None, + }) + + +def test_validate_ignores_non_numeric_geokey_values(): + """A string value in the model_type slot (e.g. a corrupt parse) is + treated as 'not declared' rather than crashing the validator.""" + # Should not raise even though projected_cs_type is set. + validate_read_metadata({ + 'model_type': 'garbage', + 'projected_cs_type': 4326, + 'geographic_type': None, + }) + + +# --------------------------------------------------------------------------- +# Full-stack integration: the bug's exact reproducer through open_geotiff(). +# --------------------------------------------------------------------------- + + +def _write_tiff_with_geokeys( + path: str, + *, + model_type: int, + projected_cs_type: int | None, + geographic_type: int | None, +) -> None: + """Hand-build a 4x4 float32 TIFF with a tiny GeoKey directory. + + Only ``ModelTypeGeoKey`` plus optional ``ProjectedCSTypeGeoKey`` and + ``GeographicTypeGeoKey`` are written; the rest of the directory is + minimal. Pass ``None`` to either type-key argument to omit it. + + Unique fixture for issue #2417 -- name carries the issue number so + parallel test runs and other worktrees do not collide on tmp paths. + """ + bo = '<' + pixels = np.zeros((4, 4), dtype=np.float32).tobytes() + + # GeoKeyDirectory header: (version, rev_maj, rev_min, n_keys) + n_keys = 1 + int(projected_cs_type is not None) + int(geographic_type is not None) + gkd = [1, 1, 0, n_keys] + # ModelTypeGeoKey: id 1024, location 0 (immediate value), count 1, value. + gkd.extend([1024, 0, 1, model_type]) + if projected_cs_type is not None: + gkd.extend([3072, 0, 1, projected_cs_type]) + if geographic_type is not None: + gkd.extend([2048, 0, 1, geographic_type]) + + tag_list = [] + + def add_short(tag, val): + tag_list.append((tag, 3, 1, struct.pack(f'{bo}H', val))) + + def add_long(tag, val): + tag_list.append((tag, 4, 1, struct.pack(f'{bo}I', val))) + + def add_shorts(tag, vals): + tag_list.append((tag, 3, len(vals), + struct.pack(f'{bo}{len(vals)}H', *vals))) + + def add_doubles(tag, vals): + tag_list.append((tag, 12, len(vals), + struct.pack(f'{bo}{len(vals)}d', *vals))) + + add_short(256, 4) + add_short(257, 4) + add_short(258, 32) + add_short(259, 1) + add_short(262, 1) + add_short(277, 1) + add_short(339, 3) + add_short(278, 4) + add_long(273, 0) + add_long(279, len(pixels)) + add_doubles(33550, [1.0, 1.0, 0.0]) + add_doubles(33922, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]) + add_shorts(34735, gkd) + tag_list.sort(key=lambda t: t[0]) + + n = len(tag_list) + ifd_start = 8 + ifd_size = 2 + 12 * n + 4 + overflow_start = ifd_start + ifd_size + overflow_buf = bytearray() + tag_offsets: dict[int, int | None] = {} + for tag, _typ, _count, raw in tag_list: + if len(raw) > 4: + tag_offsets[tag] = len(overflow_buf) + overflow_buf.extend(raw) + if len(overflow_buf) % 2: + overflow_buf.append(0) + else: + tag_offsets[tag] = None + pixel_data_start = overflow_start + len(overflow_buf) + patched = [] + for tag, typ, count, raw in tag_list: + if tag == 273: + raw = struct.pack(f'{bo}I', pixel_data_start) + patched.append((tag, typ, count, raw)) + tag_list = patched + out = bytearray(b'II') + out.extend(struct.pack(f'{bo}H', 42)) + out.extend(struct.pack(f'{bo}I', ifd_start)) + out.extend(struct.pack(f'{bo}H', n)) + for tag, typ, count, raw in tag_list: + out.extend(struct.pack(f'{bo}HHI', tag, typ, count)) + if len(raw) <= 4: + out.extend(raw.ljust(4, b'\x00')) + else: + out.extend(struct.pack(f'{bo}I', overflow_start + tag_offsets[tag])) + out.extend(struct.pack(f'{bo}I', 0)) + out.extend(overflow_buf) + out.extend(pixels) + Path(path).write_bytes(bytes(out)) + + +def test_open_geotiff_rejects_model_geographic_with_projected_key(tmp_path): + """The exact reproducer from issue #2417. A TIFF declaring + ``ModelTypeGeoKey=geographic`` with ``ProjectedCSTypeGeoKey=4326`` + used to surface ``attrs['crs']=4326`` silently; now it raises.""" + path = tmp_path / "tmp_2417_model_geographic.tif" + _write_tiff_with_geokeys( + str(path), + model_type=2, # geographic + projected_cs_type=4326, # but projected key populated + geographic_type=None, + ) + with pytest.raises(InconsistentGeoKeysError): + open_geotiff(str(path)) + + +def test_open_geotiff_rejects_model_projected_with_only_geographic_key(tmp_path): + path = tmp_path / "tmp_2417_model_projected.tif" + _write_tiff_with_geokeys( + str(path), + model_type=1, # projected + projected_cs_type=None, # but only geographic key populated + geographic_type=4326, + ) + with pytest.raises(InconsistentGeoKeysError): + open_geotiff(str(path)) + + +def test_open_geotiff_rejects_both_keys_with_different_epsg(tmp_path): + path = tmp_path / "tmp_2417_both_keys_disagree.tif" + _write_tiff_with_geokeys( + str(path), + model_type=1, + projected_cs_type=32633, + geographic_type=4326, + ) + with pytest.raises(InconsistentGeoKeysError): + open_geotiff(str(path)) + + +def test_open_geotiff_accepts_consistent_projected(tmp_path): + """Control: a TIFF with a consistent projected ModelType + projected + EPSG should still read cleanly.""" + path = tmp_path / "tmp_2417_consistent_projected.tif" + _write_tiff_with_geokeys( + str(path), + model_type=1, + projected_cs_type=32633, + geographic_type=None, + ) + da = open_geotiff(str(path)) + assert da.shape == (4, 4) + # The reader publishes the projected EPSG verbatim. + assert da.attrs.get('crs') == 32633 + + +def test_open_geotiff_accepts_consistent_geographic(tmp_path): + path = tmp_path / "tmp_2417_consistent_geographic.tif" + _write_tiff_with_geokeys( + str(path), + model_type=2, + projected_cs_type=None, + geographic_type=4326, + ) + da = open_geotiff(str(path)) + assert da.shape == (4, 4) + assert da.attrs.get('crs') == 4326 + + +def test_open_geotiff_opt_out_restores_legacy_behaviour(tmp_path): + """``allow_inconsistent_geokeys=True`` keeps the pre-#2417 silent + acceptance for callers with known-quirky historical files.""" + path = tmp_path / "tmp_2417_opt_out.tif" + _write_tiff_with_geokeys( + str(path), + model_type=2, + projected_cs_type=4326, + geographic_type=None, + ) + da = open_geotiff(str(path), allow_inconsistent_geokeys=True) + # The legacy reader takes ProjectedCSTypeGeoKey first, so the EPSG + # surfaces in attrs['crs'] verbatim. The bug is that the surface + # looks trustworthy; the opt-in is the documented way to keep that + # behaviour for callers who need the legacy contract. + assert da.attrs.get('crs') == 4326 diff --git a/xrspatial/geotiff/tests/test_reader_kwarg_order_1935.py b/xrspatial/geotiff/tests/test_reader_kwarg_order_1935.py index b880d866..58c8aa24 100644 --- a/xrspatial/geotiff/tests/test_reader_kwarg_order_1935.py +++ b/xrspatial/geotiff/tests/test_reader_kwarg_order_1935.py @@ -38,6 +38,10 @@ "missing_sources", "allow_rotated", "allow_unparseable_crs", + # Issue #2417 added the GeoKey-shape fail-closed opt-out. Sits + # alongside the other ambiguous-metadata opt-outs so the canonical + # order keeps the typed-error gates grouped. + "allow_inconsistent_geokeys", # PR 4 of epic #2340 added the experimental / internal-only codec # opt-ins on the read side, mirroring the writer surface from #2137 # / #1845. They sit after the other ``allow_*`` flags so the From 23aa9e8baa7558dd615106bff655ffa019c1c89d Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 26 May 2026 07:13:36 -0700 Subject: [PATCH 2/2] Address review follow-ups on GeoKey consistency check (#2417) - Use the named GEOKEY_MODEL_TYPE / GEOKEY_PROJECTED_CS_TYPE / GEOKEY_GEOGRAPHIC_TYPE constants from _geotags in _attrs instead of raw integer literals, so the IDs live in one place. - Guard the validator's int coercion against NaN / inf floats. Pure defensive belt for callers that hand-build a context dict; validate_read_metadata is public-ish and should not crash on garbage input. Adds a parametrised fuzz test covering nan / inf / -inf across all three GeoKey slots. - Reference the unused _MODEL_TYPE_UNDEFINED / _MODEL_TYPE_GEOCENTRIC enum constants in the check's docstring so they document the full spec rather than dangling. - VRT docstring no longer claims the kwarg is forwarded to the per-source reader. _read_vrt_internal does not currently thread per-GeoTIFF-source allow_* kwargs (same pattern as allow_unparseable_crs), so the kwarg is documented as a no-op on the VRT path until that VRT-internal change happens separately. Follow-up issue #2423 tracks extracting the duplicated hand-built TIFF writer in tests/test_inconsistent_geokeys_2417.py and tests/test_remaining_fail_closed_1987.py into a shared tests/_geotiff_fixtures.py helper. --- xrspatial/geotiff/_attrs.py | 9 +++-- xrspatial/geotiff/_backends/vrt.py | 13 +++--- xrspatial/geotiff/_validation.py | 19 ++++++++- .../tests/test_inconsistent_geokeys_2417.py | 40 +++++++++++++++++++ 4 files changed, 69 insertions(+), 12 deletions(-) diff --git a/xrspatial/geotiff/_attrs.py b/xrspatial/geotiff/_attrs.py index 7041dec1..6d1cc22b 100644 --- a/xrspatial/geotiff/_attrs.py +++ b/xrspatial/geotiff/_attrs.py @@ -164,7 +164,8 @@ from ._coords import coords_from_geo_info as _coords_from_geo_info from ._coords import resolve_georef as _resolve_georef from ._coords import transform_tuple_from_pixel_geometry as _transform_tuple_from_pixel_geometry -from ._geotags import _NO_GEOREF_KEY, RASTER_PIXEL_IS_AREA, RASTER_PIXEL_IS_POINT +from ._geotags import (_NO_GEOREF_KEY, GEOKEY_GEOGRAPHIC_TYPE, GEOKEY_MODEL_TYPE, + GEOKEY_PROJECTED_CS_TYPE, RASTER_PIXEL_IS_AREA, RASTER_PIXEL_IS_POINT) # Per-codec valid compression-level ranges, used by ``to_geotiff`` for # friendly up-front validation. Codecs not listed here either reject any @@ -1111,9 +1112,9 @@ def _validate_read_geo_info( # than as a default-zero, which would otherwise look like # ``ModelType = undefined`` instead of "no model type tag at all". raw_geokeys = getattr(geo_info, 'geokeys', None) or {} - model_type_ctx = raw_geokeys.get(1024) # GEOKEY_MODEL_TYPE - proj_cs_ctx = raw_geokeys.get(3072) # GEOKEY_PROJECTED_CS_TYPE - geog_ctx = raw_geokeys.get(2048) # GEOKEY_GEOGRAPHIC_TYPE + model_type_ctx = raw_geokeys.get(GEOKEY_MODEL_TYPE) + proj_cs_ctx = raw_geokeys.get(GEOKEY_PROJECTED_CS_TYPE) + geog_ctx = raw_geokeys.get(GEOKEY_GEOGRAPHIC_TYPE) validate_read_metadata({ 'allow_rotated': allow_rotated, 'allow_unparseable_crs': allow_unparseable_crs, diff --git a/xrspatial/geotiff/_backends/vrt.py b/xrspatial/geotiff/_backends/vrt.py index ac56523a..d1ac359f 100644 --- a/xrspatial/geotiff/_backends/vrt.py +++ b/xrspatial/geotiff/_backends/vrt.py @@ -261,12 +261,13 @@ def read_vrt(source: str, *, unrecognised payload through. See ``open_geotiff`` for the full description. allow_inconsistent_geokeys : bool, default False - [advanced] Read-side opt-in for per-source GeoTIFF files - referenced by the VRT whose GeoKey directory is internally - contradictory. Forwarded to the per-source reader. VRT - capability validation itself does not parse GeoKeys (it - consumes the GDAL ```` field), so this kwarg only affects - the per-source decode. See ``open_geotiff`` for the full + [advanced] Read-side opt-in for sources whose GeoKey directory + is internally contradictory. Accepted for signature symmetry + with ``open_geotiff`` and the GeoTIFF readers; VRT capability + validation itself does not parse GeoKeys (it consumes the GDAL + ```` field), and the legacy VRT internal reader does not + thread per-GeoTIFF-source kwargs, so this kwarg is currently a + no-op on the VRT path. See ``open_geotiff`` for the full description (issue #2417). allow_experimental_codecs : bool, default False [advanced] Read-side opt-in for Tier 3 experimental codecs in diff --git a/xrspatial/geotiff/_validation.py b/xrspatial/geotiff/_validation.py index 54bbcfa6..0fc35b9f 100644 --- a/xrspatial/geotiff/_validation.py +++ b/xrspatial/geotiff/_validation.py @@ -1263,6 +1263,15 @@ def _check_read_inconsistent_geokeys(context: Mapping[str, Any]) -> None: the actual CRS is defined by sibling keys (``GeogGeodeticDatumGeoKey`` et al.) rather than the type-code slot itself. + ModelType enum values per the GeoTIFF spec: + + * ``_MODEL_TYPE_UNDEFINED`` (0): no ModelTypeGeoKey present. + * ``_MODEL_TYPE_PROJECTED`` (1): projected CRS. + * ``_MODEL_TYPE_GEOGRAPHIC`` (2): geographic CRS. + * ``_MODEL_TYPE_GEOCENTRIC`` (3): geocentric CRS (not used by + xrspatial readers today; passes through with no contradiction + against either type-code slot). + Context keys consumed: * ``allow_inconsistent_geokeys`` -- caller opt-out kwarg. @@ -1285,12 +1294,18 @@ def _check_read_inconsistent_geokeys(context: Mapping[str, Any]) -> None: return # Coerce ints. The reader stashes raw GeoKey ints, but a caller could - # plausibly pass floats; tolerate both and ignore the rest. + # plausibly pass floats; tolerate both and ignore the rest. NaN and + # inf would make ``int()`` raise (ValueError / OverflowError) on a + # caller hand-building a garbage context dict; catch them and treat + # as "not declared" rather than crash the validator. def _as_int(v): if isinstance(v, bool): return None if isinstance(v, (int, float)): - return int(v) + try: + return int(v) + except (ValueError, OverflowError): + return None return None model_type_i = _as_int(model_type) diff --git a/xrspatial/geotiff/tests/test_inconsistent_geokeys_2417.py b/xrspatial/geotiff/tests/test_inconsistent_geokeys_2417.py index 8340fabb..2be6b6f0 100644 --- a/xrspatial/geotiff/tests/test_inconsistent_geokeys_2417.py +++ b/xrspatial/geotiff/tests/test_inconsistent_geokeys_2417.py @@ -203,6 +203,46 @@ def test_validate_ignores_non_numeric_geokey_values(): }) +@pytest.mark.parametrize( + "bad_value", + [ + float('nan'), + float('inf'), + float('-inf'), + ], + ids=["nan", "inf", "-inf"], +) +def test_validate_tolerates_nan_or_inf_geokey_values(bad_value): + """Hand-built context dicts with NaN / inf floats in a GeoKey slot + must not crash the validator. ``int(float('nan'))`` raises + ValueError and ``int(float('inf'))`` raises OverflowError; the + check catches both and treats the slot as 'not declared'. + + The reader never produces these for type-code GeoKeys (those parse + as TIFF SHORT ints), but ``validate_read_metadata`` is a public-ish + helper that takes an arbitrary dict, so the validator stays robust + against garbage callers. See PR review follow-up on issue #2417. + """ + # bad model_type with an otherwise-conflict-looking projected slot. + validate_read_metadata({ + 'model_type': bad_value, + 'projected_cs_type': 4326, + 'geographic_type': None, + }) + # bad projected_cs_type alongside a valid geographic slot. + validate_read_metadata({ + 'model_type': 2, + 'projected_cs_type': bad_value, + 'geographic_type': 4326, + }) + # bad geographic_type. + validate_read_metadata({ + 'model_type': 1, + 'projected_cs_type': 32633, + 'geographic_type': bad_value, + }) + + # --------------------------------------------------------------------------- # Full-stack integration: the bug's exact reproducer through open_geotiff(). # ---------------------------------------------------------------------------