diff --git a/.github/workflows/test-cog-validator.yml b/.github/workflows/test-cog-validator.yml index 78e0fc718..d2f59cacc 100644 --- a/.github/workflows/test-cog-validator.yml +++ b/.github/workflows/test-cog-validator.yml @@ -101,4 +101,4 @@ jobs: # missing rio-cogeo / GDAL install fail the suite instead of # skipping it -- the whole point of this gate. run: | - pytest xrspatial/geotiff/tests/test_cog_writer_compliance.py xrspatial/geotiff/tests/test_cog_parity_2286.py -x + pytest xrspatial/geotiff/tests/write/test_cog.py -x diff --git a/docs/source/reference/geotiff.rst b/docs/source/reference/geotiff.rst index 4fc70be73..414450c69 100644 --- a/docs/source/reference/geotiff.rst +++ b/docs/source/reference/geotiff.rst @@ -207,7 +207,7 @@ COG validator CI gate on every PR. A dedicated Linux job (``pytest-cog-validator``) installs rio-cogeo and the GDAL Python bindings from conda-forge, sets ``XRSPATIAL_REQUIRE_COG_VALIDATOR=1``, and runs the compliance -suite in ``xrspatial/geotiff/tests/test_cog_writer_compliance.py``. +suite in ``xrspatial/geotiff/tests/write/test_cog.py``. With the env var set, a missing validator dependency is a hard failure instead of a silent skip, so a misconfigured install step cannot quietly let the gate pass. Contributors without rio-cogeo @@ -493,7 +493,7 @@ is a plain GeoTIFF or a COG. ``SUPPORTED_FEATURES['writer.bigtiff_cog']`` is currently ``advanced``. The external-interop gate lives in -``xrspatial/geotiff/tests/test_bigtiff_cog_compliance_2286.py`` and +``xrspatial/geotiff/tests/write/test_bigtiff.py`` and covers the BigTIFF-specific layout (header, IFDs, tile and overview offset tables), one lossless integer codec, one lossless float codec, single-band and 3-band, one overview level, plus an auto-promotion row @@ -515,9 +515,9 @@ regression test that locks the behaviour. * - Combination - Regression test * - ``to_geotiff(cog=True, tiled=False)`` - - ``xrspatial/geotiff/tests/test_cog_requires_tiled_2312.py`` + - ``xrspatial/geotiff/tests/write/test_cog.py`` * - ``to_geotiff(cog=True, tile_size <= 0)`` - - ``xrspatial/geotiff/tests/test_cog_tile_size_hang_2311.py`` + - ``xrspatial/geotiff/tests/write/test_cog.py`` * - Warped VRT (```` or ````) diff --git a/docs/source/reference/release_gate_geotiff.rst b/docs/source/reference/release_gate_geotiff.rst index f5b851354..b801496ae 100644 --- a/docs/source/reference/release_gate_geotiff.rst +++ b/docs/source/reference/release_gate_geotiff.rst @@ -208,14 +208,14 @@ Local GeoTIFF read and write - stable - ``to_geotiff`` writes a file that ``open_geotiff`` reads back bit-exact for every stable codec. - - ``xrspatial/geotiff/tests/test_cog_writer_compliance.py``, + - ``xrspatial/geotiff/tests/write/test_cog.py``, ``xrspatial/geotiff/tests/parity/test_backend_matrix.py`` - `#2341`_ * - ``writer.overviews`` - advanced - Internal overview IFDs round-trip; the reader can pick a level. - ``xrspatial/geotiff/tests/test_dask_overview_level.py``, - ``xrspatial/geotiff/tests/test_cog_overview_nodata_1613.py`` + ``xrspatial/geotiff/tests/write/test_overview.py`` - `#2286`_ * - ``writer.bigtiff`` - advanced @@ -283,14 +283,13 @@ Cloud-optimized GeoTIFF (COG) - ``to_geotiff(cog=True)`` writes an IFD-first tiled file with internal overviews that ``rio-cogeo`` accepts (CI-gated by ``XRSPATIAL_REQUIRE_COG_VALIDATOR=1``). - - ``xrspatial/geotiff/tests/test_cog_writer_compliance.py``, - ``xrspatial/geotiff/tests/test_cog_parity_2286.py`` + - ``xrspatial/geotiff/tests/write/test_cog.py`` - `#2286`_ * - ``reader.local_cog`` - stable - Local COG with overview IFDs decodes byte-for-byte through eager and dask paths. - - ``xrspatial/geotiff/tests/test_cog.py``, + - ``xrspatial/geotiff/tests/write/test_cog.py``, ``xrspatial/geotiff/tests/test_golden_corpus_overview_cog_1930.py`` - `#2286`_ * - ``reader.http_cog`` @@ -305,19 +304,19 @@ Cloud-optimized GeoTIFF (COG) - advanced - BigTIFF + COG combination passes the dedicated compliance suite (header magic, IFDs, tile and overview offset tables). - - ``xrspatial/geotiff/tests/test_bigtiff_cog_compliance_2286.py`` + - ``xrspatial/geotiff/tests/write/test_bigtiff.py`` - `#2286`_ * - ``writer.cog`` -- tile-layout pre-flight (``cog=True, tiled=False``) - stable - Raises ``ValueError`` at the writer entry point regardless of dtype or codec. - - ``xrspatial/geotiff/tests/test_cog_requires_tiled_2312.py`` + - ``xrspatial/geotiff/tests/write/test_cog.py`` - `#2286`_ * - ``writer.cog`` -- tile-size pre-flight (non-positive ``tile_size``) - stable - Non-positive tile sizes raise ``ValueError`` regardless of the ``tiled`` flag. - - ``xrspatial/geotiff/tests/test_cog_tile_size_hang_2311.py`` + - ``xrspatial/geotiff/tests/write/test_cog.py`` - `#2286`_ HTTP / fsspec reads diff --git a/xrspatial/geotiff/_attrs.py b/xrspatial/geotiff/_attrs.py index 783bed646..f9f9c7d79 100644 --- a/xrspatial/geotiff/_attrs.py +++ b/xrspatial/geotiff/_attrs.py @@ -336,7 +336,7 @@ # the BigTIFF + COG combination has its own external-interop surface # (8-byte offsets in tile/overview tables, BigTIFF-form IFDs, COG # layout invariants). Stays ``advanced`` even when every row of - # ``tests/test_bigtiff_cog_compliance_2286.py`` passes -- promotion + # ``tests/write/test_bigtiff.py`` passes -- promotion # to ``stable`` happens after the gate has lived in CI for a release # cycle. See the BigTIFF COG section in # ``docs/source/reference/geotiff.rst``. diff --git a/xrspatial/geotiff/_writer.py b/xrspatial/geotiff/_writer.py index 96bf710f3..926d94023 100644 --- a/xrspatial/geotiff/_writer.py +++ b/xrspatial/geotiff/_writer.py @@ -137,7 +137,7 @@ # the array-level ``_write`` defense-in-depth gate. Keeping the message # string in one place stops the two raise sites from drifting if one # ever gets reworded. The substring assertions in -# ``test_cog_requires_tiled_2312.py`` pin the actionable tokens +# ``tests/write/test_cog.py`` pin the actionable tokens # (``tiled=True``, ``cog=False``, ``COG``) so a future rewrite still # has to satisfy the same contract. _COG_REQUIRES_TILED_MSG = ( diff --git a/xrspatial/geotiff/tests/test_cog.py b/xrspatial/geotiff/tests/test_cog.py deleted file mode 100644 index bab8fa5dc..000000000 --- a/xrspatial/geotiff/tests/test_cog.py +++ /dev/null @@ -1,378 +0,0 @@ -"""Tests for COG writing and the public API.""" -from __future__ import annotations - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import open_geotiff, to_geotiff -from xrspatial.geotiff._geotags import GeoTransform -from xrspatial.geotiff._header import parse_all_ifds, parse_header -from xrspatial.geotiff._writer import write - -from .conftest import gpu_available - - -class TestCOGWriter: - def test_cog_layout_ifds_before_data(self, tmp_path): - """COG spec: all IFDs should come before pixel data.""" - arr = np.arange(256, dtype=np.float32).reshape(16, 16) - path = str(tmp_path / 'cog.tif') - write(arr, path, compression='deflate', tiled=True, tile_size=8, - cog=True, overview_levels=[2]) - - with open(path, 'rb') as f: - data = f.read() - - header = parse_header(data) - ifds = parse_all_ifds(data, header) - - assert len(ifds) >= 2 # full res + at least 1 overview - - # All IFD offsets should be < the first tile data offset - all_tile_offsets = [] - for ifd in ifds: - tile_off = ifd.tile_offsets - if tile_off: - all_tile_offsets.extend(tile_off) - - if all_tile_offsets: - first_data_offset = min(all_tile_offsets) - # The last IFD byte should be before the first tile data - # (This is the COG layout requirement) - assert header.first_ifd_offset < first_data_offset - - def test_cog_round_trip(self, tmp_path): - arr = np.arange(256, dtype=np.float32).reshape(16, 16) - gt = GeoTransform(-120.0, 45.0, 0.001, -0.001) - path = str(tmp_path / 'cog_rt.tif') - write(arr, path, geo_transform=gt, crs_epsg=4326, - compression='deflate', tiled=True, tile_size=8, - cog=True, overview_levels=[2]) - - result, geo = read_to_array_local(path) - np.testing.assert_array_equal(result, arr) - assert geo.crs_epsg == 4326 - - def test_cog_auto_overviews(self, tmp_path): - """Auto-generate overviews when none specified.""" - arr = np.arange(1024, dtype=np.float32).reshape(32, 32) - path = str(tmp_path / 'cog_auto.tif') - write(arr, path, compression='deflate', tiled=True, tile_size=8, - cog=True) - - with open(path, 'rb') as f: - data = f.read() - - header = parse_header(data) - ifds = parse_all_ifds(data, header) - # Should have at least 2 IFDs (full res + overviews) - assert len(ifds) >= 2 - - -class TestPublicAPI: - def test_read_write_round_trip(self, tmp_path): - """Write a DataArray, read it back, verify values and coords.""" - y = np.linspace(45.0, 44.0, 10) - x = np.linspace(-120.0, -119.0, 12) - data = np.random.RandomState(42).rand(10, 12).astype(np.float32) - - da = xr.DataArray( - data, dims=['y', 'x'], - coords={'y': y, 'x': x}, - attrs={'crs': 4326}, - name='test', - ) - - path = str(tmp_path / 'round_trip.tif') - to_geotiff(da, path, compression='deflate', tiled=False) - - result = open_geotiff(path) - np.testing.assert_array_almost_equal(result.values, data, decimal=5) - assert result.attrs.get('crs') == 4326 - - def test_open_geotiff_name(self, tmp_path): - """DataArray name defaults to filename stem.""" - arr = np.zeros((4, 4), dtype=np.float32) - path = str(tmp_path / 'myfile.tif') - write(arr, path, compression='none', tiled=False) - - da = open_geotiff(path) - assert da.name == 'myfile' - - def test_open_geotiff_custom_name(self, tmp_path): - arr = np.zeros((4, 4), dtype=np.float32) - path = str(tmp_path / 'test.tif') - write(arr, path, compression='none', tiled=False) - - da = open_geotiff(path, name='custom') - assert da.name == 'custom' - - def test_write_numpy_array(self, tmp_path): - """to_geotiff should accept raw numpy arrays too.""" - arr = np.arange(16, dtype=np.float32).reshape(4, 4) - path = str(tmp_path / 'numpy.tif') - to_geotiff(arr, path, compression='none') - - result = open_geotiff(path) - np.testing.assert_array_equal(result.values, arr) - - def test_write_3d_rgb(self, tmp_path): - """3D arrays (height, width, bands) should write multi-band.""" - arr = np.zeros((4, 4, 3), dtype=np.uint8) - arr[:, :, 0] = 255 # red channel - path = str(tmp_path / 'rgb.tif') - to_geotiff(arr, path, compression='none') - - result = open_geotiff(path) - np.testing.assert_array_equal(result.values, arr) - - def test_write_rejects_4d(self, tmp_path): - arr = np.zeros((2, 3, 4, 4), dtype=np.float32) - with pytest.raises(ValueError, match="Expected 2D or 3D"): - to_geotiff(arr, str(tmp_path / 'bad.tif')) - - -class TestCOGOverviewResampling: - """Test overview resampling methods produce correct results.""" - - def test_overview_mean(self, tmp_path): - arr = np.array([[1, 3, 5, 7], - [2, 4, 6, 8], - [9, 11, 13, 15], - [10, 12, 14, 16]], dtype=np.float32) - path = str(tmp_path / 'cog_1150_mean.tif') - write(arr, path, compression='deflate', tiled=True, tile_size=4, - cog=True, overview_levels=[2], overview_resampling='mean') - - with open(path, 'rb') as f: - data = f.read() - header = parse_header(data) - ifds = parse_all_ifds(data, header) - assert len(ifds) == 2 - # Overview should be 2x2 - ov_ifd = ifds[1] - assert ov_ifd.width == 2 - assert ov_ifd.height == 2 - - def test_overview_nearest(self, tmp_path): - arr = np.arange(64, dtype=np.float32).reshape(8, 8) - path = str(tmp_path / 'cog_1150_nearest.tif') - write(arr, path, compression='deflate', tiled=True, tile_size=4, - cog=True, overview_levels=[2], overview_resampling='nearest') - - result, _ = read_to_array_local(path) - np.testing.assert_array_equal(result, arr) - - def test_overview_mode(self, tmp_path): - # Categorical data: mode should pick the most common value - arr = np.array([[1, 1, 2, 2], - [1, 1, 2, 2], - [3, 3, 4, 4], - [3, 3, 4, 4]], dtype=np.int32) - path = str(tmp_path / 'cog_1150_mode.tif') - write(arr, path, compression='deflate', tiled=True, tile_size=4, - cog=True, overview_levels=[2], overview_resampling='mode') - - with open(path, 'rb') as f: - data = f.read() - header = parse_header(data) - ifds = parse_all_ifds(data, header) - assert len(ifds) == 2 - - @pytest.mark.parametrize('method', ['min', 'max', 'median']) - def test_overview_other_methods(self, tmp_path, method): - arr = np.arange(256, dtype=np.float32).reshape(16, 16) - path = str(tmp_path / f'cog_1150_{method}.tif') - write(arr, path, compression='deflate', tiled=True, tile_size=8, - cog=True, overview_levels=[2], overview_resampling=method) - - with open(path, 'rb') as f: - data = f.read() - header = parse_header(data) - ifds = parse_all_ifds(data, header) - assert len(ifds) >= 2 - - -class TestCOGMultipleOverviews: - def test_multiple_overview_levels(self, tmp_path): - """Multiple explicit overview levels produce correct number of IFDs.""" - arr = np.arange(4096, dtype=np.float32).reshape(64, 64) - path = str(tmp_path / 'cog_1150_multi.tif') - write(arr, path, compression='deflate', tiled=True, tile_size=8, - cog=True, overview_levels=[2, 4, 8]) - - with open(path, 'rb') as f: - data = f.read() - header = parse_header(data) - ifds = parse_all_ifds(data, header) - # Full res + 3 overviews - assert len(ifds) == 4 - - def test_auto_overviews_large_raster(self, tmp_path): - """Auto-generation on a larger raster produces multiple levels.""" - arr = np.random.RandomState(42).rand(512, 512).astype(np.float32) - path = str(tmp_path / 'cog_1150_auto_large.tif') - write(arr, path, compression='deflate', tiled=True, tile_size=64, - cog=True) - - with open(path, 'rb') as f: - data = f.read() - header = parse_header(data) - ifds = parse_all_ifds(data, header) - # 512 -> 256 -> 128 -> 64: should stop, so 3 overview levels + full = 4 - assert len(ifds) >= 3 - - def test_cog_overview_round_trip_values(self, tmp_path): - """Full-res values are preserved through COG write with overviews.""" - arr = np.random.RandomState(99).rand(32, 32).astype(np.float32) - gt = GeoTransform(-120.0, 45.0, 0.001, -0.001) - path = str(tmp_path / 'cog_1150_rt_values.tif') - write(arr, path, geo_transform=gt, crs_epsg=4326, - compression='deflate', tiled=True, tile_size=16, - cog=True, overview_levels=[2, 4]) - - result, geo = read_to_array_local(path) - np.testing.assert_array_equal(result, arr) - assert geo.crs_epsg == 4326 - - -class TestCOGPublicAPIOverviews: - def test_to_geotiff_cog_with_overviews(self, tmp_path): - """Public to_geotiff() with cog=True writes overviews.""" - y = np.linspace(45.0, 44.0, 32) - x = np.linspace(-120.0, -119.0, 32) - data = np.random.RandomState(42).rand(32, 32).astype(np.float32) - - da = xr.DataArray( - data, dims=['y', 'x'], - coords={'y': y, 'x': x}, - attrs={'crs': 4326}, - ) - - path = str(tmp_path / 'cog_1150_api.tif') - to_geotiff(da, path, compression='deflate', cog=True, - tile_size=16, overview_levels=[2]) - - result = open_geotiff(path) - np.testing.assert_array_almost_equal(result.values, data, decimal=5) - - # Verify COG structure - with open(path, 'rb') as f: - raw = f.read() - header = parse_header(raw) - ifds = parse_all_ifds(raw, header) - assert len(ifds) >= 2 - - def test_to_geotiff_cog_auto_overviews(self, tmp_path): - """Public API auto-generates overviews when only cog=True.""" - data = np.random.RandomState(7).rand(64, 64).astype(np.float32) - da = xr.DataArray(data, dims=['y', 'x']) - - path = str(tmp_path / 'cog_1150_api_auto.tif') - to_geotiff(da, path, compression='deflate', cog=True, tile_size=16) - - with open(path, 'rb') as f: - raw = f.read() - header = parse_header(raw) - ifds = parse_all_ifds(raw, header) - assert len(ifds) >= 2 - - -_HAS_GPU = gpu_available() - - -@pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") -class TestGPUCOGOverviews: - """GPU-specific COG overview tests (require CuPy + CUDA).""" - - def test_gpu_cog_round_trip(self, tmp_path): - import cupy - arr = np.random.RandomState(42).rand(32, 32).astype(np.float32) - gpu_arr = cupy.asarray(arr) - - path = str(tmp_path / 'cog_1150_gpu_rt.tif') - from xrspatial.geotiff import write_geotiff_gpu - write_geotiff_gpu(gpu_arr, path, crs=4326, compression='deflate', - cog=True, overview_levels=[2]) - - result = open_geotiff(path) - np.testing.assert_array_almost_equal(result.values, arr, decimal=5) - - with open(path, 'rb') as f: - raw = f.read() - header = parse_header(raw) - ifds = parse_all_ifds(raw, header) - assert len(ifds) >= 2 - - def test_gpu_cog_auto_overviews(self, tmp_path): - import cupy - arr = np.random.RandomState(7).rand(64, 64).astype(np.float32) - gpu_arr = cupy.asarray(arr) - - path = str(tmp_path / 'cog_1150_gpu_auto.tif') - from xrspatial.geotiff import write_geotiff_gpu - write_geotiff_gpu(gpu_arr, path, compression='deflate', - cog=True, tile_size=16) - - with open(path, 'rb') as f: - raw = f.read() - header = parse_header(raw) - ifds = parse_all_ifds(raw, header) - assert len(ifds) >= 2 - - def test_gpu_overview_resampling_nearest(self, tmp_path): - import cupy - arr = np.arange(64, dtype=np.float32).reshape(8, 8) - gpu_arr = cupy.asarray(arr) - - path = str(tmp_path / 'cog_1150_gpu_nearest.tif') - from xrspatial.geotiff import write_geotiff_gpu - write_geotiff_gpu(gpu_arr, path, compression='deflate', - cog=True, overview_levels=[2], - overview_resampling='nearest') - - result = open_geotiff(path) - np.testing.assert_array_equal(result.values, arr) - - def test_gpu_make_overview_values(self): - """GPU overview block-reduce matches CPU for simple case.""" - import cupy - - from xrspatial.geotiff._gpu_decode import make_overview_gpu - from xrspatial.geotiff._writer import _make_overview - - arr = np.random.RandomState(42).rand(16, 16).astype(np.float32) - gpu_arr = cupy.asarray(arr) - - for method in ('mean', 'nearest', 'min', 'max'): - cpu_ov = _make_overview(arr, method=method) - gpu_ov = make_overview_gpu(gpu_arr, method=method).get() - np.testing.assert_allclose(gpu_ov, cpu_ov, rtol=1e-5, - err_msg=f"Mismatch for method={method}") - - def test_gpu_to_geotiff_dispatches_with_overviews(self, tmp_path): - """to_geotiff auto-dispatches CuPy data with overview params.""" - import cupy - arr = np.random.RandomState(11).rand(32, 32).astype(np.float32) - da = xr.DataArray(cupy.asarray(arr), dims=['y', 'x'], - attrs={'crs': 4326}) - - path = str(tmp_path / 'cog_1150_gpu_dispatch.tif') - to_geotiff(da, path, compression='deflate', cog=True, - overview_levels=[2]) - - result = open_geotiff(path) - np.testing.assert_array_almost_equal(result.values, arr, decimal=5) - - with open(path, 'rb') as f: - raw = f.read() - header = parse_header(raw) - ifds = parse_all_ifds(raw, header) - assert len(ifds) >= 2 - - -def read_to_array_local(path): - """Helper to call read_to_array for local files.""" - from xrspatial.geotiff._reader import read_to_array - return read_to_array(path) diff --git a/xrspatial/geotiff/tests/test_cog_cubic_int_overview_nodata_1975.py b/xrspatial/geotiff/tests/test_cog_cubic_int_overview_nodata_1975.py deleted file mode 100644 index 2207998d3..000000000 --- a/xrspatial/geotiff/tests/test_cog_cubic_int_overview_nodata_1975.py +++ /dev/null @@ -1,239 +0,0 @@ -"""Regression tests for issue #1975. - -``to_geotiff(data, cog=True, overview_resampling='cubic', nodata=N)`` on -**integer** rasters with a finite nodata sentinel used to produce severe -ringing artifacts in the overview pyramid near the nodata border. - -Root cause: ``_block_reduce_2d``'s cubic branch masked the sentinel to -NaN only when the input dtype was float (``arr2d.dtype.kind == 'f'``). -For integer rasters the function fell through to an unmasked -``zoom(arr2d, 0.5, order=3)``, and the bicubic spline blended the -sentinel value (e.g. -9999) into neighbouring valid cells. Cast back -to the integer dtype, the boundary pixels surfaced as silent garbage -(values like 1082 / 1134 / -11104 against actual data of 100 with -sentinel -9999). - -The fix mirrors the float branch: - -1. Promote the cropped block to float64 so NaN can survive the spline. -2. Mask the sentinel to NaN before ``zoom(... prefilter=False)`` so the - interpolation does not treat it as signal and a single NaN does not - poison the entire row/column. -3. Rewrite NaN back to the sentinel after the spline. -4. ``np.round(...).astype(arr2d.dtype)`` so the integer cast is - well-defined (mirrors the mean/min/max/median integer tail). -""" -from __future__ import annotations - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import open_geotiff, to_geotiff -from xrspatial.geotiff._writer import _block_reduce_2d - -# --------------------------------------------------------------------------- -# Helper-level: _block_reduce_2d cubic + integer + sentinel -# --------------------------------------------------------------------------- - - -def _make_block_with_nodata_corner(dtype, nodata_value, size=64, - corner=16, fill=100): - """Return a (size, size) ``dtype`` array with a corner of nodata.""" - arr = np.full((size, size), fill, dtype=dtype) - arr[:corner, :corner] = nodata_value - return arr - - -def test_cubic_int16_with_nodata_does_not_poison_overview(): - """int16 + finite sentinel: cubic overview must not blend sentinel.""" - arr = _make_block_with_nodata_corner(np.int16, -9999) - result = _block_reduce_2d(arr, method='cubic', nodata=-9999) - # Finite (non-sentinel) values must lie within the source data range. - # Pre-fix the boundary surfaced values like 1082 / 1134 / -11104. - finite_non_sentinel = result[result != -9999] - assert finite_non_sentinel.size > 0 - assert finite_non_sentinel.max() <= 100 - assert finite_non_sentinel.min() >= 100 # only valid data value is 100 - # The output dtype is the input dtype. - assert result.dtype == np.int16 - # Result shape is half (size/2, size/2). - assert result.shape == (32, 32) - - -def test_cubic_uint16_with_nodata_does_not_poison_overview(): - """uint16 + finite sentinel: same guarantee as int16.""" - arr = _make_block_with_nodata_corner(np.uint16, 65535, fill=200) - result = _block_reduce_2d(arr, method='cubic', nodata=65535) - finite = result[result != 65535] - assert finite.size > 0 - assert finite.min() >= 200 - assert finite.max() <= 200 - assert result.dtype == np.uint16 - - -def test_cubic_int32_with_nodata_does_not_poison_overview(): - """int32 + negative sentinel: same guarantee.""" - arr = _make_block_with_nodata_corner(np.int32, -2147483648, fill=42) - result = _block_reduce_2d(arr, method='cubic', nodata=-2147483648) - finite = result[result != -2147483648] - assert finite.size > 0 - assert finite.min() >= 42 - assert finite.max() <= 42 - assert result.dtype == np.int32 - - -def test_cubic_int_no_nodata_unchanged(): - """Cubic on integer without nodata still runs the plain zoom path.""" - arr = np.arange(64 * 64, dtype=np.int16).reshape(64, 64) - result_no_nd = _block_reduce_2d(arr, method='cubic', nodata=None) - # Plain zoom path: dtype preserved, shape halved. - assert result_no_nd.dtype == np.int16 - assert result_no_nd.shape == (32, 32) - - -def test_cubic_int_nodata_out_of_range_noop(): - """Sentinel outside the dtype range cannot equal any pixel — no-op.""" - arr = np.full((64, 64), 100, dtype=np.uint16) - # -1 cannot exist in uint16; the guard skips the masking branch. - result = _block_reduce_2d(arr, method='cubic', nodata=-1) - # Falls through to plain zoom path; values stay 100 (cubic on constant). - assert result.dtype == np.uint16 - # Cubic of a constant grid is the same constant. - assert np.all(result == 100) - - -def test_cubic_int_nodata_fractional_noop(): - """Fractional sentinel on integer dtype: no-op (cannot match any pixel).""" - arr = np.full((64, 64), 100, dtype=np.int16) - result = _block_reduce_2d(arr, method='cubic', nodata=1.5) - assert result.dtype == np.int16 - assert np.all(result == 100) - - -def test_cubic_int_all_sentinel_block_becomes_sentinel(): - """A 2x2 block that is entirely the sentinel rounds back to the sentinel.""" - arr = np.full((4, 4), -9999, dtype=np.int16) - result = _block_reduce_2d(arr, method='cubic', nodata=-9999) - assert result.dtype == np.int16 - assert np.all(result == -9999) - - -def test_cubic_float_branch_still_works(): - """Float regression guard: the existing #1623 path must still work.""" - arr = np.full((64, 64), 100.0, dtype=np.float32) - arr[:16, :16] = -9999.0 - result = _block_reduce_2d(arr, method='cubic', nodata=-9999.0) - assert result.dtype == np.float32 - finite = result[result != -9999.0] - assert finite.size > 0 - # No ringing: all valid output pixels are 100 (constant input region). - np.testing.assert_allclose(finite, 100.0, atol=1e-3) - - -# --------------------------------------------------------------------------- -# End-to-end: to_geotiff cubic + integer + nodata round-trip -# --------------------------------------------------------------------------- - -def test_to_geotiff_int_cubic_overview_round_trip(tmp_path): - """1024x1024 int16 + cog + cubic + nodata round-trips without poisoning.""" - data = np.full((1024, 1024), 100, dtype=np.int16) - data[:256, :256] = -9999 - da = xr.DataArray( - data, dims=('y', 'x'), - coords={'y': np.arange(1024.0), 'x': np.arange(1024.0)}, - ) - path = tmp_path / "cubic_int_1975.tif" - to_geotiff(da, str(path), cog=True, overview_resampling='cubic', - nodata=-9999, crs=4326) - # Level 0: full resolution. - r0 = open_geotiff(str(path), overview_level=0) - uniq_0 = set(np.unique(r0.values[~np.isnan(r0.values)])) - assert uniq_0 == {100.0} - # Level 1: the historically poisoned level. - r1 = open_geotiff(str(path), overview_level=1) - finite_1 = r1.values[~np.isnan(r1.values)] - # All finite values must be 100 (the only valid data value); no ringing. - np.testing.assert_array_equal(finite_1, 100.0) - - -def test_to_geotiff_int_cubic_no_nodata_regression(tmp_path): - """int16 + cog + cubic without nodata: cubic still runs (regression).""" - rng = np.random.default_rng(0) - data = rng.integers(0, 1000, size=(1024, 1024), dtype=np.int16) - da = xr.DataArray( - data, dims=('y', 'x'), - coords={'y': np.arange(1024.0), 'x': np.arange(1024.0)}, - ) - path = tmp_path / "cubic_int_no_nd_1975.tif" - to_geotiff(da, str(path), cog=True, overview_resampling='cubic', - crs=4326) - r1 = open_geotiff(str(path), overview_level=1) - # Output dtype is the source integer dtype. - assert r1.values.dtype == np.int16 - assert r1.shape == (512, 512) - - -def test_to_geotiff_int_cubic_overview_matches_mean_finite_range(tmp_path): - """Cubic must agree with mean on which pixels are finite vs nodata.""" - data = np.full((512, 512), 50, dtype=np.uint16) - data[:128, :128] = 65535 - da = xr.DataArray( - data, dims=('y', 'x'), - coords={'y': np.arange(512.0), 'x': np.arange(512.0)}, - ) - cubic_path = tmp_path / "cubic.tif" - mean_path = tmp_path / "mean.tif" - to_geotiff(da, str(cubic_path), cog=True, overview_resampling='cubic', - nodata=65535, crs=4326) - to_geotiff(da, str(mean_path), cog=True, overview_resampling='mean', - nodata=65535, crs=4326) - r_cubic = open_geotiff(str(cubic_path), overview_level=0) - r_mean = open_geotiff(str(mean_path), overview_level=0) - # Sentinel masks should land on the same pixels for both methods on a - # constant valid region with a constant nodata corner. - np.testing.assert_array_equal( - np.isnan(r_cubic.values), np.isnan(r_mean.values), - ) - finite_cubic = r_cubic.values[~np.isnan(r_cubic.values)] - finite_mean = r_mean.values[~np.isnan(r_mean.values)] - # All valid pixels are 50 in both. - np.testing.assert_array_equal(finite_cubic, 50.0) - np.testing.assert_array_equal(finite_mean, 50.0) - - -def test_gpu_int_cubic_overview_matches_cpu(tmp_path): - """GPU writer cubic falls back to CPU; bytes must match CPU writer.""" - cupy = pytest.importorskip("cupy") - if not cupy.cuda.is_available(): - pytest.skip("CUDA not available") - - data = np.full((1024, 1024), 100, dtype=np.int16) - data[:256, :256] = -9999 - cpu_da = xr.DataArray( - data, dims=('y', 'x'), - coords={'y': np.arange(1024.0), 'x': np.arange(1024.0)}, - ) - gpu_da = xr.DataArray( - cupy.asarray(data), dims=('y', 'x'), - coords={'y': np.arange(1024.0), 'x': np.arange(1024.0)}, - ) - cpu_path = tmp_path / "cpu.tif" - gpu_path = tmp_path / "gpu.tif" - to_geotiff(cpu_da, str(cpu_path), cog=True, overview_resampling='cubic', - nodata=-9999, crs=4326) - to_geotiff(gpu_da, str(gpu_path), cog=True, overview_resampling='cubic', - nodata=-9999, crs=4326) - cpu_r1 = open_geotiff(str(cpu_path), overview_level=1) - gpu_r1 = open_geotiff(str(gpu_path), overview_level=1) - # Both paths route cubic through the same CPU helper; results must agree - # bit-for-bit on this constant input. - cpu_arr = cpu_r1.values - gpu_arr = gpu_r1.values - assert cpu_arr.shape == gpu_arr.shape - np.testing.assert_array_equal( - np.isnan(cpu_arr), np.isnan(gpu_arr), - ) - np.testing.assert_array_equal( - cpu_arr[~np.isnan(cpu_arr)], gpu_arr[~np.isnan(gpu_arr)], - ) diff --git a/xrspatial/geotiff/tests/test_cog_cubic_overview_nodata_1623.py b/xrspatial/geotiff/tests/test_cog_cubic_overview_nodata_1623.py deleted file mode 100644 index 7ec2728c0..000000000 --- a/xrspatial/geotiff/tests/test_cog_cubic_overview_nodata_1623.py +++ /dev/null @@ -1,286 +0,0 @@ -"""COG cubic overview respects the nodata sentinel (issue #1623). - -Before the fix, ``to_geotiff(..., cog=True, nodata=, -overview_resampling='cubic')`` produced wrong overview pixels near -nodata borders on float rasters. The writer rewrote NaN to the -sentinel before reduction; ``_block_reduce_2d(method='cubic')`` then -ignored ``nodata`` and handed the sentinel-poisoned array straight to -``scipy.ndimage.zoom(order=3)``. The cubic spline blended the sentinel -into neighbouring cells (values like ``1133`` and ``-10290`` appeared -where the data was a constant 100). - -The fix masks the sentinel to NaN, runs cubic with -``prefilter=False`` so a single NaN does not poison the entire -row/column, and rewrites any NaN in the output back to the sentinel. -The GPU helper falls back to the CPU cubic path the same way it does -for ``mode``. - -These tests pin: - -* the helper produces no ringing near a sentinel border, -* the round-trip through ``to_geotiff`` writes a clean overview, -* the no-nodata cubic path is unchanged, -* the GPU writer routes cubic through the CPU helper and produces - byte-identical overview tiles. -""" -from __future__ import annotations - -import importlib.util - -import numpy as np -import pytest -import xarray as xr - - -def _gpu_available() -> bool: - if importlib.util.find_spec("cupy") is None: - return False - try: - import cupy - return bool(cupy.cuda.is_available()) - except Exception: - return False - - -_HAS_GPU = _gpu_available() -_gpu_only = pytest.mark.skipif( - not _HAS_GPU, - reason="cupy + CUDA required", -) - - -def _flat_with_corner_nan(side: int = 16, nan_side: int = 4): - """``side x side`` float32 ones with a ``nan_side x nan_side`` NaN corner.""" - arr = np.ones((side, side), dtype=np.float32) * 100.0 - arr[:nan_side, :nan_side] = np.nan - return arr - - -def test_block_reduce_cubic_nodata_helper_no_ringing(): - """Helper: cubic with nodata must not leak the sentinel into neighbours.""" - pytest.importorskip("scipy") - from xrspatial.geotiff._writer import _block_reduce_2d - - # Mimic what to_geotiff does: rewrite NaN to the sentinel before - # handing the array to the reducer. - arr = _flat_with_corner_nan() - arr[np.isnan(arr)] = -9999.0 - - out = _block_reduce_2d(arr, 'cubic', nodata=-9999.0) - - # The valid region must still read ~100. Without the fix the cells - # adjacent to the sentinel corner returned values like 1196.28 and - # -19.00 from the cubic blend. - valid = out != -9999.0 - assert np.all(np.abs(out[valid] - 100.0) < 1e-3), ( - f"ringing leaked into cubic output: {out[valid]}") - - # Sentinel cells still mark the nodata region. - assert (out == -9999.0).any() - - -def test_block_reduce_cubic_nodata_poisoning_repro(): - """Without the fix the sentinel poisoned the cubic output. - - Pin the failure mode by running cubic on the same array *without* - a nodata argument and confirming the documented buggy values - appear. This guards against a regression where ``nodata`` silently - stops being honoured. - """ - pytest.importorskip("scipy") - from xrspatial.geotiff._writer import _block_reduce_2d - - arr = _flat_with_corner_nan() - arr[np.isnan(arr)] = -9999.0 - - # nodata=None reproduces the pre-fix behaviour. - poisoned = _block_reduce_2d(arr, 'cubic') - # At least one cell outside the corner has a wildly wrong value. - valid_no_sentinel = (poisoned != -9999.0) - drift = np.abs(poisoned[valid_no_sentinel] - 100.0) - assert drift.max() > 50.0, ( - "expected the no-nodata cubic path to ring; got a clean output " - f"with max drift {drift.max()}") - - -def test_block_reduce_cubic_no_nodata_unchanged(): - """Cubic on data without nodata stays at order=3 with prefilter.""" - pytest.importorskip("scipy") - from xrspatial.geotiff._writer import _block_reduce_2d - - arr = np.arange(256, dtype=np.float32).reshape(16, 16) - out_default = _block_reduce_2d(arr, 'cubic') - # The same array round-tripped through scipy zoom directly should - # match (since no sentinel is present the fix path is not taken). - from scipy.ndimage import zoom - expected = zoom(arr, 0.5, order=3).astype(arr.dtype) - np.testing.assert_array_equal(out_default, expected) - - -def test_block_reduce_cubic_nodata_unset_is_zoom(): - """nodata=None goes through the original zoom path, no prefilter change.""" - pytest.importorskip("scipy") - from scipy.ndimage import zoom - - from xrspatial.geotiff._writer import _block_reduce_2d - - arr = np.linspace(0.0, 1.0, 64, dtype=np.float32).reshape(8, 8) - out = _block_reduce_2d(arr, 'cubic', nodata=None) - expected = zoom(arr, 0.5, order=3).astype(arr.dtype) - np.testing.assert_array_equal(out, expected) - - -def test_to_geotiff_cog_cubic_nodata_round_trip(tmp_path): - """End-to-end: writing a COG with cubic + nodata produces a clean overview.""" - pytest.importorskip("scipy") - from xrspatial.geotiff import open_geotiff, to_geotiff - - arr = _flat_with_corner_nan() - da = xr.DataArray(arr, dims=['y', 'x']) - p = str(tmp_path / 'cog_cubic_nodata.tif') - to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate', - tiled=True, tile_size=16, overview_levels=[2], - overview_resampling='cubic') - - ov = open_geotiff(p, overview_level=1) - data = np.asarray(ov.data) - - # No polluted pixels: every cell is either NaN (reader unmasked the - # sentinel back to NaN), the literal sentinel value (reader kept it), - # or ~100 (the source value). - polluted = ( - (~np.isnan(data)) - & (data != -9999.0) - & (np.abs(data - 100.0) > 1e-3) - ) - assert not polluted.any(), ( - f"polluted overview cells: {data[polluted]}") - - -def test_to_geotiff_cog_cubic_no_nodata_round_trip(tmp_path): - """Regression guard: cubic without nodata still produces the same overview.""" - pytest.importorskip("scipy") - from xrspatial.geotiff import open_geotiff, to_geotiff - - arr = np.arange(256, dtype=np.float32).reshape(16, 16) - da = xr.DataArray(arr, dims=['y', 'x']) - p = str(tmp_path / 'cog_cubic_no_nodata.tif') - to_geotiff(da, p, cog=True, compression='deflate', - tiled=True, tile_size=16, overview_levels=[2], - overview_resampling='cubic') - - ov = open_geotiff(p, overview_level=1) - assert ov.shape == (8, 8) - assert ov.dtype == np.float32 - # Cubic on a monotonic ramp stays bounded by source range. - assert float(np.asarray(ov.data).min()) >= float(arr.min()) - 1.0 - assert float(np.asarray(ov.data).max()) <= float(arr.max()) + 1.0 - - -def test_block_reduce_cubic_inf_nodata_is_masked(): - """nodata=+/-inf must be masked just like a finite sentinel.""" - pytest.importorskip("scipy") - from xrspatial.geotiff._writer import _block_reduce_2d - - arr = np.ones((16, 16), dtype=np.float32) * 5.0 - arr[:4, :4] = np.inf # treat +inf as sentinel - out = _block_reduce_2d(arr, 'cubic', nodata=np.inf) - valid = ~np.isinf(out) - # Outside the masked region we still read ~5.0. - np.testing.assert_allclose(out[valid], 5.0, atol=1e-4) - - -def test_block_reduce_cubic_nan_sentinel_skips_mask(): - """nodata=NaN is a no-op (matches the existing nan-pass-through gate).""" - pytest.importorskip("scipy") - from scipy.ndimage import zoom - - from xrspatial.geotiff._writer import _block_reduce_2d - - arr = np.linspace(0.0, 1.0, 64, dtype=np.float32).reshape(8, 8) - out = _block_reduce_2d(arr, 'cubic', nodata=np.nan) - expected = zoom(arr, 0.5, order=3).astype(arr.dtype) - np.testing.assert_array_equal(out, expected) - - -def test_gpu_overview_methods_includes_cubic(): - """The GPU constant must list ``cubic`` so callers do not pre-validate - against the smaller pre-#1623 set.""" - from xrspatial.geotiff._gpu_decode import GPU_OVERVIEW_METHODS - assert 'cubic' in GPU_OVERVIEW_METHODS - - -@_gpu_only -def test_gpu_block_reduce_cubic_falls_back_to_cpu(): - """GPU cubic must route through the CPU helper and return cupy data.""" - pytest.importorskip("scipy") - import cupy - - from xrspatial.geotiff._gpu_decode import _block_reduce_2d_gpu - from xrspatial.geotiff._writer import _block_reduce_2d - - arr = _flat_with_corner_nan() - arr[np.isnan(arr)] = -9999.0 - - gpu_arr = cupy.asarray(arr) - gpu_out = _block_reduce_2d_gpu(gpu_arr, 'cubic', nodata=-9999.0) - assert isinstance(gpu_out, cupy.ndarray) - - cpu_out = _block_reduce_2d(arr, 'cubic', nodata=-9999.0) - np.testing.assert_array_equal(cupy.asnumpy(gpu_out), cpu_out) - - -@_gpu_only -def test_to_geotiff_cog_cubic_nodata_gpu_round_trip(tmp_path): - """End-to-end GPU writer: cubic + nodata produces a clean overview.""" - pytest.importorskip("scipy") - import cupy - - from xrspatial.geotiff import open_geotiff, to_geotiff - - arr = _flat_with_corner_nan() - da = xr.DataArray(cupy.asarray(arr), dims=['y', 'x']) - p = str(tmp_path / 'cog_cubic_nodata_gpu.tif') - to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate', - tiled=True, tile_size=16, overview_levels=[2], - overview_resampling='cubic') - - ov = open_geotiff(p, overview_level=1) - data = np.asarray(ov.data) - polluted = ( - (~np.isnan(data)) - & (data != -9999.0) - & (np.abs(data - 100.0) > 1e-3) - ) - assert not polluted.any(), ( - f"GPU cubic overview leaked sentinel into neighbours: " - f"{data[polluted]}") - - -@_gpu_only -def test_gpu_cpu_cubic_overview_bytes_match(tmp_path): - """CPU and GPU writers produce the same cubic overview pixels.""" - pytest.importorskip("scipy") - import cupy - - from xrspatial.geotiff import open_geotiff, to_geotiff - - arr = _flat_with_corner_nan() - cpu_da = xr.DataArray(arr, dims=['y', 'x']) - gpu_da = xr.DataArray(cupy.asarray(arr), dims=['y', 'x']) - - cpu_path = str(tmp_path / 'cpu_cubic.tif') - gpu_path = str(tmp_path / 'gpu_cubic.tif') - to_geotiff(cpu_da, cpu_path, nodata=-9999.0, cog=True, - compression='deflate', tiled=True, tile_size=16, - overview_levels=[2], overview_resampling='cubic') - to_geotiff(gpu_da, gpu_path, nodata=-9999.0, cog=True, - compression='deflate', tiled=True, tile_size=16, - overview_levels=[2], overview_resampling='cubic') - - cpu_ov = np.asarray(open_geotiff(cpu_path, overview_level=1).data) - gpu_ov = np.asarray(open_geotiff(gpu_path, overview_level=1).data) - # NaN-aware compare since the reader unmasks the sentinel. - np.testing.assert_array_equal(np.isnan(cpu_ov), np.isnan(gpu_ov)) - finite = ~np.isnan(cpu_ov) - np.testing.assert_allclose(cpu_ov[finite], gpu_ov[finite], atol=1e-3) diff --git a/xrspatial/geotiff/tests/test_cog_int_overview_nodata_2026_05_12.py b/xrspatial/geotiff/tests/test_cog_int_overview_nodata_2026_05_12.py deleted file mode 100644 index c8a62c4f5..000000000 --- a/xrspatial/geotiff/tests/test_cog_int_overview_nodata_2026_05_12.py +++ /dev/null @@ -1,317 +0,0 @@ -"""COG overview generation respects the nodata sentinel for integer rasters. - -Companion to issue #1613 (float COG overview poisoning). Before this fix, -``to_geotiff(int_data, cog=True, nodata=N)`` ran the overview reduction with -the sentinel still present in the integer-cast float64 block. The nan-aware -reduction (``np.nanmean`` / nanmin / nanmax / nanmedian) averaged the -sentinel into surrounding valid pixels and produced overview values that -the reader could not mask -- they did not equal the sentinel, so the -int-to-NaN mask in ``open_geotiff`` left them as silent garbage. - -These tests pin the contract that the CPU writer (and the GPU mirror in -``_block_reduce_2d_gpu``) skip the integer sentinel during overview -reduction, so the resulting pyramid only contains real measurements and -the sentinel value. -""" -from __future__ import annotations - -import importlib.util - -import numpy as np -import pytest -import xarray as xr - - -def _gpu_available() -> bool: - if importlib.util.find_spec("cupy") is None: - return False - try: - import cupy - return bool(cupy.cuda.is_available()) - except Exception: - return False - - -_HAS_GPU = _gpu_available() -_gpu_only = pytest.mark.skipif( - not _HAS_GPU, - reason="cupy + CUDA required", -) - - -# --------------------------------------------------------------------------- -# Unit-level: _block_reduce_2d on integer dtypes -# --------------------------------------------------------------------------- - -def _int_block_partial_sentinel(sentinel, dtype): - """4x4 integer raster where the right two columns of each row pair - mix valid and sentinel cells. Block (0, 1) has (100, 100, sentinel, - sentinel); block (1, 1) has (200, 200, sentinel, sentinel).""" - arr = np.array([ - [100, 100, 100, 100], - [100, 100, sentinel, sentinel], - [200, 200, 200, 200], - [200, 200, sentinel, sentinel], - ], dtype=dtype) - return arr - - -@pytest.mark.parametrize('method', ['mean', 'min', 'max', 'median']) -@pytest.mark.parametrize('dtype,sentinel', [ - (np.uint8, 255), - (np.uint16, 65535), - (np.int16, -9999), - (np.int32, -2_000_000_000), -]) -def test_block_reduce_int_sentinel_masked(method, dtype, sentinel): - """Integer overview reductions must skip sentinel cells. - - Before the fix, mean produced averages like ``(100+sentinel)/2`` cast - back to the integer dtype -- a non-sentinel value that the reader - leaves untouched. The fix masks the sentinel to NaN before the - reduction so nan-aware aggregation skips it. - """ - from xrspatial.geotiff._writer import _block_reduce_2d - - arr = _int_block_partial_sentinel(sentinel, dtype) - out = _block_reduce_2d(arr, method, nodata=sentinel) - - # Every block now has at least one valid 100/200; result should equal - # the valid value (since for mean/min/max/median over {100, 100} is - # 100, and over {200, 200} is 200). Neither block has any cell that - # isn't 100, 200, or sentinel, so the output must be a subset of - # {100, 200}. - assert out.dtype == arr.dtype - out_vals = set(out.flatten().tolist()) - assert out_vals.issubset({100, 200}), ( - f"method={method} dtype={dtype} sentinel={sentinel} " - f"produced poisoned values: {out_vals - {100, 200}}" - ) - - -@pytest.mark.parametrize('dtype,sentinel', [ - (np.uint16, 65535), - (np.int16, -9999), -]) -def test_block_reduce_int_all_sentinel_block(dtype, sentinel): - """A 2x2 block that's entirely sentinel reduces to the sentinel. - - Without the post-reduction NaN-to-sentinel rewrite in the integer - branch, the all-NaN block from nanmean would cast to undefined - integer behaviour (zero or INT_MIN depending on platform). - """ - from xrspatial.geotiff._writer import _block_reduce_2d - - arr = np.array([ - [100, 100, sentinel, sentinel], - [100, 100, sentinel, sentinel], - [200, 200, 200, 200], - [200, 200, 200, 200], - ], dtype=dtype) - - out = _block_reduce_2d(arr, 'mean', nodata=sentinel) - assert out.dtype == arr.dtype - # Top-right block is all-sentinel; output must be the sentinel - assert out[0, 1] == sentinel - # Other blocks contain only valid values - assert out[0, 0] == 100 - assert out[1, 0] == 200 - assert out[1, 1] == 200 - - -def test_block_reduce_int_no_nodata_unchanged(): - """Without ``nodata``, the integer reduction code path stays unchanged. - - Regression check: the fix must not alter the no-sentinel case. - """ - from xrspatial.geotiff._writer import _block_reduce_2d - - arr = np.array([ - [1, 2, 3, 4], - [5, 6, 7, 8], - [9, 10, 11, 12], - [13, 14, 15, 16], - ], dtype=np.int16) - - out = _block_reduce_2d(arr, 'mean') - # Block (0,0) = mean(1,2,5,6) = 3.5 -> round -> 4 - # Block (0,1) = mean(3,4,7,8) = 5.5 -> round -> 6 - # Block (1,0) = mean(9,10,13,14) = 11.5 -> round -> 12 - # Block (1,1) = mean(11,12,15,16) = 13.5 -> round -> 14 - expected = np.array([[4, 6], [12, 14]], dtype=np.int16) - np.testing.assert_array_equal(out, expected) - - -def test_block_reduce_int_out_of_range_sentinel_noop(): - """A sentinel outside the dtype's range is a no-op (no mask applied). - - Mirrors the ``_int_nodata_in_range`` gating in ``_reader.py``: a - uint16 file with GDAL_NODATA="-9999" cannot match any decoded pixel, - so the reduction proceeds without the mask. This keeps the fix from - raising OverflowError on the dtype cast. - """ - from xrspatial.geotiff._writer import _block_reduce_2d - - # uint16 with nodata=-9999: out of range, no-op - arr = np.array([ - [1, 2, 3, 4], - [5, 6, 7, 8], - ], dtype=np.uint16) - out = _block_reduce_2d(arr, 'mean', nodata=-9999) - # Should produce the same result as without the kwarg - expected = _block_reduce_2d(arr, 'mean') - np.testing.assert_array_equal(out, expected) - - -# --------------------------------------------------------------------------- -# End-to-end: to_geotiff + open_geotiff round trip -# --------------------------------------------------------------------------- - -@pytest.fixture -def _int_cog_inputs(tmp_path): - """uint16 raster, full of 100 with a 65x65 sentinel patch.""" - H, W = 256, 256 - data = np.full((H, W), 100, dtype=np.uint16) - data[64:129, 64:129] = 65535 - da = xr.DataArray( - data, - dims=('y', 'x'), - coords={'y': np.arange(H, dtype=np.float64), - 'x': np.arange(W, dtype=np.float64)}, - attrs={'crs': 4326}, - ) - return da, tmp_path - - -@pytest.mark.parametrize('method', ['mean', 'min', 'max', 'median']) -def test_cpu_int_cog_overview_not_poisoned(_int_cog_inputs, method): - """End-to-end: integer COG overview pyramid contains only valid values. - - Before the fix, the level-1 read contained values like 16459 and - 32818 -- nan-aware-mean of (sentinel, 100, 100, 100) and (sentinel, - sentinel, 100, 100) cast back to uint16. The reader can't mask them - because they don't equal 65535. - """ - from xrspatial.geotiff import open_geotiff, to_geotiff - - da, tmp_path = _int_cog_inputs - path = str(tmp_path / f'int_overview_{method}_2026_05_12.tif') - to_geotiff(da, path, nodata=65535, cog=True, - overview_levels=[2], overview_resampling=method) - - ov = open_geotiff(path, overview_level=1) - arr = np.asarray(ov.data) - unique = set(int(v) for v in np.unique(arr) if not np.isnan(v)) - poisoned = unique - {100, 65535} - assert not poisoned, ( - f"method={method} produced poisoned overview values: {poisoned}" - ) - - -def test_cpu_int_cog_overview_3band_not_poisoned(tmp_path): - """3-band integer COG: same fix applies via the 3D _make_overview branch.""" - from xrspatial.geotiff import open_geotiff, to_geotiff - - H, W = 256, 256 - data = np.full((H, W, 3), 100, dtype=np.uint16) - data[64:129, 64:129, :] = 65535 - da = xr.DataArray( - data, - dims=('y', 'x', 'band'), - coords={'y': np.arange(H, dtype=np.float64), - 'x': np.arange(W, dtype=np.float64), - 'band': [0, 1, 2]}, - attrs={'crs': 4326}, - ) - - path = str(tmp_path / 'int_overview_3band_2026_05_12.tif') - to_geotiff(da, path, nodata=65535, cog=True, - overview_levels=[2], overview_resampling='mean') - - ov = open_geotiff(path, overview_level=1) - arr = np.asarray(ov.data) - unique = set(int(v) for v in np.unique(arr) if not np.isnan(v)) - poisoned = unique - {100, 65535} - assert not poisoned, ( - f"3-band integer overview produced poisoned values: {poisoned}" - ) - - -def test_cpu_int_cog_no_nodata_unchanged(tmp_path): - """No nodata kwarg: integer overview path stays as it was.""" - from xrspatial.geotiff import open_geotiff, to_geotiff - - H, W = 256, 256 - data = np.full((H, W), 100, dtype=np.uint16) - data[100:200, 100:200] = 50 - da = xr.DataArray( - data, - dims=('y', 'x'), - coords={'y': np.arange(H, dtype=np.float64), - 'x': np.arange(W, dtype=np.float64)}, - attrs={'crs': 4326}, - ) - - path = str(tmp_path / 'int_overview_no_nodata_2026_05_12.tif') - to_geotiff(da, path, cog=True, - overview_levels=[2], overview_resampling='mean') - - ov = open_geotiff(path, overview_level=1) - arr = np.asarray(ov.data) - # No sentinel, so every overview pixel is a real average of 50 / 100. - # Block-boundary pixels are weighted means: (50,50,50,100)/4 = 62.5 -> 63 - unique = set(int(v) for v in np.unique(arr)) - # Must contain at least 50 and 100; boundary-mixing averages allowed. - assert 50 in unique - assert 100 in unique - - -# --------------------------------------------------------------------------- -# GPU mirror -# --------------------------------------------------------------------------- - -@_gpu_only -@pytest.mark.parametrize('method', ['mean', 'min', 'max', 'median']) -@pytest.mark.parametrize('dtype,sentinel', [ - (np.uint16, 65535), - (np.int16, -9999), -]) -def test_gpu_block_reduce_int_sentinel_masked(method, dtype, sentinel): - """GPU mirror of the CPU integer sentinel-mask fix.""" - import cupy - - from xrspatial.geotiff._gpu_decode import _block_reduce_2d_gpu - - arr = _int_block_partial_sentinel(sentinel, dtype) - cpu_arr = cupy.asarray(arr) - out_gpu = _block_reduce_2d_gpu(cpu_arr, method, nodata=sentinel) - out = out_gpu.get() - - assert out.dtype == arr.dtype - out_vals = set(out.flatten().tolist()) - assert out_vals.issubset({100, 200}), ( - f"GPU method={method} dtype={dtype} produced poisoned values: " - f"{out_vals - {100, 200}}" - ) - - -@_gpu_only -@pytest.mark.parametrize('method', ['mean', 'min', 'max', 'median']) -def test_gpu_cpu_int_overview_byte_match(method): - """CPU and GPU integer overview reductions agree byte-for-byte. - - Same parity contract as #1623 (cubic). Without the GPU fix, the GPU - pyramid would carry poisoned values while the CPU pyramid carried - sentinels -- two backends disagreeing on identical input. - """ - import cupy - - from xrspatial.geotiff._gpu_decode import _block_reduce_2d_gpu - from xrspatial.geotiff._writer import _block_reduce_2d - - arr = _int_block_partial_sentinel(-9999, np.int16) - cpu_out = _block_reduce_2d(arr, method, nodata=-9999) - gpu_out = _block_reduce_2d_gpu( - cupy.asarray(arr), method, nodata=-9999).get() - - np.testing.assert_array_equal(cpu_out, gpu_out) diff --git a/xrspatial/geotiff/tests/test_cog_invalid_input_errors_2286.py b/xrspatial/geotiff/tests/test_cog_invalid_input_errors_2286.py deleted file mode 100644 index bc5a016bb..000000000 --- a/xrspatial/geotiff/tests/test_cog_invalid_input_errors_2286.py +++ /dev/null @@ -1,375 +0,0 @@ -"""COG writer rejects unsupported inputs with typed, actionable errors (#2301). - -Production-ready means predictable failure modes. The rows below cover the -input combinations the parent issue (#2286) flagged as ambiguous on the -``to_geotiff(..., cog=True)`` surface: experimental codecs without the -opt-in, internal-only JPEG without the opt-in, rotated transforms, file-like -destinations, object-dtype arrays, and conflicting CRS attrs. Each row -asserts the exception type AND a substring of the message that names the -violated constraint, so a message rewrite cannot silently turn an actionable -error into a vague one. - -Most rows pin behaviour the writer already enforced. The rotated -``attrs['transform']`` Affine row is the one writer-side change in this PR: -a rasterio ``Affine`` iterates as a 9-element augmented matrix and used to -slip past the 6-tuple rotation gate in ``transform_from_attr``, silently -producing an axis-aligned GeoTIFF that dropped the rotation. ``to_geotiff`` -now detects that shape via the ``Affine.b`` / ``Affine.d`` attrs and raises -the same diagnostic the 6-tuple branch already produced. - -The CuPy + ``cog=True`` row is intentionally a no-op pin: the GPU writer -currently produces a valid COG and is already documented as Experimental -in the docstring tier map. Promoting that to a typed rejection is a tier -decision tracked under the parent issue, not a #2301 deliverable. -""" -from __future__ import annotations - -import importlib.util -import io - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import to_geotiff -from xrspatial.geotiff._errors import ConflictingCRSError - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - -def _float_da(shape=(8, 8)): - """A small float32 DataArray suitable for COG writes.""" - return xr.DataArray( - np.zeros(shape, dtype=np.float32), dims=('y', 'x') - ) - - -def _uint8_da(shape=(8, 8)): - """A small uint8 DataArray (JPEG is uint8-only).""" - return xr.DataArray( - np.zeros(shape, dtype=np.uint8), dims=('y', 'x') - ) - - -# --------------------------------------------------------------------------- -# Row 1: Experimental codec without ``allow_experimental_codecs=True`` -# --------------------------------------------------------------------------- - -@pytest.mark.parametrize('codec', ['lerc', 'lz4', 'jpeg2000', 'j2k']) -def test_experimental_codec_without_opt_in_raises(tmp_path, codec): - """Experimental codecs are gated; the message names the codec and - the opt-in flag, and mentions the experimental tier so the caller - knows why the default refuses the input.""" - da = _float_da() - p = tmp_path / f'cog_exp_codec_{codec}_2301.tif' - - with pytest.raises(ValueError) as exc: - to_geotiff(da, str(p), cog=True, compression=codec) - - msg = str(exc.value) - assert codec in msg, msg - assert 'allow_experimental_codecs' in msg, msg - assert 'experimental' in msg.lower(), msg - - -# --------------------------------------------------------------------------- -# Row 2: Internal-only JPEG without ``allow_internal_only_jpeg=True`` -# --------------------------------------------------------------------------- - -def test_internal_only_jpeg_without_opt_in_raises(tmp_path): - """``compression='jpeg'`` is rejected by default; the message names - the codec, the opt-in flag, and explains the interop break.""" - da = _uint8_da() - p = tmp_path / 'cog_jpeg_no_optin_2301.tif' - - with pytest.raises(ValueError) as exc: - to_geotiff(da, str(p), cog=True, compression='jpeg') - - msg = str(exc.value) - assert 'jpeg' in msg.lower(), msg - assert 'allow_internal_only_jpeg' in msg, msg - - -def test_internal_only_jpeg_not_covered_by_experimental_flag(tmp_path): - """``allow_experimental_codecs=True`` does not cover JPEG. The two - flags are deliberately separate (internal-only is stricter than - experimental) so a caller cannot reach the JFIF path by toggling - only the experimental switch.""" - da = _uint8_da() - p = tmp_path / 'cog_jpeg_exp_flag_only_2301.tif' - - with pytest.raises(ValueError) as exc: - to_geotiff(da, str(p), cog=True, - compression='jpeg', - allow_experimental_codecs=True) - - msg = str(exc.value) - assert 'jpeg' in msg.lower(), msg - assert 'allow_internal_only_jpeg' in msg, msg - - -# --------------------------------------------------------------------------- -# Row 3: Rotated transform on input DataArray -# --------------------------------------------------------------------------- - -def test_rotated_affine_attr_without_drop_rotation_raises(tmp_path): - """The reader stamps ``attrs['rotated_affine']`` when called with - ``allow_rotated=True``. Writing such a DataArray without - ``drop_rotation=True`` would silently produce an identity-affine - output (#2216), so the entry point refuses up front.""" - da = _float_da() - da.attrs['rotated_affine'] = (1.0, 0.5, 0.0, 0.0, 0.5, 1.0) - p = tmp_path / 'cog_rotated_affine_2301.tif' - - with pytest.raises(ValueError) as exc: - to_geotiff(da, str(p), cog=True) - - msg = str(exc.value) - assert 'rotated_affine' in msg, msg - assert 'drop_rotation' in msg, msg - - -def test_rotated_affine_attr_drop_rotation_opt_in_succeeds(tmp_path): - """The opt-in path lets the write proceed (lossy but explicit). - Pinned here so the rejection-message test cannot be 'fixed' by - removing the opt-in entirely.""" - da = _float_da() - da.attrs['rotated_affine'] = (1.0, 0.5, 0.0, 0.0, 0.5, 1.0) - p = tmp_path / 'cog_rotated_affine_optin_2301.tif' - - to_geotiff(da, str(p), cog=True, drop_rotation=True) - assert p.exists() - assert p.stat().st_size > 0 - - -def test_rotated_transform_tuple_attr_raises(tmp_path): - """``attrs['transform']`` as a 6-tuple ``(a, b, c, d, e, f)`` with - non-zero rotation/shear (``b`` or ``d``) is refused by - ``transform_from_attr``. The message names the rotation/shear - constraint and the axis-aligned requirement.""" - da = _float_da() - da.attrs['transform'] = (1.0, 0.5, 0.0, 0.0, -1.0, 4.0) # b = 0.5 - p = tmp_path / 'cog_rotated_tuple_2301.tif' - - with pytest.raises(ValueError) as exc: - to_geotiff(da, str(p), cog=True) - - msg = str(exc.value) - assert 'rotation/shear' in msg, msg - assert 'axis-aligned' in msg, msg - - -def test_rotated_transform_affine_attr_raises(tmp_path): - """``attrs['transform']`` as a rasterio ``Affine`` with non-zero - rotation/shear used to slip past the 6-tuple gate because - ``Affine`` iterates as a 9-element augmented matrix. The #2301 - validation hook detects the Affine duck-type and raises the same - diagnostic the 6-tuple branch already produced.""" - Affine = pytest.importorskip('affine').Affine - da = _float_da() - da.attrs['transform'] = Affine(1.0, 0.5, 0.0, 0.0, -1.0, 4.0) # b = 0.5 - p = tmp_path / 'cog_rotated_affine_obj_2301.tif' - - with pytest.raises(ValueError) as exc: - to_geotiff(da, str(p), cog=True) - - msg = str(exc.value) - assert 'rotation/shear' in msg, msg - assert 'axis-aligned' in msg, msg - - -def test_skewed_transform_affine_attr_raises(tmp_path): - """The ``d`` shear term (Affine's third row, first column) is also - rejected. Same validator path as ``b != 0``; pinned separately so a - refactor that only covers ``b`` is caught.""" - Affine = pytest.importorskip('affine').Affine - da = _float_da() - da.attrs['transform'] = Affine(1.0, 0.0, 0.0, 0.3, -1.0, 4.0) # d = 0.3 - p = tmp_path / 'cog_skewed_affine_obj_2301.tif' - - with pytest.raises(ValueError) as exc: - to_geotiff(da, str(p), cog=True) - - msg = str(exc.value) - assert 'rotation/shear' in msg, msg - - -def test_affine_attr_with_unconvertable_b_d_raises(tmp_path): - """An attrs['transform'] object that quacks like an Affine (has - ``.b`` and ``.d``) but carries non-numeric values for them is - refused with a clear ``ValueError``. The fail-closed branch - prevents a malformed input from bypassing the rotation/shear gate - and falling through to the no-georef path.""" - class _BogusAffine: - b = "not a number" - d = 0.0 - da = _float_da() - da.attrs['transform'] = _BogusAffine() - p = tmp_path / 'cog_bogus_affine_2301.tif' - - with pytest.raises(ValueError) as exc: - to_geotiff(da, str(p), cog=True) - - msg = str(exc.value) - assert 'unconvertable' in msg or 'rotation/shear' in msg, msg - - -def test_axis_aligned_affine_attr_still_writes(tmp_path): - """Sanity guard: an axis-aligned Affine (b=d=0) must keep working. - Without this row the #2301 hook could regress every legitimate - Affine call site by widening the rejection bucket.""" - Affine = pytest.importorskip('affine').Affine - da = _float_da() - da.attrs['transform'] = Affine(1.0, 0.0, 0.0, 0.0, -1.0, 4.0) # b=d=0 - p = tmp_path / 'cog_axis_aligned_affine_2301.tif' - - to_geotiff(da, str(p), cog=True) - assert p.exists() - assert p.stat().st_size > 0 - - -# --------------------------------------------------------------------------- -# Row 4: File-like / BytesIO destination with ``cog=True`` -# --------------------------------------------------------------------------- - -def test_bytesio_destination_with_cog_raises(): - """COG output needs a real filesystem path because the writer runs - a second pass to populate overview offsets. ``to_geotiff`` rejects - file-like destinations with ``cog=True`` up front.""" - da = _float_da() - buf = io.BytesIO() - - with pytest.raises(ValueError) as exc: - to_geotiff(da, buf, cog=True) - - msg = str(exc.value) - assert 'cog' in msg.lower(), msg - assert 'file-like' in msg or 'string path' in msg, msg - - -def test_bytesio_destination_without_cog_still_works(): - """Sanity guard: BytesIO is fine for plain TIFF writes. Pinned so - the COG-only rejection cannot regress into a blanket file-like - refusal.""" - da = _float_da() - buf = io.BytesIO() - - to_geotiff(da, buf, cog=False) - assert buf.tell() > 0 - - -# --------------------------------------------------------------------------- -# Row 5: CuPy / GPU-backed array with ``cog=True`` -# --------------------------------------------------------------------------- - -def test_cupy_input_with_cog_currently_succeeds(tmp_path): - """The GPU writer currently produces a valid COG for CuPy input; - GPU COG is documented as Experimental in the docstring tier map - but is not refused at the entry point. This row pins the - currently-succeeds behaviour so a future tier-promotion change - (tracked under #2286) does not silently break callers that - already rely on the path. - - No production-side validation hook is added for #2301 because the - constraint for this issue is 'do not change semantics on paths - that currently succeed'.""" - if importlib.util.find_spec('cupy') is None: - pytest.skip('cupy not installed') - try: - import cupy as cp - if not cp.cuda.is_available(): - pytest.skip('CUDA device not available') - except Exception as exc: - pytest.skip(f'cupy import failed: {exc}') - - da = xr.DataArray(cp.zeros((8, 8), dtype=cp.float32), dims=('y', 'x')) - p = tmp_path / 'cog_cupy_2301.tif' - - # No exception; produces a real file. If a future PR tightens the - # GPU COG tier this assertion will start failing and the next - # reviewer can decide whether to flip this to a ``pytest.raises``. - to_geotiff(da, str(p), cog=True) - assert p.exists() - assert p.stat().st_size > 0 - - -# --------------------------------------------------------------------------- -# Row 6: Object-dtype DataArray -# --------------------------------------------------------------------------- - -def test_object_dtype_with_cog_raises(tmp_path): - """Object dtype is not a TIFF sample format. ``numpy_to_tiff_dtype`` - raises ``ValueError`` naming the dtype, so the writer surfaces a - typed error rather than a deep struct-pack traceback.""" - da = xr.DataArray( - np.array([[1, 2], [3, 4]], dtype=object), dims=('y', 'x')) - p = tmp_path / 'cog_object_dtype_2301.tif' - - with pytest.raises(ValueError) as exc: - to_geotiff(da, str(p), cog=True) - - msg = str(exc.value) - assert 'object' in msg.lower() or 'dtype' in msg.lower(), msg - - -# --------------------------------------------------------------------------- -# Row 7: Conflicting ``crs=`` kwarg / array CRS -# --------------------------------------------------------------------------- - -def test_conflicting_attrs_crs_and_crs_wkt_raises(tmp_path): - """When ``attrs['crs']`` and ``attrs['crs_wkt']`` resolve to - different CRSes via pyproj, the writer refuses with - ``ConflictingCRSError`` (#1987 PR 6). #2301 only confirms the - message stays actionable; it does not introduce a new check.""" - pytest.importorskip('pyproj') - wkt_3857 = ( - 'PROJCS["WGS 84 / Pseudo-Mercator",' - 'GEOGCS["WGS 84",' - 'DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563]],' - 'PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]],' - 'PROJECTION["Mercator_1SP"],' - 'PARAMETER["central_meridian",0],' - 'PARAMETER["scale_factor",1],' - 'PARAMETER["false_easting",0],' - 'PARAMETER["false_northing",0],' - 'UNIT["metre",1],' - 'AUTHORITY["EPSG","3857"]]' - ) - da = _float_da() - da.attrs['crs'] = 4326 - da.attrs['crs_wkt'] = wkt_3857 - p = tmp_path / 'cog_conflicting_crs_2301.tif' - - with pytest.raises(ConflictingCRSError) as exc: - to_geotiff(da, str(p), cog=True) - - msg = str(exc.value) - # Message names both inputs and the resolution hint. - assert "attrs['crs']" in msg, msg - assert "attrs['crs_wkt']" in msg, msg - # Caller-actionable: tells the user to reconcile the two attrs. - assert 'Reconcile' in msg or 'reconcile' in msg, msg - - -def test_crs_kwarg_overrides_attrs_silently(tmp_path): - """``crs=`` kwarg overrides the attrs disagreement. The - ``_check_write_conflicting_crs`` short-circuit at the top of the - check (``if context.get('crs_kwarg') is not None: return``) lets - the write proceed even when the two attrs would otherwise - disagree, so callers can intentionally use the kwarg to clobber - stale attrs. Pinned here so a future 'stricter' rewrite of the - conflict check that drops the short-circuit does not surprise - those callers.""" - pytest.importorskip('pyproj') - da = _float_da() - da.attrs['crs'] = 4326 - # ``crs_wkt`` value is irrelevant: the check short-circuits on the - # kwarg before pyproj parsing ever runs. - da.attrs['crs_wkt'] = 'GEOGCS["foo"]' - p = tmp_path / 'cog_crs_kwarg_override_2301.tif' - - to_geotiff(da, str(p), cog=True, crs=3857) - assert p.exists() - assert p.stat().st_size > 0 diff --git a/xrspatial/geotiff/tests/test_cog_overview_ceil_2105.py b/xrspatial/geotiff/tests/test_cog_overview_ceil_2105.py deleted file mode 100644 index 5ee9100f6..000000000 --- a/xrspatial/geotiff/tests/test_cog_overview_ceil_2105.py +++ /dev/null @@ -1,332 +0,0 @@ -"""COG overviews use ceil semantics for odd-sized rasters (issue #2105). - -Before the fix, ``_block_reduce_2d`` floored both dimensions to an even -multiple and cropped the trailing row/col before reducing. A 5x5 input -became a 4x4 crop and then a 2x2 overview, silently dropping the bottom -row and right column. GDAL's overview generator uses ceil semantics -(5x5 -> 3x3) so the residual edge cells still contribute. - -These tests pin the contract that every base pixel reaches the overview, -across all resampling methods, on both even and odd input shapes, and -with the nodata sentinel honoured along the trailing edge. -""" -from __future__ import annotations - -import importlib.util - -import numpy as np -import pytest - -from xrspatial.geotiff._writer import _block_reduce_2d - - -def _gpu_available() -> bool: - if importlib.util.find_spec("cupy") is None: - return False - try: - import cupy - return bool(cupy.cuda.is_available()) - except Exception: - return False - - -_HAS_GPU = _gpu_available() -_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") - - -# --------------------------------------------------------------------------- -# Output-shape contract: ceil((h+1)/2, (w+1)/2) for every method and dtype. -# --------------------------------------------------------------------------- -@pytest.mark.parametrize( - "shape,expected", - [ - ((5, 5), (3, 3)), - ((5, 4), (3, 2)), - ((4, 5), (2, 3)), - ((7, 3), (4, 2)), - ((1, 1), (1, 1)), - ((1, 5), (1, 3)), - ((5, 1), (3, 1)), - ((4, 4), (2, 2)), - ((6, 6), (3, 3)), - ], -) -@pytest.mark.parametrize( - "method", ["nearest", "mean", "min", "max", "median", "mode"] -) -def test_ceil_output_shape_float(shape, expected, method): - arr = np.arange(shape[0] * shape[1], dtype=np.float32).reshape(shape) - out = _block_reduce_2d(arr, method) - assert out.shape == expected - - -@pytest.mark.parametrize( - "shape,expected", - [ - ((5, 5), (3, 3)), - ((3, 7), (2, 4)), - ((1, 1), (1, 1)), - ], -) -@pytest.mark.parametrize("method", ["nearest", "mean", "min", "max", "median", "mode"]) -def test_ceil_output_shape_int(shape, expected, method): - arr = np.arange(shape[0] * shape[1], dtype=np.int16).reshape(shape) - out = _block_reduce_2d(arr, method) - assert out.shape == expected - assert out.dtype == arr.dtype - - -def test_ceil_output_shape_cubic_float(): - pytest.importorskip("scipy") - arr = np.arange(25, dtype=np.float32).reshape(5, 5) - out = _block_reduce_2d(arr, "cubic") - assert out.shape == (3, 3) - - -def test_ceil_output_shape_cubic_int(): - pytest.importorskip("scipy") - arr = np.arange(25, dtype=np.int16).reshape(5, 5) - out = _block_reduce_2d(arr, "cubic") - assert out.shape == (3, 3) - assert out.dtype == arr.dtype - - -# --------------------------------------------------------------------------- -# Trailing-edge pixel values: the last row/col of the source must reach the -# overview rather than being dropped. -# --------------------------------------------------------------------------- -def test_nearest_5x5_preserves_trailing_pixels(): - arr = np.arange(25, dtype=np.float64).reshape(5, 5) - out = _block_reduce_2d(arr, "nearest") - # Nearest = top-left of every 2x2 block. With ceil, that's arr[::2, ::2]. - assert out.shape == (3, 3) - np.testing.assert_array_equal(out, arr[::2, ::2]) - # The trailing row/col of the source IS represented in the overview. - assert out[-1, -1] == arr[4, 4] - - -def test_mean_5x5_trailing_residual_block_uses_valid_cell(): - # Residual at row 4, col 4 is a 1x1 block containing arr[4,4] alone. - arr = np.zeros((5, 5), dtype=np.float32) - arr[4, 4] = 100.0 - out = _block_reduce_2d(arr, "mean") - assert out.shape == (3, 3) - # The corner residual is a 1x1 block, so its mean is the single pixel. - assert out[2, 2] == pytest.approx(100.0) - - -def test_max_5x5_residual_block_uses_valid_cell(): - arr = np.zeros((5, 5), dtype=np.float32) - arr[4, :] = 9.0 # trailing row should reach overview[2, :] - arr[:, 4] = 7.0 - out = _block_reduce_2d(arr, "max") - assert out.shape == (3, 3) - # Bottom overview row picks max of (arr[4, 2*j:2*j+2]) -> 9.0 everywhere. - np.testing.assert_array_equal(out[2, :2], [9.0, 9.0]) - # Right column gets max from (arr[2*i:2*i+2, 4]) -> 7.0 except corner. - assert out[0, 2] == 7.0 - assert out[1, 2] == 7.0 - # arr[4, 4] = 7.0 (set by the trailing-column sweep, after row sweep). - assert out[2, 2] == 7.0 - - -def test_min_5x5_residual_block_uses_valid_cell(): - arr = np.full((5, 5), 10.0, dtype=np.float32) - arr[4, 4] = -1.0 - out = _block_reduce_2d(arr, "min") - assert out[2, 2] == -1.0 - - -def test_median_5x5_residual_block_uses_valid_cell(): - arr = np.full((5, 5), 5.0, dtype=np.float32) - arr[4, 4] = 99.0 - out = _block_reduce_2d(arr, "median") - # 1x1 residual: median is the single value. - assert out[2, 2] == pytest.approx(99.0) - - -def test_mode_5x5_residual_block_picks_valid_cell(): - arr = np.array( - [[1, 1, 2, 2, 3], - [1, 1, 2, 2, 3], - [4, 4, 5, 5, 6], - [4, 4, 5, 5, 6], - [7, 7, 8, 8, 9]], - dtype=np.int16, - ) - out = _block_reduce_2d(arr, "mode") - assert out.shape == (3, 3) - # Trailing 1x1 block at (4,4) is just the value 9. - assert out[2, 2] == 9 - # Trailing column (rows 0..1 / 2..3, col 4) is 1x1 blocks containing 3 / 6. - assert out[0, 2] == 3 - assert out[1, 2] == 6 - # Trailing row (col 0..1 / 2..3, row 4) is 1x2 blocks: [7,7] -> 7, [8,8] -> 8. - assert out[2, 0] == 7 - assert out[2, 1] == 8 - - -def test_cubic_5x5_covers_source_extent(): - pytest.importorskip("scipy") - # Smoothly varying ramp so cubic interpolation is well-behaved. - arr = np.arange(25, dtype=np.float32).reshape(5, 5) - out = _block_reduce_2d(arr, "cubic") - assert out.shape == (3, 3) - # Output should not be entirely zero/NaN, and trailing corner should - # roughly reflect the high source values around (4, 4). - assert np.isfinite(out).all() - assert out[2, 2] > out[0, 0] - - -# --------------------------------------------------------------------------- -# Sentinel masking still works on odd-sized inputs. -# --------------------------------------------------------------------------- -def test_mean_5x5_with_nodata_excludes_sentinel_in_residual(): - sentinel = -9999.0 - arr = np.full((5, 5), 1.0, dtype=np.float32) - arr[4, 4] = sentinel - out = _block_reduce_2d(arr, "mean", nodata=sentinel) - # The 1x1 trailing residual is all-sentinel -> all-NaN block, which - # the post-overview rewrite (in the caller) handles. Here we just - # confirm the sentinel did not bias the reduction: out[2, 2] is NaN, - # not (1.0 + sentinel)/2 or similar. - assert np.isnan(out[2, 2]) - # Other overview cells with at least one valid neighbour stay valid. - assert out[0, 0] == pytest.approx(1.0) - - -def test_min_int_5x5_with_nodata_does_not_select_sentinel_in_residual(): - sentinel = -9999 - arr = np.full((5, 5), 10, dtype=np.int16) - # Trailing column has a sentinel + valid cell in 2x1 residual blocks. - arr[0, 4] = sentinel - arr[2, 4] = sentinel - arr[4, 4] = sentinel - out = _block_reduce_2d(arr, "min", nodata=sentinel) - # The 2x1 residual at (0..1, 4) is [-9999, 10] -> min ignoring sentinel = 10. - assert out[0, 2] == 10 - assert out[1, 2] == 10 - # The 1x1 residual at (4, 4) is sentinel -> rewritten to sentinel. - assert out[2, 2] == sentinel - - -def test_int64_sentinel_near_max_masks_in_padded_branch(): - # INT64_MAX is not exactly representable in float64: float(INT64_MAX) - # rounds up to 2**63, which would miss the sentinel if the mask were - # computed against the float-padded view. The reader must compute the - # mask at native integer width before padding. - sentinel = np.iinfo(np.int64).max - arr = np.full((5, 5), 10, dtype=np.int64) - arr[0, 0] = sentinel - # Pad branch fires because shape (5, 5) is odd. - out = _block_reduce_2d(arr, "min", nodata=sentinel) - # Top-left 2x2 block has 1 sentinel + 3 valid 10s. nanmin -> 10 - # (sentinel masked out). If the mask missed the sentinel, the int64 - # value would be cast to float and the float min would pick up the - # sentinel's value or produce noise; either way out[0,0] would not - # be 10. - assert out[0, 0] == 10 - - -def test_uint64_sentinel_near_max_masks_in_padded_branch(): - # UINT64_MAX = 2**64 - 1 is also not exactly representable in float64 - # (float(UINT64_MAX) rounds up to 2**64). The native-width mask path - # must catch the sentinel for unsigned 64-bit dtypes too. - sentinel = np.iinfo(np.uint64).max - arr = np.full((5, 5), 10, dtype=np.uint64) - arr[0, 0] = sentinel - out = _block_reduce_2d(arr, "min", nodata=sentinel) - assert out[0, 0] == 10 - - -def test_float32_padded_branch_keeps_source_dtype(): - # The padded mean/min/max/median branch used to allocate a float64 - # NaN buffer regardless of the source dtype, doubling intermediate - # memory for an odd-shape float32 read. Verify the helper now keeps - # the source dtype across the pad so a float32 input round-trips as - # float32. The contract is checked end-to-end via the output dtype. - arr = np.arange(25, dtype=np.float32).reshape(5, 5) - out = _block_reduce_2d(arr, "mean") - assert out.dtype == np.float32 - # And the values still match what a manual ceil-mean would produce - # for the top-left 2x2 block. - top_left_mean = float(arr[:2, :2].mean()) - assert out[0, 0] == pytest.approx(top_left_mean) - - -def test_max_int_5x5_with_nodata_does_not_select_sentinel_in_residual(): - sentinel = -9999 - arr = np.full((5, 5), 10, dtype=np.int16) - arr[4, 4] = sentinel - out = _block_reduce_2d(arr, "max", nodata=sentinel) - # The 1x1 corner block is all-sentinel -> sentinel. - assert out[2, 2] == sentinel - # Adjacent 2x1 residual (rows 4, cols 0..1) has valid values only. - assert out[2, 0] == 10 - assert out[2, 1] == 10 - - -# --------------------------------------------------------------------------- -# Even-sized inputs keep the existing fast-path semantics. -# --------------------------------------------------------------------------- -@pytest.mark.parametrize( - "method", ["nearest", "mean", "min", "max", "median", "mode"] -) -def test_even_input_matches_legacy_2x2_behaviour(method): - rng = np.random.default_rng(2105) - arr = rng.integers(0, 100, size=(6, 8)).astype(np.int16) - out = _block_reduce_2d(arr, method) - assert out.shape == (3, 4) - # Spot-check a single block matches a direct reduction. - block = arr[0:2, 0:2] - if method == "nearest": - assert out[0, 0] == block[0, 0] - elif method == "mean": - # Integer outputs are rounded after the float reduction. - assert out[0, 0] == int(round(block.astype(np.float64).mean())) - elif method == "min": - assert out[0, 0] == block.min() - elif method == "max": - assert out[0, 0] == block.max() - elif method == "median": - assert out[0, 0] == int(round(float(np.median(block)))) - elif method == "mode": - # Lowest-value tie-break for unique cells. - vals, counts = np.unique(block, return_counts=True) - expected = vals[np.argmax(counts)] - assert out[0, 0] == expected - - -# --------------------------------------------------------------------------- -# GPU mirror: identical shape and identical values on odd-sized inputs. -# --------------------------------------------------------------------------- -@_gpu_only -@pytest.mark.parametrize( - "method", ["nearest", "mean", "min", "max", "median"] -) -@pytest.mark.parametrize("shape", [(5, 5), (5, 4), (4, 5), (7, 3)]) -def test_gpu_block_reduce_matches_cpu_on_odd_shapes(method, shape): - import cupy - - from xrspatial.geotiff._gpu_decode import _block_reduce_2d_gpu - - rng = np.random.default_rng(2105) - arr = rng.random(shape, dtype=np.float32) - cpu_out = _block_reduce_2d(arr, method) - gpu_out = _block_reduce_2d_gpu(cupy.asarray(arr), method).get() - assert gpu_out.shape == cpu_out.shape - np.testing.assert_allclose(gpu_out, cpu_out, equal_nan=True, rtol=1e-6) - - -@_gpu_only -def test_gpu_block_reduce_int_5x5_with_nodata(): - import cupy - - from xrspatial.geotiff._gpu_decode import _block_reduce_2d_gpu - - sentinel = -9999 - arr = np.full((5, 5), 10, dtype=np.int16) - arr[4, 4] = sentinel - cpu_out = _block_reduce_2d(arr, "max", nodata=sentinel) - gpu_out = _block_reduce_2d_gpu(cupy.asarray(arr), "max", nodata=sentinel).get() - np.testing.assert_array_equal(gpu_out, cpu_out) diff --git a/xrspatial/geotiff/tests/test_cog_overview_nodata_1613.py b/xrspatial/geotiff/tests/test_cog_overview_nodata_1613.py deleted file mode 100644 index ff38a1288..000000000 --- a/xrspatial/geotiff/tests/test_cog_overview_nodata_1613.py +++ /dev/null @@ -1,296 +0,0 @@ -"""COG overview generation respects the nodata sentinel (issue #1613). - -Before the fix, ``to_geotiff(..., cog=True, nodata=)`` rewrote NaN -to the sentinel *before* the overview-generation loop. ``_make_overview`` -then ran ``np.nanmean`` / ``np.nanmin`` / ``np.nanmax`` / ``np.nanmedian`` -over the rewritten array, treating the sentinel as a real number and -biasing every overview pixel toward the sentinel. - -These tests pin the contract that the CPU and GPU writers ignore the -sentinel during overview reduction, so the resulting pyramid matches -``np.nanmean``-style aggregation on the original NaN-keyed data. -""" -from __future__ import annotations - -import importlib.util - -import numpy as np -import pytest -import xarray as xr - - -def _gpu_available() -> bool: - """True if cupy is importable and CUDA is initialised.""" - if importlib.util.find_spec("cupy") is None: - return False - try: - import cupy - return bool(cupy.cuda.is_available()) - except Exception: - return False - - -_HAS_GPU = _gpu_available() -_gpu_only = pytest.mark.skipif( - not _HAS_GPU, - reason="cupy + CUDA required", -) - - -def _arr_with_partial_nan(): - """4x4 float raster: row 1 is all-NaN, rest is finite.""" - return np.array([ - [1.0, 2.0, 3.0, 4.0], - [np.nan, np.nan, np.nan, np.nan], - [10.0, 20.0, 30.0, 40.0], - [10.0, 20.0, 30.0, 40.0], - ], dtype=np.float32) - - -def _arr_with_full_nan_block(): - """4x4 float raster: top-left 2x2 entirely NaN.""" - return np.array([ - [np.nan, np.nan, 3.0, 4.0], - [np.nan, np.nan, 7.0, 8.0], - [10.0, 20.0, 30.0, 40.0], - [10.0, 20.0, 30.0, 40.0], - ], dtype=np.float32) - - -def test_cpu_cog_overview_mean_ignores_sentinel(tmp_path): - """CPU writer: overview 'mean' must skip sentinel pixels (issue #1613).""" - from xrspatial.geotiff import open_geotiff, to_geotiff - - arr = _arr_with_partial_nan() - da = xr.DataArray(arr, dims=['y', 'x']) - p = str(tmp_path / 'cog_mean_nodata.tif') - to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate', - tiled=True, tile_size=16, overview_levels=[2], - overview_resampling='mean') - - ov = open_geotiff(p, overview_level=1) - expected = np.array([[1.5, 3.5], [15.0, 35.0]], dtype=np.float32) - np.testing.assert_allclose(np.asarray(ov.data), expected) - - -def test_cpu_cog_overview_mean_partial_block(tmp_path): - """CPU writer: partial-NaN 2x2 block averages over the finite cells only.""" - from xrspatial.geotiff import open_geotiff, to_geotiff - - arr = _arr_with_full_nan_block() - da = xr.DataArray(arr, dims=['y', 'x']) - p = str(tmp_path / 'cog_mean_nodata_full_block.tif') - to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate', - tiled=True, tile_size=16, overview_levels=[2], - overview_resampling='mean') - - ov = open_geotiff(p, overview_level=1) - # Top-left 2x2 was all-NaN -> reduces to NaN -> rewritten to -9999 - # on disk, then read back as NaN once the overview-nodata - # inheritance fix (#1739) restores attrs['nodata'] and re-masks - # the sentinel. - # Top-right 2x2 [3,4,7,8] -> mean 5.5 - # Bottom-left [10,20,10,20] -> 15 - # Bottom-right [30,40,30,40] -> 35 - data = np.asarray(ov.data) - assert ov.attrs.get('nodata') == -9999.0 - assert np.isnan(data[0, 0]) - np.testing.assert_allclose(data[0, 1], 5.5) - np.testing.assert_allclose(data[1, 0], 15.0) - np.testing.assert_allclose(data[1, 1], 35.0) - - -@pytest.mark.parametrize('method,expected', [ - ('min', np.array([[1.0, 3.0], [10.0, 30.0]], dtype=np.float32)), - ('max', np.array([[2.0, 4.0], [20.0, 40.0]], dtype=np.float32)), - ('median', np.array([[1.5, 3.5], [15.0, 35.0]], dtype=np.float32)), -]) -def test_cpu_cog_overview_aggregations_ignore_sentinel( - tmp_path, method, expected): - """min/max/median overview reductions must also skip the sentinel.""" - from xrspatial.geotiff import open_geotiff, to_geotiff - - arr = _arr_with_partial_nan() - da = xr.DataArray(arr, dims=['y', 'x']) - p = str(tmp_path / f'cog_{method}_nodata.tif') - to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate', - tiled=True, tile_size=16, overview_levels=[2], - overview_resampling=method) - - ov = open_geotiff(p, overview_level=1) - np.testing.assert_allclose(np.asarray(ov.data), expected) - - -def test_cpu_cog_overview_mean_no_nodata_passes(tmp_path): - """When nodata is unset the reducer behaves as before.""" - from xrspatial.geotiff import open_geotiff, to_geotiff - - arr = np.arange(16, dtype=np.float32).reshape(4, 4) - da = xr.DataArray(arr, dims=['y', 'x']) - p = str(tmp_path / 'cog_mean_no_nodata.tif') - to_geotiff(da, p, cog=True, compression='deflate', - tiled=True, tile_size=16, overview_levels=[2], - overview_resampling='mean') - - ov = open_geotiff(p, overview_level=1) - # mean of 2x2 blocks of arange(16).reshape(4,4) - expected = np.array([ - [(0 + 1 + 4 + 5) / 4, (2 + 3 + 6 + 7) / 4], - [(8 + 9 + 12 + 13) / 4, (10 + 11 + 14 + 15) / 4], - ], dtype=np.float32) - np.testing.assert_allclose(np.asarray(ov.data), expected) - - -def test_block_reduce_2d_nodata_kwarg_directly(): - """Exercise the helper directly so a regression here is caught fast.""" - from xrspatial.geotiff._writer import _block_reduce_2d - - arr = _arr_with_partial_nan() - # Without nodata, the sentinel poisons the reduction. - arr_sentinel = arr.copy() - arr_sentinel[np.isnan(arr_sentinel)] = -9999.0 - poisoned = _block_reduce_2d(arr_sentinel, 'mean') - assert poisoned[0, 0] < -1000.0 # confirms the bug shape - - # With nodata, the sentinel is treated as missing. - fixed = _block_reduce_2d(arr_sentinel, 'mean', nodata=-9999.0) - np.testing.assert_allclose(fixed[0, 0], 1.5) - np.testing.assert_allclose(fixed[0, 1], 3.5) - - -def test_block_reduce_2d_nodata_all_sentinel_block_yields_nan(): - """All-sentinel block reduces to NaN under nan-aware aggregation.""" - from xrspatial.geotiff._writer import _block_reduce_2d - - arr = np.full((2, 2), -9999.0, dtype=np.float32) - out = _block_reduce_2d(arr, 'mean', nodata=-9999.0) - assert out.shape == (1, 1) - assert np.isnan(out[0, 0]) - - -def test_block_reduce_2d_inf_nodata_is_masked(): - """nodata=+/-inf must be masked back to NaN like a finite sentinel. - - The upstream NaN->sentinel rewrite only gates on ``not np.isnan``, - so ``nodata=inf`` is a real (if uncommon) caller choice. The reducer - has to match that gate or it re-poisons the overview with inf. - """ - from xrspatial.geotiff._writer import _block_reduce_2d - - arr = np.array([ - [1.0, 2.0, 3.0, 4.0], - [np.inf, np.inf, np.inf, np.inf], - [10.0, 20.0, 30.0, 40.0], - [10.0, 20.0, 30.0, 40.0], - ], dtype=np.float32) - out = _block_reduce_2d(arr, 'mean', nodata=float('inf')) - np.testing.assert_allclose(out[0, 0], 1.5) - np.testing.assert_allclose(out[0, 1], 3.5) - - -def test_block_reduce_2d_all_nan_block_does_not_warn(): - """All-NaN blocks must not surface RuntimeWarning to user logs.""" - import warnings as _warnings - - from xrspatial.geotiff._writer import _block_reduce_2d - - arr = np.array([ - [-9999.0, -9999.0, 3.0, 4.0], - [-9999.0, -9999.0, 7.0, 8.0], - ], dtype=np.float32) - - with _warnings.catch_warnings(record=True) as caught: - _warnings.simplefilter('always') - out = _block_reduce_2d(arr, 'mean', nodata=-9999.0) - - assert not [w for w in caught if issubclass(w.category, RuntimeWarning)] - assert np.isnan(out[0, 0]) - np.testing.assert_allclose(out[0, 1], 5.5) - - -@_gpu_only -def test_gpu_cog_overview_mean_ignores_sentinel(tmp_path): - """GPU writer: overview 'mean' must skip sentinel pixels (issue #1613).""" - import cupy - - from xrspatial.geotiff import open_geotiff, to_geotiff - - arr_cpu = _arr_with_partial_nan() - arr_gpu = cupy.asarray(arr_cpu) - da = xr.DataArray(arr_gpu, dims=['y', 'x']) - - p = str(tmp_path / 'gpu_cog_mean_nodata.tif') - to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate', - tiled=True, tile_size=16, overview_levels=[2], - overview_resampling='mean', gpu=True) - - ov = open_geotiff(p, overview_level=1) - expected = np.array([[1.5, 3.5], [15.0, 35.0]], dtype=np.float32) - np.testing.assert_allclose(np.asarray(ov.data), expected) - - -@_gpu_only -def test_gpu_block_reduce_nodata_kwarg_directly(): - """Exercise the GPU helper directly so a regression is caught fast.""" - import cupy - - from xrspatial.geotiff._gpu_decode import _block_reduce_2d_gpu - - arr_cpu = _arr_with_partial_nan() - arr_cpu[np.isnan(arr_cpu)] = -9999.0 - arr_gpu = cupy.asarray(arr_cpu) - - poisoned = _block_reduce_2d_gpu(arr_gpu, 'mean') - assert float(poisoned[0, 0].get()) < -1000.0 - - fixed = _block_reduce_2d_gpu(arr_gpu, 'mean', nodata=-9999.0) - np.testing.assert_allclose(float(fixed[0, 0].get()), 1.5) - np.testing.assert_allclose(float(fixed[0, 1].get()), 3.5) - - -@_gpu_only -def test_gpu_block_reduce_inf_nodata_is_masked(): - """GPU helper mirrors the CPU isnan-only gate for nodata=inf.""" - import cupy - - from xrspatial.geotiff._gpu_decode import _block_reduce_2d_gpu - - arr_cpu = np.array([ - [1.0, 2.0, 3.0, 4.0], - [np.inf, np.inf, np.inf, np.inf], - [10.0, 20.0, 30.0, 40.0], - [10.0, 20.0, 30.0, 40.0], - ], dtype=np.float32) - arr_gpu = cupy.asarray(arr_cpu) - - out = _block_reduce_2d_gpu(arr_gpu, 'mean', nodata=float('inf')) - np.testing.assert_allclose(float(out[0, 0].get()), 1.5) - np.testing.assert_allclose(float(out[0, 1].get()), 3.5) - - -@_gpu_only -def test_gpu_cog_overview_matches_cpu(tmp_path): - """CPU and GPU overview pyramids must agree on nodata-masked data.""" - import cupy - - from xrspatial.geotiff import open_geotiff, to_geotiff - - arr = _arr_with_partial_nan() - - # CPU - da_cpu = xr.DataArray(arr, dims=['y', 'x']) - p_cpu = str(tmp_path / 'cpu_pyramid.tif') - to_geotiff(da_cpu, p_cpu, nodata=-9999.0, cog=True, - compression='deflate', tiled=True, tile_size=16, - overview_levels=[2], overview_resampling='mean') - cpu_ov = np.asarray(open_geotiff(p_cpu, overview_level=1).data) - - # GPU - da_gpu = xr.DataArray(cupy.asarray(arr), dims=['y', 'x']) - p_gpu = str(tmp_path / 'gpu_pyramid.tif') - to_geotiff(da_gpu, p_gpu, nodata=-9999.0, cog=True, - compression='deflate', tiled=True, tile_size=16, - overview_levels=[2], overview_resampling='mean', gpu=True) - gpu_ov = np.asarray(open_geotiff(p_gpu, overview_level=1).data) - - np.testing.assert_allclose(cpu_ov, gpu_ov) diff --git a/xrspatial/geotiff/tests/test_cog_parity_2286.py b/xrspatial/geotiff/tests/test_cog_parity_2286.py deleted file mode 100644 index a1b90d3c2..000000000 --- a/xrspatial/geotiff/tests/test_cog_parity_2286.py +++ /dev/null @@ -1,609 +0,0 @@ -"""COG parity rows for the release gate (issue #2294 / parent #2286). - -A focused parity layer that locks the COG read/write paths to the -release gate. The six rows below cover the round-trip surface a caller -hits when they treat xrspatial as a COG producer or COG consumer, and -when they round-trip a COG through a third party (rasterio) or fetch -one via the HTTP range-read code path. - -Rows ----- - -1. ``xrspatial write COG -> xrspatial eager read`` -2. ``xrspatial write COG -> xrspatial dask read`` -3. ``xrspatial write COG -> rasterio read`` -4. ``golden/rasterio COG fixture -> xrspatial local read`` -5. ``golden/rasterio COG fixture -> xrspatial HTTP range read`` -6. ``golden/rasterio COG fixture -> xrspatial dask HTTP range read`` - -Each row asserts byte-exact pixels (every fixture used here is -lossless) and a fixed subset of the metadata contract: ``crs`` (or -``crs_wkt``), ``transform``, ``nodata`` (including the no-nodata -case), pixel ``dtype``, band count, and the ``(y, x)`` dim names. -The wider canonical-attrs surface (resolution, georef_status, etc.) -lives in ``test_backend_full_parity_2211.py``; this file is the -narrower COG-only gate. - -Skip policy ------------ - -Skips are always loud. If a dependency is missing (``rasterio``, -``dask``, ``fsspec``) the row calls ``pytest.skip`` with a string that -names the missing dependency. Silent collection of zero rows is itself -a bug under #2286. - -Scope ------ - -* CPU-only. The GPU rows stay out per the parent issue (``reader.gpu`` - is experimental and outside this gate). -* No experimental codecs. Every row uses lossless deflate so a - byte-exact comparison is meaningful. -* This is a tests-only PR: no changes to production code or to - ``test_backend_full_parity_2211.py``. -""" -from __future__ import annotations - -import http.server -import importlib.util -import pathlib -import socketserver -import threading -import uuid - -import numpy as np -import pytest -import xarray as xr - -pytest.importorskip("rasterio") - -from xrspatial.geotiff import open_geotiff, to_geotiff # noqa: E402 -from xrspatial.geotiff._writer import write # noqa: E402 - - -# --------------------------------------------------------------------------- -# Environment gating -# --------------------------------------------------------------------------- - -_HAS_DASK = importlib.util.find_spec("dask") is not None - - -def _require_dask() -> None: - if not _HAS_DASK: - pytest.skip( - "dask is not installed; install the dask extra to exercise " - "the COG dask-read row of the #2286 release gate." - ) - - -# Golden corpus COG fixture: tiled, internal overviews, written via -# GDAL's COG driver. Lives under ``golden_corpus/fixtures``. -_GOLDEN_COG_ID = "cog_internal_overview_uint16" - - -def _golden_cog_path() -> pathlib.Path: - from xrspatial.geotiff.tests.golden_corpus import generate - return ( - pathlib.Path(generate.__file__).resolve().parent - / "fixtures" - / f"{_GOLDEN_COG_ID}.tif" - ) - - -# --------------------------------------------------------------------------- -# Range-aware in-process HTTP server (mirrors the pattern used by -# test_cog_http_parallel_decode_2026_05_15.py and test_cog_http_concurrent.py). -# --------------------------------------------------------------------------- - -class _RangeHandler(http.server.BaseHTTPRequestHandler): - payload: bytes = b"" - - def do_GET(self): # noqa: N802 - rng = self.headers.get("Range") - if rng and rng.startswith("bytes="): - spec = rng[len("bytes="):] - start_s, _, end_s = spec.partition("-") - start = int(start_s) - end = int(end_s) if end_s else len(self.payload) - 1 - chunk = self.payload[start:end + 1] - self.send_response(206) - self.send_header("Content-Type", "application/octet-stream") - self.send_header( - "Content-Range", - f"bytes {start}-{start + len(chunk) - 1}/{len(self.payload)}", - ) - self.send_header("Content-Length", str(len(chunk))) - self.end_headers() - self.wfile.write(chunk) - return - self.send_response(200) - self.send_header("Content-Length", str(len(self.payload))) - self.end_headers() - self.wfile.write(self.payload) - - def log_message(self, *_args, **_kwargs): # silence test noise - return - - -def _serve_payload(payload: bytes, monkeypatch): - """Spin a range-aware server bound to localhost; return (httpd, port). - - The handler subclass is named with a uuid suffix so that the two - fixtures in this module (and any future ones) don't share a - qualname. Without the suffix, tracebacks reuse the same class - identifier across fixture invocations and become harder to read. - - ``allow_reuse_address = True`` lets the OS reclaim the port - quickly when the test tears down (avoiding TIME_WAIT-related - binding races under parallel pytest runs). ``timeout=5`` on the - server caps how long a stuck request can pin the daemon thread. - """ - monkeypatch.setenv("XRSPATIAL_GEOTIFF_ALLOW_PRIVATE_HOSTS", "1") - handler_cls = type( - f"RangeHandler2286_{uuid.uuid4().hex[:8]}", - (_RangeHandler,), - {"payload": payload}, - ) - - class _ReusableTCPServer(socketserver.TCPServer): - allow_reuse_address = True - timeout = 5 - - httpd = _ReusableTCPServer(("127.0.0.1", 0), handler_cls) - port = httpd.server_address[1] - thread = threading.Thread(target=httpd.serve_forever, daemon=True) - thread.start() - return httpd, port - - -# --------------------------------------------------------------------------- -# Fixtures -# --------------------------------------------------------------------------- - -@pytest.fixture -def xrspatial_cog(tmp_path): - """xrspatial writes a small lossless COG; yield (path, source_array, attrs). - - The source is a deterministic uint16 ramp so byte-exact comparison - is meaningful. CRS / transform / nodata are stamped via the public - ``to_geotiff`` API so the round trip exercises the user-visible - surface, not a private writer entry point. - """ - h, w = 64, 64 - # Use a +1 offset so pixel value 0 never appears -- the reader - # masks nodata-valued pixels to NaN under the #2092 contract, - # which upcasts integer rasters to float64. The fixture's payload - # is a deterministic ramp regardless of the offset. - data = (np.arange(h * w, dtype=np.uint16) + 1).reshape(h, w) - # Build a DataArray with a real CRS and a regular grid so the - # transform is non-degenerate. Pixel size 0.01 deg. - y = np.linspace(45.0, 45.0 - 0.01 * (h - 1), h) - x = np.linspace(-120.0, -120.0 + 0.01 * (w - 1), w) - da = xr.DataArray( - data, dims=["y", "x"], - coords={"y": y, "x": x}, - # No ``nodata`` attr: the masked-nodata path upcasts integer - # rasters to float64 and replaces sentinel pixels with NaN, - # which would break the byte-exact uint16 comparison. The - # nodata read contract is exercised separately under - # ``test_nodata_lifecycle_parity_2211.py``. - attrs={"crs": 4326}, - name="cog_2286", - ) - path = str(tmp_path / "xrspatial_cog_2286.tif") - to_geotiff( - da, path, - compression="deflate", - tiled=True, - tile_size=16, - cog=True, - overview_levels=[2], - ) - return path, data, {"crs": 4326, "nodata": None} - - -@pytest.fixture -def golden_cog_http(monkeypatch): - """Serve the golden COG fixture over a range-aware in-process HTTP server. - - Yields ``(url, expected_array)`` where ``expected_array`` is the - pixels read via the local xrspatial reader (the ground truth for - HTTP comparison). The fixture lives in the golden corpus and was - written by GDAL's COG driver, so it stresses the third-party - interop side of the COG read path. - """ - path = _golden_cog_path() - if not path.exists(): - pytest.skip( - f"golden COG fixture {_GOLDEN_COG_ID!r} missing on disk; run " - "`python -m xrspatial.geotiff.tests.golden_corpus.generate` " - "to materialise the corpus (issue #1930)." - ) - with open(path, "rb") as f: - payload = f.read() - httpd, port = _serve_payload(payload, monkeypatch) - try: - # Use a stable filename in the URL so the SSRF-hardened reader - # has a sensible-looking path to log. - yield f"http://127.0.0.1:{port}/{_GOLDEN_COG_ID}.tif", path - finally: - httpd.shutdown() - httpd.server_close() - - -# --------------------------------------------------------------------------- -# Comparison helpers -# --------------------------------------------------------------------------- - -def _materialise(da: xr.DataArray) -> np.ndarray: - """Host-side numpy view (dask compute, cupy get) without leaking lazy state.""" - raw = da.data - if hasattr(raw, "compute"): - raw = raw.compute() - if hasattr(raw, "get"): - raw = raw.get() - return np.asarray(raw) - - -def _assert_byte_exact( - expected: np.ndarray, actual: np.ndarray, *, label: str, -) -> None: - """Byte-exact equality on shape, dtype, and bytes. Every fixture is lossless.""" - assert expected.shape == actual.shape, ( - f"{label}: shape mismatch expected={expected.shape} actual={actual.shape}" - ) - assert expected.dtype == actual.dtype, ( - f"{label}: dtype mismatch expected={expected.dtype} actual={actual.dtype}" - ) - if not np.array_equal(expected, actual): - diff = np.where(expected != actual) - n = len(diff[0]) - raise AssertionError( - f"{label}: byte-exact comparison failed; {n} pixel(s) differ" - ) - - -# Scope note: every fixture in this file is single-band 2D. The two -# helpers below hard-code that shape on purpose. If a future row adds -# a multi-band fixture, extend the helpers (or replace them with -# parametrised checks) rather than reusing them as-is. - -def _assert_dim_names(da: xr.DataArray, *, label: str) -> None: - """The 2D COG path must come back with ``(y, x)`` dim names.""" - assert da.dims == ("y", "x"), ( - f"{label}: dims must be ('y', 'x'), got {da.dims!r}" - ) - - -def _assert_band_count(arr: np.ndarray, *, label: str) -> None: - """Single-band fixture; the returned array must be 2D.""" - assert arr.ndim == 2, ( - f"{label}: expected single-band 2D pixels, got ndim={arr.ndim} " - f"shape={arr.shape}" - ) - - -def _assert_crs_present(da: xr.DataArray, *, label: str) -> None: - """``crs`` (EPSG int or string) or ``crs_wkt`` must survive the read.""" - has_crs = "crs" in da.attrs and da.attrs["crs"] is not None - has_wkt = "crs_wkt" in da.attrs and da.attrs["crs_wkt"] - assert has_crs or has_wkt, ( - f"{label}: neither 'crs' nor 'crs_wkt' attr survived the read; " - f"attrs={sorted(da.attrs)!r}" - ) - - -def _assert_crs_equals(da: xr.DataArray, expected_epsg: int, *, label: str) -> None: - """Read-side CRS matches the writer's EPSG declaration.""" - crs = da.attrs.get("crs") - assert crs == expected_epsg, ( - f"{label}: crs mismatch expected={expected_epsg!r} got={crs!r}" - ) - - -def _assert_transform(da: xr.DataArray, *, label: str) -> None: - """Transform attr present and a finite 6-tuple.""" - t = da.attrs.get("transform") - assert t is not None, f"{label}: transform attr missing" - tup = tuple(float(v) for v in t) - assert len(tup) == 6, f"{label}: transform must be a 6-tuple, got {tup}" - assert all(np.isfinite(v) for v in tup), ( - f"{label}: transform has non-finite component: {tup}" - ) - - -def _assert_transform_equals( - da: xr.DataArray, expected_t: tuple, *, label: str, -) -> None: - """Transform 6-tuple matches an expected reference within a tight ULP.""" - t = da.attrs.get("transform") - assert t is not None, f"{label}: transform attr missing" - tup = tuple(float(v) for v in t) - exp = tuple(float(v) for v in expected_t) - assert len(tup) == 6 and len(exp) == 6 - for i, (a, b) in enumerate(zip(tup, exp)): - assert abs(a - b) <= 1e-9, ( - f"{label}: transform[{i}] differs expected={b!r} got={a!r}" - ) - - -def _assert_nodata_equals( - da: xr.DataArray, expected: float | int | None, *, label: str, -) -> None: - """Assert nodata sentinel matches, including the no-nodata case. - - When ``expected`` is ``None`` we still check the read side: the - reader must not fabricate a sentinel that the writer never stamped. - The reader is allowed to expose the attr as ``None`` or omit it - entirely; both count as "no nodata". - """ - nd = da.attrs.get("nodata") - if expected is None: - assert nd is None, ( - f"{label}: writer stamped no nodata, but reader exposed " - f"nodata={nd!r}" - ) - return - assert nd == expected, ( - f"{label}: nodata mismatch expected={expected!r} got={nd!r}" - ) - - -# --------------------------------------------------------------------------- -# Row 1: xrspatial write COG -> xrspatial eager read -# --------------------------------------------------------------------------- - -def test_row1_xrspatial_cog_xrspatial_eager(xrspatial_cog): - """xrspatial-written COG round-trips byte-exact through the eager reader.""" - path, expected, expected_attrs = xrspatial_cog - da = open_geotiff(path) - label = "row1_xrspatial_cog_eager" - - pixels = _materialise(da) - _assert_band_count(pixels, label=label) - _assert_byte_exact(expected, pixels, label=label) - _assert_dim_names(da, label=label) - _assert_crs_equals(da, expected_attrs["crs"], label=label) - _assert_transform(da, label=label) - _assert_nodata_equals(da, expected_attrs["nodata"], label=label) - assert da.dtype == expected.dtype, ( - f"{label}: dtype mismatch expected={expected.dtype} got={da.dtype}" - ) - - -# --------------------------------------------------------------------------- -# Row 2: xrspatial write COG -> xrspatial dask read -# --------------------------------------------------------------------------- - -def test_row2_xrspatial_cog_xrspatial_dask(xrspatial_cog): - """xrspatial-written COG round-trips byte-exact through the dask reader.""" - _require_dask() - path, expected, expected_attrs = xrspatial_cog - da = open_geotiff(path, chunks=16) - label = "row2_xrspatial_cog_dask" - - # Verify we actually went through the dask path; a regression that - # silently drops ``chunks=`` and falls back to eager would pass the - # pixel check but exercise the wrong code path. - assert hasattr(da.data, "dask"), ( - f"{label}: chunks=16 did not produce a dask-backed DataArray; " - f"got data type {type(da.data).__name__}" - ) - - pixels = _materialise(da) - _assert_band_count(pixels, label=label) - _assert_byte_exact(expected, pixels, label=label) - _assert_dim_names(da, label=label) - _assert_crs_equals(da, expected_attrs["crs"], label=label) - _assert_transform(da, label=label) - _assert_nodata_equals(da, expected_attrs["nodata"], label=label) - assert da.dtype == expected.dtype, ( - f"{label}: dtype mismatch expected={expected.dtype} got={da.dtype}" - ) - - -# --------------------------------------------------------------------------- -# Row 3: xrspatial write COG -> rasterio read -# --------------------------------------------------------------------------- - -def test_row3_xrspatial_cog_rasterio(xrspatial_cog): - """rasterio reads an xrspatial-written COG and the pixel/metadata contract holds. - - Asserts the third-party reader sees the same pixels, dtype, CRS, - transform, and nodata that xrspatial stamped on write. A regression - that drops or mangles any of these would surface as a Tier-1 - interop break. - """ - rasterio = pytest.importorskip( - "rasterio", - reason="rasterio is required for row 3 (issue #2294)", - ) - path, expected, expected_attrs = xrspatial_cog - label = "row3_xrspatial_cog_rasterio" - - with rasterio.open(path) as src: - # Single-band fixture: read band 1. - pixels = src.read(1) - rio_crs = src.crs - rio_transform = src.transform - rio_nodata = src.nodata - rio_count = src.count - rio_dtype = np.dtype(src.dtypes[0]) - - _assert_band_count(pixels, label=label) - _assert_byte_exact(expected, pixels, label=label) - assert rio_count == 1, f"{label}: rasterio reports band count {rio_count}" - assert rio_dtype == expected.dtype, ( - f"{label}: dtype mismatch expected={expected.dtype} got={rio_dtype}" - ) - # rasterio CRS -> EPSG int when possible. - epsg = rio_crs.to_epsg() if rio_crs is not None else None - assert epsg == expected_attrs["crs"], ( - f"{label}: rasterio CRS EPSG mismatch " - f"expected={expected_attrs['crs']!r} got={epsg!r}" - ) - # rasterio Affine is 6-tuple compatible via ``.a, .b, .c, .d, .e, .f``. - assert rio_transform is not None, f"{label}: rasterio transform missing" - assert all(np.isfinite(v) for v in ( - rio_transform.a, rio_transform.b, rio_transform.c, - rio_transform.d, rio_transform.e, rio_transform.f, - )), f"{label}: rasterio transform has non-finite component" - if expected_attrs["nodata"] is None: - # The writer was not asked to stamp a nodata; rasterio should - # report ``None`` too. Anything else means the writer leaked - # a sentinel onto the file. - assert rio_nodata is None, ( - f"{label}: writer stamped an unrequested nodata; " - f"rasterio reports {rio_nodata!r}" - ) - else: - assert rio_nodata == expected_attrs["nodata"], ( - f"{label}: rasterio nodata mismatch " - f"expected={expected_attrs['nodata']!r} got={rio_nodata!r}" - ) - - -# --------------------------------------------------------------------------- -# Row 4: golden/rasterio COG fixture -> xrspatial local read -# --------------------------------------------------------------------------- - -def test_row4_golden_cog_xrspatial_local(): - """Read the GDAL-written golden COG fixture with xrspatial's local reader. - - Compares pixels byte-exact against a rasterio read of the same - bytes -- the GDAL COG driver wrote the file, so rasterio is the - canonical oracle here. Catches regressions that returned the right - shape but mangled values (e.g. wrong endianness, predictor drift, - overview IFD picked instead of full res). - """ - rasterio = pytest.importorskip( - "rasterio", - reason="rasterio is required for row 4 oracle (issue #2294)", - ) - path = _golden_cog_path() - if not path.exists(): - pytest.skip( - f"golden COG fixture {_GOLDEN_COG_ID!r} missing on disk; run " - "`python -m xrspatial.geotiff.tests.golden_corpus.generate` " - "(issue #1930)." - ) - da = open_geotiff(str(path)) - label = "row4_golden_cog_xrspatial_local" - - pixels = _materialise(da) - _assert_band_count(pixels, label=label) - _assert_dim_names(da, label=label) - # The golden fixture is uint16 per the manifest entry. - assert da.dtype == np.dtype("uint16"), ( - f"{label}: dtype expected=uint16 got={da.dtype}" - ) - _assert_crs_present(da, label=label) - _assert_transform(da, label=label) - - # Pixel parity against the rasterio oracle. The fixture is lossless - # deflate, so byte-exact is the right bar. - with rasterio.open(str(path)) as src: - expected = src.read(1) - _assert_byte_exact(expected, pixels, label=label) - - -# --------------------------------------------------------------------------- -# Row 5: golden/rasterio COG fixture -> xrspatial HTTP range read -# --------------------------------------------------------------------------- - -def test_row5_golden_cog_xrspatial_http(golden_cog_http): - """xrspatial's HTTP range reader returns the same pixels as the local read. - - Exercises the cloud-source code path against the GDAL-written - fixture. The reference is the local read of the same bytes, so any - drift between the local and HTTP paths surfaces here. - """ - url, local_path = golden_cog_http - label = "row5_golden_cog_xrspatial_http" - - local_da = open_geotiff(str(local_path)) - http_da = open_geotiff(url) - - local_px = _materialise(local_da) - http_px = _materialise(http_da) - - _assert_band_count(http_px, label=label) - _assert_byte_exact(local_px, http_px, label=label) - _assert_dim_names(http_da, label=label) - assert http_da.dtype == local_da.dtype, ( - f"{label}: dtype mismatch local={local_da.dtype} http={http_da.dtype}" - ) - # CRS and transform survive the cloud-source path. - local_crs = local_da.attrs.get("crs") - http_crs = http_da.attrs.get("crs") - assert local_crs == http_crs, ( - f"{label}: crs mismatch local={local_crs!r} http={http_crs!r}" - ) - local_t = local_da.attrs.get("transform") - assert local_t is not None, f"{label}: local read missing transform" - _assert_transform_equals(http_da, local_t, label=label) - # nodata presence must agree (the fixture may or may not carry one; - # both sides must agree either way). - assert ("nodata" in local_da.attrs) == ("nodata" in http_da.attrs), ( - f"{label}: nodata presence differs " - f"local={'nodata' in local_da.attrs} http={'nodata' in http_da.attrs}" - ) - if "nodata" in local_da.attrs: - assert local_da.attrs["nodata"] == http_da.attrs["nodata"], ( - f"{label}: nodata value differs " - f"local={local_da.attrs['nodata']!r} " - f"http={http_da.attrs['nodata']!r}" - ) - - -# --------------------------------------------------------------------------- -# Row 6: golden/rasterio COG fixture -> xrspatial dask HTTP range read -# --------------------------------------------------------------------------- - -def test_row6_golden_cog_xrspatial_dask_http(golden_cog_http): - """The dask HTTP path returns the same pixels as the local read. - - Combines the cloud-source and chunked-read code paths. A regression - that silently drops ``chunks=`` over HTTP would compute correct - pixels via the eager path; the storage-type assertion below guards - against that. - """ - _require_dask() - url, local_path = golden_cog_http - label = "row6_golden_cog_xrspatial_dask_http" - - local_da = open_geotiff(str(local_path)) - http_da = open_geotiff(url, chunks=16) - - assert hasattr(http_da.data, "dask"), ( - f"{label}: chunks=16 over HTTP did not produce a dask-backed " - f"DataArray; got data type {type(http_da.data).__name__}" - ) - - local_px = _materialise(local_da) - http_px = _materialise(http_da) - - _assert_band_count(http_px, label=label) - _assert_byte_exact(local_px, http_px, label=label) - _assert_dim_names(http_da, label=label) - assert http_da.dtype == local_da.dtype, ( - f"{label}: dtype mismatch local={local_da.dtype} http={http_da.dtype}" - ) - local_crs = local_da.attrs.get("crs") - http_crs = http_da.attrs.get("crs") - assert local_crs == http_crs, ( - f"{label}: crs mismatch local={local_crs!r} http={http_crs!r}" - ) - local_t = local_da.attrs.get("transform") - assert local_t is not None, f"{label}: local read missing transform" - _assert_transform_equals(http_da, local_t, label=label) - assert ("nodata" in local_da.attrs) == ("nodata" in http_da.attrs), ( - f"{label}: nodata presence differs " - f"local={'nodata' in local_da.attrs} http={'nodata' in http_da.attrs}" - ) - if "nodata" in local_da.attrs: - assert local_da.attrs["nodata"] == http_da.attrs["nodata"], ( - f"{label}: nodata value differs " - f"local={local_da.attrs['nodata']!r} " - f"http={http_da.attrs['nodata']!r}" - ) diff --git a/xrspatial/geotiff/tests/test_cog_requires_tiled_2312.py b/xrspatial/geotiff/tests/test_cog_requires_tiled_2312.py deleted file mode 100644 index 2cee2080a..000000000 --- a/xrspatial/geotiff/tests/test_cog_requires_tiled_2312.py +++ /dev/null @@ -1,187 +0,0 @@ -"""``cog=True`` requires ``tiled=True`` (#2312). - -The COG specification mandates a tiled internal layout. Before this -issue's fix, ``to_geotiff(..., cog=True, tiled=False)`` returned -successfully and wrote a strip-layout TIFF: ``cog=True`` was silently -ignored for the layout decision while the overview-pyramid and IFD-order -parts of the COG path still ran, producing a malformed hybrid that -violated the stable COG contract promoted in #2300. - -These tests pin three things: - -* The public ``to_geotiff`` wrapper rejects ``cog=True, tiled=False`` - with a typed, actionable error that names both fixes the caller can - apply (``tiled=True`` or ``cog=False``). This is the user-visible - rejection. -* The defense-in-depth gate inside ``_writer._write`` also rejects the - combination. Direct callers of the array-level entry point (the GPU - CPU-fallback path, tests, internal tools) cannot bypass the public - wrapper to produce the malformed file. -* The tiled COG path (``cog=True``, default ``tiled=True``) still works - end-to-end. A regression in the new gate that broke valid COG writes - would be a worse outcome than the original bug. - -Message-substring assertions mirror the style of -``test_cog_invalid_input_errors_2286.py`` (PR #2301): every gate pins -both the exception type and the actionable tokens (``tiled=True``, -``cog=False``, ``COG``) so a future rewrite cannot silently turn the -error into a vague one. -""" -from __future__ import annotations - -import warnings - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import to_geotiff -from xrspatial.geotiff._writer import write as _array_write - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - -def _float_da(shape=(64, 64)): - """A small float32 DataArray suitable for COG writes.""" - return xr.DataArray( - np.zeros(shape, dtype=np.float32), dims=('y', 'x') - ) - - -# --------------------------------------------------------------------------- -# Public boundary: ``to_geotiff(cog=True, tiled=False)`` is refused. -# --------------------------------------------------------------------------- - -def test_public_writer_rejects_cog_true_tiled_false(tmp_path): - """The public entry point raises ``ValueError`` with a message that - names the COG-spec constraint and both caller-side fixes.""" - da = _float_da() - p = tmp_path / 'cog_tiled_false_2312.tif' - - with pytest.raises(ValueError) as exc: - to_geotiff(da, str(p), cog=True, tiled=False) - - msg = str(exc.value) - # The message must name the violated constraint. - assert 'COG' in msg, msg - assert 'tiled' in msg.lower(), msg - # Both caller-side fixes must appear so the error is actionable. - assert 'tiled=True' in msg, msg - assert 'cog=False' in msg, msg - - -def test_public_writer_rejects_cog_true_tiled_false_with_tile_size(tmp_path): - """Pinning the rejection survives a ``tile_size`` kwarg too. - - Before #2312, ``to_geotiff(..., cog=True, tiled=False, - tile_size=128)`` emitted the "tile_size is ignored when tiled=False" - warning and then wrote strips. The new gate has to fire before that - warning so the caller never sees the misleading "tile_size is - ignored" message under ``cog=True``. - """ - da = _float_da() - p = tmp_path / 'cog_tiled_false_with_tile_size_2312.tif' - - # ``pytest.warns(None)`` was removed; use the stdlib catch_warnings - # recorder to assert the dead "tile_size is ignored" warning never - # fires on the ``cog=True`` arm. - with warnings.catch_warnings(record=True) as record: - warnings.simplefilter('always') - with pytest.raises(ValueError) as exc: - to_geotiff(da, str(p), cog=True, tiled=False, tile_size=128) - - msg = str(exc.value) - assert 'COG' in msg, msg - assert 'tiled=True' in msg, msg - - tile_size_warnings = [ - w for w in record - if 'tile_size' in str(w.message) - and 'is ignored when tiled=False' in str(w.message) - ] - assert not tile_size_warnings, [str(w.message) for w in tile_size_warnings] - - -# --------------------------------------------------------------------------- -# Defense in depth: ``_writer._write(cog=True, tiled=False)`` also raises. -# --------------------------------------------------------------------------- - -def test_lowlevel_write_rejects_cog_true_tiled_false(tmp_path): - """The array-level entry point ``_writer._write`` (re-exported as - ``write``) carries its own gate so a caller that bypasses the public - wrapper still gets the typed rejection. - - Without this, a direct caller could quietly produce the malformed - strip-plus-overviews file the public boundary refuses. - """ - arr = np.zeros((64, 64), dtype=np.float32) - p = tmp_path / 'cog_tiled_false_lowlevel_2312.tif' - - with pytest.raises(ValueError) as exc: - _array_write( - arr, - str(p), - compression='deflate', - tiled=False, - cog=True, - ) - - msg = str(exc.value) - assert 'COG' in msg, msg - assert 'tiled=True' in msg, msg - assert 'cog=False' in msg, msg - - -# --------------------------------------------------------------------------- -# Smoke test: the valid tiled COG path still works. -# --------------------------------------------------------------------------- - -def test_tiled_cog_smoke_still_works(tmp_path): - """A regression in the new gate that broke valid COG writes would - be a worse outcome than the original bug. Pin the happy path - end-to-end so the gate has to stay narrowly targeted at the - ``cog=True, tiled=False`` combination it is meant to catch. - """ - da = _float_da(shape=(128, 128)) - p = tmp_path / 'cog_tiled_smoke_2312.tif' - - rv = to_geotiff(da, str(p), cog=True, tiled=True, tile_size=64) - assert rv == str(p) - assert p.exists() - assert p.stat().st_size > 0 - - -def test_tiled_cog_smoke_default_tiled(tmp_path): - """``tiled`` defaults to ``True`` on ``to_geotiff``, so ``cog=True`` - on its own should also produce a valid COG. Pinned so a future - change that flipped the default would not silently start hitting - the new rejection gate. - """ - da = _float_da(shape=(128, 128)) - p = tmp_path / 'cog_tiled_default_smoke_2312.tif' - - rv = to_geotiff(da, str(p), cog=True) - assert rv == str(p) - assert p.exists() - assert p.stat().st_size > 0 - - -# --------------------------------------------------------------------------- -# Negative control: ``cog=False, tiled=False`` is still a valid strip TIFF. -# --------------------------------------------------------------------------- - -def test_strip_layout_without_cog_still_works(tmp_path): - """``tiled=False`` without ``cog=True`` is the supported strip-TIFF - path; the new gate must not regress it. Pinned so a stricter - interpretation of ``cog=True implies tiled=True`` could not creep - into the general ``tiled=False`` path. - """ - da = _float_da(shape=(64, 64)) - p = tmp_path / 'strip_no_cog_2312.tif' - - rv = to_geotiff(da, str(p), cog=False, tiled=False) - assert rv == str(p) - assert p.exists() - assert p.stat().st_size > 0 diff --git a/xrspatial/geotiff/tests/test_cog_tile_size_hang_2311.py b/xrspatial/geotiff/tests/test_cog_tile_size_hang_2311.py deleted file mode 100644 index 60056cd2b..000000000 --- a/xrspatial/geotiff/tests/test_cog_tile_size_hang_2311.py +++ /dev/null @@ -1,210 +0,0 @@ -"""COG writer rejects non-positive ``tile_size`` regardless of ``tiled`` (#2311). - -Before this fix, ``to_geotiff(..., cog=True, tiled=False, tile_size=<=0)`` -hung the writer. ``tile_size`` validation only ran when ``tiled=True``, but -the COG path in ``_writer.py`` still used ``tile_size`` to auto-generate -overviews regardless of ``tiled``. With ``tile_size=-1`` the auto-overview -loop in ``_writer.py:490`` had ``oh > tile_size and ow > tile_size`` -permanently true once ``oh, ow`` halved to 0, while the inner -``if oh > 0 and ow > 0`` guard prevented the level list from growing -- -the loop never exited. - -The fix lives in two places: - -1. ``to_geotiff`` in ``_writers/eager.py`` now runs ``_validate_tile_size_arg`` - when ``tiled=True`` OR ``cog=True``. The COG path consumes ``tile_size`` - for overview generation regardless of strip-vs-tiled layout, so the - public boundary must validate it in both cases. -2. The auto-overview loop in ``_writer.py`` has a defensive pre-check that - raises if ``tile_size`` is not a positive int, plus a tightened loop - condition that requires ``oh, ow > 0`` to continue. Together these mean - the loop cannot run forever even if a future internal caller bypasses - the public validator. - -Each row below uses a SIGALRM-based timeout so a regression that brings -the hang back fails the test instead of locking up the run. SIGALRM is a -POSIX-only mechanism (CPython on Linux/macOS); the tests fall back to -plain execution on Windows, where the original hang is still a concern -but the watchdog is unavailable. -""" -from __future__ import annotations - -import contextlib -import os -import signal -import warnings - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import to_geotiff - - -@contextlib.contextmanager -def _alarm_timeout(seconds: int): - """Raise TimeoutError after ``seconds`` to bound test failure modes. - - No-op on platforms that lack SIGALRM (Windows). The window is large - enough that a healthy raise path finishes well before the alarm - fires; if the fix regresses the writer hangs and the alarm fires. - """ - if not hasattr(signal, 'SIGALRM') or os.name == 'nt': - yield - return - - def _handler(signum, frame): # noqa: ARG001 - raise TimeoutError( - f'test exceeded {seconds}s watchdog; the writer likely ' - f'regressed into the #2311 infinite-loop hang.' - ) - - old = signal.signal(signal.SIGALRM, _handler) - signal.alarm(seconds) - try: - yield - finally: - signal.alarm(0) - signal.signal(signal.SIGALRM, old) - - -def _float_da(shape=(64, 64)): - """A small float32 DataArray large enough to trigger COG overview build.""" - return xr.DataArray( - np.zeros(shape, dtype=np.float32), dims=('y', 'x') - ) - - -# --------------------------------------------------------------------------- -# Public boundary: ``to_geotiff(..., cog=True, tile_size<=0)`` must raise. -# Covers both tiled=True and tiled=False, plus 0 and a negative value, so -# the validator gate stays on regardless of layout flag. -# --------------------------------------------------------------------------- - -@pytest.mark.parametrize('tiled', [True, False]) -@pytest.mark.parametrize('tile_size', [-1, 0]) -def test_to_geotiff_cog_non_positive_tile_size_raises(tmp_path, tiled, tile_size): - """``cog=True`` with ``tile_size<=0`` raises ValueError up front, - regardless of ``tiled``. Before #2311 this hung the writer when - ``tiled=False``.""" - da = _float_da() - p = tmp_path / f'cog_tile_size_hang_2311_t{int(tiled)}_ts{tile_size}.tif' - - with _alarm_timeout(5), pytest.raises(ValueError) as exc: - to_geotiff(da, str(p), cog=True, tiled=tiled, tile_size=tile_size) - - msg = str(exc.value) - assert 'tile_size' in msg, msg - # The shared validator says "positive int" -- pin the substring so a - # message rewrite still keeps the actionable wording. - assert 'positive' in msg.lower(), msg - - -# --------------------------------------------------------------------------- -# Sanity: ``cog=False`` with ``tiled=False`` still accepts an unused -# ``tile_size`` (the existing "ignored" warning shape) -- the new gate -# must not fire when neither path will consume the value. -# --------------------------------------------------------------------------- - -def test_to_geotiff_non_cog_strip_does_not_validate_tile_size(tmp_path): - """When neither tiled output nor COG overview generation will use - ``tile_size``, the validator gate stays off. The pre-existing - "tile_size ignored" warning still fires (it carries its own - non-default-value check, not a positivity check), but no error - is raised.""" - da = _float_da() - p = tmp_path / 'cog_tile_size_hang_2311_no_cog_strip.tif' - - # A negative tile_size with cog=False AND tiled=False is accepted - # (with the "ignored" warning) because nothing consumes the value. - # Use ``filterwarnings`` to swallow the warning so the test only - # asserts no raise / no hang. - with _alarm_timeout(5), warnings.catch_warnings(): - warnings.simplefilter('ignore') - to_geotiff(da, str(p), cog=False, tiled=False, tile_size=-1) - - assert p.exists(), 'writer should have produced a strip-layout file' - - -# --------------------------------------------------------------------------- -# Defense in depth: drive the inner writer directly with a bad tile_size -# and assert the auto-overview loop raises instead of hanging. Guards -# against future internal callers that bypass ``to_geotiff``'s public -# validator. -# --------------------------------------------------------------------------- - -@pytest.mark.parametrize('tile_size', [-1, 0]) -def test_writer_auto_overview_loop_rejects_non_positive_tile_size( - tmp_path, tile_size): - """``_write(..., cog=True, overview_levels=None)`` raises ValueError - when ``tile_size`` is not a positive int, instead of spinning in the - halving loop. The public ``to_geotiff`` already validates earlier; - this is the inner-writer safety net (#2311).""" - from xrspatial.geotiff._writer import _write - - # Minimal float32 array large enough for the auto-overview branch to - # be entered. The exact pixel values do not matter -- the validator - # check runs before any encoding work. - data = np.zeros((64, 64), dtype=np.float32) - out = tmp_path / f'cog_tile_size_hang_2311_inner_ts{tile_size}.tif' - - with _alarm_timeout(5), pytest.raises(ValueError) as exc: - _write(data, str(out), - compression='none', - tiled=True, - tile_size=tile_size, - cog=True, - overview_levels=None) - - assert 'tile_size' in str(exc.value), str(exc.value) - - -# --------------------------------------------------------------------------- -# Non-int tile_size values reach the same gate. The public -# ``_validate_tile_size`` (called from ``to_geotiff`` when tiled or cog is -# true) rejects None, float, and bool with typed errors; the -# defense-in-depth gate at the top of ``_write`` does the same for direct -# callers. Both layers should reject all three types. -# --------------------------------------------------------------------------- - -@pytest.mark.parametrize('bad_tile_size', [None, 128.0, True, False]) -def test_to_geotiff_cog_non_int_tile_size_raises(tmp_path, bad_tile_size): - """Non-int ``tile_size`` (None, float, bool) with ``cog=True`` is - rejected at the public boundary, regardless of ``tiled``. Bool is - explicitly listed because Python treats ``True``/``False`` as int - subclasses (#2311 follow-up).""" - da = _float_da() - p = tmp_path / ( - f'cog_tile_size_hang_2311_nonint_{type(bad_tile_size).__name__}.tif') - - with _alarm_timeout(5), pytest.raises((ValueError, TypeError)) as exc: - to_geotiff(da, str(p), cog=True, tiled=True, tile_size=bad_tile_size) - - assert 'tile_size' in str(exc.value), str(exc.value) - - -# --------------------------------------------------------------------------- -# Inner-loop guard coverage: confirm the auto-overview halving loop's own -# ``tile_size > 0`` pre-check is present in ``_write``'s compiled -# constants. Inspecting the constants pins the literal so a future -# refactor that removes the inner guard fails this test loudly even if -# the top-of-``_write`` gate still catches the bad input at runtime. -# (Reaching the inner guard through ``_write`` directly would require -# patching out the top gate, which is invasive; the constants check is -# the simplest reliable pin without rewriting production code.) -# --------------------------------------------------------------------------- - -def test_inner_overview_loop_guard_message_is_pinned(): - """Pin the inner-overview ``tile_size`` guard literal so removing - the loop-side defense fails this test even when the top gate at - line 407 still raises for the same inputs (#2311).""" - from xrspatial.geotiff import _writer as wmod - - guard_msg = ( - 'tile_size must be a positive int for COG overview ' - 'generation, got tile_size=') - consts = wmod._write.__code__.co_consts - found = any(isinstance(c, str) and guard_msg in c for c in consts) - assert found, ( - 'inner-loop guard message not present in _write constants; the ' - 'auto-overview guard introduced in #2311 may have been removed.') diff --git a/xrspatial/geotiff/tests/test_cog_writer_compliance.py b/xrspatial/geotiff/tests/test_cog_writer_compliance.py deleted file mode 100644 index 834466ad1..000000000 --- a/xrspatial/geotiff/tests/test_cog_writer_compliance.py +++ /dev/null @@ -1,669 +0,0 @@ -"""External-interop compliance suite for ``to_geotiff(..., cog=True)``. - -Issue #2292 (part of #2286 -- COG readiness/stability rollout). - -These tests treat ``to_geotiff`` as a black box: every assertion goes through -rasterio (and optionally rio-cogeo / the GDAL ``validate_cloud_optimized_geotiff`` -sample). The goal is to catch interop regressions where xrspatial writes a -file that satisfies the in-process round-trip but trips up external readers. - -Matrix: - -- Stable codecs: ``none``, ``deflate``, ``lzw``, ``zstd``, ``packbits``. -- Dtypes: at least one integer (``uint16``) and one float (``float32``). -- Bands: single-band and 3-band. -- Nodata: sentinel value (integer + float sentinel) and NaN. -- Georef: PixelIsArea and PixelIsPoint. -- Overviews: explicit level list and auto-generated levels. - -Production code is out of scope -- if a row uncovers a real writer bug, -mark it ``xfail`` with a linked follow-up issue rather than fixing it -here. -""" -from __future__ import annotations - -import os - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import to_geotiff -from xrspatial.geotiff._header import parse_all_ifds, parse_header - -rasterio = pytest.importorskip( - "rasterio", - reason="rasterio is required for the external compliance suite", -) - - -# --------------------------------------------------------------------------- -# Test matrix definitions -# --------------------------------------------------------------------------- - -# Stable, lossless codecs only. Each row should produce a byte-for-byte -# round-trip on the base level. -STABLE_CODECS = ["none", "deflate", "lzw", "zstd", "packbits"] - -DTYPES = [ - pytest.param(np.uint16, id="uint16"), - pytest.param(np.float32, id="float32"), -] - -BAND_COUNTS = [ - pytest.param(1, id="1band"), - pytest.param(3, id="3band"), -] - -# ``raster_type`` attr the writer understands: ``'area'`` (default) or -# ``'point'``. We pass via attrs because that is the public surface. -GEOREF_MODES = [ - pytest.param("area", id="area"), - pytest.param("point", id="point"), -] - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - - -def _make_data( - dtype: np.dtype, - *, - bands: int = 1, - height: int = 64, - width: int = 64, - rng_seed: int = 17, -) -> np.ndarray: - """Deterministic raster shaped (h, w) or (h, w, bands).""" - dt = np.dtype(dtype) - rng = np.random.RandomState(rng_seed + bands) - if dt.kind == "f": - base = rng.uniform(-100.0, 100.0, size=(height, width)).astype(dt) - else: - info = np.iinfo(dt) - high = min(info.max, 1000) - base = rng.randint(0, high, size=(height, width)).astype(dt) - if bands == 1: - return base - # Stack with small per-band offsets so bands are distinguishable. - layers = [base] - for b in range(1, bands): - layers.append((base + b * 7).astype(dt)) - return np.stack(layers, axis=-1) # (h, w, bands) - - -def _build_da( - arr: np.ndarray, - *, - raster_type: str = "area", - crs: int | str | None = 4326, -) -> xr.DataArray: - """Wrap ``arr`` in a DataArray with EPSG:4326 coords and georef attrs.""" - if arr.ndim == 2: - h, w = arr.shape - dims = ("y", "x") - else: - h, w, _b = arr.shape - dims = ("y", "x", "band") - y = np.linspace(45.0, 44.0, h, dtype=np.float64) - x = np.linspace(-120.0, -119.0, w, dtype=np.float64) - coords: dict = {"y": y, "x": x} - attrs: dict = {} - if crs is not None: - attrs["crs"] = crs - if raster_type == "point": - attrs["raster_type"] = "point" - return xr.DataArray(arr, dims=dims, coords=coords, attrs=attrs) - - -def _pick_sentinel(dtype: np.dtype) -> float | int: - """Pick a nodata sentinel that fits the dtype. - - The signed-int branch is unreachable from the current DTYPES list - (only ``uint16`` and ``float32``) but is kept for the eventual case - where the matrix grows. Dead branches in a helper are cheap and the - intent is clearer than special-casing the current matrix here. - """ - dt = np.dtype(dtype) - if dt.kind == "f": - return -9999.0 - if dt.kind == "u": - return int(np.iinfo(dt).max) # e.g. 65535 for uint16 - return int(np.iinfo(dt).min) - - -def _arrange_for_rasterio(arr: np.ndarray) -> np.ndarray: - """Convert (h, w[, bands]) into rasterio's (bands, h, w).""" - if arr.ndim == 2: - return arr[np.newaxis, :, :] - # (h, w, bands) -> (bands, h, w) - return np.transpose(arr, (2, 0, 1)) - - -def _is_tiled(src) -> bool: - """Rasterio's ``is_tiled`` is deprecated; reproduce its check locally. - - A dataset is tiled when block dimensions are square and smaller than - the dataset itself (rasterio's old definition). ``block_shapes`` is - a per-band list of ``(height, width)`` tuples. - """ - shapes = src.block_shapes - if not shapes: - return False - bh, bw = shapes[0] - return bh == bw and bh < src.height and bw < src.width - - -def _assert_ifds_before_data(path: str) -> None: - """COG layout contract: every IFD sits before any tile data block.""" - with open(path, "rb") as f: - data = f.read() - header = parse_header(data) - ifds = parse_all_ifds(data, header) - assert len(ifds) >= 2, ( - f"expected at least 2 IFDs (full res + overview), got {len(ifds)}" - ) - tile_offsets: list[int] = [] - for ifd in ifds: - offs = ifd.tile_offsets - if offs: - tile_offsets.extend(offs) - assert tile_offsets, "no tile offsets found; output is not tiled" - first_data = min(tile_offsets) - # All IFD anchors must sit before the first tile blob. - assert header.first_ifd_offset < first_data, ( - f"first IFD offset {header.first_ifd_offset} >= first tile data " - f"offset {first_data}; IFDs must come before image data in a COG" - ) - - -def _require_validator_env() -> bool: - """Return True if ``XRSPATIAL_REQUIRE_COG_VALIDATOR`` is set truthy. - - Truthy values: ``1``, ``true``, ``yes``, ``on`` (case-insensitive). - Anything else, including unset / empty, returns False. - - CI sets this to make a missing validator dependency a hard failure - rather than a silent skip. On a contributor laptop without rio-cogeo - or GDAL it is unset and the validator step skips cleanly. - """ - val = os.environ.get("XRSPATIAL_REQUIRE_COG_VALIDATOR", "") - return val.lower() in {"1", "true", "yes", "on"} - - -def _try_cog_validate(path: str) -> None: - """Call rio-cogeo's validator if present, else GDAL's. - - When ``XRSPATIAL_REQUIRE_COG_VALIDATOR=1`` is set in the environment - and neither validator is importable, fail loudly instead of skipping - so a misconfigured CI job cannot pretend the gate passed. When the - env var is unset, missing dependencies skip cleanly. - """ - try: - from rio_cogeo.cogeo import cog_validate - except ImportError: - cog_validate = None # type: ignore[assignment] - - if cog_validate is not None: - valid, errors, _warns = cog_validate(path, strict=False) - assert valid, f"rio_cogeo cog_validate failed: errors={errors}" - return - - try: - from osgeo_utils.samples import validate_cloud_optimized_geotiff - except ImportError: - if _require_validator_env(): - pytest.fail( - "XRSPATIAL_REQUIRE_COG_VALIDATOR=1 but neither rio-cogeo " - "nor GDAL validate_cloud_optimized_geotiff is importable. " - "Install rio-cogeo (and/or GDAL Python bindings) on this " - "job, or unset XRSPATIAL_REQUIRE_COG_VALIDATOR to allow " - "the soft skip." - ) - pytest.skip( - "neither rio-cogeo nor GDAL validate_cloud_optimized_geotiff " - "is installed; skipping external COG validator step" - ) - return - - _warns, errors, _details = validate_cloud_optimized_geotiff.validate( - path, full_check=True, - ) - assert not errors, f"GDAL validator errors: {errors}" - - -# --------------------------------------------------------------------------- -# Codec x dtype x band-count: base pixels + overviews + georef survive -# --------------------------------------------------------------------------- - - -@pytest.mark.parametrize("bands", BAND_COUNTS) -@pytest.mark.parametrize("dtype", DTYPES) -@pytest.mark.parametrize("codec", STABLE_CODECS) -def test_codec_dtype_bands_roundtrip(tmp_path, codec, dtype, bands): - """Stable codec round-trip via rasterio: base pixels byte-exact, georef survives. - - Contracts asserted per row: - - rasterio.open succeeds and reports a tiled COG. - - Band count and dtype survive. - - Base pixels are byte-exact (stable codecs are lossless). - - Overview decimation factors survive. - - CRS and transform survive. - - IFDs sit before any tile data block (COG layout). - """ - arr = _make_data(dtype, bands=bands, height=64, width=64) - da = _build_da(arr, raster_type="area", crs=4326) - - path = str(tmp_path / f"2292_codec_{codec}_{np.dtype(dtype).name}_b{bands}.tif") - to_geotiff( - da, path, - compression=codec, cog=True, tile_size=16, - overview_levels=[2], - ) - - expected = _arrange_for_rasterio(arr) - with rasterio.open(path) as src: - assert _is_tiled(src), ( - f"{codec} {dtype} b{bands}: COG output must be tiled" - ) - assert src.count == bands, ( - f"band count mismatch: expected {bands}, got {src.count}" - ) - assert src.dtypes == tuple([np.dtype(dtype).name] * bands), ( - f"dtype tuple mismatch: expected " - f"{tuple([np.dtype(dtype).name] * bands)}, got {src.dtypes}" - ) - # Stable codecs are lossless -> byte-exact at full resolution. - actual = src.read() - assert actual.shape == expected.shape, ( - f"shape mismatch: expected {expected.shape}, got {actual.shape}" - ) - np.testing.assert_array_equal( - actual, expected, - err_msg=f"base pixels diverged for codec={codec} dtype={dtype}", - ) - # Overviews - for b in range(1, bands + 1): - ovs = src.overviews(b) - assert ovs == [2], ( - f"band {b}: expected overview factors [2], got {ovs}" - ) - # CRS / transform - assert src.crs is not None and src.crs.to_epsg() == 4326, ( - f"CRS round-trip failed: got {src.crs}" - ) - assert not src.transform.is_identity, ( - "transform should not be identity for a georeferenced raster" - ) - # COG layout invariant - _assert_ifds_before_data(path) - - -# --------------------------------------------------------------------------- -# Nodata: sentinel and NaN -# --------------------------------------------------------------------------- - - -@pytest.mark.parametrize("dtype", DTYPES) -def test_nodata_sentinel_survives(tmp_path, dtype): - """Integer and float sentinels survive write -> rasterio.open.""" - arr = _make_data(dtype, bands=1, height=64, width=64) - sentinel = _pick_sentinel(dtype) - # Mark a couple of cells as nodata. - arr_with_nd = arr.copy() - arr_with_nd[0, 0] = sentinel - arr_with_nd[5, 7] = sentinel - da = _build_da(arr_with_nd, raster_type="area", crs=4326) - - path = str(tmp_path / f"2292_nodata_sentinel_{np.dtype(dtype).name}.tif") - to_geotiff( - da, path, - compression="deflate", cog=True, tile_size=16, - overview_levels=[2], nodata=sentinel, - ) - - with rasterio.open(path) as src: - assert src.nodata is not None, "nodata tag not set on output" - # rasterio normalises to float; compare numerically. - assert float(src.nodata) == float(sentinel), ( - f"nodata mismatch: expected {sentinel}, got {src.nodata}" - ) - actual = src.read(1) - # Byte-exact at base level for deflate. - np.testing.assert_array_equal(actual, arr_with_nd) - - -def test_nodata_nan_survives(tmp_path): - """NaN nodata: NaN positions round-trip as NaN through rasterio.""" - arr = _make_data(np.float32, bands=1, height=64, width=64) - arr[0, 0] = np.nan - arr[3, 9] = np.nan - da = _build_da(arr, raster_type="area", crs=4326) - - path = str(tmp_path / "2292_nodata_nan.tif") - to_geotiff( - da, path, - compression="deflate", cog=True, tile_size=16, - overview_levels=[2], nodata=float("nan"), - ) - - with rasterio.open(path) as src: - assert src.nodata is not None and np.isnan(src.nodata), ( - f"nodata tag should be NaN, got {src.nodata}" - ) - actual = src.read(1) - np.testing.assert_array_equal(np.isnan(actual), np.isnan(arr)) - finite = ~np.isnan(arr) - np.testing.assert_array_equal(actual[finite], arr[finite]) - - -# --------------------------------------------------------------------------- -# Georef: PixelIsArea vs PixelIsPoint -# --------------------------------------------------------------------------- - - -@pytest.mark.parametrize("raster_type", GEOREF_MODES) -def test_raster_type_tag_survives(tmp_path, raster_type): - """AREA_OR_POINT tag survives to rasterio.tags().""" - arr = _make_data(np.float32, bands=1, height=32, width=32) - da = _build_da(arr, raster_type=raster_type, crs=4326) - - path = str(tmp_path / f"2292_georef_{raster_type}.tif") - to_geotiff( - da, path, - compression="deflate", cog=True, tile_size=16, - overview_levels=[2], - ) - - with rasterio.open(path) as src: - tag = src.tags().get("AREA_OR_POINT") - expected_tag = "Area" if raster_type == "area" else "Point" - assert tag == expected_tag, ( - f"AREA_OR_POINT tag mismatch: expected {expected_tag!r}, " - f"got {tag!r}" - ) - # Base values still round-trip exactly. - np.testing.assert_array_equal(src.read(1), arr) - - -# --------------------------------------------------------------------------- -# Overviews: explicit list vs auto-generated -# --------------------------------------------------------------------------- - - -def test_overviews_explicit_levels(tmp_path): - """``overview_levels=[2, 4, 8]`` produces exactly those decimations.""" - arr = _make_data(np.float32, bands=1, height=128, width=128) - da = _build_da(arr, raster_type="area", crs=4326) - - path = str(tmp_path / "2292_overviews_explicit.tif") - to_geotiff( - da, path, - compression="deflate", cog=True, tile_size=16, - overview_levels=[2, 4, 8], - ) - - with rasterio.open(path) as src: - assert src.overviews(1) == [2, 4, 8], ( - f"expected overviews [2, 4, 8], got {src.overviews(1)}" - ) - # Each native overview should have the expected shape. - for lvl, factor in enumerate([2, 4, 8]): - with rasterio.open(path, OVERVIEW_LEVEL=lvl) as ov: - exp_h = arr.shape[0] // factor - exp_w = arr.shape[1] // factor - assert ov.shape == (exp_h, exp_w), ( - f"overview {factor}x: expected shape ({exp_h}, {exp_w}), " - f"got {ov.shape}" - ) - _assert_ifds_before_data(path) - - -@pytest.mark.parametrize("resampling", ["mean", "nearest"]) -def test_overview_pixels_match_expected(tmp_path, resampling): - """Overview pixel values agree with a hand-computed 2x decimation. - - Uses a deterministic base array so we can predict the level-1 overview - in pure numpy. ``mean`` reduces each 2x2 block to its mean; ``nearest`` - keeps the upper-left pixel of each block. The writer should produce - overviews that match within float tolerance (lossless codec on the - base, deterministic block reducer on the overview). - """ - base = _make_data(np.float32, bands=1, height=64, width=64) - da = _build_da(base, raster_type="area", crs=4326) - - path = str(tmp_path / f"2292_ovpix_{resampling}.tif") - to_geotiff( - da, path, - compression="deflate", cog=True, tile_size=16, - overview_levels=[2], overview_resampling=resampling, - ) - - if resampling == "mean": - # Block-mean 2x2 -> (32, 32). Promote to float64 for the reduction - # so the comparison is not biased by float32 round-off in the - # intermediate sum, then cast back to match what the reader - # returns. - b = base.astype(np.float64).reshape(32, 2, 32, 2).mean(axis=(1, 3)) - expected_ov = b.astype(np.float32) - else: # nearest - # Upper-left pixel of each 2x2 block. - expected_ov = base[::2, ::2] - - with rasterio.open(path, OVERVIEW_LEVEL=0) as ov: - actual = ov.read(1) - assert actual.shape == expected_ov.shape, ( - f"{resampling}: expected overview shape {expected_ov.shape}, " - f"got {actual.shape}" - ) - # Tolerance: the writer's mean reducer accumulates in float64 internally - # but the on-disk result is float32; comparing against our hand-computed - # float32 expected leaves <= 1 ULP of slack per cell. - np.testing.assert_allclose( - actual, expected_ov, rtol=1e-5, atol=1e-5, - err_msg=f"{resampling} overview pixels diverged from expected", - ) - - -def test_overviews_auto_generated(tmp_path): - """``overview_levels=None`` with cog=True auto-generates a pyramid.""" - arr = _make_data(np.float32, bands=1, height=128, width=128) - da = _build_da(arr, raster_type="area", crs=4326) - - path = str(tmp_path / "2292_overviews_auto.tif") - to_geotiff( - da, path, - compression="deflate", cog=True, tile_size=32, - ) - - with rasterio.open(path) as src: - ovs = src.overviews(1) - assert len(ovs) >= 1, f"expected at least one overview, got {ovs}" - # Auto-generated pyramid: every level is a power of two, strictly - # increasing, and large enough that the next halving would not fall - # below the tile_size of 32. The bitwise test below is the classic - # power-of-two check: ``o & (o - 1) == 0`` is True iff ``o`` has a - # single set bit. The ``o >= 2`` guard rules out the false-positive - # at ``o == 0``. - assert all((o & (o - 1)) == 0 and o >= 2 for o in ovs), ( - f"auto overviews should be powers of two >= 2, got {ovs}" - ) - assert all(b > a for a, b in zip(ovs, ovs[1:])), ( - f"auto overviews not strictly increasing: {ovs}" - ) - _assert_ifds_before_data(path) - - -# --------------------------------------------------------------------------- -# TIFF layout sanity: tiled, sane tile offsets, IFDs before data -# --------------------------------------------------------------------------- - - -def test_layout_is_cog_shaped(tmp_path): - """A cog=True file is tiled, has overview IFDs, and IFDs precede data.""" - arr = _make_data(np.uint16, bands=1, height=128, width=128) - da = _build_da(arr, raster_type="area", crs=4326) - - path = str(tmp_path / "2292_layout.tif") - to_geotiff( - da, path, - compression="lzw", cog=True, tile_size=32, - overview_levels=[2, 4], - ) - - with rasterio.open(path) as src: - assert _is_tiled(src), "COG output must be tiled, got stripped layout" - assert src.block_shapes[0] == (32, 32), ( - f"unexpected block shape: {src.block_shapes}" - ) - - # All IFDs come before image data; tile offsets are monotonic-ish - # (not strictly monotonic across IFDs but every offset must point inside - # the file). - with open(path, "rb") as f: - data = f.read() - header = parse_header(data) - ifds = parse_all_ifds(data, header) - assert len(ifds) == 3, ( - f"expected 3 IFDs (full + 2 overviews), got {len(ifds)}" - ) - file_len = len(data) - for ifd in ifds: - for off in (ifd.tile_offsets or ()): - assert 0 <= off < file_len, ( - f"tile offset {off} outside file bounds [0, {file_len})" - ) - _assert_ifds_before_data(path) - - -# --------------------------------------------------------------------------- -# Optional external validator -# --------------------------------------------------------------------------- - - -def test_external_cog_validator(tmp_path): - """Run rio-cogeo / GDAL's COG validator if available, else skip cleanly.""" - arr = _make_data(np.float32, bands=1, height=256, width=256) - da = _build_da(arr, raster_type="area", crs=4326) - - path = str(tmp_path / "2292_validator.tif") - to_geotiff( - da, path, - compression="deflate", cog=True, tile_size=64, - overview_levels=[2, 4], - ) - - _try_cog_validate(path) - - -# --------------------------------------------------------------------------- -# Validator-mode env contract (issue #2302) -# --------------------------------------------------------------------------- - - -def test_require_validator_env_strict_fails_when_dep_missing( - tmp_path, monkeypatch, -): - """``XRSPATIAL_REQUIRE_COG_VALIDATOR=1`` must fail (not skip) if both - validators are absent. - - This guards the CI gate: if the install step silently drops rio-cogeo - or GDAL, the compliance suite must fail rather than skip past the - validator step. Stub both imports as ``ImportError`` so the test runs - the same on every job, validator-present or not. - """ - import builtins - - real_import = builtins.__import__ - - def _blocked_import(name, globals=None, locals=None, fromlist=(), level=0): - fl = tuple(fromlist) if fromlist else () - rio_match = ( - name == "rio_cogeo.cogeo" and "cog_validate" in fl - ) - gdal_match = ( - name == "osgeo_utils.samples" - and "validate_cloud_optimized_geotiff" in fl - ) - if rio_match or gdal_match: - raise ImportError(f"blocked for test: {name}") - return real_import(name, globals, locals, fromlist, level) - - monkeypatch.setattr(builtins, "__import__", _blocked_import) - monkeypatch.setenv("XRSPATIAL_REQUIRE_COG_VALIDATOR", "1") - - arr = _make_data(np.float32, bands=1, height=64, width=64) - da = _build_da(arr, raster_type="area", crs=4326) - path = str(tmp_path / "2302_require_strict.tif") - to_geotiff( - da, path, - compression="deflate", cog=True, tile_size=16, - overview_levels=[2], - ) - - # ``pytest.fail.Exception`` is a documented alias for - # ``_pytest.outcomes.Failed`` on pytest >= 7 (which this repo pins - # via setup.cfg). Update both spots in this file if that pin moves. - with pytest.raises(pytest.fail.Exception, match="XRSPATIAL_REQUIRE_COG_VALIDATOR"): - _try_cog_validate(path) - - -def test_require_validator_env_unset_skips_when_dep_missing( - tmp_path, monkeypatch, -): - """With the env var unset, missing validators trigger a clean skip. - - This is the contributor-laptop path: no rio-cogeo / GDAL installed, - the compliance suite still passes without the optional validator - step. - """ - import builtins - - real_import = builtins.__import__ - - def _blocked_import(name, globals=None, locals=None, fromlist=(), level=0): - fl = tuple(fromlist) if fromlist else () - rio_match = ( - name == "rio_cogeo.cogeo" and "cog_validate" in fl - ) - gdal_match = ( - name == "osgeo_utils.samples" - and "validate_cloud_optimized_geotiff" in fl - ) - if rio_match or gdal_match: - raise ImportError(f"blocked for test: {name}") - return real_import(name, globals, locals, fromlist, level) - - monkeypatch.setattr(builtins, "__import__", _blocked_import) - monkeypatch.delenv("XRSPATIAL_REQUIRE_COG_VALIDATOR", raising=False) - - arr = _make_data(np.float32, bands=1, height=64, width=64) - da = _build_da(arr, raster_type="area", crs=4326) - path = str(tmp_path / "2302_require_unset.tif") - to_geotiff( - da, path, - compression="deflate", cog=True, tile_size=16, - overview_levels=[2], - ) - - with pytest.raises(pytest.skip.Exception): - _try_cog_validate(path) - - -@pytest.mark.parametrize("val", ["1", "true", "TRUE", "yes", "on"]) -def test_require_validator_env_truthy_values(monkeypatch, val): - """All documented truthy spellings activate strict mode.""" - monkeypatch.setenv("XRSPATIAL_REQUIRE_COG_VALIDATOR", val) - assert _require_validator_env() is True - - -@pytest.mark.parametrize("val", ["", "0", "false", "no", "off", "anything"]) -def test_require_validator_env_non_truthy_values(monkeypatch, val): - """Empty or non-truthy spellings leave strict mode off.""" - if val == "": - monkeypatch.delenv("XRSPATIAL_REQUIRE_COG_VALIDATOR", raising=False) - else: - monkeypatch.setenv("XRSPATIAL_REQUIRE_COG_VALIDATOR", val) - assert _require_validator_env() is False diff --git a/xrspatial/geotiff/tests/test_overview_block_order_2308.py b/xrspatial/geotiff/tests/test_overview_block_order_2308.py index 8b3c56b45..15103060b 100644 --- a/xrspatial/geotiff/tests/test_overview_block_order_2308.py +++ b/xrspatial/geotiff/tests/test_overview_block_order_2308.py @@ -110,7 +110,7 @@ def test_cog_overview_block_order_three_levels_2308(): def _rio_cogeo_or_skip(): """Skip the rio-cogeo gate when the dependency isn't installed. - Mirrors the skip semantics used in ``test_cog_writer_compliance``: + Mirrors the skip semantics used in ``write/test_cog.py``: contributor laptops without rio-cogeo see a skip, CI with rio-cogeo runs the strict check. """ diff --git a/xrspatial/geotiff/tests/test_vrt_write.py b/xrspatial/geotiff/tests/test_vrt_write.py deleted file mode 100644 index 9525336a4..000000000 --- a/xrspatial/geotiff/tests/test_vrt_write.py +++ /dev/null @@ -1,113 +0,0 @@ -"""Tests for VRT tiled output from to_geotiff.""" -import os - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import open_geotiff, to_geotiff - - -@pytest.fixture -def sample_raster(): - """200x200 float32 raster with coords and CRS.""" - arr = np.random.default_rng(55).random((200, 200), dtype=np.float32) - y = np.linspace(41.0, 40.0, 200) # north-to-south - x = np.linspace(-106.0, -105.0, 200) - da = xr.DataArray(arr, dims=['y', 'x'], - coords={'y': y, 'x': x}, - attrs={'crs': 4326, 'nodata': -9999.0}) - return da - - -class TestVrtOutputNumpy: - def test_creates_vrt_and_tiles_dir(self, sample_raster, tmp_path): - vrt_path = str(tmp_path / 'out_1083.vrt') - to_geotiff(sample_raster, vrt_path) - assert os.path.exists(vrt_path) - tiles_dir = str(tmp_path / 'out_1083_tiles') - assert os.path.isdir(tiles_dir) - tile_files = os.listdir(tiles_dir) - assert len(tile_files) > 0 - assert all(f.endswith('.tif') for f in tile_files) - - def test_round_trip_numpy(self, sample_raster, tmp_path): - vrt_path = str(tmp_path / 'rt_1083.vrt') - to_geotiff(sample_raster, vrt_path) - result = open_geotiff(vrt_path) - np.testing.assert_array_almost_equal( - result.values, sample_raster.values, decimal=5) - - def test_tile_naming_convention(self, sample_raster, tmp_path): - vrt_path = str(tmp_path / 'named_1083.vrt') - to_geotiff(sample_raster, vrt_path, tile_size=128) - tiles_dir = str(tmp_path / 'named_1083_tiles') - files = sorted(os.listdir(tiles_dir)) - # 200x200 with tile_size=128 -> 2x2 grid (TIFF 6 spec requires - # tile_size be a multiple of 16; 100 was rejected post-#1767). - assert files == [ - 'tile_00_00.tif', 'tile_00_01.tif', - 'tile_01_00.tif', 'tile_01_01.tif', - ] - - def test_relative_paths_in_vrt(self, sample_raster, tmp_path): - vrt_path = str(tmp_path / 'rel_1083.vrt') - to_geotiff(sample_raster, vrt_path) - with open(vrt_path) as f: - content = f.read() - # Paths should be relative (no leading /) - assert 'rel_1083_tiles/' in content - assert str(tmp_path) not in content - - def test_compression_level_passed_to_tiles(self, sample_raster, tmp_path): - vrt_path = str(tmp_path / 'cl_1083.vrt') - to_geotiff(sample_raster, vrt_path, compression='zstd', - compression_level=1) - result = open_geotiff(vrt_path) - np.testing.assert_array_almost_equal( - result.values, sample_raster.values, decimal=5) - - -class TestVrtOutputDask: - def test_dask_round_trip(self, sample_raster, tmp_path): - dask_da = sample_raster.chunk({'y': 100, 'x': 100}) - vrt_path = str(tmp_path / 'dask_1083.vrt') - to_geotiff(dask_da, vrt_path) - result = open_geotiff(vrt_path) - np.testing.assert_array_almost_equal( - result.values, sample_raster.values, decimal=5) - - def test_dask_one_tile_per_chunk(self, sample_raster, tmp_path): - dask_da = sample_raster.chunk({'y': 100, 'x': 100}) - vrt_path = str(tmp_path / 'chunks_1083.vrt') - to_geotiff(dask_da, vrt_path) - tiles_dir = str(tmp_path / 'chunks_1083_tiles') - # 200x200 chunked 100x100 -> 2x2 = 4 tiles - assert len(os.listdir(tiles_dir)) == 4 - - -class TestVrtEdgeCases: - def test_cog_with_vrt_raises(self, sample_raster, tmp_path): - vrt_path = str(tmp_path / 'cog_1083.vrt') - with pytest.raises(ValueError, match='cog.*vrt|vrt.*cog|COG.*VRT|VRT.*COG|cog.*VRT|vrt.*COG'): # noqa: E501 - to_geotiff(sample_raster, vrt_path, cog=True) - - def test_overview_levels_with_vrt_raises(self, sample_raster, tmp_path): - vrt_path = str(tmp_path / 'ovr_1083.vrt') - with pytest.raises(ValueError, match='overview.*vrt|vrt.*overview|overview.*VRT|VRT.*overview'): # noqa: E501 - to_geotiff(sample_raster, vrt_path, overview_levels=[2, 4]) - - def test_nonempty_tiles_dir_raises(self, sample_raster, tmp_path): - tiles_dir = tmp_path / 'exist_1083_tiles' - tiles_dir.mkdir() - (tiles_dir / 'dummy.tif').write_text('x') - vrt_path = str(tmp_path / 'exist_1083.vrt') - with pytest.raises(FileExistsError): - to_geotiff(sample_raster, vrt_path) - - def test_empty_tiles_dir_ok(self, sample_raster, tmp_path): - tiles_dir = tmp_path / 'empty_1083_tiles' - tiles_dir.mkdir() - vrt_path = str(tmp_path / 'empty_1083.vrt') - to_geotiff(sample_raster, vrt_path) - assert os.path.exists(vrt_path) diff --git a/xrspatial/geotiff/tests/test_vrt_writer_int64_1833.py b/xrspatial/geotiff/tests/test_vrt_writer_int64_1833.py deleted file mode 100644 index 7e4b0751e..000000000 --- a/xrspatial/geotiff/tests/test_vrt_writer_int64_1833.py +++ /dev/null @@ -1,75 +0,0 @@ -"""Regression tests for VRT writer 64-bit integer dtype handling. - -``write_vrt`` (and ``to_geotiff(da, "*.vrt")`` by extension) previously -mapped signed 64-bit source rasters to ``Int32`` and unsigned 64-bit -source rasters to ``Byte`` because the dtype lookup had no entry for -``bps=64`` and fell back to the small-int default. The VRT reader has -explicit ``UInt64`` / ``Int64`` support (see issue #1783), so the loss -happened on write -- silently truncating uint64 values to ``[0, 255]``. - -See issue #1833. -""" -from __future__ import annotations - -import re - -import numpy as np -import xarray as xr - -from xrspatial.geotiff import open_geotiff, to_geotiff - - -def _da(arr: np.ndarray) -> xr.DataArray: - h, w = arr.shape - return xr.DataArray( - arr, - dims=('y', 'x'), - coords={'y': np.arange(h, dtype=np.float64), - 'x': np.arange(w, dtype=np.float64)}, - attrs={'res': (1.0, 1.0)}, - ) - - -def _read_vrt_dtype_attr(vrt_path: str) -> str: - """Extract the ``dataType`` attribute from the emitted VRT XML.""" - with open(vrt_path) as f: - xml = f.read() - m = re.search(r'dataType="([^"]+)"', xml) - assert m is not None, f"no dataType attribute in VRT:\n{xml}" - return m.group(1) - - -def test_uint64_vrt_writer_declares_uint64(tmp_path): - big = np.iinfo(np.uint64).max - arr = np.array([[1, 2], [big - 7, big]], dtype=np.uint64) - vrt = tmp_path / 'u64_1833.vrt' - to_geotiff(_da(arr), str(vrt)) - assert _read_vrt_dtype_attr(str(vrt)) == 'UInt64' - - -def test_int64_vrt_writer_declares_int64(tmp_path): - info = np.iinfo(np.int64) - arr = np.array([[info.min, -1], [0, info.max]], dtype=np.int64) - vrt = tmp_path / 'i64_1833.vrt' - to_geotiff(_da(arr), str(vrt)) - assert _read_vrt_dtype_attr(str(vrt)) == 'Int64' - - -def test_uint64_vrt_round_trip(tmp_path): - big = np.iinfo(np.uint64).max - arr = np.array([[1, 2], [big - 7, big]], dtype=np.uint64) - vrt = tmp_path / 'u64_rt_1833.vrt' - to_geotiff(_da(arr), str(vrt)) - r = open_geotiff(str(vrt)) - assert r.dtype == np.uint64 - np.testing.assert_array_equal(np.asarray(r.values), arr) - - -def test_int64_vrt_round_trip(tmp_path): - info = np.iinfo(np.int64) - arr = np.array([[info.min, -1], [0, info.max]], dtype=np.int64) - vrt = tmp_path / 'i64_rt_1833.vrt' - to_geotiff(_da(arr), str(vrt)) - r = open_geotiff(str(vrt)) - assert r.dtype == np.int64 - np.testing.assert_array_equal(np.asarray(r.values), arr) diff --git a/xrspatial/geotiff/tests/test_vrt_writer_photometric_1861.py b/xrspatial/geotiff/tests/test_vrt_writer_photometric_1861.py deleted file mode 100644 index 88a5e4c29..000000000 --- a/xrspatial/geotiff/tests/test_vrt_writer_photometric_1861.py +++ /dev/null @@ -1,75 +0,0 @@ -"""Regression test for issue #1861: ``photometric`` dropped by VRT writer. - -``to_geotiff(data, '.vrt', photometric=...)`` accepted the kwarg at the -public boundary but ``_write_vrt_tiled`` did not take ``photometric`` and -``_write_single_tile`` did not forward it to ``write(...)``. Per-tile -TIFFs were always tagged with the default Photometric=MinIsBlack (1) no -matter what the caller requested. - -This test pins the fix: the kwarg now threads through to every per-tile -``write`` call so the on-disk Photometric tag matches the request. -""" -from __future__ import annotations - -import glob -import os - -import numpy as np -import xarray as xr - -from xrspatial.geotiff import to_geotiff -from xrspatial.geotiff._header import TAG_PHOTOMETRIC, parse_header, parse_ifd - - -def _read_primary_ifd(path: str): - with open(path, 'rb') as f: - raw = f.read() - hdr = parse_header(raw[:16]) - return parse_ifd(raw, hdr.first_ifd_offset, hdr) - - -def _tile_paths(vrt_path: str): - stem = os.path.splitext(os.path.basename(vrt_path))[0] - tiles_dir = os.path.join( - os.path.dirname(os.path.abspath(vrt_path)), - stem + '_tiles', - ) - return sorted(glob.glob(os.path.join(tiles_dir, 'tile_*.tif'))) - - -def test_vrt_writer_forwards_photometric_miniswhite_1861(tmp_path): - """photometric='miniswhite' must tag every per-tile TIFF with - PhotometricInterpretation = 0 (MinIsWhite).""" - arr = np.zeros((48, 48), dtype=np.uint8) - da = xr.DataArray(arr, dims=('y', 'x')) - vrt_path = str(tmp_path / 'miniswhite_1861.vrt') - - to_geotiff(da, vrt_path, photometric='miniswhite', tile_size=16) - - tiles = _tile_paths(vrt_path) - assert tiles, 'expected at least one per-tile TIFF under _tiles/' - for tile in tiles: - ifd = _read_primary_ifd(tile) - assert ifd.get_value(TAG_PHOTOMETRIC) == 0, ( - f'tile {tile} has Photometric ' - f'{ifd.get_value(TAG_PHOTOMETRIC)}, expected 0 (MinIsWhite)' - ) - - -def test_vrt_writer_default_photometric_minisblack_1861(tmp_path): - """Control: default photometric='auto' keeps per-tile TIFFs at - PhotometricInterpretation = 1 (MinIsBlack).""" - arr = np.zeros((48, 48), dtype=np.uint8) - da = xr.DataArray(arr, dims=('y', 'x')) - vrt_path = str(tmp_path / 'default_auto_1861.vrt') - - to_geotiff(da, vrt_path, tile_size=16) - - tiles = _tile_paths(vrt_path) - assert tiles, 'expected at least one per-tile TIFF under _tiles/' - for tile in tiles: - ifd = _read_primary_ifd(tile) - assert ifd.get_value(TAG_PHOTOMETRIC) == 1, ( - f'tile {tile} has Photometric ' - f'{ifd.get_value(TAG_PHOTOMETRIC)}, expected 1 (MinIsBlack)' - ) diff --git a/xrspatial/geotiff/tests/test_vrt_writer_source_compat_1733.py b/xrspatial/geotiff/tests/test_vrt_writer_source_compat_1733.py deleted file mode 100644 index 380f02fea..000000000 --- a/xrspatial/geotiff/tests/test_vrt_writer_source_compat_1733.py +++ /dev/null @@ -1,188 +0,0 @@ -"""Regression tests for issue #1733. - -``write_vrt`` previously trusted the first source for resolution, -sample format + bps (dtype), band count, and CRS. A mismatched source -would silently produce a VRT that placed pixels incorrectly or -re-interpreted bytes as the wrong dtype downstream. - -These tests assert that ``write_vrt`` now rejects mismatched sources -with a clear ``ValueError`` covering each of those properties, and -still accepts sources that match within a small float tolerance on -pixel size. -""" -from __future__ import annotations - -import os -import uuid - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import to_geotiff -from xrspatial.geotiff._vrt import write_vrt - - -def _unique_dir(tmp_path, label: str) -> str: - d = tmp_path / f"vrt_1733_{label}_{uuid.uuid4().hex[:8]}" - d.mkdir() - return str(d) - - -def _write_tif(path: str, *, h: int, w: int, dtype, bands: int = 1, - px: float = 1.0, py: float = -1.0, - origin_x: float = 0.0, origin_y: float = 100.0, - crs: int | None = 4326) -> None: - if bands == 1: - arr = np.arange(h * w, dtype=dtype).reshape(h, w) - dims = ['y', 'x'] - else: - arr = np.arange(h * w * bands, dtype=dtype).reshape(h, w, bands) - dims = ['y', 'x', 'band'] - y = origin_y + (np.arange(h) + 0.5) * py - x = origin_x + (np.arange(w) + 0.5) * px - coords = {'y': y, 'x': x} - attrs = {} - if crs is not None: - attrs['crs'] = crs - da = xr.DataArray(arr, dims=dims, coords=coords, attrs=attrs) - to_geotiff(da, path, compression='none') - - -def test_mismatched_pixel_size_raises(tmp_path): - d = _unique_dir(tmp_path, "px") - a = os.path.join(d, "a.tif") - b = os.path.join(d, "b.tif") - _write_tif(a, h=4, w=4, dtype=np.float32, px=1.0, py=-1.0) - # Place b adjacent so the geometry would otherwise work, but the - # pixel size disagrees. - _write_tif(b, h=4, w=4, dtype=np.float32, px=2.0, py=-2.0, - origin_x=4.0) - vrt = os.path.join(d, "out.vrt") - with pytest.raises(ValueError, match="pixel size"): - write_vrt(vrt, [a, b]) - - -def test_mismatched_dtype_raises(tmp_path): - d = _unique_dir(tmp_path, "dtype") - a = os.path.join(d, "a.tif") - b = os.path.join(d, "b.tif") - _write_tif(a, h=4, w=4, dtype=np.float32) - _write_tif(b, h=4, w=4, dtype=np.int16, origin_x=4.0) - vrt = os.path.join(d, "out.vrt") - with pytest.raises(ValueError, match="dtype|sample_format|bps"): - write_vrt(vrt, [a, b]) - - -def test_mismatched_band_count_raises(tmp_path): - d = _unique_dir(tmp_path, "bands") - a = os.path.join(d, "a.tif") - b = os.path.join(d, "b.tif") - _write_tif(a, h=4, w=4, dtype=np.float32, bands=1) - _write_tif(b, h=4, w=4, dtype=np.float32, bands=3, origin_x=4.0) - vrt = os.path.join(d, "out.vrt") - with pytest.raises(ValueError, match="band count"): - write_vrt(vrt, [a, b]) - - -def test_compatible_sources_succeed(tmp_path): - d = _unique_dir(tmp_path, "ok") - a = os.path.join(d, "a.tif") - b = os.path.join(d, "b.tif") - _write_tif(a, h=4, w=4, dtype=np.float32) - _write_tif(b, h=4, w=4, dtype=np.float32, origin_x=4.0) - vrt = os.path.join(d, "out.vrt") - write_vrt(vrt, [a, b]) - assert os.path.exists(vrt) - - -def test_pixel_size_within_tolerance_accepted(tmp_path): - d = _unique_dir(tmp_path, "tol") - a = os.path.join(d, "a.tif") - b = os.path.join(d, "b.tif") - _write_tif(a, h=4, w=4, dtype=np.float32, px=1.0, py=-1.0) - # Drift well below the 1e-6 relative tolerance. - _write_tif(b, h=4, w=4, dtype=np.float32, - px=1.0 + 1e-10, py=-1.0, origin_x=4.0) - vrt = os.path.join(d, "out.vrt") - write_vrt(vrt, [a, b]) - assert os.path.exists(vrt) - - -def test_single_source_still_works(tmp_path): - d = _unique_dir(tmp_path, "one") - a = os.path.join(d, "a.tif") - _write_tif(a, h=4, w=4, dtype=np.float32) - vrt = os.path.join(d, "out.vrt") - write_vrt(vrt, [a]) - assert os.path.exists(vrt) - - -def test_mismatched_crs_raises(tmp_path): - # Two sources with different non-empty CRS values must be rejected, - # otherwise the VRT would inherit the first source's CRS and silently - # misproject the second. - d = _unique_dir(tmp_path, "crs_diff") - a = os.path.join(d, "a.tif") - b = os.path.join(d, "b.tif") - _write_tif(a, h=4, w=4, dtype=np.float32, crs=4326) - _write_tif(b, h=4, w=4, dtype=np.float32, origin_x=4.0, crs=3857) - vrt = os.path.join(d, "out.vrt") - with pytest.raises(ValueError, match="CRS"): - write_vrt(vrt, [a, b]) - - -def test_asymmetric_crs_raises_first_set_second_missing(tmp_path): - # First source has a CRS, second is written without one. The VRT - # would otherwise be tagged with the first source's CRS, which can - # misplace data when the second source actually came from a - # different (or unknown) projection. - d = _unique_dir(tmp_path, "crs_first") - a = os.path.join(d, "a.tif") - b = os.path.join(d, "b.tif") - _write_tif(a, h=4, w=4, dtype=np.float32, crs=4326) - _write_tif(b, h=4, w=4, dtype=np.float32, origin_x=4.0, crs=None) - vrt = os.path.join(d, "out.vrt") - with pytest.raises(ValueError, match="CRS"): - write_vrt(vrt, [a, b]) - - -def test_asymmetric_crs_raises_first_missing_second_set(tmp_path): - # Symmetric case: first source missing a CRS, second has one. The - # earlier guard only triggered when both sides were set, so this - # would have silently produced an untagged VRT despite one source - # carrying a known projection. - d = _unique_dir(tmp_path, "crs_second") - a = os.path.join(d, "a.tif") - b = os.path.join(d, "b.tif") - _write_tif(a, h=4, w=4, dtype=np.float32, crs=None) - _write_tif(b, h=4, w=4, dtype=np.float32, origin_x=4.0, crs=4326) - vrt = os.path.join(d, "out.vrt") - with pytest.raises(ValueError, match="CRS"): - write_vrt(vrt, [a, b]) - - -def test_matching_crs_succeeds(tmp_path): - # Sanity check: two sources with the same CRS should still be - # accepted (defends against an overly aggressive equality check). - d = _unique_dir(tmp_path, "crs_match") - a = os.path.join(d, "a.tif") - b = os.path.join(d, "b.tif") - _write_tif(a, h=4, w=4, dtype=np.float32, crs=4326) - _write_tif(b, h=4, w=4, dtype=np.float32, origin_x=4.0, crs=4326) - vrt = os.path.join(d, "out.vrt") - write_vrt(vrt, [a, b]) - assert os.path.exists(vrt) - - -def test_both_missing_crs_succeeds(tmp_path): - # If neither source has a CRS, the VRT just won't be tagged with one - # and there's nothing to mis-tag. This must not raise. - d = _unique_dir(tmp_path, "crs_both_missing") - a = os.path.join(d, "a.tif") - b = os.path.join(d, "b.tif") - _write_tif(a, h=4, w=4, dtype=np.float32, crs=None) - _write_tif(b, h=4, w=4, dtype=np.float32, origin_x=4.0, crs=None) - vrt = os.path.join(d, "out.vrt") - write_vrt(vrt, [a, b]) - assert os.path.exists(vrt) diff --git a/xrspatial/geotiff/tests/test_write_layout_monkeypatch_contract_2248.py b/xrspatial/geotiff/tests/test_write_layout_monkeypatch_contract_2248.py deleted file mode 100644 index 7f053b730..000000000 --- a/xrspatial/geotiff/tests/test_write_layout_monkeypatch_contract_2248.py +++ /dev/null @@ -1,83 +0,0 @@ -"""Lock down the ``_writer.*`` monkeypatch contract for ``_assemble_tiff``. - -When the IFD-assembly and layout helpers were extracted from -``_writer.py`` into ``_write_layout.py`` (issue #2248), the eager -writer's ``_assemble_tiff`` retained the pre-extraction property that -patching ``_writer._compute_classic_ifd_overhead`` (and the other -helpers ``_assemble_tiff`` dispatches to) flows through into -``_assemble_tiff``'s execution. The single existing regression at -``test_eager_bigtiff_overhead_exact_1905`` covers the -``_compute_classic_ifd_overhead`` indirection only. These tests cover -the remaining indirected names so a future refactor that inlines any -of them at the call site is caught immediately. -""" -from __future__ import annotations - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import _writer as writer_mod -from xrspatial.geotiff import to_geotiff - - -def _make_float32(h: int = 8, w: int = 8) -> xr.DataArray: - arr = np.arange(h * w, dtype=np.float32).reshape(h, w) - return xr.DataArray( - arr, - dims=["y", "x"], - coords={ - "x": np.arange(w, dtype=np.float64), - "y": np.arange(h, dtype=np.float64), - }, - attrs={"crs": 4326}, - ) - - -@pytest.mark.parametrize( - "helper_name", - [ - "_promote_offsets_to_long8", - "_assemble_standard_layout", - "_assemble_cog_layout", - "_resolve_photometric", - ], -) -def test_assemble_tiff_resolves_helper_through_writer_module( - monkeypatch, tmp_path, helper_name, -): - """``_assemble_tiff`` must look up ``helper_name`` via ``_writer``. - - Replace the helper on the ``_writer`` module with a sentinel that - records the call and delegates to the real implementation. If - ``_assemble_tiff`` were to bind the helper at import time (rather - than resolving it through ``_writer`` on each call), the sentinel - would never fire and the assertion would fail. - """ - real = getattr(writer_mod, helper_name) - calls: list[tuple] = [] - - def _wrapped(*args, **kwargs): - calls.append((args, tuple(sorted(kwargs.items())))) - return real(*args, **kwargs) - - monkeypatch.setattr(writer_mod, helper_name, _wrapped) - - da = _make_float32(8, 8) - path = str(tmp_path / f"monkeypatch_{helper_name}_2248.tif") - - # ``_assemble_cog_layout`` only fires when at least one overview - # is written; ``_promote_offsets_to_long8`` only fires when the - # writer chooses BigTIFF. Pass the right kwargs per helper so each - # one is exercised by ``_assemble_tiff`` on this call. - if helper_name == "_assemble_cog_layout": - to_geotiff(da, path, cog=True, overview_levels=[2]) - elif helper_name == "_promote_offsets_to_long8": - to_geotiff(da, path, bigtiff=True) - else: - to_geotiff(da, path) - - assert calls, ( - f"_assemble_tiff did not call _writer.{helper_name}; the " - f"monkeypatch on the _writer namespace was bypassed." - ) diff --git a/xrspatial/geotiff/tests/test_write_vrt_bool_nodata_1921.py b/xrspatial/geotiff/tests/test_write_vrt_bool_nodata_1921.py deleted file mode 100644 index 9f7afd9ce..000000000 --- a/xrspatial/geotiff/tests/test_write_vrt_bool_nodata_1921.py +++ /dev/null @@ -1,147 +0,0 @@ -"""Backend-parity coverage for bool / np.bool_ nodata rejection. - -Issue #1911 added the ``isinstance(nodata, (bool, np.bool_)) -> TypeError`` -guard at the ``to_geotiff`` entry point, with a belt-and-braces copy in -``_geotags.build_geo_tags``. Issue #1921 extended the same parity to the -sibling writers: - -* ``write_vrt`` -- now rejects bool nodata at the public wrapper via - ``_validate_nodata_arg`` and again inside ``_vrt.write_vrt`` as - defense-in-depth. Previously wrote ``True`` - into the VRT XML, which no reader parses as numeric, so the - round-trip silently dropped the sentinel. -* ``write_geotiff_gpu`` (direct call) -- already routes through - ``_validate_nodata_arg`` near the top of the function. Pinning the - behaviour here so a refactor that drops that call surfaces the - regression at the parity boundary, not inside ``build_geo_tags``. - -Found by ``/sweep-test-coverage`` (pass 15 / 2026-05-15). -""" -from __future__ import annotations - -import os - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import to_geotiff, write_vrt -from xrspatial.geotiff.tests.conftest import requires_gpu - - -@pytest.fixture -def uint8_da(): - """Small uint8 DataArray for nodata round-trip tests.""" - arr = np.zeros((4, 4), dtype=np.uint8) - return xr.DataArray(arr, dims=['y', 'x']) - - -@pytest.fixture -def src_geotiff(uint8_da, tmp_path): - """A real on-disk source GeoTIFF that write_vrt can point at.""" - path = str(tmp_path / "src_1921.tif") - to_geotiff(uint8_da, path) - return path - - -# --------------------------------------------------------------------------- -# write_vrt: the bug from issue #1921 -# --------------------------------------------------------------------------- - - -@pytest.mark.parametrize( - "bad", - [True, False, np.bool_(True), np.bool_(False)], -) -def test_write_vrt_rejects_bool_nodata(src_geotiff, tmp_path, bad): - """``write_vrt`` raises ``TypeError`` for any bool nodata. - - Fixed in issue #1921 by routing the public ``write_vrt`` wrapper - through ``_validate_nodata_arg`` and adding a defense-in-depth check - inside the internal ``_vrt.write_vrt``. - """ - vrt_path = str(tmp_path / "out_1921_bad.vrt") - with pytest.raises(TypeError, match="nodata must be numeric"): - write_vrt(vrt_path, [src_geotiff], nodata=bad) - - -@pytest.mark.parametrize( - "bad", - [True, False, np.bool_(True), np.bool_(False)], -) -def test_write_vrt_internal_rejects_bool_nodata(src_geotiff, tmp_path, bad): - """Direct call to the internal ``_vrt.write_vrt`` also rejects bool. - - Defense-in-depth: the public wrapper's ``_validate_nodata_arg`` is - skipped when callers reach the internal symbol directly (e.g. the - multi-tile dask write path in ``_writers/eager.py`` that calls - ``_vrt.write_vrt`` after writing per-tile GeoTIFFs, or a future - split of the wrapper). Parametrize over both ``bool`` and - ``np.bool_`` polarities so a refactor that narrows the internal - guard to just ``bool`` surfaces here, not in user code. See #1921. - """ - from xrspatial.geotiff._vrt import write_vrt as _internal_write_vrt - vrt_path = str(tmp_path / "out_1921_internal.vrt") - with pytest.raises(TypeError, match="nodata must be numeric"): - _internal_write_vrt(vrt_path, [src_geotiff], nodata=bad) - - -@pytest.mark.parametrize( - "good", - [0, 0.0, -9999, 255, np.int16(-1), np.float32(0.5)], -) -def test_write_vrt_accepts_numeric_nodata(src_geotiff, tmp_path, good): - """Numeric sentinels go through unchanged: the fix must not over-reject.""" - vrt_path = str(tmp_path / f"out_1921_numeric_{good!r}.vrt") - write_vrt(vrt_path, [src_geotiff], nodata=good) - with open(vrt_path) as f: - content = f.read() - # The exact format of the emitted nodata string is implementation - # detail; we only assert no "True"/"False" leaked through. - assert "True" not in content - assert "False" not in content - - -def test_write_vrt_accepts_none_nodata(src_geotiff, tmp_path): - """``nodata=None`` is the documented default and must keep working.""" - vrt_path = str(tmp_path / "out_1921_none.vrt") - write_vrt(vrt_path, [src_geotiff], nodata=None) - assert os.path.exists(vrt_path) - - -# --------------------------------------------------------------------------- -# write_geotiff_gpu: defense-in-depth parity -# --------------------------------------------------------------------------- - - -@requires_gpu -@pytest.mark.parametrize( - "bad", - [True, False, np.bool_(True), np.bool_(False)], -) -def test_write_geotiff_gpu_rejects_bool_nodata(uint8_da, tmp_path, bad): - """Direct ``write_geotiff_gpu`` call rejects bool nodata. - - The top-of-function ``_validate_nodata_arg`` call (added by #1973) - fires first; the deeper ``build_geo_tags`` guard is a second line - of defense. Pinning the behaviour so a refactor that drops the - top-of-function call surfaces here, not deep inside the geotag - builder. - """ - from xrspatial.geotiff import write_geotiff_gpu - path = str(tmp_path / "gpu_1921_bad.tif") - with pytest.raises(TypeError, match="nodata must be numeric"): - write_geotiff_gpu(uint8_da, path, nodata=bad) - - -@requires_gpu -def test_to_geotiff_gpu_dispatch_rejects_bool_nodata(uint8_da, tmp_path): - """Auto-dispatch path: ``to_geotiff(gpu=True, nodata=True)``. - - The eager-side guard fires before dispatch, so the GPU writer never - runs. Pin that ordering so a future refactor cannot accidentally - skip the eager check on the GPU dispatch path. - """ - path = str(tmp_path / "to_geotiff_gpu_1921.tif") - with pytest.raises(TypeError, match="nodata must be numeric"): - to_geotiff(uint8_da, path, gpu=True, nodata=True) diff --git a/xrspatial/geotiff/tests/test_write_vrt_crs_1715.py b/xrspatial/geotiff/tests/test_write_vrt_crs_1715.py deleted file mode 100644 index 9bff86699..000000000 --- a/xrspatial/geotiff/tests/test_write_vrt_crs_1715.py +++ /dev/null @@ -1,225 +0,0 @@ -"""Regression test for #1715: write_vrt accepts ``crs`` for parity with -``to_geotiff`` / ``write_geotiff_gpu``. - -The api-consistency sweep on 2026-05-12 flagged that ``write_vrt`` was -the only writer in ``xrspatial.geotiff`` using ``crs_wkt`` instead of -``crs``, breaking the "forward the same kwargs to whichever writer -matches the output extension" pattern. The fix adds ``crs`` as the -canonical kwarg and keeps ``crs_wkt`` as a deprecated alias. - -This module pins: - -* ``crs`` accepts ``int`` (EPSG) and ``str`` (WKT) and ``None``, - matching ``to_geotiff``/``write_geotiff_gpu``. -* The ``crs_wkt`` alias still works but emits ``DeprecationWarning``. -* Passing both ``crs`` and ``crs_wkt`` raises ``TypeError``. -* The deprecation shim does NOT warn when neither kwarg is supplied - (the no-crs path picks from the first source, unchanged from - pre-#1715 behaviour). -* Read-back round trip: ``read_vrt(written).attrs['crs'] == 4326`` - when the writer was given ``crs=4326``. -""" -from __future__ import annotations - -import os -import warnings - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import read_vrt, to_geotiff, write_vrt - - -def _build_source_tif(tmp_path, name='src.tif'): - """Create a small GeoTIFF used as the VRT's source file.""" - arr = np.arange(8 * 8, dtype=np.float32).reshape(8, 8) - da = xr.DataArray( - arr, dims=['y', 'x'], - coords={'y': np.arange(8.0, 0, -1), 'x': np.arange(8.0)}, - attrs={'crs': 4326, 'transform': (1.0, 0, 0.0, 0, -1.0, 8.0)}, - ) - p = str(tmp_path / name) - to_geotiff(da, p) - return p - - -# --- Signature pins --- - - -def test_write_vrt_accepts_crs_kwarg(): - """``crs`` is in the signature and defaults to ``None``.""" - import inspect - - sig = inspect.signature(write_vrt) - assert 'crs' in sig.parameters - assert sig.parameters['crs'].default is None - - -def test_write_vrt_crs_annotation_matches_writer_trio(): - """``crs`` is annotated ``int | str | None``, identical to - ``to_geotiff(..., crs=...)`` and ``write_geotiff_gpu(..., crs=...)``. - """ - import inspect - - sig = inspect.signature(write_vrt) - ann = str(sig.parameters['crs'].annotation) - assert ann == 'int | str | None' - - -# --- Runtime: ``crs=`` writes an EPSG-resolved WKT --- - - -def test_write_vrt_crs_epsg_int_writes_wkt_to_xml(tmp_path): - """``crs=4326`` resolves to a WKT string in the VRT's element. - - The current implementation forwards the WKT to ``_vrt.write_vrt``, - which interpolates it into the XML node. Reading the file - back with ``read_vrt`` must therefore produce - ``attrs['crs'] == 4326`` (because ``_wkt_to_epsg`` round-trips - EPSG:4326's WKT cleanly). - """ - src = _build_source_tif(tmp_path, 'epsg_int.tif') - vrt_path = str(tmp_path / 'epsg_int.vrt') - - out = write_vrt(vrt_path, [src], crs=4326) - assert out == vrt_path - assert os.path.exists(vrt_path) - - da = read_vrt(vrt_path) - assert da.attrs.get('crs') == 4326 - - -def test_write_vrt_crs_wkt_string(tmp_path): - """``crs=`` passes the WKT through verbatim.""" - src = _build_source_tif(tmp_path, 'wkt.tif') - vrt_path = str(tmp_path / 'wkt.vrt') - - # Build a WKT for EPSG:4326 directly via pyproj - from pyproj import CRS - - wkt = CRS.from_epsg(4326).to_wkt() - - out = write_vrt(vrt_path, [src], crs=wkt) - assert out == vrt_path - da = read_vrt(vrt_path) - # WKT round-trips back to EPSG:4326 via _wkt_to_epsg - assert da.attrs.get('crs') == 4326 - - -def test_write_vrt_crs_none_falls_through(tmp_path): - """``crs=None`` (the default) picks the CRS from the first source.""" - src = _build_source_tif(tmp_path, 'none.tif') - vrt_path = str(tmp_path / 'none.vrt') - - with warnings.catch_warnings(): - warnings.simplefilter('error', DeprecationWarning) - out = write_vrt(vrt_path, [src], crs=None) - assert out == vrt_path - da = read_vrt(vrt_path) - # The source TIFF was written with EPSG:4326; VRT inherits it. - assert da.attrs.get('crs') == 4326 - - -def test_write_vrt_no_crs_kwarg_no_warning(tmp_path): - """Omitting ``crs`` entirely (the most common call shape) does not - emit any warning. The deprecation shim only fires when ``crs_wkt`` - is supplied explicitly.""" - src = _build_source_tif(tmp_path, 'no_kwarg.tif') - vrt_path = str(tmp_path / 'no_kwarg.vrt') - - with warnings.catch_warnings(): - warnings.simplefilter('error', DeprecationWarning) - write_vrt(vrt_path, [src]) # neither kwarg supplied - assert os.path.exists(vrt_path) - - -# --- Deprecation shim: ``crs_wkt=`` still works but warns --- - - -def test_write_vrt_crs_wkt_deprecated_warns(tmp_path): - """Passing ``crs_wkt=`` emits ``DeprecationWarning`` but still - produces a working VRT.""" - src = _build_source_tif(tmp_path, 'depr.tif') - vrt_path = str(tmp_path / 'depr.vrt') - - from pyproj import CRS - - wkt = CRS.from_epsg(4326).to_wkt() - - with pytest.warns(DeprecationWarning, match='crs_wkt'): - out = write_vrt(vrt_path, [src], crs_wkt=wkt) - assert out == vrt_path - da = read_vrt(vrt_path) - assert da.attrs.get('crs') == 4326 - - -def test_write_vrt_crs_wkt_none_still_warns(tmp_path): - """``crs_wkt=None`` (explicit) was a documented shape in the old - signature -- it now warns because the caller is using the - deprecated kwarg name, even if the value is None.""" - src = _build_source_tif(tmp_path, 'depr_none.tif') - vrt_path = str(tmp_path / 'depr_none.vrt') - - with pytest.warns(DeprecationWarning, match='crs_wkt'): - write_vrt(vrt_path, [src], crs_wkt=None) - assert os.path.exists(vrt_path) - - -def test_write_vrt_both_crs_and_crs_wkt_rejected(tmp_path): - """Passing both raises ``TypeError`` rather than silently picking - one. The error message names both kwargs so the caller can fix - their call quickly.""" - src = _build_source_tif(tmp_path, 'both.tif') - vrt_path = str(tmp_path / 'both.vrt') - - from pyproj import CRS - - wkt = CRS.from_epsg(4326).to_wkt() - - with pytest.raises(TypeError, match='crs.*crs_wkt'): - write_vrt(vrt_path, [src], crs=4326, crs_wkt=wkt) - - -# --- Cross-writer parity: same kwarg name on all three writers --- - - -def test_writer_trio_all_accept_crs_kwarg(): - """``crs`` is the canonical kwarg on every public writer in the trio. - A caller forwarding ``crs=`` to whichever writer matches the - output extension never has to special-case the kwarg name (issue - #1715).""" - import inspect - - from xrspatial.geotiff import to_geotiff, write_geotiff_gpu, write_vrt - - for fn in (to_geotiff, write_geotiff_gpu, write_vrt): - sig = inspect.signature(fn) - assert 'crs' in sig.parameters, f"{fn.__name__} missing crs kwarg" - assert ( - str(sig.parameters['crs'].annotation) == 'int | str | None' - ), f"{fn.__name__}.crs annotation drift" - - -# --- Negative tests: bad input shapes --- - - -def test_write_vrt_crs_invalid_type_rejected(tmp_path): - """``crs=`` (or any non-int/str/None) raises ``TypeError`` from - the public wrapper rather than from deep inside the writer.""" - src = _build_source_tif(tmp_path, 'bad_type.tif') - vrt_path = str(tmp_path / 'bad_type.vrt') - - with pytest.raises(TypeError, match='crs must be'): - write_vrt(vrt_path, [src], crs=[4326]) - - -def test_write_vrt_crs_unparseable_string_rejected(tmp_path): - """``crs='not a CRS'`` raises ``ValueError`` from the public - wrapper (the WKT keyword heuristic recognises PROJCS/GEOGCS only; - everything else is sent through pyproj which will reject it).""" - src = _build_source_tif(tmp_path, 'bad_str.tif') - vrt_path = str(tmp_path / 'bad_str.vrt') - - with pytest.raises(ValueError, match='Could not parse crs'): - write_vrt(vrt_path, [src], crs='not-a-real-crs-string') diff --git a/xrspatial/geotiff/tests/test_write_vrt_int_nodata_1684.py b/xrspatial/geotiff/tests/test_write_vrt_int_nodata_1684.py deleted file mode 100644 index a4e3dd077..000000000 --- a/xrspatial/geotiff/tests/test_write_vrt_int_nodata_1684.py +++ /dev/null @@ -1,88 +0,0 @@ -"""Regression test for #1684: ``write_vrt`` nodata annotation rejected ints. - -The api-consistency sweep on 2026-05-12 flagged that -``xrspatial.geotiff.write_vrt`` annotated ``nodata`` as ``float | None`` -even though the sibling writers ``to_geotiff`` and ``write_geotiff_gpu`` -accept ``float``, ``int``, or ``None``. Integer sentinels (``65535`` for -uint16, ``-9999`` for int32) flow through the rest of the I/O surface -unchanged, so the float-only hint forced callers either to cast (losing -the exact sentinel) or to ignore the static-type complaint. - -This module pins the widened annotation and confirms an integer nodata -round-trips through ``write_vrt`` -> ``read_vrt`` losslessly. -""" -from __future__ import annotations - -import inspect -import typing - -import numpy as np -import xarray as xr - -from xrspatial.geotiff import _vrt as _vrt_module -from xrspatial.geotiff import to_geotiff, write_vrt - - -def _nodata_annotation(fn): - sig = inspect.signature(fn) - return sig.parameters["nodata"].annotation - - -def test_write_vrt_public_nodata_accepts_int_annotation(): - """The public wrapper widens the annotation to include int.""" - ann = _nodata_annotation(write_vrt) - # Allow either typing.Union[float, int, None] or float | int | None. - if isinstance(ann, str): - # Forward-referenced string annotation (rare here; defensive). - assert "int" in ann, ann - return - if hasattr(typing, "get_args"): - args = set(typing.get_args(ann)) - if args: - assert int in args, args - return - # Fallback: stringify the annotation. - assert "int" in str(ann), str(ann) - - -def test_write_vrt_internal_nodata_accepts_int_annotation(): - """The internal helper in `_vrt.py` mirrors the public surface.""" - ann = _nodata_annotation(_vrt_module.write_vrt) - if isinstance(ann, str): - assert "int" in ann, ann - return - if hasattr(typing, "get_args"): - args = set(typing.get_args(ann)) - if args: - assert int in args, args - return - assert "int" in str(ann), str(ann) - - -def test_write_vrt_int_nodata_round_trips(tmp_path): - """An int nodata renders to ```` and parses back the same.""" - # Build a tiny uint16 tile so the sentinel makes sense. - arr = np.array([[100, 200, 65535], - [300, 400, 500]], dtype=np.uint16) - da = xr.DataArray( - arr, - dims=["y", "x"], - coords={ - "y": np.array([0.5, 1.5]), - "x": np.array([0.5, 1.5, 2.5]), - }, - attrs={"crs": 4326}, - ) - tif_path = tmp_path / "source.tif" - to_geotiff(da, str(tif_path)) - - vrt_path = tmp_path / "mosaic.vrt" - # Passing an int sentinel must not raise; the surface should match - # to_geotiff's "float, int, or None" contract. - write_vrt(str(vrt_path), [str(tif_path)], nodata=65535) - - # Confirm the int round-trips through the parser back into a VRT band. - parsed = _vrt_module.parse_vrt( - vrt_path.read_text(), vrt_dir=str(tmp_path)) - band_nodata = parsed.bands[0].nodata - assert band_nodata == 65535, band_nodata diff --git a/xrspatial/geotiff/tests/test_write_vrt_path_kwarg_1946.py b/xrspatial/geotiff/tests/test_write_vrt_path_kwarg_1946.py deleted file mode 100644 index 23be09a7f..000000000 --- a/xrspatial/geotiff/tests/test_write_vrt_path_kwarg_1946.py +++ /dev/null @@ -1,216 +0,0 @@ -"""Regression test for #1946: write_vrt accepts ``path`` for parity -with ``to_geotiff`` / ``write_geotiff_gpu``. - -The api-consistency sweep on 2026-05-15 flagged that ``write_vrt`` was -the only writer in ``xrspatial.geotiff`` whose destination kwarg was -named ``vrt_path`` while the sibling writers use ``path``. The fix adds -``path`` as the canonical kwarg and keeps ``vrt_path`` as a deprecated -alias. - -This module pins: - -* Positional ``write_vrt(path, sources)`` works (back-compat with the - previous ``write_vrt(vrt_path, sources)`` positional form). -* Keyword ``write_vrt(path=..., source_files=...)`` works (the new - canonical form). -* Keyword ``write_vrt(vrt_path=...)`` still works and emits - ``DeprecationWarning``. -* Passing both ``path`` and ``vrt_path`` raises ``TypeError``. -* The signature exposes ``path`` as the first positional, matching - ``to_geotiff`` / ``write_geotiff_gpu``. -* The deprecation shim does NOT warn when ``path`` is used. -* Omitting both names raises ``TypeError`` (preserves the pre-#1946 - required-argument semantics). -""" -from __future__ import annotations - -import inspect -import os -import warnings - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import read_vrt, to_geotiff, write_geotiff_gpu, write_vrt - - -def _build_source_tif(tmp_path, name='src.tif'): - """Create a small GeoTIFF used as the VRT's source file.""" - arr = np.arange(8 * 8, dtype=np.float32).reshape(8, 8) - da = xr.DataArray( - arr, dims=['y', 'x'], - coords={'y': np.arange(8.0, 0, -1), 'x': np.arange(8.0)}, - attrs={'crs': 4326, 'transform': (1.0, 0, 0.0, 0, -1.0, 8.0)}, - ) - p = str(tmp_path / name) - to_geotiff(da, p) - return p - - -def test_write_vrt_signature_first_arg_is_path(): - """Signature parity with to_geotiff / write_geotiff_gpu. - - The api-consistency sweep cares specifically about - ``inspect.signature``: IDE autocomplete, mypy, and Sphinx-rendered - docs all read the same source. Pinning the first param name here - catches any future re-rename that re-introduces the drift. - """ - sig = inspect.signature(write_vrt) - params = list(sig.parameters) - # ``path`` is the new canonical name, ``source_files`` follows. - # ``vrt_path`` is kept as a keyword-only deprecated alias. - assert params[0] == 'path' - assert params[1] == 'source_files' - assert 'vrt_path' in params - # ``vrt_path`` is keyword-only (the alias should never be used - # positionally going forward). - assert sig.parameters['vrt_path'].kind == inspect.Parameter.KEYWORD_ONLY - - -def test_write_vrt_positional_path_works(tmp_path): - """Positional ``write_vrt(path, sources)`` is unchanged. - - Existing callers ``write_vrt(some_path, sources)`` keep working - after the rename because the new ``path`` parameter sits where - ``vrt_path`` used to be. No deprecation warning should fire. - """ - src = _build_source_tif(tmp_path) - out = str(tmp_path / 'out.vrt') - with warnings.catch_warnings(): - warnings.simplefilter('error', DeprecationWarning) - result = write_vrt(out, [src]) - assert result == out - assert os.path.exists(out) - - -def test_write_vrt_path_kwarg_works(tmp_path): - """Keyword ``write_vrt(path=..., source_files=...)`` works. - - A caller who passes everything by keyword (no positional args) - cannot reach the function before #1946 because ``path`` did not - exist; this is the path-symmetric counterpart to the existing - ``write_vrt(vrt_path=...)`` test below. - """ - src = _build_source_tif(tmp_path) - out = str(tmp_path / 'out.vrt') - with warnings.catch_warnings(): - warnings.simplefilter('error', DeprecationWarning) - result = write_vrt(path=out, source_files=[src]) - assert result == out - assert os.path.exists(out) - - -def test_write_vrt_vrt_path_kwarg_emits_deprecation_warning(tmp_path): - """``vrt_path=...`` works but emits ``DeprecationWarning``. - - Mirrors the existing ``crs_wkt`` deprecation in the same writer - (#1715): old name still works, but caller sees a clear migration - hint via the warning. - """ - src = _build_source_tif(tmp_path) - out = str(tmp_path / 'out.vrt') - with pytest.warns(DeprecationWarning, match='vrt_path'): - result = write_vrt(vrt_path=out, source_files=[src]) - assert result == out - assert os.path.exists(out) - - -def test_write_vrt_path_and_vrt_path_together_raises(tmp_path): - """Both names supplied is ambiguous; refuse to pick one. - - Mirrors the ``crs`` / ``crs_wkt`` rule documented in the existing - write_vrt source: passing both is rejected with TypeError - regardless of whether the two values happen to match. - """ - src = _build_source_tif(tmp_path) - out = str(tmp_path / 'out.vrt') - with pytest.raises(TypeError, match="path.*vrt_path"): - write_vrt(path=out, vrt_path=out, source_files=[src]) - - -def test_write_vrt_no_path_raises(tmp_path): - """Neither ``path`` nor ``vrt_path`` -> TypeError. - - Before the shim, omitting the first positional argument raised - ``TypeError: missing 1 required positional argument`` from CPython. - The shim adds a sentinel default so the kwarg-only positional no - longer triggers that automatic check; the explicit raise inside - the shim preserves the pre-#1946 error semantics. - """ - src = _build_source_tif(tmp_path) - with pytest.raises(TypeError, match='path'): - write_vrt(source_files=[src]) - - -def test_write_vrt_explicit_path_none_raises(tmp_path): - """``write_vrt(path=None, ...)`` is rejected with TypeError. - - The sentinel-default pattern (#1962 review) distinguishes "caller - passed nothing" (sentinel) from "caller passed None explicitly". - Explicit ``None`` is a bug in the caller's code, not a request to - fall through to the deprecated ``vrt_path`` alias, so the shim - raises with a clear message that names the offending kwarg. - """ - src = _build_source_tif(tmp_path) - with pytest.raises(TypeError, match="'path'.*None"): - write_vrt(path=None, source_files=[src]) - - -def test_write_vrt_positional_none_raises(tmp_path): - """Positional ``write_vrt(None, sources)`` is rejected with TypeError. - - Same rationale as the keyword case: an explicit positional ``None`` - is rejected up front instead of crashing deep in - ``os.path.dirname(os.path.abspath(None))``. Pinned because the - pre-#1962 code accepted positional ``None`` and raised the wrong - "missing required argument" error. - """ - src = _build_source_tif(tmp_path) - with pytest.raises(TypeError, match="'path'.*None"): - write_vrt(None, [src]) - - -def test_write_vrt_first_arg_name_matches_writer_trio(): - """Cross-sibling consistency: all three writers use the same - destination kwarg name. - - The deep-sweep-api-consistency sweep keeps adding to the writer - trio's parity contract. Pin the rule here so future re-renames - that split the trio again will trip a test. - """ - eager_first = list( - inspect.signature(to_geotiff).parameters - )[1] # data, path -> index 1 - gpu_first = list( - inspect.signature(write_geotiff_gpu).parameters - )[1] - vrt_first = list( - inspect.signature(write_vrt).parameters - )[0] # path, source_files -> index 0 - assert eager_first == 'path' - assert gpu_first == 'path' - assert vrt_first == 'path' - - -def test_write_vrt_path_round_trip_matches_old(tmp_path): - """The written VRT decodes the same regardless of which kwarg name - the caller used. - - Smoke test that the shim does not silently drop or re-route any of - the other kwargs while resolving ``path`` vs ``vrt_path``. - """ - src = _build_source_tif(tmp_path) - out_new = str(tmp_path / 'out_new.vrt') - out_old = str(tmp_path / 'out_old.vrt') - - write_vrt(out_new, [src]) - with warnings.catch_warnings(): - # ignore the deprecation; we still need the legacy path to - # produce a byte-identical mosaic. - warnings.simplefilter('ignore', DeprecationWarning) - write_vrt(vrt_path=out_old, source_files=[src]) - - a_new = read_vrt(out_new) - a_old = read_vrt(out_old) - np.testing.assert_array_equal(np.asarray(a_new), np.asarray(a_old)) diff --git a/xrspatial/geotiff/tests/test_writer.py b/xrspatial/geotiff/tests/test_writer.py deleted file mode 100644 index 920c3b7d3..000000000 --- a/xrspatial/geotiff/tests/test_writer.py +++ /dev/null @@ -1,109 +0,0 @@ -"""Tests for the GeoTIFF writer.""" -from __future__ import annotations - -import numpy as np -import pytest - -from xrspatial.geotiff._geotags import GeoTransform -from xrspatial.geotiff._reader import read_to_array -from xrspatial.geotiff._writer import _make_overview, write - - -class TestMakeOverview: - def test_2x_decimation(self): - arr = np.arange(64, dtype=np.float32).reshape(8, 8) - ov = _make_overview(arr) - assert ov.shape == (4, 4) - # Check first value: mean of top-left 2x2 block - expected = np.mean([0, 1, 8, 9]) - assert ov[0, 0] == pytest.approx(expected) - - def test_integer_rounding(self): - arr = np.array([[1, 2, 3, 4], - [5, 6, 7, 8]], dtype=np.uint8) - ov = _make_overview(arr) - assert ov.shape == (1, 2) - assert ov.dtype == np.uint8 - - -class TestWriteRoundTrip: - def test_uncompressed_stripped(self, tmp_path): - expected = np.arange(64, dtype=np.float32).reshape(8, 8) - path = str(tmp_path / 'uncompressed.tif') - write(expected, path, compression='none', tiled=False) - - arr, geo = read_to_array(path) - np.testing.assert_array_equal(arr, expected) - - def test_deflate_stripped(self, tmp_path): - expected = np.arange(64, dtype=np.float32).reshape(8, 8) - path = str(tmp_path / 'deflate.tif') - write(expected, path, compression='deflate', tiled=False) - - arr, geo = read_to_array(path) - np.testing.assert_array_equal(arr, expected) - - def test_uncompressed_tiled(self, tmp_path): - expected = np.arange(64, dtype=np.float32).reshape(8, 8) - path = str(tmp_path / 'tiled.tif') - write(expected, path, compression='none', tiled=True, tile_size=4) - - arr, geo = read_to_array(path) - np.testing.assert_array_equal(arr, expected) - - def test_deflate_tiled(self, tmp_path): - expected = np.arange(64, dtype=np.float32).reshape(8, 8) - path = str(tmp_path / 'deflate_tiled.tif') - write(expected, path, compression='deflate', tiled=True, tile_size=4) - - arr, geo = read_to_array(path) - np.testing.assert_array_equal(arr, expected) - - def test_lzw_stripped(self, tmp_path): - expected = np.arange(64, dtype=np.float32).reshape(8, 8) - path = str(tmp_path / 'lzw.tif') - write(expected, path, compression='lzw', tiled=False) - - arr, geo = read_to_array(path) - np.testing.assert_array_equal(arr, expected) - - def test_uint16(self, tmp_path): - expected = np.arange(100, dtype=np.uint16).reshape(10, 10) - path = str(tmp_path / 'uint16.tif') - write(expected, path, compression='none', tiled=False) - - arr, geo = read_to_array(path) - np.testing.assert_array_equal(arr, expected) - - def test_with_geo_info(self, tmp_path): - expected = np.ones((4, 4), dtype=np.float32) - gt = GeoTransform(-120.0, 45.0, 0.001, -0.001) - path = str(tmp_path / 'geo.tif') - write(expected, path, geo_transform=gt, crs_epsg=4326, - nodata=-9999.0, compression='none', tiled=False) - - arr, geo = read_to_array(path) - np.testing.assert_array_equal(arr, expected) - assert geo.crs_epsg == 4326 - assert geo.transform.origin_x == pytest.approx(-120.0) - assert geo.transform.pixel_width == pytest.approx(0.001) - - def test_predictor_deflate(self, tmp_path): - expected = np.arange(64, dtype=np.float32).reshape(8, 8) - path = str(tmp_path / 'predictor.tif') - write(expected, path, compression='deflate', tiled=False, predictor=True) - - arr, geo = read_to_array(path) - np.testing.assert_array_equal(arr, expected) - - -class TestWriteInvalidInput: - def test_unsupported_compression(self, tmp_path): - arr = np.zeros((4, 4), dtype=np.float32) - # Issue #2138 pushed the canonical compression-list check from - # ``to_geotiff`` down into ``_write`` so direct callers get the - # same actionable error as the public wrapper. The wording - # shifted from ``_compression_tag``'s "Unsupported compression" - # to the wrapper's "Unknown compression" + canonical list. - with pytest.raises(ValueError, match="(Unsupported|Unknown) compression"): - write(arr, str(tmp_path / 'bad.tif'), compression='bzip2') diff --git a/xrspatial/geotiff/tests/test_writer_kwarg_order_1922.py b/xrspatial/geotiff/tests/test_writer_kwarg_order_1922.py deleted file mode 100644 index 1da66d3dd..000000000 --- a/xrspatial/geotiff/tests/test_writer_kwarg_order_1922.py +++ /dev/null @@ -1,76 +0,0 @@ -"""Regression test for #1922: write_geotiff_gpu kwarg order matches -to_geotiff (with the documented exception for ``gpu``). - -The two writers are advertised as parity siblings. The GPU writer's -own docstring says "Accepted at the signature level for API parity with -``to_geotiff``" for ``max_z_error`` and ``streaming_buffer_bytes``, but -the two kwargs were in opposite order across the two signatures: - - to_geotiff: ..., bigtiff, gpu, streaming_buffer_bytes, - max_z_error, photometric, ... - write_geotiff_gpu: ..., bigtiff, max_z_error, - streaming_buffer_bytes, photometric, ... - -Both are keyword-only so calling code did not break, but -``inspect.signature()``, IDE autocomplete, and Sphinx-rendered docs all -exposed the drift. Detected by deep-sweep-api-consistency on 2026-05-15. -""" -from __future__ import annotations - -import inspect - -from xrspatial.geotiff import to_geotiff, write_geotiff_gpu - - -def test_writer_kwarg_order_matches_to_geotiff(): - """``write_geotiff_gpu`` lists its kwargs in the same order as - ``to_geotiff``, modulo the ``gpu`` kwarg the GPU writer omits. - - Both signatures use keyword-only kwargs so positional callers are - unaffected. The order still matters for IDE autocomplete, generated - docs, and any caller that inspects ``inspect.signature``. - """ - eager_params = list(inspect.signature(to_geotiff).parameters) - gpu_params = list(inspect.signature(write_geotiff_gpu).parameters) - - # to_geotiff has ``gpu`` (auto-dispatch flag); write_geotiff_gpu does - # not. Drop it from the comparison instead of asserting on the - # missing kwarg directly, so unrelated future additions to either - # signature still surface here. - assert 'gpu' in eager_params - assert 'gpu' not in gpu_params - eager_params_no_gpu = [p for p in eager_params if p != 'gpu'] - - assert gpu_params == eager_params_no_gpu, ( - "write_geotiff_gpu and to_geotiff kwarg order diverged.\n" - f" to_geotiff (with 'gpu' removed): {eager_params_no_gpu}\n" - f" write_geotiff_gpu: {gpu_params}\n" - "Reorder write_geotiff_gpu to match to_geotiff (see #1922)." - ) - - -def test_writer_kwarg_defaults_match_to_geotiff(): - """The kwargs both writers share also have identical defaults. - - A surprise-free dispatch ``to_geotiff(..., gpu=True)`` requires - ``write_geotiff_gpu`` to default the same way for every kwarg the - auto-dispatch entry point forwards (issue #1916 added - ``allow_internal_only_jpeg`` to satisfy that contract; this test - pins the broader parity). - """ - eager_sig = inspect.signature(to_geotiff) - gpu_sig = inspect.signature(write_geotiff_gpu) - - shared = set(eager_sig.parameters) & set(gpu_sig.parameters) - # ``data`` and ``path`` are required positionals with no default; - # comparing inspect.Parameter.empty against itself is fine. - mismatches = [] - for name in sorted(shared): - ed = eager_sig.parameters[name].default - gd = gpu_sig.parameters[name].default - if ed != gd: - mismatches.append((name, ed, gd)) - assert not mismatches, ( - "write_geotiff_gpu and to_geotiff disagree on defaults: " - f"{mismatches}" - ) diff --git a/xrspatial/geotiff/tests/test_writer_matrix.py b/xrspatial/geotiff/tests/test_writer_matrix.py deleted file mode 100644 index bdc9f11fc..000000000 --- a/xrspatial/geotiff/tests/test_writer_matrix.py +++ /dev/null @@ -1,253 +0,0 @@ -"""Write-side tests for the GeoTIFF writer covering issue #1483. - -Adds: -- T-5: dtype x compression round-trip matrix. -- T-6: NaN vs sentinel nodata round-trip semantics. -- T-7: COG validity check via rasterio (skipped if rasterio missing). -- T-9: write-to-readonly directory raises a clean OS/PermissionError. - -T-10 (planar config 2 round-trip) is intentionally omitted -- the writer -does not currently emit PlanarConfiguration=2 (read-only support). -""" -from __future__ import annotations - -import os -import platform - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import open_geotiff, to_geotiff -from xrspatial.geotiff._reader import read_to_array -from xrspatial.geotiff._writer import write - -# --------------------------------------------------------------------------- -# T-5: dtype x compression matrix -# --------------------------------------------------------------------------- - -DTYPES_T5 = [ - np.uint8, np.uint16, np.uint32, - np.int16, np.int32, np.int64, - np.float32, np.float64, -] -CODECS_T5 = ['none', 'deflate', 'lzw', 'zstd', 'lz4'] - - -def _make_dtype_arr(dtype, h=32, w=32): - """Make a small array with values that fit the dtype's positive range.""" - n = h * w - dt = np.dtype(dtype) - if dt.kind == 'f': - # Non-trivial floats; include a few extreme-ish values. - arr = np.linspace(-1e3, 1e3, n).astype(dt).reshape(h, w) - elif dt.kind == 'u': - # Stay below uint16 max so it fits any unsigned dtype here. - arr = (np.arange(n) % 1000).astype(dt).reshape(h, w) - else: # signed int - arr = ((np.arange(n) % 2000) - 1000).astype(dt).reshape(h, w) - return arr - - -def _codec_supports(codec, dtype): - """Return False for combos the writer rejects, True otherwise.""" - # JPEG is not in the parametrized codec list (only uint8/3-band). - # All listed codecs accept any of the listed dtypes. - return True - - -@pytest.mark.parametrize('codec', CODECS_T5) -@pytest.mark.parametrize('dtype', DTYPES_T5) -def test_dtype_codec_roundtrip_stripped(tmp_path, dtype, codec): - """Round-trip every dtype x codec in stripped layout.""" - if not _codec_supports(codec, dtype): - pytest.skip(f"{codec} does not support {np.dtype(dtype).name}") - - expected = _make_dtype_arr(dtype) - path = str(tmp_path / f'1483_t5_strip_{np.dtype(dtype).name}_{codec}.tif') - - try: - write(expected, path, compression=codec, tiled=False) - except (ImportError, ModuleNotFoundError) as e: - pytest.skip(f"codec {codec} not available: {e}") - - # Codecs in the experimental tier (LERC / J2K / LZ4) need the - # read-side opt-in too (PR 4 of epic #2340). Tier 1 codecs ignore - # the kwarg, so passing it unconditionally keeps the loop simple. - arr, _geo = read_to_array(path, allow_experimental_codecs=True) - np.testing.assert_array_equal(arr, expected) - assert arr.dtype == expected.dtype - - -@pytest.mark.parametrize('codec', CODECS_T5) -@pytest.mark.parametrize('dtype', DTYPES_T5) -def test_dtype_codec_roundtrip_tiled(tmp_path, dtype, codec): - """Round-trip every dtype x codec in tiled layout.""" - if not _codec_supports(codec, dtype): - pytest.skip(f"{codec} does not support {np.dtype(dtype).name}") - - expected = _make_dtype_arr(dtype) - path = str(tmp_path / f'1483_t5_tile_{np.dtype(dtype).name}_{codec}.tif') - - try: - write(expected, path, compression=codec, tiled=True, tile_size=16) - except (ImportError, ModuleNotFoundError) as e: - pytest.skip(f"codec {codec} not available: {e}") - - arr, _geo = read_to_array(path, allow_experimental_codecs=True) - np.testing.assert_array_equal(arr, expected) - assert arr.dtype == expected.dtype - - -# --------------------------------------------------------------------------- -# T-6: NaN vs sentinel nodata -# --------------------------------------------------------------------------- - -def _float_with_nan(h=8, w=8, dtype=np.float32): - arr = np.linspace(0.0, 100.0, h * w, dtype=dtype).reshape(h, w) - arr[0, 0] = np.nan - arr[3, 5] = np.nan - arr[-1, -1] = np.nan - return arr - - -def test_nodata_nan_float_roundtrip(tmp_path): - """nodata=NaN: NaN positions in the input round-trip as NaN.""" - expected = _float_with_nan(dtype=np.float32) - path = str(tmp_path / '1483_t6_nodata_nan.tif') - - da = xr.DataArray(expected, dims=('y', 'x')) - to_geotiff(da, path, nodata=float('nan'), compression='deflate') - - out = open_geotiff(path) - np.testing.assert_array_equal(np.isnan(out.data), np.isnan(expected)) - finite = ~np.isnan(expected) - np.testing.assert_array_equal(out.data[finite], expected[finite]) - - -def test_nodata_sentinel_float_disk_vs_read(tmp_path): - """nodata=-9999: NaN positions become sentinel on disk, NaN on read-back.""" - expected = _float_with_nan(dtype=np.float32) - path = str(tmp_path / '1483_t6_nodata_sentinel.tif') - - da = xr.DataArray(expected, dims=('y', 'x')) - to_geotiff(da, path, nodata=-9999.0, compression='deflate') - - # On-disk values: NaN positions hold the sentinel. - raw, _geo = read_to_array(path) - nan_mask = np.isnan(expected) - assert np.all(raw[nan_mask] == np.float32(-9999.0)) - # Non-NaN positions match. - np.testing.assert_array_equal(raw[~nan_mask], expected[~nan_mask]) - - # Read back through open_geotiff: sentinel becomes NaN again. - out = open_geotiff(path) - np.testing.assert_array_equal(np.isnan(out.data), nan_mask) - np.testing.assert_array_equal(out.data[~nan_mask], expected[~nan_mask]) - assert out.attrs.get('nodata') == -9999.0 - - -def test_nodata_uint8_sentinel(tmp_path): - """nodata=255 for uint8: sentinel on disk, NaN on read (array promoted to float).""" - arr = np.arange(64, dtype=np.uint8).reshape(8, 8).copy() - arr[0, 0] = 255 - arr[4, 4] = 255 - path = str(tmp_path / '1483_t6_nodata_uint8.tif') - - da = xr.DataArray(arr, dims=('y', 'x')) - to_geotiff(da, path, nodata=255, compression='deflate') - - # On-disk: still uint8 with 255 in those slots. - raw, _geo = read_to_array(path) - assert raw.dtype == np.uint8 - assert raw[0, 0] == 255 and raw[4, 4] == 255 - np.testing.assert_array_equal(raw, arr) - - # Read-back: open_geotiff promotes integer with nodata to float + NaN. - out = open_geotiff(path) - assert out.dtype.kind == 'f' - assert np.isnan(out.data[0, 0]) - assert np.isnan(out.data[4, 4]) - finite = ~np.isnan(out.data) - np.testing.assert_array_equal(out.data[finite].astype(np.uint8), - arr[finite]) - - -# --------------------------------------------------------------------------- -# T-7: COG validity (rasterio-dependent) -# --------------------------------------------------------------------------- - -rasterio = pytest.importorskip( - 'rasterio', - reason='rasterio is optional; COG validity test skipped when missing', -) - - -def test_cog_layout_and_overviews(tmp_path): - """A cog=True file is tiled, carries overviews, and (when rio-cogeo is - installed) passes the COG validator. - - Note: xrspatial does not currently emit GDAL's IMAGE_STRUCTURE.LAYOUT=COG - tag, so we don't assert that. Structural COG properties (tiled, overviews - present, GDAL-readable) are what the writer actually guarantees. - """ - h = w = 1024 - arr = np.arange(h * w, dtype=np.float32).reshape(h, w) % 1000.0 - path = str(tmp_path / '1483_t7_cog.tif') - - da = xr.DataArray(arr, dims=('y', 'x')) - to_geotiff( - da, path, crs=4326, cog=True, compression='deflate', tile_size=256, - ) - - with rasterio.open(path) as src: - assert src.is_tiled, "COG output must be tiled" - # 1024x1024 with 256 tiles produces at least one halving. - ovs = src.overviews(1) - assert len(ovs) >= 1, f"expected at least one overview, got {ovs}" - assert ovs[0] in (2, 4, 8, 16), f"unexpected first overview: {ovs}" - # Each overview should be strictly larger than the previous (decimation - # factors are monotonically increasing). - assert all(b > a for a, b in zip(ovs, ovs[1:])), \ - f"overview decimations not monotonically increasing: {ovs}" - # Sanity: full-resolution band should round-trip values. - sample = src.read(1, window=((0, 4), (0, 4))) - np.testing.assert_array_equal(sample, arr[:4, :4]) - - # If rio-cogeo is installed, run its validator for the gold-standard check. - try: - from rio_cogeo.cogeo import cog_validate - except ImportError: - return - valid, errors, _warnings = cog_validate(path, strict=False) - assert valid, f"rio-cogeo cog_validate failed: errors={errors}" - - -# --------------------------------------------------------------------------- -# T-9: write-to-readonly directory -# --------------------------------------------------------------------------- - -@pytest.mark.skipif( - platform.system() == 'Windows', - reason='POSIX chmod semantics required', -) -@pytest.mark.skipif( - hasattr(os, 'geteuid') and os.geteuid() == 0, - reason='root bypasses directory permissions', -) -def test_write_to_readonly_dir_raises_oserror(tmp_path): - """Writing into a chmod 0o555 directory must raise OSError/PermissionError.""" - ro_dir = tmp_path / '1483_t9_readonly' - ro_dir.mkdir() - target = str(ro_dir / 'out.tif') - - arr = np.arange(64, dtype=np.float32).reshape(8, 8) - da = xr.DataArray(arr, dims=('y', 'x')) - - original_mode = ro_dir.stat().st_mode - try: - os.chmod(ro_dir, 0o555) - with pytest.raises((OSError, PermissionError)): - to_geotiff(da, target, compression='deflate') - finally: - os.chmod(ro_dir, original_mode) diff --git a/xrspatial/geotiff/tests/test_writer_return_path_1938.py b/xrspatial/geotiff/tests/test_writer_return_path_1938.py deleted file mode 100644 index b1f5fe0e2..000000000 --- a/xrspatial/geotiff/tests/test_writer_return_path_1938.py +++ /dev/null @@ -1,186 +0,0 @@ -"""Regression test for #1938: writer entry points return the written path. - -``write_vrt`` returned ``str`` while ``to_geotiff`` and -``write_geotiff_gpu`` returned ``None``. The drift broke ``mypy`` -consumers who handle the three writers uniformly and made the -Sphinx-rendered docs surface inconsistent. - -This module asserts: - -1. ``to_geotiff`` returns the ``path`` argument for filesystem and - file-like destinations. -2. ``write_geotiff_gpu``'s annotation matches the canonical ``path`` - return (the runtime check is gated on cupy + CUDA availability and - skipped here so the CPU test suite stays green). -3. ``write_vrt`` keeps returning the path (already conformant). -4. The three entry points share the same ``Returns`` annotation in - ``inspect.signature``. -""" -from __future__ import annotations - -import importlib.util -import inspect -import io -import os - -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import to_geotiff, write_geotiff_gpu, write_vrt - - -def _gpu_available() -> bool: - if importlib.util.find_spec("cupy") is None: - return False - try: - import cupy - - return bool(cupy.cuda.is_available()) - except Exception: - return False - - -_HAS_GPU = _gpu_available() -_gpu_only = pytest.mark.skipif( - not _HAS_GPU, reason="cupy + CUDA required", -) - - -def _small_da() -> xr.DataArray: - arr = np.arange(16, dtype=np.float32).reshape(4, 4) - return xr.DataArray( - arr, - dims=("y", "x"), - coords={"y": np.arange(4)[::-1].astype(np.float64), - "x": np.arange(4).astype(np.float64)}, - attrs={"crs": 4326}, - ) - - -def test_to_geotiff_returns_string_path(tmp_path): - """``to_geotiff`` returns the str path passed in.""" - da = _small_da() - out = tmp_path / "test_1938_str.tif" - rv = to_geotiff(da, str(out)) - assert isinstance(rv, str), ( - f"to_geotiff(str) must return a str, got {type(rv).__name__}" - ) - assert rv == str(out) - assert os.path.exists(rv) - - -def test_to_geotiff_returns_file_like(tmp_path): - """``to_geotiff`` returns the file-like object passed in.""" - da = _small_da() - buf = io.BytesIO() - rv = to_geotiff(da, buf) - assert rv is buf, ( - f"to_geotiff(BytesIO) must return the same file-like, " - f"got {type(rv).__name__}" - ) - # The buffer was actually written to. - assert buf.tell() > 0 or len(buf.getvalue()) > 0 - - -def test_to_geotiff_cog_returns_path(tmp_path): - """COG path also returns the str path.""" - da = _small_da() - out = tmp_path / "test_1938_cog.tif" - rv = to_geotiff(da, str(out), cog=True, tile_size=16) - assert isinstance(rv, str) - assert rv == str(out) - assert os.path.exists(rv) - - -def test_to_geotiff_dask_streaming_returns_path(tmp_path): - """Dask-streaming write path also returns the str path.""" - import dask.array as da_arr - - arr = da_arr.arange(256, dtype=np.float32, chunks=64).reshape(16, 16) - da = xr.DataArray( - arr, - dims=("y", "x"), - coords={"y": np.arange(16)[::-1].astype(np.float64), - "x": np.arange(16).astype(np.float64)}, - attrs={"crs": 4326}, - ) - out = tmp_path / "test_1938_dask.tif" - rv = to_geotiff(da, str(out)) - assert isinstance(rv, str) - assert rv == str(out) - assert os.path.exists(rv) - - -def test_write_vrt_returns_string_path(tmp_path): - """``write_vrt`` (already conformant) keeps returning the str path.""" - # Create a source tif first. - src = tmp_path / "src.tif" - to_geotiff(_small_da(), str(src)) - vrt_path = tmp_path / "out.vrt" - rv = write_vrt(str(vrt_path), [str(src)]) - assert isinstance(rv, str) - assert rv == str(vrt_path) - assert os.path.exists(rv) - - -@_gpu_only -def test_write_geotiff_gpu_returns_string_path(tmp_path): - """GPU writer returns the str path (only runs with cupy + CUDA).""" - import cupy - - arr_cpu = np.arange(16, dtype=np.float32).reshape(4, 4) - arr_gpu = cupy.asarray(arr_cpu) - da = xr.DataArray( - arr_gpu, - dims=("y", "x"), - coords={"y": np.arange(4)[::-1].astype(np.float64), - "x": np.arange(4).astype(np.float64)}, - attrs={"crs": 4326}, - ) - out = tmp_path / "test_1938_gpu.tif" - rv = write_geotiff_gpu(da, str(out)) - assert isinstance(rv, str) - assert rv == str(out) - assert os.path.exists(rv) - - -def test_writer_signatures_declare_path_return(): - """All three writers annotate the same return type. - - The annotation is a string under ``from __future__ import annotations``; - pin the literal so the three writers cannot drift apart silently. - """ - expected = { - to_geotiff: "str | BinaryIO", - write_geotiff_gpu: "str | BinaryIO", - write_vrt: "str", - } - for fn, expected_ann in expected.items(): - sig = inspect.signature(fn) - assert sig.return_annotation == expected_ann, ( - f"{fn.__name__} return annotation drifted: expected " - f"{expected_ann!r}, got {sig.return_annotation!r}" - ) - - -def test_writer_returns_are_not_none(tmp_path): - """None of the public writers may go back to returning ``None``.""" - # Use the ``tmp_path`` fixture (not ``tempfile.TemporaryDirectory``) - # because ``write_vrt`` reads each source through the module-level - # ``_MmapCache`` in ``_reader.py``, which keeps the file handle and - # mmap of ``src.tif`` open after ``_FileSource.close()`` so repeated - # reads of the same file stay cheap. On Windows that cached handle - # blocks ``os.unlink`` (WinError 32), so a synchronous - # ``TemporaryDirectory`` teardown raises before the test returns. - # ``tmp_path`` defers cleanup to pytest's session-end sweep, which - # tolerates the still-open handle the same way the other tests in - # this file already do. - da = _small_da() - out = str(tmp_path / "out.tif") - rv = to_geotiff(da, out) - assert rv is not None - src = str(tmp_path / "src.tif") - to_geotiff(da, src) - vrt_rv = write_vrt(str(tmp_path / "m.vrt"), [src]) - assert vrt_rv is not None diff --git a/xrspatial/geotiff/tests/test_writer_uncompressed_tiled_no_dead_alloc_1736.py b/xrspatial/geotiff/tests/test_writer_uncompressed_tiled_no_dead_alloc_1736.py deleted file mode 100644 index 0313bfef1..000000000 --- a/xrspatial/geotiff/tests/test_writer_uncompressed_tiled_no_dead_alloc_1736.py +++ /dev/null @@ -1,134 +0,0 @@ -"""Regression test for issue #1736. - -The uncompressed tiled branch of ``xrspatial.geotiff._writer._write_tiled`` -previously allocated a contiguous ``bytearray`` plus a memoryview -``(n_tiles * tw * th * bytes_per_sample * samples)`` bytes long at the -top of the loop and never read either back. Tile bytes were still -built via ``tile_arr.tobytes()`` and appended to a list. The dead -buffer roughly doubled peak memory for an uncompressed write. - -The fix is a pure deletion. The tests below cover both behaviours -worth pinning: - -* round-trip fidelity (writing an uncompressed tiled GeoTIFF must - still read back identically with no holes between tiles); and -* peak-memory shape, by snapshotting ``tracemalloc`` peak across a - direct ``_write_tiled`` call. The current implementation lands at - roughly ``1.06x`` the raw raster size; the dead bytearray pushed it - to ``~2.07x``. The threshold below (``1.5x``) catches any - reintroduction of that allocation with comfortable headroom for - unrelated implementation changes. -""" -from __future__ import annotations - -import os -import tracemalloc -import uuid - -import numpy as np -import xarray as xr - -from xrspatial.geotiff import open_geotiff, to_geotiff -from xrspatial.geotiff._compression import COMPRESSION_NONE -from xrspatial.geotiff._writer import _write_tiled - -# Peak ``tracemalloc`` size, in multiples of the input raster size, that -# the uncompressed branch of ``_write_tiled`` must stay under. The dead -# bytearray drove peak to ~2.07x; the current implementation sits at -# ~1.06-1.12x across the cases below. 1.5x leaves room for unrelated -# refactors while still firmly catching the regression. -_PEAK_RATIO_LIMIT = 1.5 - - -def test_uncompressed_tiled_round_trip_exact(tmp_path): - rng = np.random.RandomState(20260512) - h, w = 96, 144 - data = rng.randint(0, 200, size=(h, w)).astype(np.uint8) - da = xr.DataArray(data, dims=['y', 'x']) - - p = str(tmp_path / f"tmp_1736_uncomp_{uuid.uuid4().hex[:8]}.tif") - to_geotiff(da, p, tiled=True, tile_size=32, compression='none') - assert os.path.exists(p) - - out = open_geotiff(p) - np.testing.assert_array_equal(out.data, data) - assert out.shape == (h, w) - - -def test_uncompressed_tiled_round_trip_partial_edge_tiles(tmp_path): - """Tile size that does not divide width/height exercises the - zero-padded edge-tile branch inside the loop.""" - rng = np.random.RandomState(20260513) - h, w = 50, 70 # 32 does not divide either; edges pad - data = rng.randint(0, 60000, size=(h, w)).astype(np.uint16) - da = xr.DataArray(data, dims=['y', 'x']) - - p = str(tmp_path / f"tmp_1736_edge_{uuid.uuid4().hex[:8]}.tif") - to_geotiff(da, p, tiled=True, tile_size=32, compression='none') - - out = open_geotiff(p) - np.testing.assert_array_equal(out.data, data) - - -def test_uncompressed_tiled_round_trip_multiband(tmp_path): - rng = np.random.RandomState(20260514) - h, w, b = 48, 80, 3 - data = rng.randint(0, 200, size=(h, w, b)).astype(np.uint8) - da = xr.DataArray(data, dims=['y', 'x', 'band']) - - p = str(tmp_path / f"tmp_1736_multi_{uuid.uuid4().hex[:8]}.tif") - to_geotiff(da, p, tiled=True, tile_size=16, compression='none') - - out = open_geotiff(p) - np.testing.assert_array_equal(out.data, data) - - -def _peak_ratio_for_write_tiled(data: np.ndarray, tile_size: int) -> float: - """Return ``tracemalloc`` peak / ``data.nbytes`` for one - ``_write_tiled`` call against the uncompressed branch. - - Allocations made before this call are excluded from peak by the - ``reset_peak`` step, so the ratio reflects what ``_write_tiled`` - itself adds. - """ - tracemalloc.start() - try: - tracemalloc.reset_peak() - _write_tiled(data, COMPRESSION_NONE, 1, tile_size=tile_size) - _current, peak = tracemalloc.get_traced_memory() - finally: - tracemalloc.stop() - return peak / data.nbytes - - -def test_uncompressed_tiled_peak_memory_single_band(): - """Peak memory for the uncompressed branch should stay below - ``_PEAK_RATIO_LIMIT * raster_bytes``. Reintroducing the dead - ``bytearray(n_tiles * tile_bytes)`` would push the ratio to ~2x - and fail this check.""" - h, w = 1024, 1024 # 1 MB raw, exact tile divisor -> no edge padding - data = np.random.RandomState(20260512).randint( - 0, 255, size=(h, w), dtype=np.uint8, - ) - ratio = _peak_ratio_for_write_tiled(data, tile_size=256) - assert ratio < _PEAK_RATIO_LIMIT, ( - f"_write_tiled peak memory {ratio:.2f}x raster exceeds the " - f"{_PEAK_RATIO_LIMIT}x cap; the dead bytearray from #1736 may " - f"have been reintroduced." - ) - - -def test_uncompressed_tiled_peak_memory_multiband(): - """3-band variant of the peak-memory check. ``samples == 3`` - triples the would-be dead buffer, so this case is even more - sensitive to a regression.""" - h, w = 1024, 1024 - data = np.random.RandomState(20260513).randint( - 0, 255, size=(h, w, 3), dtype=np.uint8, - ) - ratio = _peak_ratio_for_write_tiled(data, tile_size=256) - assert ratio < _PEAK_RATIO_LIMIT, ( - f"_write_tiled peak memory {ratio:.2f}x raster exceeds the " - f"{_PEAK_RATIO_LIMIT}x cap; the dead bytearray from #1736 may " - f"have been reintroduced." - ) diff --git a/xrspatial/geotiff/tests/write/__init__.py b/xrspatial/geotiff/tests/write/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/xrspatial/geotiff/tests/write/test_basic.py b/xrspatial/geotiff/tests/write/test_basic.py new file mode 100644 index 000000000..7a7800e01 --- /dev/null +++ b/xrspatial/geotiff/tests/write/test_basic.py @@ -0,0 +1,1727 @@ +"""Generic writer paths. + +Covers the eager ``to_geotiff`` / ``write_geotiff_gpu`` / ``write_vrt`` +surface: round-trip basics, dtype x compression matrix, kwarg order +and return-path contracts, the uncompressed-tiled no-dead-alloc gate, +the writer layout monkeypatch contract, and the VRT writer surface +(path kwarg, CRS, bool / int nodata, int64, photometric, source +compatibility, tiled output). + +Section banners below mark the topical sub-areas. Tests-only restructure +for epic #2390. +""" + +from __future__ import annotations + +import numpy as np +import pytest +import os +import platform +import xarray as xr +import inspect +import importlib.util +import io +import tracemalloc +import uuid +import warnings +import typing +import re +import glob + +from xrspatial.geotiff import ( + _vrt as _vrt_module, + _writer as writer_mod, + open_geotiff, + read_vrt, + to_geotiff, + write_geotiff_gpu, + write_vrt, +) +from xrspatial.geotiff._compression import COMPRESSION_NONE +from xrspatial.geotiff._geotags import GeoTransform +from xrspatial.geotiff._header import TAG_PHOTOMETRIC, parse_header, parse_ifd +from xrspatial.geotiff._reader import read_to_array +from xrspatial.geotiff._writer import _make_overview, _write_tiled, write +# ``write_vrt`` here is the private internal binding, aliased so it does +# not shadow the public re-export above. The only section that needs +# the private form is the writer-source-compat fold (see PR +# description for the why). +from xrspatial.geotiff._vrt import write_vrt as _priv_write_vrt +from xrspatial.geotiff.tests.conftest import requires_gpu + + +# ------------------------------------------------------------------------- +# Section: writer round-trip basics +# ------------------------------------------------------------------------- + +class TestMakeOverview: + def test_2x_decimation(self): + arr = np.arange(64, dtype=np.float32).reshape(8, 8) + ov = _make_overview(arr) + assert ov.shape == (4, 4) + # Check first value: mean of top-left 2x2 block + expected = np.mean([0, 1, 8, 9]) + assert ov[0, 0] == pytest.approx(expected) + + def test_integer_rounding(self): + arr = np.array([[1, 2, 3, 4], + [5, 6, 7, 8]], dtype=np.uint8) + ov = _make_overview(arr) + assert ov.shape == (1, 2) + assert ov.dtype == np.uint8 + + +class TestWriteRoundTrip: + def test_uncompressed_stripped(self, tmp_path): + expected = np.arange(64, dtype=np.float32).reshape(8, 8) + path = str(tmp_path / 'uncompressed.tif') + write(expected, path, compression='none', tiled=False) + + arr, geo = read_to_array(path) + np.testing.assert_array_equal(arr, expected) + + def test_deflate_stripped(self, tmp_path): + expected = np.arange(64, dtype=np.float32).reshape(8, 8) + path = str(tmp_path / 'deflate.tif') + write(expected, path, compression='deflate', tiled=False) + + arr, geo = read_to_array(path) + np.testing.assert_array_equal(arr, expected) + + def test_uncompressed_tiled(self, tmp_path): + expected = np.arange(64, dtype=np.float32).reshape(8, 8) + path = str(tmp_path / 'tiled.tif') + write(expected, path, compression='none', tiled=True, tile_size=4) + + arr, geo = read_to_array(path) + np.testing.assert_array_equal(arr, expected) + + def test_deflate_tiled(self, tmp_path): + expected = np.arange(64, dtype=np.float32).reshape(8, 8) + path = str(tmp_path / 'deflate_tiled.tif') + write(expected, path, compression='deflate', tiled=True, tile_size=4) + + arr, geo = read_to_array(path) + np.testing.assert_array_equal(arr, expected) + + def test_lzw_stripped(self, tmp_path): + expected = np.arange(64, dtype=np.float32).reshape(8, 8) + path = str(tmp_path / 'lzw.tif') + write(expected, path, compression='lzw', tiled=False) + + arr, geo = read_to_array(path) + np.testing.assert_array_equal(arr, expected) + + def test_uint16(self, tmp_path): + expected = np.arange(100, dtype=np.uint16).reshape(10, 10) + path = str(tmp_path / 'uint16.tif') + write(expected, path, compression='none', tiled=False) + + arr, geo = read_to_array(path) + np.testing.assert_array_equal(arr, expected) + + def test_with_geo_info(self, tmp_path): + expected = np.ones((4, 4), dtype=np.float32) + gt = GeoTransform(-120.0, 45.0, 0.001, -0.001) + path = str(tmp_path / 'geo.tif') + write(expected, path, geo_transform=gt, crs_epsg=4326, + nodata=-9999.0, compression='none', tiled=False) + + arr, geo = read_to_array(path) + np.testing.assert_array_equal(arr, expected) + assert geo.crs_epsg == 4326 + assert geo.transform.origin_x == pytest.approx(-120.0) + assert geo.transform.pixel_width == pytest.approx(0.001) + + def test_predictor_deflate(self, tmp_path): + expected = np.arange(64, dtype=np.float32).reshape(8, 8) + path = str(tmp_path / 'predictor.tif') + write(expected, path, compression='deflate', tiled=False, predictor=True) + + arr, geo = read_to_array(path) + np.testing.assert_array_equal(arr, expected) + + +class TestWriteInvalidInput: + def test_unsupported_compression(self, tmp_path): + arr = np.zeros((4, 4), dtype=np.float32) + # Issue #2138 pushed the canonical compression-list check from + # ``to_geotiff`` down into ``_write`` so direct callers get the + # same actionable error as the public wrapper. The wording + # shifted from ``_compression_tag``'s "Unsupported compression" + # to the wrapper's "Unknown compression" + canonical list. + with pytest.raises(ValueError, match="(Unsupported|Unknown) compression"): + write(arr, str(tmp_path / 'bad.tif'), compression='bzip2') + + +# ------------------------------------------------------------------------- +# Section: writer dtype x compression matrix +# ------------------------------------------------------------------------- + +# --------------------------------------------------------------------------- +# T-5: dtype x compression matrix +# --------------------------------------------------------------------------- + +DTYPES_T5 = [ + np.uint8, np.uint16, np.uint32, + np.int16, np.int32, np.int64, + np.float32, np.float64, +] +CODECS_T5 = ['none', 'deflate', 'lzw', 'zstd', 'lz4'] + + +def _make_dtype_arr(dtype, h=32, w=32): + """Make a small array with values that fit the dtype's positive range.""" + n = h * w + dt = np.dtype(dtype) + if dt.kind == 'f': + # Non-trivial floats; include a few extreme-ish values. + arr = np.linspace(-1e3, 1e3, n).astype(dt).reshape(h, w) + elif dt.kind == 'u': + # Stay below uint16 max so it fits any unsigned dtype here. + arr = (np.arange(n) % 1000).astype(dt).reshape(h, w) + else: # signed int + arr = ((np.arange(n) % 2000) - 1000).astype(dt).reshape(h, w) + return arr + + +def _codec_supports(codec, dtype): + """Return False for combos the writer rejects, True otherwise.""" + # JPEG is not in the parametrized codec list (only uint8/3-band). + # All listed codecs accept any of the listed dtypes. + return True + + +@pytest.mark.parametrize('codec', CODECS_T5) +@pytest.mark.parametrize('dtype', DTYPES_T5) +def test_dtype_codec_roundtrip_stripped(tmp_path, dtype, codec): + """Round-trip every dtype x codec in stripped layout.""" + if not _codec_supports(codec, dtype): + pytest.skip(f"{codec} does not support {np.dtype(dtype).name}") + + expected = _make_dtype_arr(dtype) + path = str(tmp_path / f'1483_t5_strip_{np.dtype(dtype).name}_{codec}.tif') + + try: + write(expected, path, compression=codec, tiled=False) + except (ImportError, ModuleNotFoundError) as e: + pytest.skip(f"codec {codec} not available: {e}") + + # Codecs in the experimental tier (LERC / J2K / LZ4) need the + # read-side opt-in too (PR 4 of epic #2340). Tier 1 codecs ignore + # the kwarg, so passing it unconditionally keeps the loop simple. + arr, _geo = read_to_array(path, allow_experimental_codecs=True) + np.testing.assert_array_equal(arr, expected) + assert arr.dtype == expected.dtype + + +@pytest.mark.parametrize('codec', CODECS_T5) +@pytest.mark.parametrize('dtype', DTYPES_T5) +def test_dtype_codec_roundtrip_tiled(tmp_path, dtype, codec): + """Round-trip every dtype x codec in tiled layout.""" + if not _codec_supports(codec, dtype): + pytest.skip(f"{codec} does not support {np.dtype(dtype).name}") + + expected = _make_dtype_arr(dtype) + path = str(tmp_path / f'1483_t5_tile_{np.dtype(dtype).name}_{codec}.tif') + + try: + write(expected, path, compression=codec, tiled=True, tile_size=16) + except (ImportError, ModuleNotFoundError) as e: + pytest.skip(f"codec {codec} not available: {e}") + + arr, _geo = read_to_array(path, allow_experimental_codecs=True) + np.testing.assert_array_equal(arr, expected) + assert arr.dtype == expected.dtype + + +# --------------------------------------------------------------------------- +# T-6: NaN vs sentinel nodata +# --------------------------------------------------------------------------- + +def _float_with_nan(h=8, w=8, dtype=np.float32): + arr = np.linspace(0.0, 100.0, h * w, dtype=dtype).reshape(h, w) + arr[0, 0] = np.nan + arr[3, 5] = np.nan + arr[-1, -1] = np.nan + return arr + + +def test_nodata_nan_float_roundtrip(tmp_path): + """nodata=NaN: NaN positions in the input round-trip as NaN.""" + expected = _float_with_nan(dtype=np.float32) + path = str(tmp_path / '1483_t6_nodata_nan.tif') + + da = xr.DataArray(expected, dims=('y', 'x')) + to_geotiff(da, path, nodata=float('nan'), compression='deflate') + + out = open_geotiff(path) + np.testing.assert_array_equal(np.isnan(out.data), np.isnan(expected)) + finite = ~np.isnan(expected) + np.testing.assert_array_equal(out.data[finite], expected[finite]) + + +def test_nodata_sentinel_float_disk_vs_read(tmp_path): + """nodata=-9999: NaN positions become sentinel on disk, NaN on read-back.""" + expected = _float_with_nan(dtype=np.float32) + path = str(tmp_path / '1483_t6_nodata_sentinel.tif') + + da = xr.DataArray(expected, dims=('y', 'x')) + to_geotiff(da, path, nodata=-9999.0, compression='deflate') + + # On-disk values: NaN positions hold the sentinel. + raw, _geo = read_to_array(path) + nan_mask = np.isnan(expected) + assert np.all(raw[nan_mask] == np.float32(-9999.0)) + # Non-NaN positions match. + np.testing.assert_array_equal(raw[~nan_mask], expected[~nan_mask]) + + # Read back through open_geotiff: sentinel becomes NaN again. + out = open_geotiff(path) + np.testing.assert_array_equal(np.isnan(out.data), nan_mask) + np.testing.assert_array_equal(out.data[~nan_mask], expected[~nan_mask]) + assert out.attrs.get('nodata') == -9999.0 + + +def test_nodata_uint8_sentinel(tmp_path): + """nodata=255 for uint8: sentinel on disk, NaN on read (array promoted to float).""" + arr = np.arange(64, dtype=np.uint8).reshape(8, 8).copy() + arr[0, 0] = 255 + arr[4, 4] = 255 + path = str(tmp_path / '1483_t6_nodata_uint8.tif') + + da = xr.DataArray(arr, dims=('y', 'x')) + to_geotiff(da, path, nodata=255, compression='deflate') + + # On-disk: still uint8 with 255 in those slots. + raw, _geo = read_to_array(path) + assert raw.dtype == np.uint8 + assert raw[0, 0] == 255 and raw[4, 4] == 255 + np.testing.assert_array_equal(raw, arr) + + # Read-back: open_geotiff promotes integer with nodata to float + NaN. + out = open_geotiff(path) + assert out.dtype.kind == 'f' + assert np.isnan(out.data[0, 0]) + assert np.isnan(out.data[4, 4]) + finite = ~np.isnan(out.data) + np.testing.assert_array_equal(out.data[finite].astype(np.uint8), + arr[finite]) + + +# --------------------------------------------------------------------------- +# T-7: COG validity (rasterio-dependent) +# --------------------------------------------------------------------------- + + +def test_cog_layout_and_overviews(tmp_path): + """A cog=True file is tiled, carries overviews, and (when rio-cogeo is + installed) passes the COG validator. + + Note: xrspatial does not currently emit GDAL's IMAGE_STRUCTURE.LAYOUT=COG + tag, so we don't assert that. Structural COG properties (tiled, overviews + present, GDAL-readable) are what the writer actually guarantees. + """ + rasterio = pytest.importorskip( + 'rasterio', + reason='rasterio is optional; COG validity test skipped when missing', + ) + h = w = 1024 + arr = np.arange(h * w, dtype=np.float32).reshape(h, w) % 1000.0 + path = str(tmp_path / '1483_t7_cog.tif') + + da = xr.DataArray(arr, dims=('y', 'x')) + to_geotiff( + da, path, crs=4326, cog=True, compression='deflate', tile_size=256, + ) + + with rasterio.open(path) as src: + assert src.is_tiled, "COG output must be tiled" + # 1024x1024 with 256 tiles produces at least one halving. + ovs = src.overviews(1) + assert len(ovs) >= 1, f"expected at least one overview, got {ovs}" + assert ovs[0] in (2, 4, 8, 16), f"unexpected first overview: {ovs}" + # Each overview should be strictly larger than the previous (decimation + # factors are monotonically increasing). + assert all(b > a for a, b in zip(ovs, ovs[1:])), \ + f"overview decimations not monotonically increasing: {ovs}" + # Sanity: full-resolution band should round-trip values. + sample = src.read(1, window=((0, 4), (0, 4))) + np.testing.assert_array_equal(sample, arr[:4, :4]) + + # If rio-cogeo is installed, run its validator for the gold-standard check. + try: + from rio_cogeo.cogeo import cog_validate + except ImportError: + return + valid, errors, _warnings = cog_validate(path, strict=False) + assert valid, f"rio-cogeo cog_validate failed: errors={errors}" + + +# --------------------------------------------------------------------------- +# T-9: write-to-readonly directory +# --------------------------------------------------------------------------- + +@pytest.mark.skipif( + platform.system() == 'Windows', + reason='POSIX chmod semantics required', +) +@pytest.mark.skipif( + hasattr(os, 'geteuid') and os.geteuid() == 0, + reason='root bypasses directory permissions', +) +def test_write_to_readonly_dir_raises_oserror(tmp_path): + """Writing into a chmod 0o555 directory must raise OSError/PermissionError.""" + ro_dir = tmp_path / '1483_t9_readonly' + ro_dir.mkdir() + target = str(ro_dir / 'out.tif') + + arr = np.arange(64, dtype=np.float32).reshape(8, 8) + da = xr.DataArray(arr, dims=('y', 'x')) + + original_mode = ro_dir.stat().st_mode + try: + os.chmod(ro_dir, 0o555) + with pytest.raises((OSError, PermissionError)): + to_geotiff(da, target, compression='deflate') + finally: + os.chmod(ro_dir, original_mode) + + +# ------------------------------------------------------------------------- +# Section: kwarg order / signature parity +# ------------------------------------------------------------------------- + +def test_writer_kwarg_order_matches_to_geotiff(): + """``write_geotiff_gpu`` lists its kwargs in the same order as + ``to_geotiff``, modulo the ``gpu`` kwarg the GPU writer omits. + + Both signatures use keyword-only kwargs so positional callers are + unaffected. The order still matters for IDE autocomplete, generated + docs, and any caller that inspects ``inspect.signature``. + """ + eager_params = list(inspect.signature(to_geotiff).parameters) + gpu_params = list(inspect.signature(write_geotiff_gpu).parameters) + + # to_geotiff has ``gpu`` (auto-dispatch flag); write_geotiff_gpu does + # not. Drop it from the comparison instead of asserting on the + # missing kwarg directly, so unrelated future additions to either + # signature still surface here. + assert 'gpu' in eager_params + assert 'gpu' not in gpu_params + eager_params_no_gpu = [p for p in eager_params if p != 'gpu'] + + assert gpu_params == eager_params_no_gpu, ( + "write_geotiff_gpu and to_geotiff kwarg order diverged.\n" + f" to_geotiff (with 'gpu' removed): {eager_params_no_gpu}\n" + f" write_geotiff_gpu: {gpu_params}\n" + "Reorder write_geotiff_gpu to match to_geotiff (see #1922)." + ) + + +def test_writer_kwarg_defaults_match_to_geotiff(): + """The kwargs both writers share also have identical defaults. + + A surprise-free dispatch ``to_geotiff(..., gpu=True)`` requires + ``write_geotiff_gpu`` to default the same way for every kwarg the + auto-dispatch entry point forwards (issue #1916 added + ``allow_internal_only_jpeg`` to satisfy that contract; this test + pins the broader parity). + """ + eager_sig = inspect.signature(to_geotiff) + gpu_sig = inspect.signature(write_geotiff_gpu) + + shared = set(eager_sig.parameters) & set(gpu_sig.parameters) + # ``data`` and ``path`` are required positionals with no default; + # comparing inspect.Parameter.empty against itself is fine. + mismatches = [] + for name in sorted(shared): + ed = eager_sig.parameters[name].default + gd = gpu_sig.parameters[name].default + if ed != gd: + mismatches.append((name, ed, gd)) + assert not mismatches, ( + "write_geotiff_gpu and to_geotiff disagree on defaults: " + f"{mismatches}" + ) + + +# ------------------------------------------------------------------------- +# Section: return-path contract +# ------------------------------------------------------------------------- + +from .._helpers.markers import gpu_available as _gpu_available # noqa: E402 + +_HAS_GPU = _gpu_available() +_gpu_only = pytest.mark.skipif( + not _HAS_GPU, reason="cupy + CUDA required", +) + + +def _small_da() -> xr.DataArray: + arr = np.arange(16, dtype=np.float32).reshape(4, 4) + return xr.DataArray( + arr, + dims=("y", "x"), + coords={"y": np.arange(4)[::-1].astype(np.float64), + "x": np.arange(4).astype(np.float64)}, + attrs={"crs": 4326}, + ) + + +def test_to_geotiff_returns_string_path(tmp_path): + """``to_geotiff`` returns the str path passed in.""" + da = _small_da() + out = tmp_path / "test_1938_str.tif" + rv = to_geotiff(da, str(out)) + assert isinstance(rv, str), ( + f"to_geotiff(str) must return a str, got {type(rv).__name__}" + ) + assert rv == str(out) + assert os.path.exists(rv) + + +def test_to_geotiff_returns_file_like(tmp_path): + """``to_geotiff`` returns the file-like object passed in.""" + da = _small_da() + buf = io.BytesIO() + rv = to_geotiff(da, buf) + assert rv is buf, ( + f"to_geotiff(BytesIO) must return the same file-like, " + f"got {type(rv).__name__}" + ) + # The buffer was actually written to. + assert buf.tell() > 0 or len(buf.getvalue()) > 0 + + +def test_to_geotiff_cog_returns_path(tmp_path): + """COG path also returns the str path.""" + da = _small_da() + out = tmp_path / "test_1938_cog.tif" + rv = to_geotiff(da, str(out), cog=True, tile_size=16) + assert isinstance(rv, str) + assert rv == str(out) + assert os.path.exists(rv) + + +def test_to_geotiff_dask_streaming_returns_path(tmp_path): + """Dask-streaming write path also returns the str path.""" + import dask.array as da_arr + + arr = da_arr.arange(256, dtype=np.float32, chunks=64).reshape(16, 16) + da = xr.DataArray( + arr, + dims=("y", "x"), + coords={"y": np.arange(16)[::-1].astype(np.float64), + "x": np.arange(16).astype(np.float64)}, + attrs={"crs": 4326}, + ) + out = tmp_path / "test_1938_dask.tif" + rv = to_geotiff(da, str(out)) + assert isinstance(rv, str) + assert rv == str(out) + assert os.path.exists(rv) + + +def test_write_vrt_returns_string_path(tmp_path): + """``write_vrt`` (already conformant) keeps returning the str path.""" + # Create a source tif first. + src = tmp_path / "src.tif" + to_geotiff(_small_da(), str(src)) + vrt_path = tmp_path / "out.vrt" + rv = write_vrt(str(vrt_path), [str(src)]) + assert isinstance(rv, str) + assert rv == str(vrt_path) + assert os.path.exists(rv) + + +@_gpu_only +def test_write_geotiff_gpu_returns_string_path(tmp_path): + """GPU writer returns the str path (only runs with cupy + CUDA).""" + import cupy + + arr_cpu = np.arange(16, dtype=np.float32).reshape(4, 4) + arr_gpu = cupy.asarray(arr_cpu) + da = xr.DataArray( + arr_gpu, + dims=("y", "x"), + coords={"y": np.arange(4)[::-1].astype(np.float64), + "x": np.arange(4).astype(np.float64)}, + attrs={"crs": 4326}, + ) + out = tmp_path / "test_1938_gpu.tif" + rv = write_geotiff_gpu(da, str(out)) + assert isinstance(rv, str) + assert rv == str(out) + assert os.path.exists(rv) + + +def test_writer_signatures_declare_path_return(): + """All three writers annotate the same return type. + + The annotation is a string under ``from __future__ import annotations``; + pin the literal so the three writers cannot drift apart silently. + """ + expected = { + to_geotiff: "str | BinaryIO", + write_geotiff_gpu: "str | BinaryIO", + write_vrt: "str", + } + for fn, expected_ann in expected.items(): + sig = inspect.signature(fn) + assert sig.return_annotation == expected_ann, ( + f"{fn.__name__} return annotation drifted: expected " + f"{expected_ann!r}, got {sig.return_annotation!r}" + ) + + +def test_writer_returns_are_not_none(tmp_path): + """None of the public writers may go back to returning ``None``.""" + # Use the ``tmp_path`` fixture (not ``tempfile.TemporaryDirectory``) + # because ``write_vrt`` reads each source through the module-level + # ``_MmapCache`` in ``_reader.py``, which keeps the file handle and + # mmap of ``src.tif`` open after ``_FileSource.close()`` so repeated + # reads of the same file stay cheap. On Windows that cached handle + # blocks ``os.unlink`` (WinError 32), so a synchronous + # ``TemporaryDirectory`` teardown raises before the test returns. + # ``tmp_path`` defers cleanup to pytest's session-end sweep, which + # tolerates the still-open handle the same way the other tests in + # this file already do. + da = _small_da() + out = str(tmp_path / "out.tif") + rv = to_geotiff(da, out) + assert rv is not None + src = str(tmp_path / "src.tif") + to_geotiff(da, src) + vrt_rv = write_vrt(str(tmp_path / "m.vrt"), [src]) + assert vrt_rv is not None + + +# ------------------------------------------------------------------------- +# Section: uncompressed tiled: no dead allocation +# ------------------------------------------------------------------------- + +# Peak ``tracemalloc`` size, in multiples of the input raster size, that +# the uncompressed branch of ``_write_tiled`` must stay under. The dead +# bytearray drove peak to ~2.07x; the current implementation sits at +# ~1.06-1.12x across the cases below. 1.5x leaves room for unrelated +# refactors while still firmly catching the regression. +_PEAK_RATIO_LIMIT = 1.5 + + +def test_uncompressed_tiled_round_trip_exact(tmp_path): + rng = np.random.RandomState(20260512) + h, w = 96, 144 + data = rng.randint(0, 200, size=(h, w)).astype(np.uint8) + da = xr.DataArray(data, dims=['y', 'x']) + + p = str(tmp_path / f"tmp_1736_uncomp_{uuid.uuid4().hex[:8]}.tif") + to_geotiff(da, p, tiled=True, tile_size=32, compression='none') + assert os.path.exists(p) + + out = open_geotiff(p) + np.testing.assert_array_equal(out.data, data) + assert out.shape == (h, w) + + +def test_uncompressed_tiled_round_trip_partial_edge_tiles(tmp_path): + """Tile size that does not divide width/height exercises the + zero-padded edge-tile branch inside the loop.""" + rng = np.random.RandomState(20260513) + h, w = 50, 70 # 32 does not divide either; edges pad + data = rng.randint(0, 60000, size=(h, w)).astype(np.uint16) + da = xr.DataArray(data, dims=['y', 'x']) + + p = str(tmp_path / f"tmp_1736_edge_{uuid.uuid4().hex[:8]}.tif") + to_geotiff(da, p, tiled=True, tile_size=32, compression='none') + + out = open_geotiff(p) + np.testing.assert_array_equal(out.data, data) + + +def test_uncompressed_tiled_round_trip_multiband(tmp_path): + rng = np.random.RandomState(20260514) + h, w, b = 48, 80, 3 + data = rng.randint(0, 200, size=(h, w, b)).astype(np.uint8) + da = xr.DataArray(data, dims=['y', 'x', 'band']) + + p = str(tmp_path / f"tmp_1736_multi_{uuid.uuid4().hex[:8]}.tif") + to_geotiff(da, p, tiled=True, tile_size=16, compression='none') + + out = open_geotiff(p) + np.testing.assert_array_equal(out.data, data) + + +def _peak_ratio_for_write_tiled(data: np.ndarray, tile_size: int) -> float: + """Return ``tracemalloc`` peak / ``data.nbytes`` for one + ``_write_tiled`` call against the uncompressed branch. + + Allocations made before this call are excluded from peak by the + ``reset_peak`` step, so the ratio reflects what ``_write_tiled`` + itself adds. + """ + tracemalloc.start() + try: + tracemalloc.reset_peak() + _write_tiled(data, COMPRESSION_NONE, 1, tile_size=tile_size) + _current, peak = tracemalloc.get_traced_memory() + finally: + tracemalloc.stop() + return peak / data.nbytes + + +def test_uncompressed_tiled_peak_memory_single_band(): + """Peak memory for the uncompressed branch should stay below + ``_PEAK_RATIO_LIMIT * raster_bytes``. Reintroducing the dead + ``bytearray(n_tiles * tile_bytes)`` would push the ratio to ~2x + and fail this check.""" + h, w = 1024, 1024 # 1 MB raw, exact tile divisor -> no edge padding + data = np.random.RandomState(20260512).randint( + 0, 255, size=(h, w), dtype=np.uint8, + ) + ratio = _peak_ratio_for_write_tiled(data, tile_size=256) + assert ratio < _PEAK_RATIO_LIMIT, ( + f"_write_tiled peak memory {ratio:.2f}x raster exceeds the " + f"{_PEAK_RATIO_LIMIT}x cap; the dead bytearray from #1736 may " + f"have been reintroduced." + ) + + +def test_uncompressed_tiled_peak_memory_multiband(): + """3-band variant of the peak-memory check. ``samples == 3`` + triples the would-be dead buffer, so this case is even more + sensitive to a regression.""" + h, w = 1024, 1024 + data = np.random.RandomState(20260513).randint( + 0, 255, size=(h, w, 3), dtype=np.uint8, + ) + ratio = _peak_ratio_for_write_tiled(data, tile_size=256) + assert ratio < _PEAK_RATIO_LIMIT, ( + f"_write_tiled peak memory {ratio:.2f}x raster exceeds the " + f"{_PEAK_RATIO_LIMIT}x cap; the dead bytearray from #1736 may " + f"have been reintroduced." + ) + + +# ------------------------------------------------------------------------- +# Section: writer layout monkeypatch contract +# ------------------------------------------------------------------------- + +def _make_float32(h: int = 8, w: int = 8) -> xr.DataArray: + arr = np.arange(h * w, dtype=np.float32).reshape(h, w) + return xr.DataArray( + arr, + dims=["y", "x"], + coords={ + "x": np.arange(w, dtype=np.float64), + "y": np.arange(h, dtype=np.float64), + }, + attrs={"crs": 4326}, + ) + + +@pytest.mark.parametrize( + "helper_name", + [ + "_promote_offsets_to_long8", + "_assemble_standard_layout", + "_assemble_cog_layout", + "_resolve_photometric", + ], +) +def test_assemble_tiff_resolves_helper_through_writer_module( + monkeypatch, tmp_path, helper_name, +): + """``_assemble_tiff`` must look up ``helper_name`` via ``_writer``. + + Replace the helper on the ``_writer`` module with a sentinel that + records the call and delegates to the real implementation. If + ``_assemble_tiff`` were to bind the helper at import time (rather + than resolving it through ``_writer`` on each call), the sentinel + would never fire and the assertion would fail. + """ + real = getattr(writer_mod, helper_name) + calls: list[tuple] = [] + + def _wrapped(*args, **kwargs): + calls.append((args, tuple(sorted(kwargs.items())))) + return real(*args, **kwargs) + + monkeypatch.setattr(writer_mod, helper_name, _wrapped) + + da = _make_float32(8, 8) + path = str(tmp_path / f"monkeypatch_{helper_name}_2248.tif") + + # ``_assemble_cog_layout`` only fires when at least one overview + # is written; ``_promote_offsets_to_long8`` only fires when the + # writer chooses BigTIFF. Pass the right kwargs per helper so each + # one is exercised by ``_assemble_tiff`` on this call. + if helper_name == "_assemble_cog_layout": + to_geotiff(da, path, cog=True, overview_levels=[2]) + elif helper_name == "_promote_offsets_to_long8": + to_geotiff(da, path, bigtiff=True) + else: + to_geotiff(da, path) + + assert calls, ( + f"_assemble_tiff did not call _writer.{helper_name}; the " + f"monkeypatch on the _writer namespace was bypassed." + ) + + +# ------------------------------------------------------------------------- +# Section: write_vrt path kwarg contract +# ------------------------------------------------------------------------- + +def _build_source_tif(tmp_path, name='src.tif'): + """Create a small GeoTIFF used as the VRT's source file.""" + arr = np.arange(8 * 8, dtype=np.float32).reshape(8, 8) + da = xr.DataArray( + arr, dims=['y', 'x'], + coords={'y': np.arange(8.0, 0, -1), 'x': np.arange(8.0)}, + attrs={'crs': 4326, 'transform': (1.0, 0, 0.0, 0, -1.0, 8.0)}, + ) + p = str(tmp_path / name) + to_geotiff(da, p) + return p + + +def test_write_vrt_signature_first_arg_is_path(): + """Signature parity with to_geotiff / write_geotiff_gpu. + + The api-consistency sweep cares specifically about + ``inspect.signature``: IDE autocomplete, mypy, and Sphinx-rendered + docs all read the same source. Pinning the first param name here + catches any future re-rename that re-introduces the drift. + """ + sig = inspect.signature(write_vrt) + params = list(sig.parameters) + # ``path`` is the new canonical name, ``source_files`` follows. + # ``vrt_path`` is kept as a keyword-only deprecated alias. + assert params[0] == 'path' + assert params[1] == 'source_files' + assert 'vrt_path' in params + # ``vrt_path`` is keyword-only (the alias should never be used + # positionally going forward). + assert sig.parameters['vrt_path'].kind == inspect.Parameter.KEYWORD_ONLY + + +def test_write_vrt_positional_path_works(tmp_path): + """Positional ``write_vrt(path, sources)`` is unchanged. + + Existing callers ``write_vrt(some_path, sources)`` keep working + after the rename because the new ``path`` parameter sits where + ``vrt_path`` used to be. No deprecation warning should fire. + """ + src = _build_source_tif(tmp_path) + out = str(tmp_path / 'out.vrt') + with warnings.catch_warnings(): + warnings.simplefilter('error', DeprecationWarning) + result = write_vrt(out, [src]) + assert result == out + assert os.path.exists(out) + + +def test_write_vrt_path_kwarg_works(tmp_path): + """Keyword ``write_vrt(path=..., source_files=...)`` works. + + A caller who passes everything by keyword (no positional args) + cannot reach the function before #1946 because ``path`` did not + exist; this is the path-symmetric counterpart to the existing + ``write_vrt(vrt_path=...)`` test below. + """ + src = _build_source_tif(tmp_path) + out = str(tmp_path / 'out.vrt') + with warnings.catch_warnings(): + warnings.simplefilter('error', DeprecationWarning) + result = write_vrt(path=out, source_files=[src]) + assert result == out + assert os.path.exists(out) + + +def test_write_vrt_vrt_path_kwarg_emits_deprecation_warning(tmp_path): + """``vrt_path=...`` works but emits ``DeprecationWarning``. + + Mirrors the existing ``crs_wkt`` deprecation in the same writer + (#1715): old name still works, but caller sees a clear migration + hint via the warning. + """ + src = _build_source_tif(tmp_path) + out = str(tmp_path / 'out.vrt') + with pytest.warns(DeprecationWarning, match='vrt_path'): + result = write_vrt(vrt_path=out, source_files=[src]) + assert result == out + assert os.path.exists(out) + + +def test_write_vrt_path_and_vrt_path_together_raises(tmp_path): + """Both names supplied is ambiguous; refuse to pick one. + + Mirrors the ``crs`` / ``crs_wkt`` rule documented in the existing + write_vrt source: passing both is rejected with TypeError + regardless of whether the two values happen to match. + """ + src = _build_source_tif(tmp_path) + out = str(tmp_path / 'out.vrt') + with pytest.raises(TypeError, match="path.*vrt_path"): + write_vrt(path=out, vrt_path=out, source_files=[src]) + + +def test_write_vrt_no_path_raises(tmp_path): + """Neither ``path`` nor ``vrt_path`` -> TypeError. + + Before the shim, omitting the first positional argument raised + ``TypeError: missing 1 required positional argument`` from CPython. + The shim adds a sentinel default so the kwarg-only positional no + longer triggers that automatic check; the explicit raise inside + the shim preserves the pre-#1946 error semantics. + """ + src = _build_source_tif(tmp_path) + with pytest.raises(TypeError, match='path'): + write_vrt(source_files=[src]) + + +def test_write_vrt_explicit_path_none_raises(tmp_path): + """``write_vrt(path=None, ...)`` is rejected with TypeError. + + The sentinel-default pattern (#1962 review) distinguishes "caller + passed nothing" (sentinel) from "caller passed None explicitly". + Explicit ``None`` is a bug in the caller's code, not a request to + fall through to the deprecated ``vrt_path`` alias, so the shim + raises with a clear message that names the offending kwarg. + """ + src = _build_source_tif(tmp_path) + with pytest.raises(TypeError, match="'path'.*None"): + write_vrt(path=None, source_files=[src]) + + +def test_write_vrt_positional_none_raises(tmp_path): + """Positional ``write_vrt(None, sources)`` is rejected with TypeError. + + Same rationale as the keyword case: an explicit positional ``None`` + is rejected up front instead of crashing deep in + ``os.path.dirname(os.path.abspath(None))``. Pinned because the + pre-#1962 code accepted positional ``None`` and raised the wrong + "missing required argument" error. + """ + src = _build_source_tif(tmp_path) + with pytest.raises(TypeError, match="'path'.*None"): + write_vrt(None, [src]) + + +def test_write_vrt_first_arg_name_matches_writer_trio(): + """Cross-sibling consistency: all three writers use the same + destination kwarg name. + + The deep-sweep-api-consistency sweep keeps adding to the writer + trio's parity contract. Pin the rule here so future re-renames + that split the trio again will trip a test. + """ + eager_first = list( + inspect.signature(to_geotiff).parameters + )[1] # data, path -> index 1 + gpu_first = list( + inspect.signature(write_geotiff_gpu).parameters + )[1] + vrt_first = list( + inspect.signature(write_vrt).parameters + )[0] # path, source_files -> index 0 + assert eager_first == 'path' + assert gpu_first == 'path' + assert vrt_first == 'path' + + +def test_write_vrt_path_round_trip_matches_old(tmp_path): + """The written VRT decodes the same regardless of which kwarg name + the caller used. + + Smoke test that the shim does not silently drop or re-route any of + the other kwargs while resolving ``path`` vs ``vrt_path``. + """ + src = _build_source_tif(tmp_path) + out_new = str(tmp_path / 'out_new.vrt') + out_old = str(tmp_path / 'out_old.vrt') + + write_vrt(out_new, [src]) + with warnings.catch_warnings(): + # ignore the deprecation; we still need the legacy path to + # produce a byte-identical mosaic. + warnings.simplefilter('ignore', DeprecationWarning) + write_vrt(vrt_path=out_old, source_files=[src]) + + a_new = read_vrt(out_new) + a_old = read_vrt(out_old) + np.testing.assert_array_equal(np.asarray(a_new), np.asarray(a_old)) + + +# ------------------------------------------------------------------------- +# Section: write_vrt CRS propagation +# ------------------------------------------------------------------------- + + +# --- Signature pins --- + + +def test_write_vrt_accepts_crs_kwarg(): + """``crs`` is in the signature and defaults to ``None``.""" + import inspect + + sig = inspect.signature(write_vrt) + assert 'crs' in sig.parameters + assert sig.parameters['crs'].default is None + + +def test_write_vrt_crs_annotation_matches_writer_trio(): + """``crs`` is annotated ``int | str | None``, identical to + ``to_geotiff(..., crs=...)`` and ``write_geotiff_gpu(..., crs=...)``. + """ + import inspect + + sig = inspect.signature(write_vrt) + ann = str(sig.parameters['crs'].annotation) + assert ann == 'int | str | None' + + +# --- Runtime: ``crs=`` writes an EPSG-resolved WKT --- + + +def test_write_vrt_crs_epsg_int_writes_wkt_to_xml(tmp_path): + """``crs=4326`` resolves to a WKT string in the VRT's element. + + The current implementation forwards the WKT to ``_vrt.write_vrt``, + which interpolates it into the XML node. Reading the file + back with ``read_vrt`` must therefore produce + ``attrs['crs'] == 4326`` (because ``_wkt_to_epsg`` round-trips + EPSG:4326's WKT cleanly). + """ + src = _build_source_tif(tmp_path, 'epsg_int.tif') + vrt_path = str(tmp_path / 'epsg_int.vrt') + + out = write_vrt(vrt_path, [src], crs=4326) + assert out == vrt_path + assert os.path.exists(vrt_path) + + da = read_vrt(vrt_path) + assert da.attrs.get('crs') == 4326 + + +def test_write_vrt_crs_wkt_string(tmp_path): + """``crs=`` passes the WKT through verbatim.""" + src = _build_source_tif(tmp_path, 'wkt.tif') + vrt_path = str(tmp_path / 'wkt.vrt') + + # Build a WKT for EPSG:4326 directly via pyproj + from pyproj import CRS + + wkt = CRS.from_epsg(4326).to_wkt() + + out = write_vrt(vrt_path, [src], crs=wkt) + assert out == vrt_path + da = read_vrt(vrt_path) + # WKT round-trips back to EPSG:4326 via _wkt_to_epsg + assert da.attrs.get('crs') == 4326 + + +def test_write_vrt_crs_none_falls_through(tmp_path): + """``crs=None`` (the default) picks the CRS from the first source.""" + src = _build_source_tif(tmp_path, 'none.tif') + vrt_path = str(tmp_path / 'none.vrt') + + with warnings.catch_warnings(): + warnings.simplefilter('error', DeprecationWarning) + out = write_vrt(vrt_path, [src], crs=None) + assert out == vrt_path + da = read_vrt(vrt_path) + # The source TIFF was written with EPSG:4326; VRT inherits it. + assert da.attrs.get('crs') == 4326 + + +def test_write_vrt_no_crs_kwarg_no_warning(tmp_path): + """Omitting ``crs`` entirely (the most common call shape) does not + emit any warning. The deprecation shim only fires when ``crs_wkt`` + is supplied explicitly.""" + src = _build_source_tif(tmp_path, 'no_kwarg.tif') + vrt_path = str(tmp_path / 'no_kwarg.vrt') + + with warnings.catch_warnings(): + warnings.simplefilter('error', DeprecationWarning) + write_vrt(vrt_path, [src]) # neither kwarg supplied + assert os.path.exists(vrt_path) + + +# --- Deprecation shim: ``crs_wkt=`` still works but warns --- + + +def test_write_vrt_crs_wkt_deprecated_warns(tmp_path): + """Passing ``crs_wkt=`` emits ``DeprecationWarning`` but still + produces a working VRT.""" + src = _build_source_tif(tmp_path, 'depr.tif') + vrt_path = str(tmp_path / 'depr.vrt') + + from pyproj import CRS + + wkt = CRS.from_epsg(4326).to_wkt() + + with pytest.warns(DeprecationWarning, match='crs_wkt'): + out = write_vrt(vrt_path, [src], crs_wkt=wkt) + assert out == vrt_path + da = read_vrt(vrt_path) + assert da.attrs.get('crs') == 4326 + + +def test_write_vrt_crs_wkt_none_still_warns(tmp_path): + """``crs_wkt=None`` (explicit) was a documented shape in the old + signature -- it now warns because the caller is using the + deprecated kwarg name, even if the value is None.""" + src = _build_source_tif(tmp_path, 'depr_none.tif') + vrt_path = str(tmp_path / 'depr_none.vrt') + + with pytest.warns(DeprecationWarning, match='crs_wkt'): + write_vrt(vrt_path, [src], crs_wkt=None) + assert os.path.exists(vrt_path) + + +def test_write_vrt_both_crs_and_crs_wkt_rejected(tmp_path): + """Passing both raises ``TypeError`` rather than silently picking + one. The error message names both kwargs so the caller can fix + their call quickly.""" + src = _build_source_tif(tmp_path, 'both.tif') + vrt_path = str(tmp_path / 'both.vrt') + + from pyproj import CRS + + wkt = CRS.from_epsg(4326).to_wkt() + + with pytest.raises(TypeError, match='crs.*crs_wkt'): + write_vrt(vrt_path, [src], crs=4326, crs_wkt=wkt) + + +# --- Cross-writer parity: same kwarg name on all three writers --- + + +def test_writer_trio_all_accept_crs_kwarg(): + """``crs`` is the canonical kwarg on every public writer in the trio. + A caller forwarding ``crs=`` to whichever writer matches the + output extension never has to special-case the kwarg name (issue + #1715).""" + import inspect + + from xrspatial.geotiff import to_geotiff, write_geotiff_gpu, write_vrt + + for fn in (to_geotiff, write_geotiff_gpu, write_vrt): + sig = inspect.signature(fn) + assert 'crs' in sig.parameters, f"{fn.__name__} missing crs kwarg" + assert ( + str(sig.parameters['crs'].annotation) == 'int | str | None' + ), f"{fn.__name__}.crs annotation drift" + + +# --- Negative tests: bad input shapes --- + + +def test_write_vrt_crs_invalid_type_rejected(tmp_path): + """``crs=`` (or any non-int/str/None) raises ``TypeError`` from + the public wrapper rather than from deep inside the writer.""" + src = _build_source_tif(tmp_path, 'bad_type.tif') + vrt_path = str(tmp_path / 'bad_type.vrt') + + with pytest.raises(TypeError, match='crs must be'): + write_vrt(vrt_path, [src], crs=[4326]) + + +def test_write_vrt_crs_unparseable_string_rejected(tmp_path): + """``crs='not a CRS'`` raises ``ValueError`` from the public + wrapper (the WKT keyword heuristic recognises PROJCS/GEOGCS only; + everything else is sent through pyproj which will reject it).""" + src = _build_source_tif(tmp_path, 'bad_str.tif') + vrt_path = str(tmp_path / 'bad_str.vrt') + + with pytest.raises(ValueError, match='Could not parse crs'): + write_vrt(vrt_path, [src], crs='not-a-real-crs-string') + + +# ------------------------------------------------------------------------- +# Section: write_vrt bool nodata +# ------------------------------------------------------------------------- + +@pytest.fixture +def uint8_da(): + """Small uint8 DataArray for nodata round-trip tests.""" + arr = np.zeros((4, 4), dtype=np.uint8) + return xr.DataArray(arr, dims=['y', 'x']) + + +@pytest.fixture +def src_geotiff(uint8_da, tmp_path): + """A real on-disk source GeoTIFF that write_vrt can point at.""" + path = str(tmp_path / "src_1921.tif") + to_geotiff(uint8_da, path) + return path + + +# --------------------------------------------------------------------------- +# write_vrt: the bug from issue #1921 +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "bad", + [True, False, np.bool_(True), np.bool_(False)], +) +def test_write_vrt_rejects_bool_nodata(src_geotiff, tmp_path, bad): + """``write_vrt`` raises ``TypeError`` for any bool nodata. + + Fixed in issue #1921 by routing the public ``write_vrt`` wrapper + through ``_validate_nodata_arg`` and adding a defense-in-depth check + inside the internal ``_vrt.write_vrt``. + """ + vrt_path = str(tmp_path / "out_1921_bad.vrt") + with pytest.raises(TypeError, match="nodata must be numeric"): + write_vrt(vrt_path, [src_geotiff], nodata=bad) + + +@pytest.mark.parametrize( + "bad", + [True, False, np.bool_(True), np.bool_(False)], +) +def test_write_vrt_internal_rejects_bool_nodata(src_geotiff, tmp_path, bad): + """Direct call to the internal ``_vrt.write_vrt`` also rejects bool. + + Defense-in-depth: the public wrapper's ``_validate_nodata_arg`` is + skipped when callers reach the internal symbol directly (e.g. the + multi-tile dask write path in ``_writers/eager.py`` that calls + ``_vrt.write_vrt`` after writing per-tile GeoTIFFs, or a future + split of the wrapper). Parametrize over both ``bool`` and + ``np.bool_`` polarities so a refactor that narrows the internal + guard to just ``bool`` surfaces here, not in user code. See #1921. + """ + from xrspatial.geotiff._vrt import write_vrt as _internal_write_vrt + vrt_path = str(tmp_path / "out_1921_internal.vrt") + with pytest.raises(TypeError, match="nodata must be numeric"): + _internal_write_vrt(vrt_path, [src_geotiff], nodata=bad) + + +@pytest.mark.parametrize( + "good", + [0, 0.0, -9999, 255, np.int16(-1), np.float32(0.5)], +) +def test_write_vrt_accepts_numeric_nodata(src_geotiff, tmp_path, good): + """Numeric sentinels go through unchanged: the fix must not over-reject.""" + vrt_path = str(tmp_path / f"out_1921_numeric_{good!r}.vrt") + write_vrt(vrt_path, [src_geotiff], nodata=good) + with open(vrt_path) as f: + content = f.read() + # The exact format of the emitted nodata string is implementation + # detail; we only assert no "True"/"False" leaked through. + assert "True" not in content + assert "False" not in content + + +def test_write_vrt_accepts_none_nodata(src_geotiff, tmp_path): + """``nodata=None`` is the documented default and must keep working.""" + vrt_path = str(tmp_path / "out_1921_none.vrt") + write_vrt(vrt_path, [src_geotiff], nodata=None) + assert os.path.exists(vrt_path) + + +# --------------------------------------------------------------------------- +# write_geotiff_gpu: defense-in-depth parity +# --------------------------------------------------------------------------- + + +@requires_gpu +@pytest.mark.parametrize( + "bad", + [True, False, np.bool_(True), np.bool_(False)], +) +def test_write_geotiff_gpu_rejects_bool_nodata(uint8_da, tmp_path, bad): + """Direct ``write_geotiff_gpu`` call rejects bool nodata. + + The top-of-function ``_validate_nodata_arg`` call (added by #1973) + fires first; the deeper ``build_geo_tags`` guard is a second line + of defense. Pinning the behaviour so a refactor that drops the + top-of-function call surfaces here, not deep inside the geotag + builder. + """ + from xrspatial.geotiff import write_geotiff_gpu + path = str(tmp_path / "gpu_1921_bad.tif") + with pytest.raises(TypeError, match="nodata must be numeric"): + write_geotiff_gpu(uint8_da, path, nodata=bad) + + +@requires_gpu +def test_to_geotiff_gpu_dispatch_rejects_bool_nodata(uint8_da, tmp_path): + """Auto-dispatch path: ``to_geotiff(gpu=True, nodata=True)``. + + The eager-side guard fires before dispatch, so the GPU writer never + runs. Pin that ordering so a future refactor cannot accidentally + skip the eager check on the GPU dispatch path. + """ + path = str(tmp_path / "to_geotiff_gpu_1921.tif") + with pytest.raises(TypeError, match="nodata must be numeric"): + to_geotiff(uint8_da, path, gpu=True, nodata=True) + + +# ------------------------------------------------------------------------- +# Section: write_vrt int nodata +# ------------------------------------------------------------------------- + +def _nodata_annotation(fn): + sig = inspect.signature(fn) + return sig.parameters["nodata"].annotation + + +def test_write_vrt_public_nodata_accepts_int_annotation(): + """The public wrapper widens the annotation to include int.""" + ann = _nodata_annotation(write_vrt) + # Allow either typing.Union[float, int, None] or float | int | None. + if isinstance(ann, str): + # Forward-referenced string annotation (rare here; defensive). + assert "int" in ann, ann + return + if hasattr(typing, "get_args"): + args = set(typing.get_args(ann)) + if args: + assert int in args, args + return + # Fallback: stringify the annotation. + assert "int" in str(ann), str(ann) + + +def test_write_vrt_internal_nodata_accepts_int_annotation(): + """The internal helper in `_vrt.py` mirrors the public surface.""" + ann = _nodata_annotation(_vrt_module.write_vrt) + if isinstance(ann, str): + assert "int" in ann, ann + return + if hasattr(typing, "get_args"): + args = set(typing.get_args(ann)) + if args: + assert int in args, args + return + assert "int" in str(ann), str(ann) + + +def test_write_vrt_int_nodata_round_trips(tmp_path): + """An int nodata renders to ```` and parses back the same.""" + # Build a tiny uint16 tile so the sentinel makes sense. + arr = np.array([[100, 200, 65535], + [300, 400, 500]], dtype=np.uint16) + da = xr.DataArray( + arr, + dims=["y", "x"], + coords={ + "y": np.array([0.5, 1.5]), + "x": np.array([0.5, 1.5, 2.5]), + }, + attrs={"crs": 4326}, + ) + tif_path = tmp_path / "source.tif" + to_geotiff(da, str(tif_path)) + + vrt_path = tmp_path / "mosaic.vrt" + # Passing an int sentinel must not raise; the surface should match + # to_geotiff's "float, int, or None" contract. + write_vrt(str(vrt_path), [str(tif_path)], nodata=65535) + + # Confirm the int round-trips through the parser back into a VRT band. + parsed = _vrt_module.parse_vrt( + vrt_path.read_text(), vrt_dir=str(tmp_path)) + band_nodata = parsed.bands[0].nodata + assert band_nodata == 65535, band_nodata + + +# ------------------------------------------------------------------------- +# Section: VRT writer: int64 source +# ------------------------------------------------------------------------- + +def _da(arr: np.ndarray) -> xr.DataArray: + h, w = arr.shape + return xr.DataArray( + arr, + dims=('y', 'x'), + coords={'y': np.arange(h, dtype=np.float64), + 'x': np.arange(w, dtype=np.float64)}, + attrs={'res': (1.0, 1.0)}, + ) + + +def _read_vrt_dtype_attr(vrt_path: str) -> str: + """Extract the ``dataType`` attribute from the emitted VRT XML.""" + with open(vrt_path) as f: + xml = f.read() + m = re.search(r'dataType="([^"]+)"', xml) + assert m is not None, f"no dataType attribute in VRT:\n{xml}" + return m.group(1) + + +def test_uint64_vrt_writer_declares_uint64(tmp_path): + big = np.iinfo(np.uint64).max + arr = np.array([[1, 2], [big - 7, big]], dtype=np.uint64) + vrt = tmp_path / 'u64_1833.vrt' + to_geotiff(_da(arr), str(vrt)) + assert _read_vrt_dtype_attr(str(vrt)) == 'UInt64' + + +def test_int64_vrt_writer_declares_int64(tmp_path): + info = np.iinfo(np.int64) + arr = np.array([[info.min, -1], [0, info.max]], dtype=np.int64) + vrt = tmp_path / 'i64_1833.vrt' + to_geotiff(_da(arr), str(vrt)) + assert _read_vrt_dtype_attr(str(vrt)) == 'Int64' + + +def test_uint64_vrt_round_trip(tmp_path): + big = np.iinfo(np.uint64).max + arr = np.array([[1, 2], [big - 7, big]], dtype=np.uint64) + vrt = tmp_path / 'u64_rt_1833.vrt' + to_geotiff(_da(arr), str(vrt)) + r = open_geotiff(str(vrt)) + assert r.dtype == np.uint64 + np.testing.assert_array_equal(np.asarray(r.values), arr) + + +def test_int64_vrt_round_trip(tmp_path): + info = np.iinfo(np.int64) + arr = np.array([[info.min, -1], [0, info.max]], dtype=np.int64) + vrt = tmp_path / 'i64_rt_1833.vrt' + to_geotiff(_da(arr), str(vrt)) + r = open_geotiff(str(vrt)) + assert r.dtype == np.int64 + np.testing.assert_array_equal(np.asarray(r.values), arr) + + +# ------------------------------------------------------------------------- +# Section: VRT writer: photometric tag +# ------------------------------------------------------------------------- + +def _read_primary_ifd(path: str): + with open(path, 'rb') as f: + raw = f.read() + hdr = parse_header(raw[:16]) + return parse_ifd(raw, hdr.first_ifd_offset, hdr) + + +def _tile_paths(vrt_path: str): + stem = os.path.splitext(os.path.basename(vrt_path))[0] + tiles_dir = os.path.join( + os.path.dirname(os.path.abspath(vrt_path)), + stem + '_tiles', + ) + return sorted(glob.glob(os.path.join(tiles_dir, 'tile_*.tif'))) + + +def test_vrt_writer_forwards_photometric_miniswhite_1861(tmp_path): + """photometric='miniswhite' must tag every per-tile TIFF with + PhotometricInterpretation = 0 (MinIsWhite).""" + arr = np.zeros((48, 48), dtype=np.uint8) + da = xr.DataArray(arr, dims=('y', 'x')) + vrt_path = str(tmp_path / 'miniswhite_1861.vrt') + + to_geotiff(da, vrt_path, photometric='miniswhite', tile_size=16) + + tiles = _tile_paths(vrt_path) + assert tiles, 'expected at least one per-tile TIFF under _tiles/' + for tile in tiles: + ifd = _read_primary_ifd(tile) + assert ifd.get_value(TAG_PHOTOMETRIC) == 0, ( + f'tile {tile} has Photometric ' + f'{ifd.get_value(TAG_PHOTOMETRIC)}, expected 0 (MinIsWhite)' + ) + + +def test_vrt_writer_default_photometric_minisblack_1861(tmp_path): + """Control: default photometric='auto' keeps per-tile TIFFs at + PhotometricInterpretation = 1 (MinIsBlack).""" + arr = np.zeros((48, 48), dtype=np.uint8) + da = xr.DataArray(arr, dims=('y', 'x')) + vrt_path = str(tmp_path / 'default_auto_1861.vrt') + + to_geotiff(da, vrt_path, tile_size=16) + + tiles = _tile_paths(vrt_path) + assert tiles, 'expected at least one per-tile TIFF under _tiles/' + for tile in tiles: + ifd = _read_primary_ifd(tile) + assert ifd.get_value(TAG_PHOTOMETRIC) == 1, ( + f'tile {tile} has Photometric ' + f'{ifd.get_value(TAG_PHOTOMETRIC)}, expected 1 (MinIsBlack)' + ) + + +# ------------------------------------------------------------------------- +# Section: VRT writer: source compatibility +# ------------------------------------------------------------------------- + +def _unique_dir(tmp_path, label: str) -> str: + d = tmp_path / f"vrt_1733_{label}_{uuid.uuid4().hex[:8]}" + d.mkdir() + return str(d) + + +def _write_tif(path: str, *, h: int, w: int, dtype, bands: int = 1, + px: float = 1.0, py: float = -1.0, + origin_x: float = 0.0, origin_y: float = 100.0, + crs: int | None = 4326) -> None: + if bands == 1: + arr = np.arange(h * w, dtype=dtype).reshape(h, w) + dims = ['y', 'x'] + else: + arr = np.arange(h * w * bands, dtype=dtype).reshape(h, w, bands) + dims = ['y', 'x', 'band'] + y = origin_y + (np.arange(h) + 0.5) * py + x = origin_x + (np.arange(w) + 0.5) * px + coords = {'y': y, 'x': x} + attrs = {} + if crs is not None: + attrs['crs'] = crs + da = xr.DataArray(arr, dims=dims, coords=coords, attrs=attrs) + to_geotiff(da, path, compression='none') + + +def test_mismatched_pixel_size_raises(tmp_path): + d = _unique_dir(tmp_path, "px") + a = os.path.join(d, "a.tif") + b = os.path.join(d, "b.tif") + _write_tif(a, h=4, w=4, dtype=np.float32, px=1.0, py=-1.0) + # Place b adjacent so the geometry would otherwise work, but the + # pixel size disagrees. + _write_tif(b, h=4, w=4, dtype=np.float32, px=2.0, py=-2.0, + origin_x=4.0) + vrt = os.path.join(d, "out.vrt") + with pytest.raises(ValueError, match="pixel size"): + _priv_write_vrt(vrt, [a, b]) + + +def test_mismatched_dtype_raises(tmp_path): + d = _unique_dir(tmp_path, "dtype") + a = os.path.join(d, "a.tif") + b = os.path.join(d, "b.tif") + _write_tif(a, h=4, w=4, dtype=np.float32) + _write_tif(b, h=4, w=4, dtype=np.int16, origin_x=4.0) + vrt = os.path.join(d, "out.vrt") + with pytest.raises(ValueError, match="dtype|sample_format|bps"): + _priv_write_vrt(vrt, [a, b]) + + +def test_mismatched_band_count_raises(tmp_path): + d = _unique_dir(tmp_path, "bands") + a = os.path.join(d, "a.tif") + b = os.path.join(d, "b.tif") + _write_tif(a, h=4, w=4, dtype=np.float32, bands=1) + _write_tif(b, h=4, w=4, dtype=np.float32, bands=3, origin_x=4.0) + vrt = os.path.join(d, "out.vrt") + with pytest.raises(ValueError, match="band count"): + _priv_write_vrt(vrt, [a, b]) + + +def test_compatible_sources_succeed(tmp_path): + d = _unique_dir(tmp_path, "ok") + a = os.path.join(d, "a.tif") + b = os.path.join(d, "b.tif") + _write_tif(a, h=4, w=4, dtype=np.float32) + _write_tif(b, h=4, w=4, dtype=np.float32, origin_x=4.0) + vrt = os.path.join(d, "out.vrt") + _priv_write_vrt(vrt, [a, b]) + assert os.path.exists(vrt) + + +def test_pixel_size_within_tolerance_accepted(tmp_path): + d = _unique_dir(tmp_path, "tol") + a = os.path.join(d, "a.tif") + b = os.path.join(d, "b.tif") + _write_tif(a, h=4, w=4, dtype=np.float32, px=1.0, py=-1.0) + # Drift well below the 1e-6 relative tolerance. + _write_tif(b, h=4, w=4, dtype=np.float32, + px=1.0 + 1e-10, py=-1.0, origin_x=4.0) + vrt = os.path.join(d, "out.vrt") + _priv_write_vrt(vrt, [a, b]) + assert os.path.exists(vrt) + + +def test_single_source_still_works(tmp_path): + d = _unique_dir(tmp_path, "one") + a = os.path.join(d, "a.tif") + _write_tif(a, h=4, w=4, dtype=np.float32) + vrt = os.path.join(d, "out.vrt") + _priv_write_vrt(vrt, [a]) + assert os.path.exists(vrt) + + +def test_mismatched_crs_raises(tmp_path): + # Two sources with different non-empty CRS values must be rejected, + # otherwise the VRT would inherit the first source's CRS and silently + # misproject the second. + d = _unique_dir(tmp_path, "crs_diff") + a = os.path.join(d, "a.tif") + b = os.path.join(d, "b.tif") + _write_tif(a, h=4, w=4, dtype=np.float32, crs=4326) + _write_tif(b, h=4, w=4, dtype=np.float32, origin_x=4.0, crs=3857) + vrt = os.path.join(d, "out.vrt") + with pytest.raises(ValueError, match="CRS"): + _priv_write_vrt(vrt, [a, b]) + + +def test_asymmetric_crs_raises_first_set_second_missing(tmp_path): + # First source has a CRS, second is written without one. The VRT + # would otherwise be tagged with the first source's CRS, which can + # misplace data when the second source actually came from a + # different (or unknown) projection. + d = _unique_dir(tmp_path, "crs_first") + a = os.path.join(d, "a.tif") + b = os.path.join(d, "b.tif") + _write_tif(a, h=4, w=4, dtype=np.float32, crs=4326) + _write_tif(b, h=4, w=4, dtype=np.float32, origin_x=4.0, crs=None) + vrt = os.path.join(d, "out.vrt") + with pytest.raises(ValueError, match="CRS"): + _priv_write_vrt(vrt, [a, b]) + + +def test_asymmetric_crs_raises_first_missing_second_set(tmp_path): + # Symmetric case: first source missing a CRS, second has one. The + # earlier guard only triggered when both sides were set, so this + # would have silently produced an untagged VRT despite one source + # carrying a known projection. + d = _unique_dir(tmp_path, "crs_second") + a = os.path.join(d, "a.tif") + b = os.path.join(d, "b.tif") + _write_tif(a, h=4, w=4, dtype=np.float32, crs=None) + _write_tif(b, h=4, w=4, dtype=np.float32, origin_x=4.0, crs=4326) + vrt = os.path.join(d, "out.vrt") + with pytest.raises(ValueError, match="CRS"): + _priv_write_vrt(vrt, [a, b]) + + +def test_matching_crs_succeeds(tmp_path): + # Sanity check: two sources with the same CRS should still be + # accepted (defends against an overly aggressive equality check). + d = _unique_dir(tmp_path, "crs_match") + a = os.path.join(d, "a.tif") + b = os.path.join(d, "b.tif") + _write_tif(a, h=4, w=4, dtype=np.float32, crs=4326) + _write_tif(b, h=4, w=4, dtype=np.float32, origin_x=4.0, crs=4326) + vrt = os.path.join(d, "out.vrt") + _priv_write_vrt(vrt, [a, b]) + assert os.path.exists(vrt) + + +def test_both_missing_crs_succeeds(tmp_path): + # If neither source has a CRS, the VRT just won't be tagged with one + # and there's nothing to mis-tag. This must not raise. + d = _unique_dir(tmp_path, "crs_both_missing") + a = os.path.join(d, "a.tif") + b = os.path.join(d, "b.tif") + _write_tif(a, h=4, w=4, dtype=np.float32, crs=None) + _write_tif(b, h=4, w=4, dtype=np.float32, origin_x=4.0, crs=None) + vrt = os.path.join(d, "out.vrt") + _priv_write_vrt(vrt, [a, b]) + assert os.path.exists(vrt) + + +# ------------------------------------------------------------------------- +# Section: VRT writer: tiled output +# ------------------------------------------------------------------------- + +@pytest.fixture +def sample_raster(): + """200x200 float32 raster with coords and CRS.""" + arr = np.random.default_rng(55).random((200, 200), dtype=np.float32) + y = np.linspace(41.0, 40.0, 200) # north-to-south + x = np.linspace(-106.0, -105.0, 200) + da = xr.DataArray(arr, dims=['y', 'x'], + coords={'y': y, 'x': x}, + attrs={'crs': 4326, 'nodata': -9999.0}) + return da + + +class TestVrtOutputNumpy: + def test_creates_vrt_and_tiles_dir(self, sample_raster, tmp_path): + vrt_path = str(tmp_path / 'out_1083.vrt') + to_geotiff(sample_raster, vrt_path) + assert os.path.exists(vrt_path) + tiles_dir = str(tmp_path / 'out_1083_tiles') + assert os.path.isdir(tiles_dir) + tile_files = os.listdir(tiles_dir) + assert len(tile_files) > 0 + assert all(f.endswith('.tif') for f in tile_files) + + def test_round_trip_numpy(self, sample_raster, tmp_path): + vrt_path = str(tmp_path / 'rt_1083.vrt') + to_geotiff(sample_raster, vrt_path) + result = open_geotiff(vrt_path) + np.testing.assert_array_almost_equal( + result.values, sample_raster.values, decimal=5) + + def test_tile_naming_convention(self, sample_raster, tmp_path): + vrt_path = str(tmp_path / 'named_1083.vrt') + to_geotiff(sample_raster, vrt_path, tile_size=128) + tiles_dir = str(tmp_path / 'named_1083_tiles') + files = sorted(os.listdir(tiles_dir)) + # 200x200 with tile_size=128 -> 2x2 grid (TIFF 6 spec requires + # tile_size be a multiple of 16; 100 was rejected post-#1767). + assert files == [ + 'tile_00_00.tif', 'tile_00_01.tif', + 'tile_01_00.tif', 'tile_01_01.tif', + ] + + def test_relative_paths_in_vrt(self, sample_raster, tmp_path): + vrt_path = str(tmp_path / 'rel_1083.vrt') + to_geotiff(sample_raster, vrt_path) + with open(vrt_path) as f: + content = f.read() + # Paths should be relative (no leading /) + assert 'rel_1083_tiles/' in content + assert str(tmp_path) not in content + + def test_compression_level_passed_to_tiles(self, sample_raster, tmp_path): + vrt_path = str(tmp_path / 'cl_1083.vrt') + to_geotiff(sample_raster, vrt_path, compression='zstd', + compression_level=1) + result = open_geotiff(vrt_path) + np.testing.assert_array_almost_equal( + result.values, sample_raster.values, decimal=5) + + +class TestVrtOutputDask: + def test_dask_round_trip(self, sample_raster, tmp_path): + dask_da = sample_raster.chunk({'y': 100, 'x': 100}) + vrt_path = str(tmp_path / 'dask_1083.vrt') + to_geotiff(dask_da, vrt_path) + result = open_geotiff(vrt_path) + np.testing.assert_array_almost_equal( + result.values, sample_raster.values, decimal=5) + + def test_dask_one_tile_per_chunk(self, sample_raster, tmp_path): + dask_da = sample_raster.chunk({'y': 100, 'x': 100}) + vrt_path = str(tmp_path / 'chunks_1083.vrt') + to_geotiff(dask_da, vrt_path) + tiles_dir = str(tmp_path / 'chunks_1083_tiles') + # 200x200 chunked 100x100 -> 2x2 = 4 tiles + assert len(os.listdir(tiles_dir)) == 4 + + +class TestVrtEdgeCases: + def test_cog_with_vrt_raises(self, sample_raster, tmp_path): + vrt_path = str(tmp_path / 'cog_1083.vrt') + with pytest.raises(ValueError, match='cog.*vrt|vrt.*cog|COG.*VRT|VRT.*COG|cog.*VRT|vrt.*COG'): # noqa: E501 + to_geotiff(sample_raster, vrt_path, cog=True) + + def test_overview_levels_with_vrt_raises(self, sample_raster, tmp_path): + vrt_path = str(tmp_path / 'ovr_1083.vrt') + with pytest.raises(ValueError, match='overview.*vrt|vrt.*overview|overview.*VRT|VRT.*overview'): # noqa: E501 + to_geotiff(sample_raster, vrt_path, overview_levels=[2, 4]) + + def test_nonempty_tiles_dir_raises(self, sample_raster, tmp_path): + tiles_dir = tmp_path / 'exist_1083_tiles' + tiles_dir.mkdir() + (tiles_dir / 'dummy.tif').write_text('x') + vrt_path = str(tmp_path / 'exist_1083.vrt') + with pytest.raises(FileExistsError): + to_geotiff(sample_raster, vrt_path) + + def test_empty_tiles_dir_ok(self, sample_raster, tmp_path): + tiles_dir = tmp_path / 'empty_1083_tiles' + tiles_dir.mkdir() + vrt_path = str(tmp_path / 'empty_1083.vrt') + to_geotiff(sample_raster, vrt_path) + assert os.path.exists(vrt_path) diff --git a/xrspatial/geotiff/tests/test_bigtiff_cog_compliance_2286.py b/xrspatial/geotiff/tests/write/test_bigtiff.py similarity index 89% rename from xrspatial/geotiff/tests/test_bigtiff_cog_compliance_2286.py rename to xrspatial/geotiff/tests/write/test_bigtiff.py index 2a5068046..f677d08ce 100644 --- a/xrspatial/geotiff/tests/test_bigtiff_cog_compliance_2286.py +++ b/xrspatial/geotiff/tests/write/test_bigtiff.py @@ -1,42 +1,16 @@ -"""External-interop compliance suite for BigTIFF COG output. - -Issue #2303 (part of #2286 -- production-readiness wave D). Sibling of -``test_cog_writer_compliance.py`` (issue #2292): same shape, same -assertions, but the matrix is scoped to the BigTIFF-specific layout -rather than re-validating every codec. - -Scope ------ - -These tests force BigTIFF via ``bigtiff=True`` on a small raster so the -output exercises: - -- The BigTIFF header (magic ``43`` + 8-byte offsets). -- BigTIFF IFD entry layout (8-byte counts, 20-byte entry stride, - 8-byte next-IFD pointer). -- Tile / overview offset and byte-count arrays in LONG8 form. - -A small raster is enough because the on-disk layout machinery flips -the same switches whether the file is 50 KB or 5 GB. Allocating a true -multi-gigabyte buffer is out of scope here -- the auto-BigTIFF threshold -is exercised separately by monkeypatching the IFD-overhead helper, so -the decision logic gets coverage without the allocation. - -Tier decision rationale ------------------------ - -Promotion of ``writer.bigtiff_cog`` to ``stable`` is deliberately out -of scope for this PR. ``SUPPORTED_FEATURES['writer.bigtiff_cog']`` is -set to ``advanced`` and stays there even when every row of this suite -passes -- promotion happens after the gate has lived in CI for a -release cycle (same rule the rest of the #2286 wave follows). If a row -uncovers a real BigTIFF COG bug, mark that row ``xfail`` with a linked -follow-up issue rather than fixing the writer here. +"""BigTIFF threshold and COG compliance for big files. + +Covers the BigTIFF-specific layout (header magic, 8-byte offsets, +20-byte IFD entries, tile and overview offset tables) for the +codec / dtype / band-count matrix, plus the auto-promotion row that +drives the threshold via the IFD-overhead helper. + +Tests-only restructure for epic #2390. """ + from __future__ import annotations import struct - import numpy as np import pytest import xarray as xr @@ -44,6 +18,11 @@ from xrspatial.geotiff import to_geotiff from xrspatial.geotiff._header import parse_all_ifds, parse_header + +# ------------------------------------------------------------------------- +# Section: BigTIFF + COG compliance matrix +# ------------------------------------------------------------------------- + rasterio = pytest.importorskip( "rasterio", reason="rasterio is required for the BigTIFF COG compliance suite", diff --git a/xrspatial/geotiff/tests/write/test_cog.py b/xrspatial/geotiff/tests/write/test_cog.py new file mode 100644 index 000000000..3d290beae --- /dev/null +++ b/xrspatial/geotiff/tests/write/test_cog.py @@ -0,0 +1,2268 @@ +"""COG writer compliance and overview/nodata combinations. + +Covers the COG public API, the external-interop compliance suite +(rasterio / rio-cogeo / GDAL validator), invalid-input errors, the +parity rows that exercise xrspatial-write -> external-read and the +mirror direction, and the tile-layout / tile-size pre-flight gates. + +HTTP-side COG tests stay separate (integration cluster, PR 9). +Tests-only restructure for epic #2390. +""" + +from __future__ import annotations + +import numpy as np +import pytest +import xarray as xr +from .._helpers.markers import gpu_available +import os +import importlib.util +import io +import http.server +import pathlib +import socketserver +import threading +import uuid +import warnings +import contextlib +import signal + +from xrspatial.geotiff import open_geotiff, to_geotiff +from xrspatial.geotiff._errors import ConflictingCRSError +from xrspatial.geotiff._geotags import GeoTransform +from xrspatial.geotiff._header import parse_all_ifds, parse_header +from xrspatial.geotiff._writer import write, write as _array_write + + +# ------------------------------------------------------------------------- +# Section: COG writer (public API) +# ------------------------------------------------------------------------- + +class TestCOGWriter: + def test_cog_layout_ifds_before_data(self, tmp_path): + """COG spec: all IFDs should come before pixel data.""" + arr = np.arange(256, dtype=np.float32).reshape(16, 16) + path = str(tmp_path / 'cog.tif') + write(arr, path, compression='deflate', tiled=True, tile_size=8, + cog=True, overview_levels=[2]) + + with open(path, 'rb') as f: + data = f.read() + + header = parse_header(data) + ifds = parse_all_ifds(data, header) + + assert len(ifds) >= 2 # full res + at least 1 overview + + # All IFD offsets should be < the first tile data offset + all_tile_offsets = [] + for ifd in ifds: + tile_off = ifd.tile_offsets + if tile_off: + all_tile_offsets.extend(tile_off) + + if all_tile_offsets: + first_data_offset = min(all_tile_offsets) + # The last IFD byte should be before the first tile data + # (This is the COG layout requirement) + assert header.first_ifd_offset < first_data_offset + + def test_cog_round_trip(self, tmp_path): + arr = np.arange(256, dtype=np.float32).reshape(16, 16) + gt = GeoTransform(-120.0, 45.0, 0.001, -0.001) + path = str(tmp_path / 'cog_rt.tif') + write(arr, path, geo_transform=gt, crs_epsg=4326, + compression='deflate', tiled=True, tile_size=8, + cog=True, overview_levels=[2]) + + result, geo = read_to_array_local(path) + np.testing.assert_array_equal(result, arr) + assert geo.crs_epsg == 4326 + + def test_cog_auto_overviews(self, tmp_path): + """Auto-generate overviews when none specified.""" + arr = np.arange(1024, dtype=np.float32).reshape(32, 32) + path = str(tmp_path / 'cog_auto.tif') + write(arr, path, compression='deflate', tiled=True, tile_size=8, + cog=True) + + with open(path, 'rb') as f: + data = f.read() + + header = parse_header(data) + ifds = parse_all_ifds(data, header) + # Should have at least 2 IFDs (full res + overviews) + assert len(ifds) >= 2 + + +class TestPublicAPI: + def test_read_write_round_trip(self, tmp_path): + """Write a DataArray, read it back, verify values and coords.""" + y = np.linspace(45.0, 44.0, 10) + x = np.linspace(-120.0, -119.0, 12) + data = np.random.RandomState(42).rand(10, 12).astype(np.float32) + + da = xr.DataArray( + data, dims=['y', 'x'], + coords={'y': y, 'x': x}, + attrs={'crs': 4326}, + name='test', + ) + + path = str(tmp_path / 'round_trip.tif') + to_geotiff(da, path, compression='deflate', tiled=False) + + result = open_geotiff(path) + np.testing.assert_array_almost_equal(result.values, data, decimal=5) + assert result.attrs.get('crs') == 4326 + + def test_open_geotiff_name(self, tmp_path): + """DataArray name defaults to filename stem.""" + arr = np.zeros((4, 4), dtype=np.float32) + path = str(tmp_path / 'myfile.tif') + write(arr, path, compression='none', tiled=False) + + da = open_geotiff(path) + assert da.name == 'myfile' + + def test_open_geotiff_custom_name(self, tmp_path): + arr = np.zeros((4, 4), dtype=np.float32) + path = str(tmp_path / 'test.tif') + write(arr, path, compression='none', tiled=False) + + da = open_geotiff(path, name='custom') + assert da.name == 'custom' + + def test_write_numpy_array(self, tmp_path): + """to_geotiff should accept raw numpy arrays too.""" + arr = np.arange(16, dtype=np.float32).reshape(4, 4) + path = str(tmp_path / 'numpy.tif') + to_geotiff(arr, path, compression='none') + + result = open_geotiff(path) + np.testing.assert_array_equal(result.values, arr) + + def test_write_3d_rgb(self, tmp_path): + """3D arrays (height, width, bands) should write multi-band.""" + arr = np.zeros((4, 4, 3), dtype=np.uint8) + arr[:, :, 0] = 255 # red channel + path = str(tmp_path / 'rgb.tif') + to_geotiff(arr, path, compression='none') + + result = open_geotiff(path) + np.testing.assert_array_equal(result.values, arr) + + def test_write_rejects_4d(self, tmp_path): + arr = np.zeros((2, 3, 4, 4), dtype=np.float32) + with pytest.raises(ValueError, match="Expected 2D or 3D"): + to_geotiff(arr, str(tmp_path / 'bad.tif')) + + +class TestCOGOverviewResampling: + """Test overview resampling methods produce correct results.""" + + def test_overview_mean(self, tmp_path): + arr = np.array([[1, 3, 5, 7], + [2, 4, 6, 8], + [9, 11, 13, 15], + [10, 12, 14, 16]], dtype=np.float32) + path = str(tmp_path / 'cog_1150_mean.tif') + write(arr, path, compression='deflate', tiled=True, tile_size=4, + cog=True, overview_levels=[2], overview_resampling='mean') + + with open(path, 'rb') as f: + data = f.read() + header = parse_header(data) + ifds = parse_all_ifds(data, header) + assert len(ifds) == 2 + # Overview should be 2x2 + ov_ifd = ifds[1] + assert ov_ifd.width == 2 + assert ov_ifd.height == 2 + + def test_overview_nearest(self, tmp_path): + arr = np.arange(64, dtype=np.float32).reshape(8, 8) + path = str(tmp_path / 'cog_1150_nearest.tif') + write(arr, path, compression='deflate', tiled=True, tile_size=4, + cog=True, overview_levels=[2], overview_resampling='nearest') + + result, _ = read_to_array_local(path) + np.testing.assert_array_equal(result, arr) + + def test_overview_mode(self, tmp_path): + # Categorical data: mode should pick the most common value + arr = np.array([[1, 1, 2, 2], + [1, 1, 2, 2], + [3, 3, 4, 4], + [3, 3, 4, 4]], dtype=np.int32) + path = str(tmp_path / 'cog_1150_mode.tif') + write(arr, path, compression='deflate', tiled=True, tile_size=4, + cog=True, overview_levels=[2], overview_resampling='mode') + + with open(path, 'rb') as f: + data = f.read() + header = parse_header(data) + ifds = parse_all_ifds(data, header) + assert len(ifds) == 2 + + @pytest.mark.parametrize('method', ['min', 'max', 'median']) + def test_overview_other_methods(self, tmp_path, method): + arr = np.arange(256, dtype=np.float32).reshape(16, 16) + path = str(tmp_path / f'cog_1150_{method}.tif') + write(arr, path, compression='deflate', tiled=True, tile_size=8, + cog=True, overview_levels=[2], overview_resampling=method) + + with open(path, 'rb') as f: + data = f.read() + header = parse_header(data) + ifds = parse_all_ifds(data, header) + assert len(ifds) >= 2 + + +class TestCOGMultipleOverviews: + def test_multiple_overview_levels(self, tmp_path): + """Multiple explicit overview levels produce correct number of IFDs.""" + arr = np.arange(4096, dtype=np.float32).reshape(64, 64) + path = str(tmp_path / 'cog_1150_multi.tif') + write(arr, path, compression='deflate', tiled=True, tile_size=8, + cog=True, overview_levels=[2, 4, 8]) + + with open(path, 'rb') as f: + data = f.read() + header = parse_header(data) + ifds = parse_all_ifds(data, header) + # Full res + 3 overviews + assert len(ifds) == 4 + + def test_auto_overviews_large_raster(self, tmp_path): + """Auto-generation on a larger raster produces multiple levels.""" + arr = np.random.RandomState(42).rand(512, 512).astype(np.float32) + path = str(tmp_path / 'cog_1150_auto_large.tif') + write(arr, path, compression='deflate', tiled=True, tile_size=64, + cog=True) + + with open(path, 'rb') as f: + data = f.read() + header = parse_header(data) + ifds = parse_all_ifds(data, header) + # 512 -> 256 -> 128 -> 64: should stop, so 3 overview levels + full = 4 + assert len(ifds) >= 3 + + def test_cog_overview_round_trip_values(self, tmp_path): + """Full-res values are preserved through COG write with overviews.""" + arr = np.random.RandomState(99).rand(32, 32).astype(np.float32) + gt = GeoTransform(-120.0, 45.0, 0.001, -0.001) + path = str(tmp_path / 'cog_1150_rt_values.tif') + write(arr, path, geo_transform=gt, crs_epsg=4326, + compression='deflate', tiled=True, tile_size=16, + cog=True, overview_levels=[2, 4]) + + result, geo = read_to_array_local(path) + np.testing.assert_array_equal(result, arr) + assert geo.crs_epsg == 4326 + + +class TestCOGPublicAPIOverviews: + def test_to_geotiff_cog_with_overviews(self, tmp_path): + """Public to_geotiff() with cog=True writes overviews.""" + y = np.linspace(45.0, 44.0, 32) + x = np.linspace(-120.0, -119.0, 32) + data = np.random.RandomState(42).rand(32, 32).astype(np.float32) + + da = xr.DataArray( + data, dims=['y', 'x'], + coords={'y': y, 'x': x}, + attrs={'crs': 4326}, + ) + + path = str(tmp_path / 'cog_1150_api.tif') + to_geotiff(da, path, compression='deflate', cog=True, + tile_size=16, overview_levels=[2]) + + result = open_geotiff(path) + np.testing.assert_array_almost_equal(result.values, data, decimal=5) + + # Verify COG structure + with open(path, 'rb') as f: + raw = f.read() + header = parse_header(raw) + ifds = parse_all_ifds(raw, header) + assert len(ifds) >= 2 + + def test_to_geotiff_cog_auto_overviews(self, tmp_path): + """Public API auto-generates overviews when only cog=True.""" + data = np.random.RandomState(7).rand(64, 64).astype(np.float32) + da = xr.DataArray(data, dims=['y', 'x']) + + path = str(tmp_path / 'cog_1150_api_auto.tif') + to_geotiff(da, path, compression='deflate', cog=True, tile_size=16) + + with open(path, 'rb') as f: + raw = f.read() + header = parse_header(raw) + ifds = parse_all_ifds(raw, header) + assert len(ifds) >= 2 + + +_HAS_GPU = gpu_available() + + +@pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") +class TestGPUCOGOverviews: + """GPU-specific COG overview tests (require CuPy + CUDA).""" + + def test_gpu_cog_round_trip(self, tmp_path): + import cupy + arr = np.random.RandomState(42).rand(32, 32).astype(np.float32) + gpu_arr = cupy.asarray(arr) + + path = str(tmp_path / 'cog_1150_gpu_rt.tif') + from xrspatial.geotiff import write_geotiff_gpu + write_geotiff_gpu(gpu_arr, path, crs=4326, compression='deflate', + cog=True, overview_levels=[2]) + + result = open_geotiff(path) + np.testing.assert_array_almost_equal(result.values, arr, decimal=5) + + with open(path, 'rb') as f: + raw = f.read() + header = parse_header(raw) + ifds = parse_all_ifds(raw, header) + assert len(ifds) >= 2 + + def test_gpu_cog_auto_overviews(self, tmp_path): + import cupy + arr = np.random.RandomState(7).rand(64, 64).astype(np.float32) + gpu_arr = cupy.asarray(arr) + + path = str(tmp_path / 'cog_1150_gpu_auto.tif') + from xrspatial.geotiff import write_geotiff_gpu + write_geotiff_gpu(gpu_arr, path, compression='deflate', + cog=True, tile_size=16) + + with open(path, 'rb') as f: + raw = f.read() + header = parse_header(raw) + ifds = parse_all_ifds(raw, header) + assert len(ifds) >= 2 + + def test_gpu_overview_resampling_nearest(self, tmp_path): + import cupy + arr = np.arange(64, dtype=np.float32).reshape(8, 8) + gpu_arr = cupy.asarray(arr) + + path = str(tmp_path / 'cog_1150_gpu_nearest.tif') + from xrspatial.geotiff import write_geotiff_gpu + write_geotiff_gpu(gpu_arr, path, compression='deflate', + cog=True, overview_levels=[2], + overview_resampling='nearest') + + result = open_geotiff(path) + np.testing.assert_array_equal(result.values, arr) + + def test_gpu_make_overview_values(self): + """GPU overview block-reduce matches CPU for simple case.""" + import cupy + + from xrspatial.geotiff._gpu_decode import make_overview_gpu + from xrspatial.geotiff._writer import _make_overview + + arr = np.random.RandomState(42).rand(16, 16).astype(np.float32) + gpu_arr = cupy.asarray(arr) + + for method in ('mean', 'nearest', 'min', 'max'): + cpu_ov = _make_overview(arr, method=method) + gpu_ov = make_overview_gpu(gpu_arr, method=method).get() + np.testing.assert_allclose(gpu_ov, cpu_ov, rtol=1e-5, + err_msg=f"Mismatch for method={method}") + + def test_gpu_to_geotiff_dispatches_with_overviews(self, tmp_path): + """to_geotiff auto-dispatches CuPy data with overview params.""" + import cupy + arr = np.random.RandomState(11).rand(32, 32).astype(np.float32) + da = xr.DataArray(cupy.asarray(arr), dims=['y', 'x'], + attrs={'crs': 4326}) + + path = str(tmp_path / 'cog_1150_gpu_dispatch.tif') + to_geotiff(da, path, compression='deflate', cog=True, + overview_levels=[2]) + + result = open_geotiff(path) + np.testing.assert_array_almost_equal(result.values, arr, decimal=5) + + with open(path, 'rb') as f: + raw = f.read() + header = parse_header(raw) + ifds = parse_all_ifds(raw, header) + assert len(ifds) >= 2 + + +def read_to_array_local(path): + """Helper to call read_to_array for local files.""" + from xrspatial.geotiff._reader import read_to_array + return read_to_array(path) + + +# ------------------------------------------------------------------------- +# Section: COG external-interop compliance suite +# ------------------------------------------------------------------------- + +# rasterio is imported per-test below so tests that do not need it are +# still collected when rasterio is absent. + + +# --------------------------------------------------------------------------- +# Test matrix definitions +# --------------------------------------------------------------------------- + +# Stable, lossless codecs only. Each row should produce a byte-for-byte +# round-trip on the base level. +STABLE_CODECS = ["none", "deflate", "lzw", "zstd", "packbits"] + +DTYPES = [ + pytest.param(np.uint16, id="uint16"), + pytest.param(np.float32, id="float32"), +] + +BAND_COUNTS = [ + pytest.param(1, id="1band"), + pytest.param(3, id="3band"), +] + +# ``raster_type`` attr the writer understands: ``'area'`` (default) or +# ``'point'``. We pass via attrs because that is the public surface. +GEOREF_MODES = [ + pytest.param("area", id="area"), + pytest.param("point", id="point"), +] + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_data( + dtype: np.dtype, + *, + bands: int = 1, + height: int = 64, + width: int = 64, + rng_seed: int = 17, +) -> np.ndarray: + """Deterministic raster shaped (h, w) or (h, w, bands).""" + dt = np.dtype(dtype) + rng = np.random.RandomState(rng_seed + bands) + if dt.kind == "f": + base = rng.uniform(-100.0, 100.0, size=(height, width)).astype(dt) + else: + info = np.iinfo(dt) + high = min(info.max, 1000) + base = rng.randint(0, high, size=(height, width)).astype(dt) + if bands == 1: + return base + # Stack with small per-band offsets so bands are distinguishable. + layers = [base] + for b in range(1, bands): + layers.append((base + b * 7).astype(dt)) + return np.stack(layers, axis=-1) # (h, w, bands) + + +def _build_da( + arr: np.ndarray, + *, + raster_type: str = "area", + crs: int | str | None = 4326, +) -> xr.DataArray: + """Wrap ``arr`` in a DataArray with EPSG:4326 coords and georef attrs.""" + if arr.ndim == 2: + h, w = arr.shape + dims = ("y", "x") + else: + h, w, _b = arr.shape + dims = ("y", "x", "band") + y = np.linspace(45.0, 44.0, h, dtype=np.float64) + x = np.linspace(-120.0, -119.0, w, dtype=np.float64) + coords: dict = {"y": y, "x": x} + attrs: dict = {} + if crs is not None: + attrs["crs"] = crs + if raster_type == "point": + attrs["raster_type"] = "point" + return xr.DataArray(arr, dims=dims, coords=coords, attrs=attrs) + + +def _pick_sentinel(dtype: np.dtype) -> float | int: + """Pick a nodata sentinel that fits the dtype. + + The signed-int branch is unreachable from the current DTYPES list + (only ``uint16`` and ``float32``) but is kept for the eventual case + where the matrix grows. Dead branches in a helper are cheap and the + intent is clearer than special-casing the current matrix here. + """ + dt = np.dtype(dtype) + if dt.kind == "f": + return -9999.0 + if dt.kind == "u": + return int(np.iinfo(dt).max) # e.g. 65535 for uint16 + return int(np.iinfo(dt).min) + + +def _arrange_for_rasterio(arr: np.ndarray) -> np.ndarray: + """Convert (h, w[, bands]) into rasterio's (bands, h, w).""" + if arr.ndim == 2: + return arr[np.newaxis, :, :] + # (h, w, bands) -> (bands, h, w) + return np.transpose(arr, (2, 0, 1)) + + +def _is_tiled(src) -> bool: + """Rasterio's ``is_tiled`` is deprecated; reproduce its check locally. + + A dataset is tiled when block dimensions are square and smaller than + the dataset itself (rasterio's old definition). ``block_shapes`` is + a per-band list of ``(height, width)`` tuples. + """ + shapes = src.block_shapes + if not shapes: + return False + bh, bw = shapes[0] + return bh == bw and bh < src.height and bw < src.width + + +def _assert_ifds_before_data(path: str) -> None: + """COG layout contract: every IFD sits before any tile data block.""" + with open(path, "rb") as f: + data = f.read() + header = parse_header(data) + ifds = parse_all_ifds(data, header) + assert len(ifds) >= 2, ( + f"expected at least 2 IFDs (full res + overview), got {len(ifds)}" + ) + tile_offsets: list[int] = [] + for ifd in ifds: + offs = ifd.tile_offsets + if offs: + tile_offsets.extend(offs) + assert tile_offsets, "no tile offsets found; output is not tiled" + first_data = min(tile_offsets) + # All IFD anchors must sit before the first tile blob. + assert header.first_ifd_offset < first_data, ( + f"first IFD offset {header.first_ifd_offset} >= first tile data " + f"offset {first_data}; IFDs must come before image data in a COG" + ) + + +def _require_validator_env() -> bool: + """Return True if ``XRSPATIAL_REQUIRE_COG_VALIDATOR`` is set truthy. + + Truthy values: ``1``, ``true``, ``yes``, ``on`` (case-insensitive). + Anything else, including unset / empty, returns False. + + CI sets this to make a missing validator dependency a hard failure + rather than a silent skip. On a contributor laptop without rio-cogeo + or GDAL it is unset and the validator step skips cleanly. + """ + val = os.environ.get("XRSPATIAL_REQUIRE_COG_VALIDATOR", "") + return val.lower() in {"1", "true", "yes", "on"} + + +def _try_cog_validate(path: str) -> None: + """Call rio-cogeo's validator if present, else GDAL's. + + When ``XRSPATIAL_REQUIRE_COG_VALIDATOR=1`` is set in the environment + and neither validator is importable, fail loudly instead of skipping + so a misconfigured CI job cannot pretend the gate passed. When the + env var is unset, missing dependencies skip cleanly. + """ + try: + from rio_cogeo.cogeo import cog_validate + except ImportError: + cog_validate = None # type: ignore[assignment] + + if cog_validate is not None: + valid, errors, _warns = cog_validate(path, strict=False) + assert valid, f"rio_cogeo cog_validate failed: errors={errors}" + return + + try: + from osgeo_utils.samples import validate_cloud_optimized_geotiff + except ImportError: + if _require_validator_env(): + pytest.fail( + "XRSPATIAL_REQUIRE_COG_VALIDATOR=1 but neither rio-cogeo " + "nor GDAL validate_cloud_optimized_geotiff is importable. " + "Install rio-cogeo (and/or GDAL Python bindings) on this " + "job, or unset XRSPATIAL_REQUIRE_COG_VALIDATOR to allow " + "the soft skip." + ) + pytest.skip( + "neither rio-cogeo nor GDAL validate_cloud_optimized_geotiff " + "is installed; skipping external COG validator step" + ) + return + + _warns, errors, _details = validate_cloud_optimized_geotiff.validate( + path, full_check=True, + ) + assert not errors, f"GDAL validator errors: {errors}" + + +# --------------------------------------------------------------------------- +# Codec x dtype x band-count: base pixels + overviews + georef survive +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("bands", BAND_COUNTS) +@pytest.mark.parametrize("dtype", DTYPES) +@pytest.mark.parametrize("codec", STABLE_CODECS) +def test_codec_dtype_bands_roundtrip(tmp_path, codec, dtype, bands): + """Stable codec round-trip via rasterio: base pixels byte-exact, georef survives. + + Contracts asserted per row: + - rasterio.open succeeds and reports a tiled COG. + - Band count and dtype survive. + - Base pixels are byte-exact (stable codecs are lossless). + - Overview decimation factors survive. + - CRS and transform survive. + - IFDs sit before any tile data block (COG layout). + """ + rasterio = pytest.importorskip("rasterio") + arr = _make_data(dtype, bands=bands, height=64, width=64) + da = _build_da(arr, raster_type="area", crs=4326) + + path = str(tmp_path / f"2292_codec_{codec}_{np.dtype(dtype).name}_b{bands}.tif") + to_geotiff( + da, path, + compression=codec, cog=True, tile_size=16, + overview_levels=[2], + ) + + expected = _arrange_for_rasterio(arr) + with rasterio.open(path) as src: + assert _is_tiled(src), ( + f"{codec} {dtype} b{bands}: COG output must be tiled" + ) + assert src.count == bands, ( + f"band count mismatch: expected {bands}, got {src.count}" + ) + assert src.dtypes == tuple([np.dtype(dtype).name] * bands), ( + f"dtype tuple mismatch: expected " + f"{tuple([np.dtype(dtype).name] * bands)}, got {src.dtypes}" + ) + # Stable codecs are lossless -> byte-exact at full resolution. + actual = src.read() + assert actual.shape == expected.shape, ( + f"shape mismatch: expected {expected.shape}, got {actual.shape}" + ) + np.testing.assert_array_equal( + actual, expected, + err_msg=f"base pixels diverged for codec={codec} dtype={dtype}", + ) + # Overviews + for b in range(1, bands + 1): + ovs = src.overviews(b) + assert ovs == [2], ( + f"band {b}: expected overview factors [2], got {ovs}" + ) + # CRS / transform + assert src.crs is not None and src.crs.to_epsg() == 4326, ( + f"CRS round-trip failed: got {src.crs}" + ) + assert not src.transform.is_identity, ( + "transform should not be identity for a georeferenced raster" + ) + # COG layout invariant + _assert_ifds_before_data(path) + + +# --------------------------------------------------------------------------- +# Nodata: sentinel and NaN +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("dtype", DTYPES) +def test_nodata_sentinel_survives(tmp_path, dtype): + """Integer and float sentinels survive write -> rasterio.open.""" + rasterio = pytest.importorskip("rasterio") + arr = _make_data(dtype, bands=1, height=64, width=64) + sentinel = _pick_sentinel(dtype) + # Mark a couple of cells as nodata. + arr_with_nd = arr.copy() + arr_with_nd[0, 0] = sentinel + arr_with_nd[5, 7] = sentinel + da = _build_da(arr_with_nd, raster_type="area", crs=4326) + + path = str(tmp_path / f"2292_nodata_sentinel_{np.dtype(dtype).name}.tif") + to_geotiff( + da, path, + compression="deflate", cog=True, tile_size=16, + overview_levels=[2], nodata=sentinel, + ) + + with rasterio.open(path) as src: + assert src.nodata is not None, "nodata tag not set on output" + # rasterio normalises to float; compare numerically. + assert float(src.nodata) == float(sentinel), ( + f"nodata mismatch: expected {sentinel}, got {src.nodata}" + ) + actual = src.read(1) + # Byte-exact at base level for deflate. + np.testing.assert_array_equal(actual, arr_with_nd) + + +def test_nodata_nan_survives(tmp_path): + """NaN nodata: NaN positions round-trip as NaN through rasterio.""" + rasterio = pytest.importorskip("rasterio") + arr = _make_data(np.float32, bands=1, height=64, width=64) + arr[0, 0] = np.nan + arr[3, 9] = np.nan + da = _build_da(arr, raster_type="area", crs=4326) + + path = str(tmp_path / "2292_nodata_nan.tif") + to_geotiff( + da, path, + compression="deflate", cog=True, tile_size=16, + overview_levels=[2], nodata=float("nan"), + ) + + with rasterio.open(path) as src: + assert src.nodata is not None and np.isnan(src.nodata), ( + f"nodata tag should be NaN, got {src.nodata}" + ) + actual = src.read(1) + np.testing.assert_array_equal(np.isnan(actual), np.isnan(arr)) + finite = ~np.isnan(arr) + np.testing.assert_array_equal(actual[finite], arr[finite]) + + +# --------------------------------------------------------------------------- +# Georef: PixelIsArea vs PixelIsPoint +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("raster_type", GEOREF_MODES) +def test_raster_type_tag_survives(tmp_path, raster_type): + """AREA_OR_POINT tag survives to rasterio.tags().""" + rasterio = pytest.importorskip("rasterio") + arr = _make_data(np.float32, bands=1, height=32, width=32) + da = _build_da(arr, raster_type=raster_type, crs=4326) + + path = str(tmp_path / f"2292_georef_{raster_type}.tif") + to_geotiff( + da, path, + compression="deflate", cog=True, tile_size=16, + overview_levels=[2], + ) + + with rasterio.open(path) as src: + tag = src.tags().get("AREA_OR_POINT") + expected_tag = "Area" if raster_type == "area" else "Point" + assert tag == expected_tag, ( + f"AREA_OR_POINT tag mismatch: expected {expected_tag!r}, " + f"got {tag!r}" + ) + # Base values still round-trip exactly. + np.testing.assert_array_equal(src.read(1), arr) + + +# --------------------------------------------------------------------------- +# Overviews: explicit list vs auto-generated +# --------------------------------------------------------------------------- + + +def test_overviews_explicit_levels(tmp_path): + """``overview_levels=[2, 4, 8]`` produces exactly those decimations.""" + rasterio = pytest.importorskip("rasterio") + arr = _make_data(np.float32, bands=1, height=128, width=128) + da = _build_da(arr, raster_type="area", crs=4326) + + path = str(tmp_path / "2292_overviews_explicit.tif") + to_geotiff( + da, path, + compression="deflate", cog=True, tile_size=16, + overview_levels=[2, 4, 8], + ) + + with rasterio.open(path) as src: + assert src.overviews(1) == [2, 4, 8], ( + f"expected overviews [2, 4, 8], got {src.overviews(1)}" + ) + # Each native overview should have the expected shape. + for lvl, factor in enumerate([2, 4, 8]): + with rasterio.open(path, OVERVIEW_LEVEL=lvl) as ov: + exp_h = arr.shape[0] // factor + exp_w = arr.shape[1] // factor + assert ov.shape == (exp_h, exp_w), ( + f"overview {factor}x: expected shape ({exp_h}, {exp_w}), " + f"got {ov.shape}" + ) + _assert_ifds_before_data(path) + + +@pytest.mark.parametrize("resampling", ["mean", "nearest"]) +def test_overview_pixels_match_expected(tmp_path, resampling): + """Overview pixel values agree with a hand-computed 2x decimation. + + Uses a deterministic base array so we can predict the level-1 overview + in pure numpy. ``mean`` reduces each 2x2 block to its mean; ``nearest`` + keeps the upper-left pixel of each block. The writer should produce + overviews that match within float tolerance (lossless codec on the + base, deterministic block reducer on the overview). + """ + rasterio = pytest.importorskip("rasterio") + base = _make_data(np.float32, bands=1, height=64, width=64) + da = _build_da(base, raster_type="area", crs=4326) + + path = str(tmp_path / f"2292_ovpix_{resampling}.tif") + to_geotiff( + da, path, + compression="deflate", cog=True, tile_size=16, + overview_levels=[2], overview_resampling=resampling, + ) + + if resampling == "mean": + # Block-mean 2x2 -> (32, 32). Promote to float64 for the reduction + # so the comparison is not biased by float32 round-off in the + # intermediate sum, then cast back to match what the reader + # returns. + b = base.astype(np.float64).reshape(32, 2, 32, 2).mean(axis=(1, 3)) + expected_ov = b.astype(np.float32) + else: # nearest + # Upper-left pixel of each 2x2 block. + expected_ov = base[::2, ::2] + + with rasterio.open(path, OVERVIEW_LEVEL=0) as ov: + actual = ov.read(1) + assert actual.shape == expected_ov.shape, ( + f"{resampling}: expected overview shape {expected_ov.shape}, " + f"got {actual.shape}" + ) + # Tolerance: the writer's mean reducer accumulates in float64 internally + # but the on-disk result is float32; comparing against our hand-computed + # float32 expected leaves <= 1 ULP of slack per cell. + np.testing.assert_allclose( + actual, expected_ov, rtol=1e-5, atol=1e-5, + err_msg=f"{resampling} overview pixels diverged from expected", + ) + + +def test_overviews_auto_generated(tmp_path): + """``overview_levels=None`` with cog=True auto-generates a pyramid.""" + rasterio = pytest.importorskip("rasterio") + arr = _make_data(np.float32, bands=1, height=128, width=128) + da = _build_da(arr, raster_type="area", crs=4326) + + path = str(tmp_path / "2292_overviews_auto.tif") + to_geotiff( + da, path, + compression="deflate", cog=True, tile_size=32, + ) + + with rasterio.open(path) as src: + ovs = src.overviews(1) + assert len(ovs) >= 1, f"expected at least one overview, got {ovs}" + # Auto-generated pyramid: every level is a power of two, strictly + # increasing, and large enough that the next halving would not fall + # below the tile_size of 32. The bitwise test below is the classic + # power-of-two check: ``o & (o - 1) == 0`` is True iff ``o`` has a + # single set bit. The ``o >= 2`` guard rules out the false-positive + # at ``o == 0``. + assert all((o & (o - 1)) == 0 and o >= 2 for o in ovs), ( + f"auto overviews should be powers of two >= 2, got {ovs}" + ) + assert all(b > a for a, b in zip(ovs, ovs[1:])), ( + f"auto overviews not strictly increasing: {ovs}" + ) + _assert_ifds_before_data(path) + + +# --------------------------------------------------------------------------- +# TIFF layout sanity: tiled, sane tile offsets, IFDs before data +# --------------------------------------------------------------------------- + + +def test_layout_is_cog_shaped(tmp_path): + """A cog=True file is tiled, has overview IFDs, and IFDs precede data.""" + rasterio = pytest.importorskip("rasterio") + arr = _make_data(np.uint16, bands=1, height=128, width=128) + da = _build_da(arr, raster_type="area", crs=4326) + + path = str(tmp_path / "2292_layout.tif") + to_geotiff( + da, path, + compression="lzw", cog=True, tile_size=32, + overview_levels=[2, 4], + ) + + with rasterio.open(path) as src: + assert _is_tiled(src), "COG output must be tiled, got stripped layout" + assert src.block_shapes[0] == (32, 32), ( + f"unexpected block shape: {src.block_shapes}" + ) + + # All IFDs come before image data; tile offsets are monotonic-ish + # (not strictly monotonic across IFDs but every offset must point inside + # the file). + with open(path, "rb") as f: + data = f.read() + header = parse_header(data) + ifds = parse_all_ifds(data, header) + assert len(ifds) == 3, ( + f"expected 3 IFDs (full + 2 overviews), got {len(ifds)}" + ) + file_len = len(data) + for ifd in ifds: + for off in (ifd.tile_offsets or ()): + assert 0 <= off < file_len, ( + f"tile offset {off} outside file bounds [0, {file_len})" + ) + _assert_ifds_before_data(path) + + +# --------------------------------------------------------------------------- +# Optional external validator +# --------------------------------------------------------------------------- + + +def test_external_cog_validator(tmp_path): + """Run rio-cogeo / GDAL's COG validator if available, else skip cleanly.""" + arr = _make_data(np.float32, bands=1, height=256, width=256) + da = _build_da(arr, raster_type="area", crs=4326) + + path = str(tmp_path / "2292_validator.tif") + to_geotiff( + da, path, + compression="deflate", cog=True, tile_size=64, + overview_levels=[2, 4], + ) + + _try_cog_validate(path) + + +# --------------------------------------------------------------------------- +# Validator-mode env contract (issue #2302) +# --------------------------------------------------------------------------- + + +def test_require_validator_env_strict_fails_when_dep_missing( + tmp_path, monkeypatch, +): + """``XRSPATIAL_REQUIRE_COG_VALIDATOR=1`` must fail (not skip) if both + validators are absent. + + This guards the CI gate: if the install step silently drops rio-cogeo + or GDAL, the compliance suite must fail rather than skip past the + validator step. Stub both imports as ``ImportError`` so the test runs + the same on every job, validator-present or not. + """ + import builtins + + real_import = builtins.__import__ + + def _blocked_import(name, globals=None, locals=None, fromlist=(), level=0): + fl = tuple(fromlist) if fromlist else () + rio_match = ( + name == "rio_cogeo.cogeo" and "cog_validate" in fl + ) + gdal_match = ( + name == "osgeo_utils.samples" + and "validate_cloud_optimized_geotiff" in fl + ) + if rio_match or gdal_match: + raise ImportError(f"blocked for test: {name}") + return real_import(name, globals, locals, fromlist, level) + + monkeypatch.setattr(builtins, "__import__", _blocked_import) + monkeypatch.setenv("XRSPATIAL_REQUIRE_COG_VALIDATOR", "1") + + arr = _make_data(np.float32, bands=1, height=64, width=64) + da = _build_da(arr, raster_type="area", crs=4326) + path = str(tmp_path / "2302_require_strict.tif") + to_geotiff( + da, path, + compression="deflate", cog=True, tile_size=16, + overview_levels=[2], + ) + + # ``pytest.fail.Exception`` is a documented alias for + # ``_pytest.outcomes.Failed`` on pytest >= 7 (which this repo pins + # via setup.cfg). Update both spots in this file if that pin moves. + with pytest.raises(pytest.fail.Exception, match="XRSPATIAL_REQUIRE_COG_VALIDATOR"): + _try_cog_validate(path) + + +def test_require_validator_env_unset_skips_when_dep_missing( + tmp_path, monkeypatch, +): + """With the env var unset, missing validators trigger a clean skip. + + This is the contributor-laptop path: no rio-cogeo / GDAL installed, + the compliance suite still passes without the optional validator + step. + """ + import builtins + + real_import = builtins.__import__ + + def _blocked_import(name, globals=None, locals=None, fromlist=(), level=0): + fl = tuple(fromlist) if fromlist else () + rio_match = ( + name == "rio_cogeo.cogeo" and "cog_validate" in fl + ) + gdal_match = ( + name == "osgeo_utils.samples" + and "validate_cloud_optimized_geotiff" in fl + ) + if rio_match or gdal_match: + raise ImportError(f"blocked for test: {name}") + return real_import(name, globals, locals, fromlist, level) + + monkeypatch.setattr(builtins, "__import__", _blocked_import) + monkeypatch.delenv("XRSPATIAL_REQUIRE_COG_VALIDATOR", raising=False) + + arr = _make_data(np.float32, bands=1, height=64, width=64) + da = _build_da(arr, raster_type="area", crs=4326) + path = str(tmp_path / "2302_require_unset.tif") + to_geotiff( + da, path, + compression="deflate", cog=True, tile_size=16, + overview_levels=[2], + ) + + with pytest.raises(pytest.skip.Exception): + _try_cog_validate(path) + + +@pytest.mark.parametrize("val", ["1", "true", "TRUE", "yes", "on"]) +def test_require_validator_env_truthy_values(monkeypatch, val): + """All documented truthy spellings activate strict mode.""" + monkeypatch.setenv("XRSPATIAL_REQUIRE_COG_VALIDATOR", val) + assert _require_validator_env() is True + + +@pytest.mark.parametrize("val", ["", "0", "false", "no", "off", "anything"]) +def test_require_validator_env_non_truthy_values(monkeypatch, val): + """Empty or non-truthy spellings leave strict mode off.""" + if val == "": + monkeypatch.delenv("XRSPATIAL_REQUIRE_COG_VALIDATOR", raising=False) + else: + monkeypatch.setenv("XRSPATIAL_REQUIRE_COG_VALIDATOR", val) + assert _require_validator_env() is False + + +# ------------------------------------------------------------------------- +# Section: COG invalid-input errors +# ------------------------------------------------------------------------- + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _float_da_small(shape=(8, 8)): + """A small float32 DataArray suitable for COG writes.""" + return xr.DataArray( + np.zeros(shape, dtype=np.float32), dims=('y', 'x') + ) + + +def _uint8_da(shape=(8, 8)): + """A small uint8 DataArray (JPEG is uint8-only).""" + return xr.DataArray( + np.zeros(shape, dtype=np.uint8), dims=('y', 'x') + ) + + +# --------------------------------------------------------------------------- +# Row 1: Experimental codec without ``allow_experimental_codecs=True`` +# --------------------------------------------------------------------------- + +@pytest.mark.parametrize('codec', ['lerc', 'lz4', 'jpeg2000', 'j2k']) +def test_experimental_codec_without_opt_in_raises(tmp_path, codec): + """Experimental codecs are gated; the message names the codec and + the opt-in flag, and mentions the experimental tier so the caller + knows why the default refuses the input.""" + da = _float_da_small() + p = tmp_path / f'cog_exp_codec_{codec}_2301.tif' + + with pytest.raises(ValueError) as exc: + to_geotiff(da, str(p), cog=True, compression=codec) + + msg = str(exc.value) + assert codec in msg, msg + assert 'allow_experimental_codecs' in msg, msg + assert 'experimental' in msg.lower(), msg + + +# --------------------------------------------------------------------------- +# Row 2: Internal-only JPEG without ``allow_internal_only_jpeg=True`` +# --------------------------------------------------------------------------- + +def test_internal_only_jpeg_without_opt_in_raises(tmp_path): + """``compression='jpeg'`` is rejected by default; the message names + the codec, the opt-in flag, and explains the interop break.""" + da = _uint8_da() + p = tmp_path / 'cog_jpeg_no_optin_2301.tif' + + with pytest.raises(ValueError) as exc: + to_geotiff(da, str(p), cog=True, compression='jpeg') + + msg = str(exc.value) + assert 'jpeg' in msg.lower(), msg + assert 'allow_internal_only_jpeg' in msg, msg + + +def test_internal_only_jpeg_not_covered_by_experimental_flag(tmp_path): + """``allow_experimental_codecs=True`` does not cover JPEG. The two + flags are deliberately separate (internal-only is stricter than + experimental) so a caller cannot reach the JFIF path by toggling + only the experimental switch.""" + da = _uint8_da() + p = tmp_path / 'cog_jpeg_exp_flag_only_2301.tif' + + with pytest.raises(ValueError) as exc: + to_geotiff(da, str(p), cog=True, + compression='jpeg', + allow_experimental_codecs=True) + + msg = str(exc.value) + assert 'jpeg' in msg.lower(), msg + assert 'allow_internal_only_jpeg' in msg, msg + + +# --------------------------------------------------------------------------- +# Row 3: Rotated transform on input DataArray +# --------------------------------------------------------------------------- + +def test_rotated_affine_attr_without_drop_rotation_raises(tmp_path): + """The reader stamps ``attrs['rotated_affine']`` when called with + ``allow_rotated=True``. Writing such a DataArray without + ``drop_rotation=True`` would silently produce an identity-affine + output (#2216), so the entry point refuses up front.""" + da = _float_da_small() + da.attrs['rotated_affine'] = (1.0, 0.5, 0.0, 0.0, 0.5, 1.0) + p = tmp_path / 'cog_rotated_affine_2301.tif' + + with pytest.raises(ValueError) as exc: + to_geotiff(da, str(p), cog=True) + + msg = str(exc.value) + assert 'rotated_affine' in msg, msg + assert 'drop_rotation' in msg, msg + + +def test_rotated_affine_attr_drop_rotation_opt_in_succeeds(tmp_path): + """The opt-in path lets the write proceed (lossy but explicit). + Pinned here so the rejection-message test cannot be 'fixed' by + removing the opt-in entirely.""" + da = _float_da_small() + da.attrs['rotated_affine'] = (1.0, 0.5, 0.0, 0.0, 0.5, 1.0) + p = tmp_path / 'cog_rotated_affine_optin_2301.tif' + + to_geotiff(da, str(p), cog=True, drop_rotation=True) + assert p.exists() + assert p.stat().st_size > 0 + + +def test_rotated_transform_tuple_attr_raises(tmp_path): + """``attrs['transform']`` as a 6-tuple ``(a, b, c, d, e, f)`` with + non-zero rotation/shear (``b`` or ``d``) is refused by + ``transform_from_attr``. The message names the rotation/shear + constraint and the axis-aligned requirement.""" + da = _float_da_small() + da.attrs['transform'] = (1.0, 0.5, 0.0, 0.0, -1.0, 4.0) # b = 0.5 + p = tmp_path / 'cog_rotated_tuple_2301.tif' + + with pytest.raises(ValueError) as exc: + to_geotiff(da, str(p), cog=True) + + msg = str(exc.value) + assert 'rotation/shear' in msg, msg + assert 'axis-aligned' in msg, msg + + +def test_rotated_transform_affine_attr_raises(tmp_path): + """``attrs['transform']`` as a rasterio ``Affine`` with non-zero + rotation/shear used to slip past the 6-tuple gate because + ``Affine`` iterates as a 9-element augmented matrix. The #2301 + validation hook detects the Affine duck-type and raises the same + diagnostic the 6-tuple branch already produced.""" + Affine = pytest.importorskip('affine').Affine + da = _float_da_small() + da.attrs['transform'] = Affine(1.0, 0.5, 0.0, 0.0, -1.0, 4.0) # b = 0.5 + p = tmp_path / 'cog_rotated_affine_obj_2301.tif' + + with pytest.raises(ValueError) as exc: + to_geotiff(da, str(p), cog=True) + + msg = str(exc.value) + assert 'rotation/shear' in msg, msg + assert 'axis-aligned' in msg, msg + + +def test_skewed_transform_affine_attr_raises(tmp_path): + """The ``d`` shear term (Affine's third row, first column) is also + rejected. Same validator path as ``b != 0``; pinned separately so a + refactor that only covers ``b`` is caught.""" + Affine = pytest.importorskip('affine').Affine + da = _float_da_small() + da.attrs['transform'] = Affine(1.0, 0.0, 0.0, 0.3, -1.0, 4.0) # d = 0.3 + p = tmp_path / 'cog_skewed_affine_obj_2301.tif' + + with pytest.raises(ValueError) as exc: + to_geotiff(da, str(p), cog=True) + + msg = str(exc.value) + assert 'rotation/shear' in msg, msg + + +def test_affine_attr_with_unconvertable_b_d_raises(tmp_path): + """An attrs['transform'] object that quacks like an Affine (has + ``.b`` and ``.d``) but carries non-numeric values for them is + refused with a clear ``ValueError``. The fail-closed branch + prevents a malformed input from bypassing the rotation/shear gate + and falling through to the no-georef path.""" + class _BogusAffine: + b = "not a number" + d = 0.0 + da = _float_da_small() + da.attrs['transform'] = _BogusAffine() + p = tmp_path / 'cog_bogus_affine_2301.tif' + + with pytest.raises(ValueError) as exc: + to_geotiff(da, str(p), cog=True) + + msg = str(exc.value) + assert 'unconvertable' in msg or 'rotation/shear' in msg, msg + + +def test_axis_aligned_affine_attr_still_writes(tmp_path): + """Sanity guard: an axis-aligned Affine (b=d=0) must keep working. + Without this row the #2301 hook could regress every legitimate + Affine call site by widening the rejection bucket.""" + Affine = pytest.importorskip('affine').Affine + da = _float_da_small() + da.attrs['transform'] = Affine(1.0, 0.0, 0.0, 0.0, -1.0, 4.0) # b=d=0 + p = tmp_path / 'cog_axis_aligned_affine_2301.tif' + + to_geotiff(da, str(p), cog=True) + assert p.exists() + assert p.stat().st_size > 0 + + +# --------------------------------------------------------------------------- +# Row 4: File-like / BytesIO destination with ``cog=True`` +# --------------------------------------------------------------------------- + +def test_bytesio_destination_with_cog_raises(): + """COG output needs a real filesystem path because the writer runs + a second pass to populate overview offsets. ``to_geotiff`` rejects + file-like destinations with ``cog=True`` up front.""" + da = _float_da_small() + buf = io.BytesIO() + + with pytest.raises(ValueError) as exc: + to_geotiff(da, buf, cog=True) + + msg = str(exc.value) + assert 'cog' in msg.lower(), msg + assert 'file-like' in msg or 'string path' in msg, msg + + +def test_bytesio_destination_without_cog_still_works(): + """Sanity guard: BytesIO is fine for plain TIFF writes. Pinned so + the COG-only rejection cannot regress into a blanket file-like + refusal.""" + da = _float_da_small() + buf = io.BytesIO() + + to_geotiff(da, buf, cog=False) + assert buf.tell() > 0 + + +# --------------------------------------------------------------------------- +# Row 5: CuPy / GPU-backed array with ``cog=True`` +# --------------------------------------------------------------------------- + +def test_cupy_input_with_cog_currently_succeeds(tmp_path): + """The GPU writer currently produces a valid COG for CuPy input; + GPU COG is documented as Experimental in the docstring tier map + but is not refused at the entry point. This row pins the + currently-succeeds behaviour so a future tier-promotion change + (tracked under #2286) does not silently break callers that + already rely on the path. + + No production-side validation hook is added for #2301 because the + constraint for this issue is 'do not change semantics on paths + that currently succeed'.""" + if importlib.util.find_spec('cupy') is None: + pytest.skip('cupy not installed') + try: + import cupy as cp + if not cp.cuda.is_available(): + pytest.skip('CUDA device not available') + except Exception as exc: + pytest.skip(f'cupy import failed: {exc}') + + da = xr.DataArray(cp.zeros((8, 8), dtype=cp.float32), dims=('y', 'x')) + p = tmp_path / 'cog_cupy_2301.tif' + + # No exception; produces a real file. If a future PR tightens the + # GPU COG tier this assertion will start failing and the next + # reviewer can decide whether to flip this to a ``pytest.raises``. + to_geotiff(da, str(p), cog=True) + assert p.exists() + assert p.stat().st_size > 0 + + +# --------------------------------------------------------------------------- +# Row 6: Object-dtype DataArray +# --------------------------------------------------------------------------- + +def test_object_dtype_with_cog_raises(tmp_path): + """Object dtype is not a TIFF sample format. ``numpy_to_tiff_dtype`` + raises ``ValueError`` naming the dtype, so the writer surfaces a + typed error rather than a deep struct-pack traceback.""" + da = xr.DataArray( + np.array([[1, 2], [3, 4]], dtype=object), dims=('y', 'x')) + p = tmp_path / 'cog_object_dtype_2301.tif' + + with pytest.raises(ValueError) as exc: + to_geotiff(da, str(p), cog=True) + + msg = str(exc.value) + assert 'object' in msg.lower() or 'dtype' in msg.lower(), msg + + +# --------------------------------------------------------------------------- +# Row 7: Conflicting ``crs=`` kwarg / array CRS +# --------------------------------------------------------------------------- + +def test_conflicting_attrs_crs_and_crs_wkt_raises(tmp_path): + """When ``attrs['crs']`` and ``attrs['crs_wkt']`` resolve to + different CRSes via pyproj, the writer refuses with + ``ConflictingCRSError`` (#1987 PR 6). #2301 only confirms the + message stays actionable; it does not introduce a new check.""" + pytest.importorskip('pyproj') + wkt_3857 = ( + 'PROJCS["WGS 84 / Pseudo-Mercator",' + 'GEOGCS["WGS 84",' + 'DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563]],' + 'PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]],' + 'PROJECTION["Mercator_1SP"],' + 'PARAMETER["central_meridian",0],' + 'PARAMETER["scale_factor",1],' + 'PARAMETER["false_easting",0],' + 'PARAMETER["false_northing",0],' + 'UNIT["metre",1],' + 'AUTHORITY["EPSG","3857"]]' + ) + da = _float_da_small() + da.attrs['crs'] = 4326 + da.attrs['crs_wkt'] = wkt_3857 + p = tmp_path / 'cog_conflicting_crs_2301.tif' + + with pytest.raises(ConflictingCRSError) as exc: + to_geotiff(da, str(p), cog=True) + + msg = str(exc.value) + # Message names both inputs and the resolution hint. + assert "attrs['crs']" in msg, msg + assert "attrs['crs_wkt']" in msg, msg + # Caller-actionable: tells the user to reconcile the two attrs. + assert 'Reconcile' in msg or 'reconcile' in msg, msg + + +def test_crs_kwarg_overrides_attrs_silently(tmp_path): + """``crs=`` kwarg overrides the attrs disagreement. The + ``_check_write_conflicting_crs`` short-circuit at the top of the + check (``if context.get('crs_kwarg') is not None: return``) lets + the write proceed even when the two attrs would otherwise + disagree, so callers can intentionally use the kwarg to clobber + stale attrs. Pinned here so a future 'stricter' rewrite of the + conflict check that drops the short-circuit does not surprise + those callers.""" + pytest.importorskip('pyproj') + da = _float_da_small() + da.attrs['crs'] = 4326 + # ``crs_wkt`` value is irrelevant: the check short-circuits on the + # kwarg before pyproj parsing ever runs. + da.attrs['crs_wkt'] = 'GEOGCS["foo"]' + p = tmp_path / 'cog_crs_kwarg_override_2301.tif' + + to_geotiff(da, str(p), cog=True, crs=3857) + assert p.exists() + assert p.stat().st_size > 0 + + +# ------------------------------------------------------------------------- +# Section: COG parity rows +# ------------------------------------------------------------------------- + + + +# --------------------------------------------------------------------------- +# Environment gating +# --------------------------------------------------------------------------- + +_HAS_DASK = importlib.util.find_spec("dask") is not None + + +def _require_dask() -> None: + if not _HAS_DASK: + pytest.skip( + "dask is not installed; install the dask extra to exercise " + "the COG dask-read row of the #2286 release gate." + ) + + +# Golden corpus COG fixture: tiled, internal overviews, written via +# GDAL's COG driver. Lives under ``golden_corpus/fixtures``. +_GOLDEN_COG_ID = "cog_internal_overview_uint16" + + +def _golden_cog_path() -> pathlib.Path: + from xrspatial.geotiff.tests.golden_corpus import generate + return ( + pathlib.Path(generate.__file__).resolve().parent + / "fixtures" + / f"{_GOLDEN_COG_ID}.tif" + ) + + +# --------------------------------------------------------------------------- +# Range-aware in-process HTTP server (mirrors the pattern used by +# test_cog_http_parallel_decode_2026_05_15.py and test_cog_http_concurrent.py). +# --------------------------------------------------------------------------- + +class _RangeHandler(http.server.BaseHTTPRequestHandler): + payload: bytes = b"" + + def do_GET(self): # noqa: N802 + rng = self.headers.get("Range") + if rng and rng.startswith("bytes="): + spec = rng[len("bytes="):] + start_s, _, end_s = spec.partition("-") + start = int(start_s) + end = int(end_s) if end_s else len(self.payload) - 1 + chunk = self.payload[start:end + 1] + self.send_response(206) + self.send_header("Content-Type", "application/octet-stream") + self.send_header( + "Content-Range", + f"bytes {start}-{start + len(chunk) - 1}/{len(self.payload)}", + ) + self.send_header("Content-Length", str(len(chunk))) + self.end_headers() + self.wfile.write(chunk) + return + self.send_response(200) + self.send_header("Content-Length", str(len(self.payload))) + self.end_headers() + self.wfile.write(self.payload) + + def log_message(self, *_args, **_kwargs): # silence test noise + return + + +def _serve_payload(payload: bytes, monkeypatch): + """Spin a range-aware server bound to localhost; return (httpd, port). + + The handler subclass is named with a uuid suffix so that the two + fixtures in this module (and any future ones) don't share a + qualname. Without the suffix, tracebacks reuse the same class + identifier across fixture invocations and become harder to read. + + ``allow_reuse_address = True`` lets the OS reclaim the port + quickly when the test tears down (avoiding TIME_WAIT-related + binding races under parallel pytest runs). ``timeout=5`` on the + server caps how long a stuck request can pin the daemon thread. + """ + monkeypatch.setenv("XRSPATIAL_GEOTIFF_ALLOW_PRIVATE_HOSTS", "1") + handler_cls = type( + f"RangeHandler2286_{uuid.uuid4().hex[:8]}", + (_RangeHandler,), + {"payload": payload}, + ) + + class _ReusableTCPServer(socketserver.TCPServer): + allow_reuse_address = True + timeout = 5 + + httpd = _ReusableTCPServer(("127.0.0.1", 0), handler_cls) + port = httpd.server_address[1] + thread = threading.Thread(target=httpd.serve_forever, daemon=True) + thread.start() + return httpd, port + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture +def xrspatial_cog(tmp_path): + """xrspatial writes a small lossless COG; yield (path, source_array, attrs). + + The source is a deterministic uint16 ramp so byte-exact comparison + is meaningful. CRS / transform / nodata are stamped via the public + ``to_geotiff`` API so the round trip exercises the user-visible + surface, not a private writer entry point. + """ + h, w = 64, 64 + # Use a +1 offset so pixel value 0 never appears -- the reader + # masks nodata-valued pixels to NaN under the #2092 contract, + # which upcasts integer rasters to float64. The fixture's payload + # is a deterministic ramp regardless of the offset. + data = (np.arange(h * w, dtype=np.uint16) + 1).reshape(h, w) + # Build a DataArray with a real CRS and a regular grid so the + # transform is non-degenerate. Pixel size 0.01 deg. + y = np.linspace(45.0, 45.0 - 0.01 * (h - 1), h) + x = np.linspace(-120.0, -120.0 + 0.01 * (w - 1), w) + da = xr.DataArray( + data, dims=["y", "x"], + coords={"y": y, "x": x}, + # No ``nodata`` attr: the masked-nodata path upcasts integer + # rasters to float64 and replaces sentinel pixels with NaN, + # which would break the byte-exact uint16 comparison. The + # nodata read contract is exercised separately under + # ``test_nodata_lifecycle_parity_2211.py``. + attrs={"crs": 4326}, + name="cog_2286", + ) + path = str(tmp_path / "xrspatial_cog_2286.tif") + to_geotiff( + da, path, + compression="deflate", + tiled=True, + tile_size=16, + cog=True, + overview_levels=[2], + ) + return path, data, {"crs": 4326, "nodata": None} + + +@pytest.fixture +def golden_cog_http(monkeypatch): + """Serve the golden COG fixture over a range-aware in-process HTTP server. + + Yields ``(url, expected_array)`` where ``expected_array`` is the + pixels read via the local xrspatial reader (the ground truth for + HTTP comparison). The fixture lives in the golden corpus and was + written by GDAL's COG driver, so it stresses the third-party + interop side of the COG read path. + """ + path = _golden_cog_path() + if not path.exists(): + pytest.skip( + f"golden COG fixture {_GOLDEN_COG_ID!r} missing on disk; run " + "`python -m xrspatial.geotiff.tests.golden_corpus.generate` " + "to materialise the corpus (issue #1930)." + ) + with open(path, "rb") as f: + payload = f.read() + httpd, port = _serve_payload(payload, monkeypatch) + try: + # Use a stable filename in the URL so the SSRF-hardened reader + # has a sensible-looking path to log. + yield f"http://127.0.0.1:{port}/{_GOLDEN_COG_ID}.tif", path + finally: + httpd.shutdown() + httpd.server_close() + + +# --------------------------------------------------------------------------- +# Comparison helpers +# --------------------------------------------------------------------------- + +def _materialise(da: xr.DataArray) -> np.ndarray: + """Host-side numpy view (dask compute, cupy get) without leaking lazy state.""" + raw = da.data + if hasattr(raw, "compute"): + raw = raw.compute() + if hasattr(raw, "get"): + raw = raw.get() + return np.asarray(raw) + + +def _assert_byte_exact( + expected: np.ndarray, actual: np.ndarray, *, label: str, +) -> None: + """Byte-exact equality on shape, dtype, and bytes. Every fixture is lossless.""" + assert expected.shape == actual.shape, ( + f"{label}: shape mismatch expected={expected.shape} actual={actual.shape}" + ) + assert expected.dtype == actual.dtype, ( + f"{label}: dtype mismatch expected={expected.dtype} actual={actual.dtype}" + ) + if not np.array_equal(expected, actual): + diff = np.where(expected != actual) + n = len(diff[0]) + raise AssertionError( + f"{label}: byte-exact comparison failed; {n} pixel(s) differ" + ) + + +# Scope note: every fixture in this file is single-band 2D. The two +# helpers below hard-code that shape on purpose. If a future row adds +# a multi-band fixture, extend the helpers (or replace them with +# parametrised checks) rather than reusing them as-is. + +def _assert_dim_names(da: xr.DataArray, *, label: str) -> None: + """The 2D COG path must come back with ``(y, x)`` dim names.""" + assert da.dims == ("y", "x"), ( + f"{label}: dims must be ('y', 'x'), got {da.dims!r}" + ) + + +def _assert_band_count(arr: np.ndarray, *, label: str) -> None: + """Single-band fixture; the returned array must be 2D.""" + assert arr.ndim == 2, ( + f"{label}: expected single-band 2D pixels, got ndim={arr.ndim} " + f"shape={arr.shape}" + ) + + +def _assert_crs_present(da: xr.DataArray, *, label: str) -> None: + """``crs`` (EPSG int or string) or ``crs_wkt`` must survive the read.""" + has_crs = "crs" in da.attrs and da.attrs["crs"] is not None + has_wkt = "crs_wkt" in da.attrs and da.attrs["crs_wkt"] + assert has_crs or has_wkt, ( + f"{label}: neither 'crs' nor 'crs_wkt' attr survived the read; " + f"attrs={sorted(da.attrs)!r}" + ) + + +def _assert_crs_equals(da: xr.DataArray, expected_epsg: int, *, label: str) -> None: + """Read-side CRS matches the writer's EPSG declaration.""" + crs = da.attrs.get("crs") + assert crs == expected_epsg, ( + f"{label}: crs mismatch expected={expected_epsg!r} got={crs!r}" + ) + + +def _assert_transform(da: xr.DataArray, *, label: str) -> None: + """Transform attr present and a finite 6-tuple.""" + t = da.attrs.get("transform") + assert t is not None, f"{label}: transform attr missing" + tup = tuple(float(v) for v in t) + assert len(tup) == 6, f"{label}: transform must be a 6-tuple, got {tup}" + assert all(np.isfinite(v) for v in tup), ( + f"{label}: transform has non-finite component: {tup}" + ) + + +def _assert_transform_equals( + da: xr.DataArray, expected_t: tuple, *, label: str, +) -> None: + """Transform 6-tuple matches an expected reference within a tight ULP.""" + t = da.attrs.get("transform") + assert t is not None, f"{label}: transform attr missing" + tup = tuple(float(v) for v in t) + exp = tuple(float(v) for v in expected_t) + assert len(tup) == 6 and len(exp) == 6 + for i, (a, b) in enumerate(zip(tup, exp)): + assert abs(a - b) <= 1e-9, ( + f"{label}: transform[{i}] differs expected={b!r} got={a!r}" + ) + + +def _assert_nodata_equals( + da: xr.DataArray, expected: float | int | None, *, label: str, +) -> None: + """Assert nodata sentinel matches, including the no-nodata case. + + When ``expected`` is ``None`` we still check the read side: the + reader must not fabricate a sentinel that the writer never stamped. + The reader is allowed to expose the attr as ``None`` or omit it + entirely; both count as "no nodata". + """ + nd = da.attrs.get("nodata") + if expected is None: + assert nd is None, ( + f"{label}: writer stamped no nodata, but reader exposed " + f"nodata={nd!r}" + ) + return + assert nd == expected, ( + f"{label}: nodata mismatch expected={expected!r} got={nd!r}" + ) + + +# --------------------------------------------------------------------------- +# Row 1: xrspatial write COG -> xrspatial eager read +# --------------------------------------------------------------------------- + +def test_row1_xrspatial_cog_xrspatial_eager(xrspatial_cog): + """xrspatial-written COG round-trips byte-exact through the eager reader.""" + path, expected, expected_attrs = xrspatial_cog + da = open_geotiff(path) + label = "row1_xrspatial_cog_eager" + + pixels = _materialise(da) + _assert_band_count(pixels, label=label) + _assert_byte_exact(expected, pixels, label=label) + _assert_dim_names(da, label=label) + _assert_crs_equals(da, expected_attrs["crs"], label=label) + _assert_transform(da, label=label) + _assert_nodata_equals(da, expected_attrs["nodata"], label=label) + assert da.dtype == expected.dtype, ( + f"{label}: dtype mismatch expected={expected.dtype} got={da.dtype}" + ) + + +# --------------------------------------------------------------------------- +# Row 2: xrspatial write COG -> xrspatial dask read +# --------------------------------------------------------------------------- + +def test_row2_xrspatial_cog_xrspatial_dask(xrspatial_cog): + """xrspatial-written COG round-trips byte-exact through the dask reader.""" + _require_dask() + path, expected, expected_attrs = xrspatial_cog + da = open_geotiff(path, chunks=16) + label = "row2_xrspatial_cog_dask" + + # Verify we actually went through the dask path; a regression that + # silently drops ``chunks=`` and falls back to eager would pass the + # pixel check but exercise the wrong code path. + assert hasattr(da.data, "dask"), ( + f"{label}: chunks=16 did not produce a dask-backed DataArray; " + f"got data type {type(da.data).__name__}" + ) + + pixels = _materialise(da) + _assert_band_count(pixels, label=label) + _assert_byte_exact(expected, pixels, label=label) + _assert_dim_names(da, label=label) + _assert_crs_equals(da, expected_attrs["crs"], label=label) + _assert_transform(da, label=label) + _assert_nodata_equals(da, expected_attrs["nodata"], label=label) + assert da.dtype == expected.dtype, ( + f"{label}: dtype mismatch expected={expected.dtype} got={da.dtype}" + ) + + +# --------------------------------------------------------------------------- +# Row 3: xrspatial write COG -> rasterio read +# --------------------------------------------------------------------------- + +def test_row3_xrspatial_cog_rasterio(xrspatial_cog): + """rasterio reads an xrspatial-written COG and the pixel/metadata contract holds. + + Asserts the third-party reader sees the same pixels, dtype, CRS, + transform, and nodata that xrspatial stamped on write. A regression + that drops or mangles any of these would surface as a Tier-1 + interop break. + """ + rasterio = pytest.importorskip( + "rasterio", + reason="rasterio is required for row 3 (issue #2294)", + ) + path, expected, expected_attrs = xrspatial_cog + label = "row3_xrspatial_cog_rasterio" + + with rasterio.open(path) as src: + # Single-band fixture: read band 1. + pixels = src.read(1) + rio_crs = src.crs + rio_transform = src.transform + rio_nodata = src.nodata + rio_count = src.count + rio_dtype = np.dtype(src.dtypes[0]) + + _assert_band_count(pixels, label=label) + _assert_byte_exact(expected, pixels, label=label) + assert rio_count == 1, f"{label}: rasterio reports band count {rio_count}" + assert rio_dtype == expected.dtype, ( + f"{label}: dtype mismatch expected={expected.dtype} got={rio_dtype}" + ) + # rasterio CRS -> EPSG int when possible. + epsg = rio_crs.to_epsg() if rio_crs is not None else None + assert epsg == expected_attrs["crs"], ( + f"{label}: rasterio CRS EPSG mismatch " + f"expected={expected_attrs['crs']!r} got={epsg!r}" + ) + # rasterio Affine is 6-tuple compatible via ``.a, .b, .c, .d, .e, .f``. + assert rio_transform is not None, f"{label}: rasterio transform missing" + assert all(np.isfinite(v) for v in ( + rio_transform.a, rio_transform.b, rio_transform.c, + rio_transform.d, rio_transform.e, rio_transform.f, + )), f"{label}: rasterio transform has non-finite component" + if expected_attrs["nodata"] is None: + # The writer was not asked to stamp a nodata; rasterio should + # report ``None`` too. Anything else means the writer leaked + # a sentinel onto the file. + assert rio_nodata is None, ( + f"{label}: writer stamped an unrequested nodata; " + f"rasterio reports {rio_nodata!r}" + ) + else: + assert rio_nodata == expected_attrs["nodata"], ( + f"{label}: rasterio nodata mismatch " + f"expected={expected_attrs['nodata']!r} got={rio_nodata!r}" + ) + + +# --------------------------------------------------------------------------- +# Row 4: golden/rasterio COG fixture -> xrspatial local read +# --------------------------------------------------------------------------- + +def test_row4_golden_cog_xrspatial_local(): + """Read the GDAL-written golden COG fixture with xrspatial's local reader. + + Compares pixels byte-exact against a rasterio read of the same + bytes -- the GDAL COG driver wrote the file, so rasterio is the + canonical oracle here. Catches regressions that returned the right + shape but mangled values (e.g. wrong endianness, predictor drift, + overview IFD picked instead of full res). + """ + rasterio = pytest.importorskip( + "rasterio", + reason="rasterio is required for row 4 oracle (issue #2294)", + ) + path = _golden_cog_path() + if not path.exists(): + pytest.skip( + f"golden COG fixture {_GOLDEN_COG_ID!r} missing on disk; run " + "`python -m xrspatial.geotiff.tests.golden_corpus.generate` " + "(issue #1930)." + ) + da = open_geotiff(str(path)) + label = "row4_golden_cog_xrspatial_local" + + pixels = _materialise(da) + _assert_band_count(pixels, label=label) + _assert_dim_names(da, label=label) + # The golden fixture is uint16 per the manifest entry. + assert da.dtype == np.dtype("uint16"), ( + f"{label}: dtype expected=uint16 got={da.dtype}" + ) + _assert_crs_present(da, label=label) + _assert_transform(da, label=label) + + # Pixel parity against the rasterio oracle. The fixture is lossless + # deflate, so byte-exact is the right bar. + with rasterio.open(str(path)) as src: + expected = src.read(1) + _assert_byte_exact(expected, pixels, label=label) + + +# --------------------------------------------------------------------------- +# Row 5: golden/rasterio COG fixture -> xrspatial HTTP range read +# --------------------------------------------------------------------------- + +def test_row5_golden_cog_xrspatial_http(golden_cog_http): + """xrspatial's HTTP range reader returns the same pixels as the local read. + + Exercises the cloud-source code path against the GDAL-written + fixture. The reference is the local read of the same bytes, so any + drift between the local and HTTP paths surfaces here. + """ + url, local_path = golden_cog_http + label = "row5_golden_cog_xrspatial_http" + + local_da = open_geotiff(str(local_path)) + http_da = open_geotiff(url) + + local_px = _materialise(local_da) + http_px = _materialise(http_da) + + _assert_band_count(http_px, label=label) + _assert_byte_exact(local_px, http_px, label=label) + _assert_dim_names(http_da, label=label) + assert http_da.dtype == local_da.dtype, ( + f"{label}: dtype mismatch local={local_da.dtype} http={http_da.dtype}" + ) + # CRS and transform survive the cloud-source path. + local_crs = local_da.attrs.get("crs") + http_crs = http_da.attrs.get("crs") + assert local_crs == http_crs, ( + f"{label}: crs mismatch local={local_crs!r} http={http_crs!r}" + ) + local_t = local_da.attrs.get("transform") + assert local_t is not None, f"{label}: local read missing transform" + _assert_transform_equals(http_da, local_t, label=label) + # nodata presence must agree (the fixture may or may not carry one; + # both sides must agree either way). + assert ("nodata" in local_da.attrs) == ("nodata" in http_da.attrs), ( + f"{label}: nodata presence differs " + f"local={'nodata' in local_da.attrs} http={'nodata' in http_da.attrs}" + ) + if "nodata" in local_da.attrs: + assert local_da.attrs["nodata"] == http_da.attrs["nodata"], ( + f"{label}: nodata value differs " + f"local={local_da.attrs['nodata']!r} " + f"http={http_da.attrs['nodata']!r}" + ) + + +# --------------------------------------------------------------------------- +# Row 6: golden/rasterio COG fixture -> xrspatial dask HTTP range read +# --------------------------------------------------------------------------- + +def test_row6_golden_cog_xrspatial_dask_http(golden_cog_http): + """The dask HTTP path returns the same pixels as the local read. + + Combines the cloud-source and chunked-read code paths. A regression + that silently drops ``chunks=`` over HTTP would compute correct + pixels via the eager path; the storage-type assertion below guards + against that. + """ + _require_dask() + url, local_path = golden_cog_http + label = "row6_golden_cog_xrspatial_dask_http" + + local_da = open_geotiff(str(local_path)) + http_da = open_geotiff(url, chunks=16) + + assert hasattr(http_da.data, "dask"), ( + f"{label}: chunks=16 over HTTP did not produce a dask-backed " + f"DataArray; got data type {type(http_da.data).__name__}" + ) + + local_px = _materialise(local_da) + http_px = _materialise(http_da) + + _assert_band_count(http_px, label=label) + _assert_byte_exact(local_px, http_px, label=label) + _assert_dim_names(http_da, label=label) + assert http_da.dtype == local_da.dtype, ( + f"{label}: dtype mismatch local={local_da.dtype} http={http_da.dtype}" + ) + local_crs = local_da.attrs.get("crs") + http_crs = http_da.attrs.get("crs") + assert local_crs == http_crs, ( + f"{label}: crs mismatch local={local_crs!r} http={http_crs!r}" + ) + local_t = local_da.attrs.get("transform") + assert local_t is not None, f"{label}: local read missing transform" + _assert_transform_equals(http_da, local_t, label=label) + assert ("nodata" in local_da.attrs) == ("nodata" in http_da.attrs), ( + f"{label}: nodata presence differs " + f"local={'nodata' in local_da.attrs} http={'nodata' in http_da.attrs}" + ) + if "nodata" in local_da.attrs: + assert local_da.attrs["nodata"] == http_da.attrs["nodata"], ( + f"{label}: nodata value differs " + f"local={local_da.attrs['nodata']!r} " + f"http={http_da.attrs['nodata']!r}" + ) + + +# ------------------------------------------------------------------------- +# Section: COG: tile-layout pre-flight +# ------------------------------------------------------------------------- + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _float_da(shape=(64, 64)): + """A small float32 DataArray suitable for COG writes.""" + return xr.DataArray( + np.zeros(shape, dtype=np.float32), dims=('y', 'x') + ) + + +# --------------------------------------------------------------------------- +# Public boundary: ``to_geotiff(cog=True, tiled=False)`` is refused. +# --------------------------------------------------------------------------- + +def test_public_writer_rejects_cog_true_tiled_false(tmp_path): + """The public entry point raises ``ValueError`` with a message that + names the COG-spec constraint and both caller-side fixes.""" + da = _float_da() + p = tmp_path / 'cog_tiled_false_2312.tif' + + with pytest.raises(ValueError) as exc: + to_geotiff(da, str(p), cog=True, tiled=False) + + msg = str(exc.value) + # The message must name the violated constraint. + assert 'COG' in msg, msg + assert 'tiled' in msg.lower(), msg + # Both caller-side fixes must appear so the error is actionable. + assert 'tiled=True' in msg, msg + assert 'cog=False' in msg, msg + + +def test_public_writer_rejects_cog_true_tiled_false_with_tile_size(tmp_path): + """Pinning the rejection survives a ``tile_size`` kwarg too. + + Before #2312, ``to_geotiff(..., cog=True, tiled=False, + tile_size=128)`` emitted the "tile_size is ignored when tiled=False" + warning and then wrote strips. The new gate has to fire before that + warning so the caller never sees the misleading "tile_size is + ignored" message under ``cog=True``. + """ + da = _float_da() + p = tmp_path / 'cog_tiled_false_with_tile_size_2312.tif' + + # ``pytest.warns(None)`` was removed; use the stdlib catch_warnings + # recorder to assert the dead "tile_size is ignored" warning never + # fires on the ``cog=True`` arm. + with warnings.catch_warnings(record=True) as record: + warnings.simplefilter('always') + with pytest.raises(ValueError) as exc: + to_geotiff(da, str(p), cog=True, tiled=False, tile_size=128) + + msg = str(exc.value) + assert 'COG' in msg, msg + assert 'tiled=True' in msg, msg + + tile_size_warnings = [ + w for w in record + if 'tile_size' in str(w.message) + and 'is ignored when tiled=False' in str(w.message) + ] + assert not tile_size_warnings, [str(w.message) for w in tile_size_warnings] + + +# --------------------------------------------------------------------------- +# Defense in depth: ``_writer._write(cog=True, tiled=False)`` also raises. +# --------------------------------------------------------------------------- + +def test_lowlevel_write_rejects_cog_true_tiled_false(tmp_path): + """The array-level entry point ``_writer._write`` (re-exported as + ``write``) carries its own gate so a caller that bypasses the public + wrapper still gets the typed rejection. + + Without this, a direct caller could quietly produce the malformed + strip-plus-overviews file the public boundary refuses. + """ + arr = np.zeros((64, 64), dtype=np.float32) + p = tmp_path / 'cog_tiled_false_lowlevel_2312.tif' + + with pytest.raises(ValueError) as exc: + _array_write( + arr, + str(p), + compression='deflate', + tiled=False, + cog=True, + ) + + msg = str(exc.value) + assert 'COG' in msg, msg + assert 'tiled=True' in msg, msg + assert 'cog=False' in msg, msg + + +# --------------------------------------------------------------------------- +# Smoke test: the valid tiled COG path still works. +# --------------------------------------------------------------------------- + +def test_tiled_cog_smoke_still_works(tmp_path): + """A regression in the new gate that broke valid COG writes would + be a worse outcome than the original bug. Pin the happy path + end-to-end so the gate has to stay narrowly targeted at the + ``cog=True, tiled=False`` combination it is meant to catch. + """ + da = _float_da(shape=(128, 128)) + p = tmp_path / 'cog_tiled_smoke_2312.tif' + + rv = to_geotiff(da, str(p), cog=True, tiled=True, tile_size=64) + assert rv == str(p) + assert p.exists() + assert p.stat().st_size > 0 + + +def test_tiled_cog_smoke_default_tiled(tmp_path): + """``tiled`` defaults to ``True`` on ``to_geotiff``, so ``cog=True`` + on its own should also produce a valid COG. Pinned so a future + change that flipped the default would not silently start hitting + the new rejection gate. + """ + da = _float_da(shape=(128, 128)) + p = tmp_path / 'cog_tiled_default_smoke_2312.tif' + + rv = to_geotiff(da, str(p), cog=True) + assert rv == str(p) + assert p.exists() + assert p.stat().st_size > 0 + + +# --------------------------------------------------------------------------- +# Negative control: ``cog=False, tiled=False`` is still a valid strip TIFF. +# --------------------------------------------------------------------------- + +def test_strip_layout_without_cog_still_works(tmp_path): + """``tiled=False`` without ``cog=True`` is the supported strip-TIFF + path; the new gate must not regress it. Pinned so a stricter + interpretation of ``cog=True implies tiled=True`` could not creep + into the general ``tiled=False`` path. + """ + da = _float_da(shape=(64, 64)) + p = tmp_path / 'strip_no_cog_2312.tif' + + rv = to_geotiff(da, str(p), cog=False, tiled=False) + assert rv == str(p) + assert p.exists() + assert p.stat().st_size > 0 + + +# ------------------------------------------------------------------------- +# Section: COG: tile-size pre-flight +# ------------------------------------------------------------------------- + +@contextlib.contextmanager +def _alarm_timeout(seconds: int): + """Raise TimeoutError after ``seconds`` to bound test failure modes. + + No-op on platforms that lack SIGALRM (Windows). The window is large + enough that a healthy raise path finishes well before the alarm + fires; if the fix regresses the writer hangs and the alarm fires. + """ + if not hasattr(signal, 'SIGALRM') or os.name == 'nt': + yield + return + + def _handler(signum, frame): # noqa: ARG001 + raise TimeoutError( + f'test exceeded {seconds}s watchdog; the writer likely ' + f'regressed into the #2311 infinite-loop hang.' + ) + + old = signal.signal(signal.SIGALRM, _handler) + signal.alarm(seconds) + try: + yield + finally: + signal.alarm(0) + signal.signal(signal.SIGALRM, old) + + + +# --------------------------------------------------------------------------- +# Public boundary: ``to_geotiff(..., cog=True, tile_size<=0)`` must raise. +# Covers both tiled=True and tiled=False, plus 0 and a negative value, so +# the validator gate stays on regardless of layout flag. +# --------------------------------------------------------------------------- + +@pytest.mark.parametrize('tiled', [True, False]) +@pytest.mark.parametrize('tile_size', [-1, 0]) +def test_to_geotiff_cog_non_positive_tile_size_raises(tmp_path, tiled, tile_size): + """``cog=True`` with ``tile_size<=0`` raises ValueError up front, + regardless of ``tiled``. Before #2311 this hung the writer when + ``tiled=False``.""" + da = _float_da() + p = tmp_path / f'cog_tile_size_hang_2311_t{int(tiled)}_ts{tile_size}.tif' + + with _alarm_timeout(5), pytest.raises(ValueError) as exc: + to_geotiff(da, str(p), cog=True, tiled=tiled, tile_size=tile_size) + + msg = str(exc.value) + assert 'tile_size' in msg, msg + # The shared validator says "positive int" -- pin the substring so a + # message rewrite still keeps the actionable wording. + assert 'positive' in msg.lower(), msg + + +# --------------------------------------------------------------------------- +# Sanity: ``cog=False`` with ``tiled=False`` still accepts an unused +# ``tile_size`` (the existing "ignored" warning shape) -- the new gate +# must not fire when neither path will consume the value. +# --------------------------------------------------------------------------- + +def test_to_geotiff_non_cog_strip_does_not_validate_tile_size(tmp_path): + """When neither tiled output nor COG overview generation will use + ``tile_size``, the validator gate stays off. The pre-existing + "tile_size ignored" warning still fires (it carries its own + non-default-value check, not a positivity check), but no error + is raised.""" + da = _float_da() + p = tmp_path / 'cog_tile_size_hang_2311_no_cog_strip.tif' + + # A negative tile_size with cog=False AND tiled=False is accepted + # (with the "ignored" warning) because nothing consumes the value. + # Use ``filterwarnings`` to swallow the warning so the test only + # asserts no raise / no hang. + with _alarm_timeout(5), warnings.catch_warnings(): + warnings.simplefilter('ignore') + to_geotiff(da, str(p), cog=False, tiled=False, tile_size=-1) + + assert p.exists(), 'writer should have produced a strip-layout file' + + +# --------------------------------------------------------------------------- +# Defense in depth: drive the inner writer directly with a bad tile_size +# and assert the auto-overview loop raises instead of hanging. Guards +# against future internal callers that bypass ``to_geotiff``'s public +# validator. +# --------------------------------------------------------------------------- + +@pytest.mark.parametrize('tile_size', [-1, 0]) +def test_writer_auto_overview_loop_rejects_non_positive_tile_size( + tmp_path, tile_size): + """``_write(..., cog=True, overview_levels=None)`` raises ValueError + when ``tile_size`` is not a positive int, instead of spinning in the + halving loop. The public ``to_geotiff`` already validates earlier; + this is the inner-writer safety net (#2311).""" + from xrspatial.geotiff._writer import _write + + # Minimal float32 array large enough for the auto-overview branch to + # be entered. The exact pixel values do not matter -- the validator + # check runs before any encoding work. + data = np.zeros((64, 64), dtype=np.float32) + out = tmp_path / f'cog_tile_size_hang_2311_inner_ts{tile_size}.tif' + + with _alarm_timeout(5), pytest.raises(ValueError) as exc: + _write(data, str(out), + compression='none', + tiled=True, + tile_size=tile_size, + cog=True, + overview_levels=None) + + assert 'tile_size' in str(exc.value), str(exc.value) + + +# --------------------------------------------------------------------------- +# Non-int tile_size values reach the same gate. The public +# ``_validate_tile_size`` (called from ``to_geotiff`` when tiled or cog is +# true) rejects None, float, and bool with typed errors; the +# defense-in-depth gate at the top of ``_write`` does the same for direct +# callers. Both layers should reject all three types. +# --------------------------------------------------------------------------- + +@pytest.mark.parametrize('bad_tile_size', [None, 128.0, True, False]) +def test_to_geotiff_cog_non_int_tile_size_raises(tmp_path, bad_tile_size): + """Non-int ``tile_size`` (None, float, bool) with ``cog=True`` is + rejected at the public boundary, regardless of ``tiled``. Bool is + explicitly listed because Python treats ``True``/``False`` as int + subclasses (#2311 follow-up).""" + da = _float_da() + p = tmp_path / ( + f'cog_tile_size_hang_2311_nonint_{type(bad_tile_size).__name__}.tif') + + with _alarm_timeout(5), pytest.raises((ValueError, TypeError)) as exc: + to_geotiff(da, str(p), cog=True, tiled=True, tile_size=bad_tile_size) + + assert 'tile_size' in str(exc.value), str(exc.value) + + +# --------------------------------------------------------------------------- +# Inner-loop guard coverage: confirm the auto-overview halving loop's own +# ``tile_size > 0`` pre-check is present in ``_write``'s compiled +# constants. Inspecting the constants pins the literal so a future +# refactor that removes the inner guard fails this test loudly even if +# the top-of-``_write`` gate still catches the bad input at runtime. +# (Reaching the inner guard through ``_write`` directly would require +# patching out the top gate, which is invasive; the constants check is +# the simplest reliable pin without rewriting production code.) +# --------------------------------------------------------------------------- + +def test_inner_overview_loop_guard_message_is_pinned(): + """Pin the inner-overview ``tile_size`` guard literal so removing + the loop-side defense fails this test even when the top gate at + line 407 still raises for the same inputs (#2311).""" + from xrspatial.geotiff import _writer as wmod + + guard_msg = ( + 'tile_size must be a positive int for COG overview ' + 'generation, got tile_size=') + consts = wmod._write.__code__.co_consts + found = any(isinstance(c, str) and guard_msg in c for c in consts) + assert found, ( + 'inner-loop guard message not present in _write constants; the ' + 'auto-overview guard introduced in #2311 may have been removed.') diff --git a/xrspatial/geotiff/tests/write/test_overview.py b/xrspatial/geotiff/tests/write/test_overview.py new file mode 100644 index 000000000..3c48264d2 --- /dev/null +++ b/xrspatial/geotiff/tests/write/test_overview.py @@ -0,0 +1,1345 @@ +"""Overview-level and nodata-aware overview tests. + +Covers the overview shape-ceiling contract, the mean / min / max / +median / mode resampling matrix with int and float nodata, the cubic +resampling cases for both float and integer dtypes, and the +block-reduce sentinel-masking gate for int sentinels. + +Tests-only restructure for epic #2390. +""" + +from __future__ import annotations + +import importlib.util +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff._writer import _block_reduce_2d +from xrspatial.geotiff import open_geotiff, to_geotiff + +from .._helpers.markers import gpu_available as _gpu_available + + +# ------------------------------------------------------------------------- +# Section: ceil-shape overview tests +# ------------------------------------------------------------------------- + +_HAS_GPU = _gpu_available() +_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") + + +# --------------------------------------------------------------------------- +# Output-shape contract: ceil((h+1)/2, (w+1)/2) for every method and dtype. +# --------------------------------------------------------------------------- +@pytest.mark.parametrize( + "shape,expected", + [ + ((5, 5), (3, 3)), + ((5, 4), (3, 2)), + ((4, 5), (2, 3)), + ((7, 3), (4, 2)), + ((1, 1), (1, 1)), + ((1, 5), (1, 3)), + ((5, 1), (3, 1)), + ((4, 4), (2, 2)), + ((6, 6), (3, 3)), + ], +) +@pytest.mark.parametrize( + "method", ["nearest", "mean", "min", "max", "median", "mode"] +) +def test_ceil_output_shape_float(shape, expected, method): + arr = np.arange(shape[0] * shape[1], dtype=np.float32).reshape(shape) + out = _block_reduce_2d(arr, method) + assert out.shape == expected + + +@pytest.mark.parametrize( + "shape,expected", + [ + ((5, 5), (3, 3)), + ((3, 7), (2, 4)), + ((1, 1), (1, 1)), + ], +) +@pytest.mark.parametrize("method", ["nearest", "mean", "min", "max", "median", "mode"]) +def test_ceil_output_shape_int(shape, expected, method): + arr = np.arange(shape[0] * shape[1], dtype=np.int16).reshape(shape) + out = _block_reduce_2d(arr, method) + assert out.shape == expected + assert out.dtype == arr.dtype + + +def test_ceil_output_shape_cubic_float(): + pytest.importorskip("scipy") + arr = np.arange(25, dtype=np.float32).reshape(5, 5) + out = _block_reduce_2d(arr, "cubic") + assert out.shape == (3, 3) + + +def test_ceil_output_shape_cubic_int(): + pytest.importorskip("scipy") + arr = np.arange(25, dtype=np.int16).reshape(5, 5) + out = _block_reduce_2d(arr, "cubic") + assert out.shape == (3, 3) + assert out.dtype == arr.dtype + + +# --------------------------------------------------------------------------- +# Trailing-edge pixel values: the last row/col of the source must reach the +# overview rather than being dropped. +# --------------------------------------------------------------------------- +def test_nearest_5x5_preserves_trailing_pixels(): + arr = np.arange(25, dtype=np.float64).reshape(5, 5) + out = _block_reduce_2d(arr, "nearest") + # Nearest = top-left of every 2x2 block. With ceil, that's arr[::2, ::2]. + assert out.shape == (3, 3) + np.testing.assert_array_equal(out, arr[::2, ::2]) + # The trailing row/col of the source IS represented in the overview. + assert out[-1, -1] == arr[4, 4] + + +def test_mean_5x5_trailing_residual_block_uses_valid_cell(): + # Residual at row 4, col 4 is a 1x1 block containing arr[4,4] alone. + arr = np.zeros((5, 5), dtype=np.float32) + arr[4, 4] = 100.0 + out = _block_reduce_2d(arr, "mean") + assert out.shape == (3, 3) + # The corner residual is a 1x1 block, so its mean is the single pixel. + assert out[2, 2] == pytest.approx(100.0) + + +def test_max_5x5_residual_block_uses_valid_cell(): + arr = np.zeros((5, 5), dtype=np.float32) + arr[4, :] = 9.0 # trailing row should reach overview[2, :] + arr[:, 4] = 7.0 + out = _block_reduce_2d(arr, "max") + assert out.shape == (3, 3) + # Bottom overview row picks max of (arr[4, 2*j:2*j+2]) -> 9.0 everywhere. + np.testing.assert_array_equal(out[2, :2], [9.0, 9.0]) + # Right column gets max from (arr[2*i:2*i+2, 4]) -> 7.0 except corner. + assert out[0, 2] == 7.0 + assert out[1, 2] == 7.0 + # arr[4, 4] = 7.0 (set by the trailing-column sweep, after row sweep). + assert out[2, 2] == 7.0 + + +def test_min_5x5_residual_block_uses_valid_cell(): + arr = np.full((5, 5), 10.0, dtype=np.float32) + arr[4, 4] = -1.0 + out = _block_reduce_2d(arr, "min") + assert out[2, 2] == -1.0 + + +def test_median_5x5_residual_block_uses_valid_cell(): + arr = np.full((5, 5), 5.0, dtype=np.float32) + arr[4, 4] = 99.0 + out = _block_reduce_2d(arr, "median") + # 1x1 residual: median is the single value. + assert out[2, 2] == pytest.approx(99.0) + + +def test_mode_5x5_residual_block_picks_valid_cell(): + arr = np.array( + [[1, 1, 2, 2, 3], + [1, 1, 2, 2, 3], + [4, 4, 5, 5, 6], + [4, 4, 5, 5, 6], + [7, 7, 8, 8, 9]], + dtype=np.int16, + ) + out = _block_reduce_2d(arr, "mode") + assert out.shape == (3, 3) + # Trailing 1x1 block at (4,4) is just the value 9. + assert out[2, 2] == 9 + # Trailing column (rows 0..1 / 2..3, col 4) is 1x1 blocks containing 3 / 6. + assert out[0, 2] == 3 + assert out[1, 2] == 6 + # Trailing row (col 0..1 / 2..3, row 4) is 1x2 blocks: [7,7] -> 7, [8,8] -> 8. + assert out[2, 0] == 7 + assert out[2, 1] == 8 + + +def test_cubic_5x5_covers_source_extent(): + pytest.importorskip("scipy") + # Smoothly varying ramp so cubic interpolation is well-behaved. + arr = np.arange(25, dtype=np.float32).reshape(5, 5) + out = _block_reduce_2d(arr, "cubic") + assert out.shape == (3, 3) + # Output should not be entirely zero/NaN, and trailing corner should + # roughly reflect the high source values around (4, 4). + assert np.isfinite(out).all() + assert out[2, 2] > out[0, 0] + + +# --------------------------------------------------------------------------- +# Sentinel masking still works on odd-sized inputs. +# --------------------------------------------------------------------------- +def test_mean_5x5_with_nodata_excludes_sentinel_in_residual(): + sentinel = -9999.0 + arr = np.full((5, 5), 1.0, dtype=np.float32) + arr[4, 4] = sentinel + out = _block_reduce_2d(arr, "mean", nodata=sentinel) + # The 1x1 trailing residual is all-sentinel -> all-NaN block, which + # the post-overview rewrite (in the caller) handles. Here we just + # confirm the sentinel did not bias the reduction: out[2, 2] is NaN, + # not (1.0 + sentinel)/2 or similar. + assert np.isnan(out[2, 2]) + # Other overview cells with at least one valid neighbour stay valid. + assert out[0, 0] == pytest.approx(1.0) + + +def test_min_int_5x5_with_nodata_does_not_select_sentinel_in_residual(): + sentinel = -9999 + arr = np.full((5, 5), 10, dtype=np.int16) + # Trailing column has a sentinel + valid cell in 2x1 residual blocks. + arr[0, 4] = sentinel + arr[2, 4] = sentinel + arr[4, 4] = sentinel + out = _block_reduce_2d(arr, "min", nodata=sentinel) + # The 2x1 residual at (0..1, 4) is [-9999, 10] -> min ignoring sentinel = 10. + assert out[0, 2] == 10 + assert out[1, 2] == 10 + # The 1x1 residual at (4, 4) is sentinel -> rewritten to sentinel. + assert out[2, 2] == sentinel + + +def test_int64_sentinel_near_max_masks_in_padded_branch(): + # INT64_MAX is not exactly representable in float64: float(INT64_MAX) + # rounds up to 2**63, which would miss the sentinel if the mask were + # computed against the float-padded view. The reader must compute the + # mask at native integer width before padding. + sentinel = np.iinfo(np.int64).max + arr = np.full((5, 5), 10, dtype=np.int64) + arr[0, 0] = sentinel + # Pad branch fires because shape (5, 5) is odd. + out = _block_reduce_2d(arr, "min", nodata=sentinel) + # Top-left 2x2 block has 1 sentinel + 3 valid 10s. nanmin -> 10 + # (sentinel masked out). If the mask missed the sentinel, the int64 + # value would be cast to float and the float min would pick up the + # sentinel's value or produce noise; either way out[0,0] would not + # be 10. + assert out[0, 0] == 10 + + +def test_uint64_sentinel_near_max_masks_in_padded_branch(): + # UINT64_MAX = 2**64 - 1 is also not exactly representable in float64 + # (float(UINT64_MAX) rounds up to 2**64). The native-width mask path + # must catch the sentinel for unsigned 64-bit dtypes too. + sentinel = np.iinfo(np.uint64).max + arr = np.full((5, 5), 10, dtype=np.uint64) + arr[0, 0] = sentinel + out = _block_reduce_2d(arr, "min", nodata=sentinel) + assert out[0, 0] == 10 + + +def test_float32_padded_branch_keeps_source_dtype(): + # The padded mean/min/max/median branch used to allocate a float64 + # NaN buffer regardless of the source dtype, doubling intermediate + # memory for an odd-shape float32 read. Verify the helper now keeps + # the source dtype across the pad so a float32 input round-trips as + # float32. The contract is checked end-to-end via the output dtype. + arr = np.arange(25, dtype=np.float32).reshape(5, 5) + out = _block_reduce_2d(arr, "mean") + assert out.dtype == np.float32 + # And the values still match what a manual ceil-mean would produce + # for the top-left 2x2 block. + top_left_mean = float(arr[:2, :2].mean()) + assert out[0, 0] == pytest.approx(top_left_mean) + + +def test_max_int_5x5_with_nodata_does_not_select_sentinel_in_residual(): + sentinel = -9999 + arr = np.full((5, 5), 10, dtype=np.int16) + arr[4, 4] = sentinel + out = _block_reduce_2d(arr, "max", nodata=sentinel) + # The 1x1 corner block is all-sentinel -> sentinel. + assert out[2, 2] == sentinel + # Adjacent 2x1 residual (rows 4, cols 0..1) has valid values only. + assert out[2, 0] == 10 + assert out[2, 1] == 10 + + +# --------------------------------------------------------------------------- +# Even-sized inputs keep the existing fast-path semantics. +# --------------------------------------------------------------------------- +@pytest.mark.parametrize( + "method", ["nearest", "mean", "min", "max", "median", "mode"] +) +def test_even_input_matches_legacy_2x2_behaviour(method): + rng = np.random.default_rng(2105) + arr = rng.integers(0, 100, size=(6, 8)).astype(np.int16) + out = _block_reduce_2d(arr, method) + assert out.shape == (3, 4) + # Spot-check a single block matches a direct reduction. + block = arr[0:2, 0:2] + if method == "nearest": + assert out[0, 0] == block[0, 0] + elif method == "mean": + # Integer outputs are rounded after the float reduction. + assert out[0, 0] == int(round(block.astype(np.float64).mean())) + elif method == "min": + assert out[0, 0] == block.min() + elif method == "max": + assert out[0, 0] == block.max() + elif method == "median": + assert out[0, 0] == int(round(float(np.median(block)))) + elif method == "mode": + # Lowest-value tie-break for unique cells. + vals, counts = np.unique(block, return_counts=True) + expected = vals[np.argmax(counts)] + assert out[0, 0] == expected + + +# --------------------------------------------------------------------------- +# GPU mirror: identical shape and identical values on odd-sized inputs. +# --------------------------------------------------------------------------- +@_gpu_only +@pytest.mark.parametrize( + "method", ["nearest", "mean", "min", "max", "median"] +) +@pytest.mark.parametrize("shape", [(5, 5), (5, 4), (4, 5), (7, 3)]) +def test_gpu_block_reduce_matches_cpu_on_odd_shapes(method, shape): + import cupy + + from xrspatial.geotiff._gpu_decode import _block_reduce_2d_gpu + + rng = np.random.default_rng(2105) + arr = rng.random(shape, dtype=np.float32) + cpu_out = _block_reduce_2d(arr, method) + gpu_out = _block_reduce_2d_gpu(cupy.asarray(arr), method).get() + assert gpu_out.shape == cpu_out.shape + np.testing.assert_allclose(gpu_out, cpu_out, equal_nan=True, rtol=1e-6) + + +@_gpu_only +def test_gpu_block_reduce_int_5x5_with_nodata(): + import cupy + + from xrspatial.geotiff._gpu_decode import _block_reduce_2d_gpu + + sentinel = -9999 + arr = np.full((5, 5), 10, dtype=np.int16) + arr[4, 4] = sentinel + cpu_out = _block_reduce_2d(arr, "max", nodata=sentinel) + gpu_out = _block_reduce_2d_gpu(cupy.asarray(arr), "max", nodata=sentinel).get() + np.testing.assert_array_equal(gpu_out, cpu_out) + + +# ------------------------------------------------------------------------- +# Section: nodata-aware overview tests +# ------------------------------------------------------------------------- + + +_gpu_only = pytest.mark.skipif( + not _HAS_GPU, + reason="cupy + CUDA required", +) + + +def _arr_with_partial_nan(): + """4x4 float raster: row 1 is all-NaN, rest is finite.""" + return np.array([ + [1.0, 2.0, 3.0, 4.0], + [np.nan, np.nan, np.nan, np.nan], + [10.0, 20.0, 30.0, 40.0], + [10.0, 20.0, 30.0, 40.0], + ], dtype=np.float32) + + +def _arr_with_full_nan_block(): + """4x4 float raster: top-left 2x2 entirely NaN.""" + return np.array([ + [np.nan, np.nan, 3.0, 4.0], + [np.nan, np.nan, 7.0, 8.0], + [10.0, 20.0, 30.0, 40.0], + [10.0, 20.0, 30.0, 40.0], + ], dtype=np.float32) + + +def test_cpu_cog_overview_mean_ignores_sentinel(tmp_path): + """CPU writer: overview 'mean' must skip sentinel pixels (issue #1613).""" + from xrspatial.geotiff import open_geotiff, to_geotiff + + arr = _arr_with_partial_nan() + da = xr.DataArray(arr, dims=['y', 'x']) + p = str(tmp_path / 'cog_mean_nodata.tif') + to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate', + tiled=True, tile_size=16, overview_levels=[2], + overview_resampling='mean') + + ov = open_geotiff(p, overview_level=1) + expected = np.array([[1.5, 3.5], [15.0, 35.0]], dtype=np.float32) + np.testing.assert_allclose(np.asarray(ov.data), expected) + + +def test_cpu_cog_overview_mean_partial_block(tmp_path): + """CPU writer: partial-NaN 2x2 block averages over the finite cells only.""" + from xrspatial.geotiff import open_geotiff, to_geotiff + + arr = _arr_with_full_nan_block() + da = xr.DataArray(arr, dims=['y', 'x']) + p = str(tmp_path / 'cog_mean_nodata_full_block.tif') + to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate', + tiled=True, tile_size=16, overview_levels=[2], + overview_resampling='mean') + + ov = open_geotiff(p, overview_level=1) + # Top-left 2x2 was all-NaN -> reduces to NaN -> rewritten to -9999 + # on disk, then read back as NaN once the overview-nodata + # inheritance fix (#1739) restores attrs['nodata'] and re-masks + # the sentinel. + # Top-right 2x2 [3,4,7,8] -> mean 5.5 + # Bottom-left [10,20,10,20] -> 15 + # Bottom-right [30,40,30,40] -> 35 + data = np.asarray(ov.data) + assert ov.attrs.get('nodata') == -9999.0 + assert np.isnan(data[0, 0]) + np.testing.assert_allclose(data[0, 1], 5.5) + np.testing.assert_allclose(data[1, 0], 15.0) + np.testing.assert_allclose(data[1, 1], 35.0) + + +@pytest.mark.parametrize('method,expected', [ + ('min', np.array([[1.0, 3.0], [10.0, 30.0]], dtype=np.float32)), + ('max', np.array([[2.0, 4.0], [20.0, 40.0]], dtype=np.float32)), + ('median', np.array([[1.5, 3.5], [15.0, 35.0]], dtype=np.float32)), +]) +def test_cpu_cog_overview_aggregations_ignore_sentinel( + tmp_path, method, expected): + """min/max/median overview reductions must also skip the sentinel.""" + from xrspatial.geotiff import open_geotiff, to_geotiff + + arr = _arr_with_partial_nan() + da = xr.DataArray(arr, dims=['y', 'x']) + p = str(tmp_path / f'cog_{method}_nodata.tif') + to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate', + tiled=True, tile_size=16, overview_levels=[2], + overview_resampling=method) + + ov = open_geotiff(p, overview_level=1) + np.testing.assert_allclose(np.asarray(ov.data), expected) + + +def test_cpu_cog_overview_mean_no_nodata_passes(tmp_path): + """When nodata is unset the reducer behaves as before.""" + from xrspatial.geotiff import open_geotiff, to_geotiff + + arr = np.arange(16, dtype=np.float32).reshape(4, 4) + da = xr.DataArray(arr, dims=['y', 'x']) + p = str(tmp_path / 'cog_mean_no_nodata.tif') + to_geotiff(da, p, cog=True, compression='deflate', + tiled=True, tile_size=16, overview_levels=[2], + overview_resampling='mean') + + ov = open_geotiff(p, overview_level=1) + # mean of 2x2 blocks of arange(16).reshape(4,4) + expected = np.array([ + [(0 + 1 + 4 + 5) / 4, (2 + 3 + 6 + 7) / 4], + [(8 + 9 + 12 + 13) / 4, (10 + 11 + 14 + 15) / 4], + ], dtype=np.float32) + np.testing.assert_allclose(np.asarray(ov.data), expected) + + +def test_block_reduce_2d_nodata_kwarg_directly(): + """Exercise the helper directly so a regression here is caught fast.""" + from xrspatial.geotiff._writer import _block_reduce_2d + + arr = _arr_with_partial_nan() + # Without nodata, the sentinel poisons the reduction. + arr_sentinel = arr.copy() + arr_sentinel[np.isnan(arr_sentinel)] = -9999.0 + poisoned = _block_reduce_2d(arr_sentinel, 'mean') + assert poisoned[0, 0] < -1000.0 # confirms the bug shape + + # With nodata, the sentinel is treated as missing. + fixed = _block_reduce_2d(arr_sentinel, 'mean', nodata=-9999.0) + np.testing.assert_allclose(fixed[0, 0], 1.5) + np.testing.assert_allclose(fixed[0, 1], 3.5) + + +def test_block_reduce_2d_nodata_all_sentinel_block_yields_nan(): + """All-sentinel block reduces to NaN under nan-aware aggregation.""" + from xrspatial.geotiff._writer import _block_reduce_2d + + arr = np.full((2, 2), -9999.0, dtype=np.float32) + out = _block_reduce_2d(arr, 'mean', nodata=-9999.0) + assert out.shape == (1, 1) + assert np.isnan(out[0, 0]) + + +def test_block_reduce_2d_inf_nodata_is_masked(): + """nodata=+/-inf must be masked back to NaN like a finite sentinel. + + The upstream NaN->sentinel rewrite only gates on ``not np.isnan``, + so ``nodata=inf`` is a real (if uncommon) caller choice. The reducer + has to match that gate or it re-poisons the overview with inf. + """ + from xrspatial.geotiff._writer import _block_reduce_2d + + arr = np.array([ + [1.0, 2.0, 3.0, 4.0], + [np.inf, np.inf, np.inf, np.inf], + [10.0, 20.0, 30.0, 40.0], + [10.0, 20.0, 30.0, 40.0], + ], dtype=np.float32) + out = _block_reduce_2d(arr, 'mean', nodata=float('inf')) + np.testing.assert_allclose(out[0, 0], 1.5) + np.testing.assert_allclose(out[0, 1], 3.5) + + +def test_block_reduce_2d_all_nan_block_does_not_warn(): + """All-NaN blocks must not surface RuntimeWarning to user logs.""" + import warnings as _warnings + + from xrspatial.geotiff._writer import _block_reduce_2d + + arr = np.array([ + [-9999.0, -9999.0, 3.0, 4.0], + [-9999.0, -9999.0, 7.0, 8.0], + ], dtype=np.float32) + + with _warnings.catch_warnings(record=True) as caught: + _warnings.simplefilter('always') + out = _block_reduce_2d(arr, 'mean', nodata=-9999.0) + + assert not [w for w in caught if issubclass(w.category, RuntimeWarning)] + assert np.isnan(out[0, 0]) + np.testing.assert_allclose(out[0, 1], 5.5) + + +@_gpu_only +def test_gpu_cog_overview_mean_ignores_sentinel(tmp_path): + """GPU writer: overview 'mean' must skip sentinel pixels (issue #1613).""" + import cupy + + from xrspatial.geotiff import open_geotiff, to_geotiff + + arr_cpu = _arr_with_partial_nan() + arr_gpu = cupy.asarray(arr_cpu) + da = xr.DataArray(arr_gpu, dims=['y', 'x']) + + p = str(tmp_path / 'gpu_cog_mean_nodata.tif') + to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate', + tiled=True, tile_size=16, overview_levels=[2], + overview_resampling='mean', gpu=True) + + ov = open_geotiff(p, overview_level=1) + expected = np.array([[1.5, 3.5], [15.0, 35.0]], dtype=np.float32) + np.testing.assert_allclose(np.asarray(ov.data), expected) + + +@_gpu_only +def test_gpu_block_reduce_nodata_kwarg_directly(): + """Exercise the GPU helper directly so a regression is caught fast.""" + import cupy + + from xrspatial.geotiff._gpu_decode import _block_reduce_2d_gpu + + arr_cpu = _arr_with_partial_nan() + arr_cpu[np.isnan(arr_cpu)] = -9999.0 + arr_gpu = cupy.asarray(arr_cpu) + + poisoned = _block_reduce_2d_gpu(arr_gpu, 'mean') + assert float(poisoned[0, 0].get()) < -1000.0 + + fixed = _block_reduce_2d_gpu(arr_gpu, 'mean', nodata=-9999.0) + np.testing.assert_allclose(float(fixed[0, 0].get()), 1.5) + np.testing.assert_allclose(float(fixed[0, 1].get()), 3.5) + + +@_gpu_only +def test_gpu_block_reduce_inf_nodata_is_masked(): + """GPU helper mirrors the CPU isnan-only gate for nodata=inf.""" + import cupy + + from xrspatial.geotiff._gpu_decode import _block_reduce_2d_gpu + + arr_cpu = np.array([ + [1.0, 2.0, 3.0, 4.0], + [np.inf, np.inf, np.inf, np.inf], + [10.0, 20.0, 30.0, 40.0], + [10.0, 20.0, 30.0, 40.0], + ], dtype=np.float32) + arr_gpu = cupy.asarray(arr_cpu) + + out = _block_reduce_2d_gpu(arr_gpu, 'mean', nodata=float('inf')) + np.testing.assert_allclose(float(out[0, 0].get()), 1.5) + np.testing.assert_allclose(float(out[0, 1].get()), 3.5) + + +@_gpu_only +def test_gpu_cog_overview_matches_cpu(tmp_path): + """CPU and GPU overview pyramids must agree on nodata-masked data.""" + import cupy + + from xrspatial.geotiff import open_geotiff, to_geotiff + + arr = _arr_with_partial_nan() + + # CPU + da_cpu = xr.DataArray(arr, dims=['y', 'x']) + p_cpu = str(tmp_path / 'cpu_pyramid.tif') + to_geotiff(da_cpu, p_cpu, nodata=-9999.0, cog=True, + compression='deflate', tiled=True, tile_size=16, + overview_levels=[2], overview_resampling='mean') + cpu_ov = np.asarray(open_geotiff(p_cpu, overview_level=1).data) + + # GPU + da_gpu = xr.DataArray(cupy.asarray(arr), dims=['y', 'x']) + p_gpu = str(tmp_path / 'gpu_pyramid.tif') + to_geotiff(da_gpu, p_gpu, nodata=-9999.0, cog=True, + compression='deflate', tiled=True, tile_size=16, + overview_levels=[2], overview_resampling='mean', gpu=True) + gpu_ov = np.asarray(open_geotiff(p_gpu, overview_level=1).data) + + np.testing.assert_allclose(cpu_ov, gpu_ov) + + +# ------------------------------------------------------------------------- +# Section: cubic resampling, float nodata +# ------------------------------------------------------------------------- + + +_gpu_only = pytest.mark.skipif( + not _HAS_GPU, + reason="cupy + CUDA required", +) + + +def _flat_with_corner_nan(side: int = 16, nan_side: int = 4): + """``side x side`` float32 ones with a ``nan_side x nan_side`` NaN corner.""" + arr = np.ones((side, side), dtype=np.float32) * 100.0 + arr[:nan_side, :nan_side] = np.nan + return arr + + +def test_block_reduce_cubic_nodata_helper_no_ringing(): + """Helper: cubic with nodata must not leak the sentinel into neighbours.""" + pytest.importorskip("scipy") + from xrspatial.geotiff._writer import _block_reduce_2d + + # Mimic what to_geotiff does: rewrite NaN to the sentinel before + # handing the array to the reducer. + arr = _flat_with_corner_nan() + arr[np.isnan(arr)] = -9999.0 + + out = _block_reduce_2d(arr, 'cubic', nodata=-9999.0) + + # The valid region must still read ~100. Without the fix the cells + # adjacent to the sentinel corner returned values like 1196.28 and + # -19.00 from the cubic blend. + valid = out != -9999.0 + assert np.all(np.abs(out[valid] - 100.0) < 1e-3), ( + f"ringing leaked into cubic output: {out[valid]}") + + # Sentinel cells still mark the nodata region. + assert (out == -9999.0).any() + + +def test_block_reduce_cubic_nodata_poisoning_repro(): + """Without the fix the sentinel poisoned the cubic output. + + Pin the failure mode by running cubic on the same array *without* + a nodata argument and confirming the documented buggy values + appear. This guards against a regression where ``nodata`` silently + stops being honoured. + """ + pytest.importorskip("scipy") + from xrspatial.geotiff._writer import _block_reduce_2d + + arr = _flat_with_corner_nan() + arr[np.isnan(arr)] = -9999.0 + + # nodata=None reproduces the pre-fix behaviour. + poisoned = _block_reduce_2d(arr, 'cubic') + # At least one cell outside the corner has a wildly wrong value. + valid_no_sentinel = (poisoned != -9999.0) + drift = np.abs(poisoned[valid_no_sentinel] - 100.0) + assert drift.max() > 50.0, ( + "expected the no-nodata cubic path to ring; got a clean output " + f"with max drift {drift.max()}") + + +def test_block_reduce_cubic_no_nodata_unchanged(): + """Cubic on data without nodata stays at order=3 with prefilter.""" + pytest.importorskip("scipy") + from xrspatial.geotiff._writer import _block_reduce_2d + + arr = np.arange(256, dtype=np.float32).reshape(16, 16) + out_default = _block_reduce_2d(arr, 'cubic') + # The same array round-tripped through scipy zoom directly should + # match (since no sentinel is present the fix path is not taken). + from scipy.ndimage import zoom + expected = zoom(arr, 0.5, order=3).astype(arr.dtype) + np.testing.assert_array_equal(out_default, expected) + + +def test_block_reduce_cubic_nodata_unset_is_zoom(): + """nodata=None goes through the original zoom path, no prefilter change.""" + pytest.importorskip("scipy") + from scipy.ndimage import zoom + + from xrspatial.geotiff._writer import _block_reduce_2d + + arr = np.linspace(0.0, 1.0, 64, dtype=np.float32).reshape(8, 8) + out = _block_reduce_2d(arr, 'cubic', nodata=None) + expected = zoom(arr, 0.5, order=3).astype(arr.dtype) + np.testing.assert_array_equal(out, expected) + + +def test_to_geotiff_cog_cubic_nodata_round_trip(tmp_path): + """End-to-end: writing a COG with cubic + nodata produces a clean overview.""" + pytest.importorskip("scipy") + from xrspatial.geotiff import open_geotiff, to_geotiff + + arr = _flat_with_corner_nan() + da = xr.DataArray(arr, dims=['y', 'x']) + p = str(tmp_path / 'cog_cubic_nodata.tif') + to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate', + tiled=True, tile_size=16, overview_levels=[2], + overview_resampling='cubic') + + ov = open_geotiff(p, overview_level=1) + data = np.asarray(ov.data) + + # No polluted pixels: every cell is either NaN (reader unmasked the + # sentinel back to NaN), the literal sentinel value (reader kept it), + # or ~100 (the source value). + polluted = ( + (~np.isnan(data)) + & (data != -9999.0) + & (np.abs(data - 100.0) > 1e-3) + ) + assert not polluted.any(), ( + f"polluted overview cells: {data[polluted]}") + + +def test_to_geotiff_cog_cubic_no_nodata_round_trip(tmp_path): + """Regression guard: cubic without nodata still produces the same overview.""" + pytest.importorskip("scipy") + from xrspatial.geotiff import open_geotiff, to_geotiff + + arr = np.arange(256, dtype=np.float32).reshape(16, 16) + da = xr.DataArray(arr, dims=['y', 'x']) + p = str(tmp_path / 'cog_cubic_no_nodata.tif') + to_geotiff(da, p, cog=True, compression='deflate', + tiled=True, tile_size=16, overview_levels=[2], + overview_resampling='cubic') + + ov = open_geotiff(p, overview_level=1) + assert ov.shape == (8, 8) + assert ov.dtype == np.float32 + # Cubic on a monotonic ramp stays bounded by source range. + assert float(np.asarray(ov.data).min()) >= float(arr.min()) - 1.0 + assert float(np.asarray(ov.data).max()) <= float(arr.max()) + 1.0 + + +def test_block_reduce_cubic_inf_nodata_is_masked(): + """nodata=+/-inf must be masked just like a finite sentinel.""" + pytest.importorskip("scipy") + from xrspatial.geotiff._writer import _block_reduce_2d + + arr = np.ones((16, 16), dtype=np.float32) * 5.0 + arr[:4, :4] = np.inf # treat +inf as sentinel + out = _block_reduce_2d(arr, 'cubic', nodata=np.inf) + valid = ~np.isinf(out) + # Outside the masked region we still read ~5.0. + np.testing.assert_allclose(out[valid], 5.0, atol=1e-4) + + +def test_block_reduce_cubic_nan_sentinel_skips_mask(): + """nodata=NaN is a no-op (matches the existing nan-pass-through gate).""" + pytest.importorskip("scipy") + from scipy.ndimage import zoom + + from xrspatial.geotiff._writer import _block_reduce_2d + + arr = np.linspace(0.0, 1.0, 64, dtype=np.float32).reshape(8, 8) + out = _block_reduce_2d(arr, 'cubic', nodata=np.nan) + expected = zoom(arr, 0.5, order=3).astype(arr.dtype) + np.testing.assert_array_equal(out, expected) + + +def test_gpu_overview_methods_includes_cubic(): + """The GPU constant must list ``cubic`` so callers do not pre-validate + against the smaller pre-#1623 set.""" + from xrspatial.geotiff._gpu_decode import GPU_OVERVIEW_METHODS + assert 'cubic' in GPU_OVERVIEW_METHODS + + +@_gpu_only +def test_gpu_block_reduce_cubic_falls_back_to_cpu(): + """GPU cubic must route through the CPU helper and return cupy data.""" + pytest.importorskip("scipy") + import cupy + + from xrspatial.geotiff._gpu_decode import _block_reduce_2d_gpu + from xrspatial.geotiff._writer import _block_reduce_2d + + arr = _flat_with_corner_nan() + arr[np.isnan(arr)] = -9999.0 + + gpu_arr = cupy.asarray(arr) + gpu_out = _block_reduce_2d_gpu(gpu_arr, 'cubic', nodata=-9999.0) + assert isinstance(gpu_out, cupy.ndarray) + + cpu_out = _block_reduce_2d(arr, 'cubic', nodata=-9999.0) + np.testing.assert_array_equal(cupy.asnumpy(gpu_out), cpu_out) + + +@_gpu_only +def test_to_geotiff_cog_cubic_nodata_gpu_round_trip(tmp_path): + """End-to-end GPU writer: cubic + nodata produces a clean overview.""" + pytest.importorskip("scipy") + import cupy + + from xrspatial.geotiff import open_geotiff, to_geotiff + + arr = _flat_with_corner_nan() + da = xr.DataArray(cupy.asarray(arr), dims=['y', 'x']) + p = str(tmp_path / 'cog_cubic_nodata_gpu.tif') + to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate', + tiled=True, tile_size=16, overview_levels=[2], + overview_resampling='cubic') + + ov = open_geotiff(p, overview_level=1) + data = np.asarray(ov.data) + polluted = ( + (~np.isnan(data)) + & (data != -9999.0) + & (np.abs(data - 100.0) > 1e-3) + ) + assert not polluted.any(), ( + f"GPU cubic overview leaked sentinel into neighbours: " + f"{data[polluted]}") + + +@_gpu_only +def test_gpu_cpu_cubic_overview_bytes_match(tmp_path): + """CPU and GPU writers produce the same cubic overview pixels.""" + pytest.importorskip("scipy") + import cupy + + from xrspatial.geotiff import open_geotiff, to_geotiff + + arr = _flat_with_corner_nan() + cpu_da = xr.DataArray(arr, dims=['y', 'x']) + gpu_da = xr.DataArray(cupy.asarray(arr), dims=['y', 'x']) + + cpu_path = str(tmp_path / 'cpu_cubic.tif') + gpu_path = str(tmp_path / 'gpu_cubic.tif') + to_geotiff(cpu_da, cpu_path, nodata=-9999.0, cog=True, + compression='deflate', tiled=True, tile_size=16, + overview_levels=[2], overview_resampling='cubic') + to_geotiff(gpu_da, gpu_path, nodata=-9999.0, cog=True, + compression='deflate', tiled=True, tile_size=16, + overview_levels=[2], overview_resampling='cubic') + + cpu_ov = np.asarray(open_geotiff(cpu_path, overview_level=1).data) + gpu_ov = np.asarray(open_geotiff(gpu_path, overview_level=1).data) + # NaN-aware compare since the reader unmasks the sentinel. + np.testing.assert_array_equal(np.isnan(cpu_ov), np.isnan(gpu_ov)) + finite = ~np.isnan(cpu_ov) + np.testing.assert_allclose(cpu_ov[finite], gpu_ov[finite], atol=1e-3) + + +# ------------------------------------------------------------------------- +# Section: cubic resampling, int nodata +# ------------------------------------------------------------------------- + +# --------------------------------------------------------------------------- +# Helper-level: _block_reduce_2d cubic + integer + sentinel +# --------------------------------------------------------------------------- + + +def _make_block_with_nodata_corner(dtype, nodata_value, size=64, + corner=16, fill=100): + """Return a (size, size) ``dtype`` array with a corner of nodata.""" + arr = np.full((size, size), fill, dtype=dtype) + arr[:corner, :corner] = nodata_value + return arr + + +def test_cubic_int16_with_nodata_does_not_poison_overview(): + """int16 + finite sentinel: cubic overview must not blend sentinel.""" + arr = _make_block_with_nodata_corner(np.int16, -9999) + result = _block_reduce_2d(arr, method='cubic', nodata=-9999) + # Finite (non-sentinel) values must lie within the source data range. + # Pre-fix the boundary surfaced values like 1082 / 1134 / -11104. + finite_non_sentinel = result[result != -9999] + assert finite_non_sentinel.size > 0 + assert finite_non_sentinel.max() <= 100 + assert finite_non_sentinel.min() >= 100 # only valid data value is 100 + # The output dtype is the input dtype. + assert result.dtype == np.int16 + # Result shape is half (size/2, size/2). + assert result.shape == (32, 32) + + +def test_cubic_uint16_with_nodata_does_not_poison_overview(): + """uint16 + finite sentinel: same guarantee as int16.""" + arr = _make_block_with_nodata_corner(np.uint16, 65535, fill=200) + result = _block_reduce_2d(arr, method='cubic', nodata=65535) + finite = result[result != 65535] + assert finite.size > 0 + assert finite.min() >= 200 + assert finite.max() <= 200 + assert result.dtype == np.uint16 + + +def test_cubic_int32_with_nodata_does_not_poison_overview(): + """int32 + negative sentinel: same guarantee.""" + arr = _make_block_with_nodata_corner(np.int32, -2147483648, fill=42) + result = _block_reduce_2d(arr, method='cubic', nodata=-2147483648) + finite = result[result != -2147483648] + assert finite.size > 0 + assert finite.min() >= 42 + assert finite.max() <= 42 + assert result.dtype == np.int32 + + +def test_cubic_int_no_nodata_unchanged(): + """Cubic on integer without nodata still runs the plain zoom path.""" + arr = np.arange(64 * 64, dtype=np.int16).reshape(64, 64) + result_no_nd = _block_reduce_2d(arr, method='cubic', nodata=None) + # Plain zoom path: dtype preserved, shape halved. + assert result_no_nd.dtype == np.int16 + assert result_no_nd.shape == (32, 32) + + +def test_cubic_int_nodata_out_of_range_noop(): + """Sentinel outside the dtype range cannot equal any pixel — no-op.""" + arr = np.full((64, 64), 100, dtype=np.uint16) + # -1 cannot exist in uint16; the guard skips the masking branch. + result = _block_reduce_2d(arr, method='cubic', nodata=-1) + # Falls through to plain zoom path; values stay 100 (cubic on constant). + assert result.dtype == np.uint16 + # Cubic of a constant grid is the same constant. + assert np.all(result == 100) + + +def test_cubic_int_nodata_fractional_noop(): + """Fractional sentinel on integer dtype: no-op (cannot match any pixel).""" + arr = np.full((64, 64), 100, dtype=np.int16) + result = _block_reduce_2d(arr, method='cubic', nodata=1.5) + assert result.dtype == np.int16 + assert np.all(result == 100) + + +def test_cubic_int_all_sentinel_block_becomes_sentinel(): + """A 2x2 block that is entirely the sentinel rounds back to the sentinel.""" + arr = np.full((4, 4), -9999, dtype=np.int16) + result = _block_reduce_2d(arr, method='cubic', nodata=-9999) + assert result.dtype == np.int16 + assert np.all(result == -9999) + + +def test_cubic_float_branch_still_works(): + """Float regression guard: the existing #1623 path must still work.""" + arr = np.full((64, 64), 100.0, dtype=np.float32) + arr[:16, :16] = -9999.0 + result = _block_reduce_2d(arr, method='cubic', nodata=-9999.0) + assert result.dtype == np.float32 + finite = result[result != -9999.0] + assert finite.size > 0 + # No ringing: all valid output pixels are 100 (constant input region). + np.testing.assert_allclose(finite, 100.0, atol=1e-3) + + +# --------------------------------------------------------------------------- +# End-to-end: to_geotiff cubic + integer + nodata round-trip +# --------------------------------------------------------------------------- + +def test_to_geotiff_int_cubic_overview_round_trip(tmp_path): + """1024x1024 int16 + cog + cubic + nodata round-trips without poisoning.""" + data = np.full((1024, 1024), 100, dtype=np.int16) + data[:256, :256] = -9999 + da = xr.DataArray( + data, dims=('y', 'x'), + coords={'y': np.arange(1024.0), 'x': np.arange(1024.0)}, + ) + path = tmp_path / "cubic_int_1975.tif" + to_geotiff(da, str(path), cog=True, overview_resampling='cubic', + nodata=-9999, crs=4326) + # Level 0: full resolution. + r0 = open_geotiff(str(path), overview_level=0) + uniq_0 = set(np.unique(r0.values[~np.isnan(r0.values)])) + assert uniq_0 == {100.0} + # Level 1: the historically poisoned level. + r1 = open_geotiff(str(path), overview_level=1) + finite_1 = r1.values[~np.isnan(r1.values)] + # All finite values must be 100 (the only valid data value); no ringing. + np.testing.assert_array_equal(finite_1, 100.0) + + +def test_to_geotiff_int_cubic_no_nodata_regression(tmp_path): + """int16 + cog + cubic without nodata: cubic still runs (regression).""" + rng = np.random.default_rng(0) + data = rng.integers(0, 1000, size=(1024, 1024), dtype=np.int16) + da = xr.DataArray( + data, dims=('y', 'x'), + coords={'y': np.arange(1024.0), 'x': np.arange(1024.0)}, + ) + path = tmp_path / "cubic_int_no_nd_1975.tif" + to_geotiff(da, str(path), cog=True, overview_resampling='cubic', + crs=4326) + r1 = open_geotiff(str(path), overview_level=1) + # Output dtype is the source integer dtype. + assert r1.values.dtype == np.int16 + assert r1.shape == (512, 512) + + +def test_to_geotiff_int_cubic_overview_matches_mean_finite_range(tmp_path): + """Cubic must agree with mean on which pixels are finite vs nodata.""" + data = np.full((512, 512), 50, dtype=np.uint16) + data[:128, :128] = 65535 + da = xr.DataArray( + data, dims=('y', 'x'), + coords={'y': np.arange(512.0), 'x': np.arange(512.0)}, + ) + cubic_path = tmp_path / "cubic.tif" + mean_path = tmp_path / "mean.tif" + to_geotiff(da, str(cubic_path), cog=True, overview_resampling='cubic', + nodata=65535, crs=4326) + to_geotiff(da, str(mean_path), cog=True, overview_resampling='mean', + nodata=65535, crs=4326) + r_cubic = open_geotiff(str(cubic_path), overview_level=0) + r_mean = open_geotiff(str(mean_path), overview_level=0) + # Sentinel masks should land on the same pixels for both methods on a + # constant valid region with a constant nodata corner. + np.testing.assert_array_equal( + np.isnan(r_cubic.values), np.isnan(r_mean.values), + ) + finite_cubic = r_cubic.values[~np.isnan(r_cubic.values)] + finite_mean = r_mean.values[~np.isnan(r_mean.values)] + # All valid pixels are 50 in both. + np.testing.assert_array_equal(finite_cubic, 50.0) + np.testing.assert_array_equal(finite_mean, 50.0) + + +def test_gpu_int_cubic_overview_matches_cpu(tmp_path): + """GPU writer cubic falls back to CPU; bytes must match CPU writer.""" + cupy = pytest.importorskip("cupy") + if not cupy.cuda.is_available(): + pytest.skip("CUDA not available") + + data = np.full((1024, 1024), 100, dtype=np.int16) + data[:256, :256] = -9999 + cpu_da = xr.DataArray( + data, dims=('y', 'x'), + coords={'y': np.arange(1024.0), 'x': np.arange(1024.0)}, + ) + gpu_da = xr.DataArray( + cupy.asarray(data), dims=('y', 'x'), + coords={'y': np.arange(1024.0), 'x': np.arange(1024.0)}, + ) + cpu_path = tmp_path / "cpu.tif" + gpu_path = tmp_path / "gpu.tif" + to_geotiff(cpu_da, str(cpu_path), cog=True, overview_resampling='cubic', + nodata=-9999, crs=4326) + to_geotiff(gpu_da, str(gpu_path), cog=True, overview_resampling='cubic', + nodata=-9999, crs=4326) + cpu_r1 = open_geotiff(str(cpu_path), overview_level=1) + gpu_r1 = open_geotiff(str(gpu_path), overview_level=1) + # Both paths route cubic through the same CPU helper; results must agree + # bit-for-bit on this constant input. + cpu_arr = cpu_r1.values + gpu_arr = gpu_r1.values + assert cpu_arr.shape == gpu_arr.shape + np.testing.assert_array_equal( + np.isnan(cpu_arr), np.isnan(gpu_arr), + ) + np.testing.assert_array_equal( + cpu_arr[~np.isnan(cpu_arr)], gpu_arr[~np.isnan(gpu_arr)], + ) + + +# ------------------------------------------------------------------------- +# Section: block-reduce int sentinel masking +# ------------------------------------------------------------------------- + + +_gpu_only = pytest.mark.skipif( + not _HAS_GPU, + reason="cupy + CUDA required", +) + + +# --------------------------------------------------------------------------- +# Unit-level: _block_reduce_2d on integer dtypes +# --------------------------------------------------------------------------- + +def _int_block_partial_sentinel(sentinel, dtype): + """4x4 integer raster where the right two columns of each row pair + mix valid and sentinel cells. Block (0, 1) has (100, 100, sentinel, + sentinel); block (1, 1) has (200, 200, sentinel, sentinel).""" + arr = np.array([ + [100, 100, 100, 100], + [100, 100, sentinel, sentinel], + [200, 200, 200, 200], + [200, 200, sentinel, sentinel], + ], dtype=dtype) + return arr + + +@pytest.mark.parametrize('method', ['mean', 'min', 'max', 'median']) +@pytest.mark.parametrize('dtype,sentinel', [ + (np.uint8, 255), + (np.uint16, 65535), + (np.int16, -9999), + (np.int32, -2_000_000_000), +]) +def test_block_reduce_int_sentinel_masked(method, dtype, sentinel): + """Integer overview reductions must skip sentinel cells. + + Before the fix, mean produced averages like ``(100+sentinel)/2`` cast + back to the integer dtype -- a non-sentinel value that the reader + leaves untouched. The fix masks the sentinel to NaN before the + reduction so nan-aware aggregation skips it. + """ + from xrspatial.geotiff._writer import _block_reduce_2d + + arr = _int_block_partial_sentinel(sentinel, dtype) + out = _block_reduce_2d(arr, method, nodata=sentinel) + + # Every block now has at least one valid 100/200; result should equal + # the valid value (since for mean/min/max/median over {100, 100} is + # 100, and over {200, 200} is 200). Neither block has any cell that + # isn't 100, 200, or sentinel, so the output must be a subset of + # {100, 200}. + assert out.dtype == arr.dtype + out_vals = set(out.flatten().tolist()) + assert out_vals.issubset({100, 200}), ( + f"method={method} dtype={dtype} sentinel={sentinel} " + f"produced poisoned values: {out_vals - {100, 200}}" + ) + + +@pytest.mark.parametrize('dtype,sentinel', [ + (np.uint16, 65535), + (np.int16, -9999), +]) +def test_block_reduce_int_all_sentinel_block(dtype, sentinel): + """A 2x2 block that's entirely sentinel reduces to the sentinel. + + Without the post-reduction NaN-to-sentinel rewrite in the integer + branch, the all-NaN block from nanmean would cast to undefined + integer behaviour (zero or INT_MIN depending on platform). + """ + from xrspatial.geotiff._writer import _block_reduce_2d + + arr = np.array([ + [100, 100, sentinel, sentinel], + [100, 100, sentinel, sentinel], + [200, 200, 200, 200], + [200, 200, 200, 200], + ], dtype=dtype) + + out = _block_reduce_2d(arr, 'mean', nodata=sentinel) + assert out.dtype == arr.dtype + # Top-right block is all-sentinel; output must be the sentinel + assert out[0, 1] == sentinel + # Other blocks contain only valid values + assert out[0, 0] == 100 + assert out[1, 0] == 200 + assert out[1, 1] == 200 + + +def test_block_reduce_int_no_nodata_unchanged(): + """Without ``nodata``, the integer reduction code path stays unchanged. + + Regression check: the fix must not alter the no-sentinel case. + """ + from xrspatial.geotiff._writer import _block_reduce_2d + + arr = np.array([ + [1, 2, 3, 4], + [5, 6, 7, 8], + [9, 10, 11, 12], + [13, 14, 15, 16], + ], dtype=np.int16) + + out = _block_reduce_2d(arr, 'mean') + # Block (0,0) = mean(1,2,5,6) = 3.5 -> round -> 4 + # Block (0,1) = mean(3,4,7,8) = 5.5 -> round -> 6 + # Block (1,0) = mean(9,10,13,14) = 11.5 -> round -> 12 + # Block (1,1) = mean(11,12,15,16) = 13.5 -> round -> 14 + expected = np.array([[4, 6], [12, 14]], dtype=np.int16) + np.testing.assert_array_equal(out, expected) + + +def test_block_reduce_int_out_of_range_sentinel_noop(): + """A sentinel outside the dtype's range is a no-op (no mask applied). + + Mirrors the ``_int_nodata_in_range`` gating in ``_reader.py``: a + uint16 file with GDAL_NODATA="-9999" cannot match any decoded pixel, + so the reduction proceeds without the mask. This keeps the fix from + raising OverflowError on the dtype cast. + """ + from xrspatial.geotiff._writer import _block_reduce_2d + + # uint16 with nodata=-9999: out of range, no-op + arr = np.array([ + [1, 2, 3, 4], + [5, 6, 7, 8], + ], dtype=np.uint16) + out = _block_reduce_2d(arr, 'mean', nodata=-9999) + # Should produce the same result as without the kwarg + expected = _block_reduce_2d(arr, 'mean') + np.testing.assert_array_equal(out, expected) + + +# --------------------------------------------------------------------------- +# End-to-end: to_geotiff + open_geotiff round trip +# --------------------------------------------------------------------------- + +@pytest.fixture +def _int_cog_inputs(tmp_path): + """uint16 raster, full of 100 with a 65x65 sentinel patch.""" + H, W = 256, 256 + data = np.full((H, W), 100, dtype=np.uint16) + data[64:129, 64:129] = 65535 + da = xr.DataArray( + data, + dims=('y', 'x'), + coords={'y': np.arange(H, dtype=np.float64), + 'x': np.arange(W, dtype=np.float64)}, + attrs={'crs': 4326}, + ) + return da, tmp_path + + +@pytest.mark.parametrize('method', ['mean', 'min', 'max', 'median']) +def test_cpu_int_cog_overview_not_poisoned(_int_cog_inputs, method): + """End-to-end: integer COG overview pyramid contains only valid values. + + Before the fix, the level-1 read contained values like 16459 and + 32818 -- nan-aware-mean of (sentinel, 100, 100, 100) and (sentinel, + sentinel, 100, 100) cast back to uint16. The reader can't mask them + because they don't equal 65535. + """ + from xrspatial.geotiff import open_geotiff, to_geotiff + + da, tmp_path = _int_cog_inputs + path = str(tmp_path / f'int_overview_{method}_2026_05_12.tif') + to_geotiff(da, path, nodata=65535, cog=True, + overview_levels=[2], overview_resampling=method) + + ov = open_geotiff(path, overview_level=1) + arr = np.asarray(ov.data) + unique = set(int(v) for v in np.unique(arr) if not np.isnan(v)) + poisoned = unique - {100, 65535} + assert not poisoned, ( + f"method={method} produced poisoned overview values: {poisoned}" + ) + + +def test_cpu_int_cog_overview_3band_not_poisoned(tmp_path): + """3-band integer COG: same fix applies via the 3D _make_overview branch.""" + from xrspatial.geotiff import open_geotiff, to_geotiff + + H, W = 256, 256 + data = np.full((H, W, 3), 100, dtype=np.uint16) + data[64:129, 64:129, :] = 65535 + da = xr.DataArray( + data, + dims=('y', 'x', 'band'), + coords={'y': np.arange(H, dtype=np.float64), + 'x': np.arange(W, dtype=np.float64), + 'band': [0, 1, 2]}, + attrs={'crs': 4326}, + ) + + path = str(tmp_path / 'int_overview_3band_2026_05_12.tif') + to_geotiff(da, path, nodata=65535, cog=True, + overview_levels=[2], overview_resampling='mean') + + ov = open_geotiff(path, overview_level=1) + arr = np.asarray(ov.data) + unique = set(int(v) for v in np.unique(arr) if not np.isnan(v)) + poisoned = unique - {100, 65535} + assert not poisoned, ( + f"3-band integer overview produced poisoned values: {poisoned}" + ) + + +def test_cpu_int_cog_no_nodata_unchanged(tmp_path): + """No nodata kwarg: integer overview path stays as it was.""" + from xrspatial.geotiff import open_geotiff, to_geotiff + + H, W = 256, 256 + data = np.full((H, W), 100, dtype=np.uint16) + data[100:200, 100:200] = 50 + da = xr.DataArray( + data, + dims=('y', 'x'), + coords={'y': np.arange(H, dtype=np.float64), + 'x': np.arange(W, dtype=np.float64)}, + attrs={'crs': 4326}, + ) + + path = str(tmp_path / 'int_overview_no_nodata_2026_05_12.tif') + to_geotiff(da, path, cog=True, + overview_levels=[2], overview_resampling='mean') + + ov = open_geotiff(path, overview_level=1) + arr = np.asarray(ov.data) + # No sentinel, so every overview pixel is a real average of 50 / 100. + # Block-boundary pixels are weighted means: (50,50,50,100)/4 = 62.5 -> 63 + unique = set(int(v) for v in np.unique(arr)) + # Must contain at least 50 and 100; boundary-mixing averages allowed. + assert 50 in unique + assert 100 in unique + + +# --------------------------------------------------------------------------- +# GPU mirror +# --------------------------------------------------------------------------- + +@_gpu_only +@pytest.mark.parametrize('method', ['mean', 'min', 'max', 'median']) +@pytest.mark.parametrize('dtype,sentinel', [ + (np.uint16, 65535), + (np.int16, -9999), +]) +def test_gpu_block_reduce_int_sentinel_masked(method, dtype, sentinel): + """GPU mirror of the CPU integer sentinel-mask fix.""" + import cupy + + from xrspatial.geotiff._gpu_decode import _block_reduce_2d_gpu + + arr = _int_block_partial_sentinel(sentinel, dtype) + cpu_arr = cupy.asarray(arr) + out_gpu = _block_reduce_2d_gpu(cpu_arr, method, nodata=sentinel) + out = out_gpu.get() + + assert out.dtype == arr.dtype + out_vals = set(out.flatten().tolist()) + assert out_vals.issubset({100, 200}), ( + f"GPU method={method} dtype={dtype} produced poisoned values: " + f"{out_vals - {100, 200}}" + ) + + +@_gpu_only +@pytest.mark.parametrize('method', ['mean', 'min', 'max', 'median']) +def test_gpu_cpu_int_overview_byte_match(method): + """CPU and GPU integer overview reductions agree byte-for-byte. + + Same parity contract as #1623 (cubic). Without the GPU fix, the GPU + pyramid would carry poisoned values while the CPU pyramid carried + sentinels -- two backends disagreeing on identical input. + """ + import cupy + + from xrspatial.geotiff._gpu_decode import _block_reduce_2d_gpu + from xrspatial.geotiff._writer import _block_reduce_2d + + arr = _int_block_partial_sentinel(-9999, np.int16) + cpu_out = _block_reduce_2d(arr, method, nodata=-9999) + gpu_out = _block_reduce_2d_gpu( + cupy.asarray(arr), method, nodata=-9999).get() + + np.testing.assert_array_equal(cpu_out, gpu_out)