diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 4f8e52c7..0af9307d 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -51,6 +51,7 @@ transform_tuple as _transform_tuple, transform_from_attr as _transform_from_attr, coords_to_transform as _coords_to_transform, + require_transform_for_georeferenced as _require_transform_for_georeferenced, ) from ._geotags import GeoTransform, RASTER_PIXEL_IS_AREA, RASTER_PIXEL_IS_POINT from ._reader import UnsafeURLError diff --git a/xrspatial/geotiff/_coords.py b/xrspatial/geotiff/_coords.py index b1ee185e..b0fe4f61 100644 --- a/xrspatial/geotiff/_coords.py +++ b/xrspatial/geotiff/_coords.py @@ -232,6 +232,49 @@ def transform_from_attr(attr_val) -> 'GeoTransform | None': ) +def require_transform_for_georeferenced( + da: xr.DataArray, geo_transform +) -> None: + """Raise if ``da`` carries spatial coords but no transform was derived. + + Used by the writer entry points (#1945). A DataArray whose spatial + dim names appear in ``da.coords`` is an explicit caller request for a + georeferenced output. Silently falling through to a non-georeferenced + TIFF -- which is what the old code did for 1x1 inputs and inputs with + a degenerate axis -- corrupted round-trips. If the writer cannot + recover a transform from coords *and* the caller did not supply + ``attrs['transform']``, fail closed instead. + + ``geo_transform`` is the value the writer has already resolved (from + ``attrs['transform']`` first, then from coord arrays). If it's not + ``None`` we have a transform and there's nothing to check. + """ + if geo_transform is not None: + return + if da.ndim == 3: + spatial = tuple(d for d in da.dims if d not in _BAND_DIM_NAMES) + if len(spatial) == 2: + ydim, xdim = spatial[0], spatial[1] + else: + ydim = da.dims[-2] + xdim = da.dims[-1] + else: + ydim = da.dims[-2] + xdim = da.dims[-1] + if xdim in da.coords and ydim in da.coords: + raise ValueError( + f"Cannot infer GeoTIFF transform from a " + f"{tuple(da.sizes.values())} array with spatial coords on " + f"both axes: both axes are degenerate (1x1), so neither " + f"coord array carries a pixel size step. 1xN and Nx1 inputs " + f"recover the pixel size from the non-degenerate axis, but " + f"a 1x1 cannot. Supply the affine transform explicitly via " + f"``attrs['transform']`` (rasterio 6-tuple " + f"``(px, 0, ox, 0, py, oy)``) or drop the coords if a " + f"non-georeferenced TIFF is desired." + ) + + def coords_to_transform(da: xr.DataArray) -> 'GeoTransform | None': """Infer GeoTransform from DataArray coordinates. @@ -274,7 +317,11 @@ def coords_to_transform(da: xr.DataArray) -> 'GeoTransform | None': x = da.coords[xdim].values y = da.coords[ydim].values - if len(x) < 2 or len(y) < 2: + # 1x1 has no pixel-size signal on either axis. The caller must supply + # ``attrs['transform']`` (handled by the writer before calling us). + # Returning ``None`` lets the writer detect this and raise rather than + # silently writing a non-georeferenced TIFF (#1945). + if len(x) < 2 and len(y) < 2: return None # GeoTIFF only supports an affine transform; non-uniform spacing @@ -298,11 +345,32 @@ def _is_regular(coord, name): f"GeoTIFF requires an affine transform." ) - _is_regular(x, "x") - _is_regular(y, "y") - - pixel_width = float(x[1] - x[0]) - pixel_height = float(y[1] - y[0]) + # Degenerate-axis fallback (#1945). When one axis has length 1, we + # can't read a step off it (``coord[1] - coord[0]`` is undefined), + # so we recover the per-axis pixel size from the non-degenerate + # axis and assume square pixels for the degenerate one. That matches + # how every other geospatial reader handles 1xN / Nx1 strips. The + # earlier behaviour — bailing out and silently writing a + # non-georeferenced TIFF — broke round-trips for legitimate + # single-scanline / single-profile rasters. + if len(x) >= 2: + _is_regular(x, "x") + pixel_width = float(x[1] - x[0]) + else: + pixel_width = None + if len(y) >= 2: + _is_regular(y, "y") + pixel_height = float(y[1] - y[0]) + else: + pixel_height = None + + if pixel_width is None: + # Borrow magnitude from y; x increases left-to-right by convention. + pixel_width = abs(pixel_height) + if pixel_height is None: + # Borrow magnitude from x; y decreases top-to-bottom by convention, + # so flip sign. + pixel_height = -abs(pixel_width) is_point = da.attrs.get('raster_type') == 'point' if is_point: diff --git a/xrspatial/geotiff/_writers/eager.py b/xrspatial/geotiff/_writers/eager.py index 90bc8f0c..1915fb84 100644 --- a/xrspatial/geotiff/_writers/eager.py +++ b/xrspatial/geotiff/_writers/eager.py @@ -30,6 +30,7 @@ from .._coords import ( _BAND_DIM_NAMES, coords_to_transform as _coords_to_transform, + require_transform_for_georeferenced as _require_transform_for_georeferenced, transform_from_attr as _transform_from_attr, ) from .._crs import _validate_crs_fallback, _wkt_to_epsg @@ -517,6 +518,11 @@ def to_geotiff(data: xr.DataArray | np.ndarray, geo_transform = _transform_from_attr(data.attrs.get('transform')) if geo_transform is None: geo_transform = _coords_to_transform(data) + # Fail closed when coords are present but no transform could be + # derived (e.g. 1x1 without ``attrs['transform']``) instead of + # silently writing a non-georeferenced TIFF that round-trips back + # with integer pixel coords (#1945). + _require_transform_for_georeferenced(data, geo_transform) if epsg is None and crs is None: crs_attr = data.attrs.get('crs') if isinstance(crs_attr, str): @@ -837,6 +843,9 @@ def _write_vrt_tiled(data, vrt_path, *, crs=None, nodata=None, geo_transform = _transform_from_attr(data.attrs.get('transform')) if geo_transform is None: geo_transform = _coords_to_transform(data) + # Match the to_geotiff fail-closed guard so VRT writes don't + # silently produce non-georeferenced tiles either (#1945). + _require_transform_for_georeferenced(data, geo_transform) # Pull the same rich-tag set that to_geotiff forwards to # ``write`` so per-tile files under the VRT carry it too. _rich = _extract_rich_tags(data.attrs) diff --git a/xrspatial/geotiff/_writers/gpu.py b/xrspatial/geotiff/_writers/gpu.py index 9a356838..ff30ff60 100644 --- a/xrspatial/geotiff/_writers/gpu.py +++ b/xrspatial/geotiff/_writers/gpu.py @@ -20,6 +20,7 @@ from .._coords import ( _BAND_DIM_NAMES, coords_to_transform as _coords_to_transform, + require_transform_for_georeferenced as _require_transform_for_georeferenced, transform_from_attr as _transform_from_attr, ) from .._crs import _validate_crs_fallback, _wkt_to_epsg @@ -348,6 +349,11 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, geo_transform = _transform_from_attr(data.attrs.get('transform')) if geo_transform is None: geo_transform = _coords_to_transform(data) + # Match the CPU writer's fail-closed guard: an array with spatial + # coords but no derivable transform (e.g. 1x1 without + # ``attrs['transform']``) must not silently round-trip as a + # non-georeferenced TIFF (#1945). + _require_transform_for_georeferenced(data, geo_transform) # Resolve CRS the same way the CPU writer does. attrs['crs'] may # be an int EPSG or a WKT string; attrs['crs_wkt'] only carries # WKT. Without the WKT branch the GPU writer silently drops CRS diff --git a/xrspatial/geotiff/tests/test_degenerate_georef_1945.py b/xrspatial/geotiff/tests/test_degenerate_georef_1945.py new file mode 100644 index 00000000..7ef66c36 --- /dev/null +++ b/xrspatial/geotiff/tests/test_degenerate_georef_1945.py @@ -0,0 +1,356 @@ +"""Georeferenced 1xN / Nx1 / 1x1 writes must preserve their transform. + +Issue #1945: ``coords_to_transform`` returned ``None`` whenever either +spatial coord array had length < 2, and ``to_geotiff`` / +``write_geotiff_gpu`` then fell through and produced a non-georeferenced +TIFF. A DataArray with real x/y coords round-tripped back with integer +pixel coords and no ``transform`` attribute, so the spatial metadata +silently disappeared. + +These tests pin the round-trip invariant for the three degenerate +georeferenced shapes (1xN, Nx1, 1x1) across the eager numpy, dask+numpy, +GPU (cupy), and dask+cupy writers, plus the VRT writer. +""" +from __future__ import annotations + +import importlib.util + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import ( + open_geotiff, + to_geotiff, + write_geotiff_gpu, +) + + +def _gpu_available() -> bool: + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +_HAS_GPU = _gpu_available() +_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") + + +# --------------------------------------------------------------------------- +# Reference georeferenced DataArrays. +# +# A 1xN strip: one row at y=45.0, 8 columns spanning -120.0..-113.0 (1° step). +# An Nx1 profile: one column at x=-120.0, 8 rows spanning 45.0..38.0 (-1° step). +# A 1x1 pixel: must carry its transform on ``attrs['transform']`` because the +# coord arrays alone cannot pin a pixel size in this case. +# --------------------------------------------------------------------------- + +PIXEL = 1.0 +X0 = -120.0 +Y0 = 45.0 + + +def _strip_1xN(raster_type: str = "area") -> xr.DataArray: + attrs = {"crs": 4326} + if raster_type == "point": + attrs["raster_type"] = "point" + return xr.DataArray( + np.arange(8, dtype="float32").reshape(1, 8), + dims=("y", "x"), + coords={ + "x": X0 + np.arange(8, dtype="float64") * PIXEL, + "y": np.array([Y0], dtype="float64"), + }, + attrs=attrs, + ) + + +def _strip_Nx1(raster_type: str = "area") -> xr.DataArray: + attrs = {"crs": 4326} + if raster_type == "point": + attrs["raster_type"] = "point" + return xr.DataArray( + np.arange(8, dtype="float32").reshape(8, 1), + dims=("y", "x"), + coords={ + "x": np.array([X0], dtype="float64"), + "y": Y0 - np.arange(8, dtype="float64") * PIXEL, + }, + attrs=attrs, + ) + + +def _pixel_1x1_with_transform() -> xr.DataArray: + # rasterio-style 6-tuple: (a, b, c, d, e, f) = (px, 0, ox, 0, py, oy) + transform = (PIXEL, 0.0, X0 - PIXEL * 0.5, 0.0, -PIXEL, Y0 + PIXEL * 0.5) + return xr.DataArray( + np.array([[42.0]], dtype="float32"), + dims=("y", "x"), + coords={"x": np.array([X0]), "y": np.array([Y0])}, + attrs={"crs": 4326, "transform": transform}, + ) + + +def _pixel_1x1_no_transform() -> xr.DataArray: + return xr.DataArray( + np.array([[42.0]], dtype="float32"), + dims=("y", "x"), + coords={"x": np.array([X0]), "y": np.array([Y0])}, + attrs={"crs": 4326}, + ) + + +def _assert_round_trip(path: str, expected: xr.DataArray) -> None: + """Read back ``path`` and check coords/transform survived the write.""" + r = open_geotiff(path) + + # Coordinates must be float (georeferenced), not integer pixel indices. + assert np.issubdtype(r.coords["x"].dtype, np.floating), ( + f"x coord came back as {r.coords['x'].dtype}; " + "writer stripped georeferencing" + ) + assert np.issubdtype(r.coords["y"].dtype, np.floating), ( + f"y coord came back as {r.coords['y'].dtype}; " + "writer stripped georeferencing" + ) + + np.testing.assert_allclose( + r.coords["x"].values, expected.coords["x"].values, rtol=0, atol=1e-9 + ) + np.testing.assert_allclose( + r.coords["y"].values, expected.coords["y"].values, rtol=0, atol=1e-9 + ) + + assert r.attrs.get("transform") is not None, "transform missing from attrs" + + np.testing.assert_array_equal(np.asarray(r.values), np.asarray(expected.values)) + + +# =========================================================================== +# Eager numpy writer (to_geotiff) +# =========================================================================== + +class TestEagerWriterDegenerateGeoref: + + def test_1xN_round_trip_preserves_transform(self, tmp_path): + da = _strip_1xN() + p = str(tmp_path / "georef_1xN_eager_1945.tif") + to_geotiff(da, p) + _assert_round_trip(p, da) + + def test_Nx1_round_trip_preserves_transform(self, tmp_path): + da = _strip_Nx1() + p = str(tmp_path / "georef_Nx1_eager_1945.tif") + to_geotiff(da, p) + _assert_round_trip(p, da) + + def test_1x1_with_transform_attr_round_trips(self, tmp_path): + da = _pixel_1x1_with_transform() + p = str(tmp_path / "georef_1x1_eager_1945.tif") + to_geotiff(da, p) + _assert_round_trip(p, da) + + def test_1x1_without_transform_raises(self, tmp_path): + """1x1 cannot recover pixel size from coords alone; raise rather + than silently strip georeferencing.""" + da = _pixel_1x1_no_transform() + p = str(tmp_path / "georef_1x1_no_tx_eager_1945.tif") + with pytest.raises(ValueError, match="(?i)pixel size|transform"): + to_geotiff(da, p) + + +# =========================================================================== +# Dask + numpy writer +# =========================================================================== + +class TestDaskNumpyWriterDegenerateGeoref: + + def test_1xN_dask_round_trip(self, tmp_path): + da = _strip_1xN().chunk({"x": 4, "y": 1}) + p = str(tmp_path / "georef_1xN_dask_1945.tif") + to_geotiff(da, p) + _assert_round_trip(p, _strip_1xN()) + + def test_Nx1_dask_round_trip(self, tmp_path): + da = _strip_Nx1().chunk({"x": 1, "y": 4}) + p = str(tmp_path / "georef_Nx1_dask_1945.tif") + to_geotiff(da, p) + _assert_round_trip(p, _strip_Nx1()) + + +# =========================================================================== +# GPU writer (write_geotiff_gpu) +# =========================================================================== + +@_gpu_only +class TestGpuWriterDegenerateGeoref: + + def test_1xN_round_trip_preserves_transform(self, tmp_path): + import cupy + da_cpu = _strip_1xN() + da_gpu = da_cpu.copy(data=cupy.asarray(da_cpu.values)) + da_gpu.attrs = dict(da_cpu.attrs) + p = str(tmp_path / "georef_1xN_gpu_1945.tif") + write_geotiff_gpu(da_gpu, p) + _assert_round_trip(p, da_cpu) + + def test_Nx1_round_trip_preserves_transform(self, tmp_path): + import cupy + da_cpu = _strip_Nx1() + da_gpu = da_cpu.copy(data=cupy.asarray(da_cpu.values)) + da_gpu.attrs = dict(da_cpu.attrs) + p = str(tmp_path / "georef_Nx1_gpu_1945.tif") + write_geotiff_gpu(da_gpu, p) + _assert_round_trip(p, da_cpu) + + def test_1x1_with_transform_attr_round_trips(self, tmp_path): + import cupy + da_cpu = _pixel_1x1_with_transform() + da_gpu = da_cpu.copy(data=cupy.asarray(da_cpu.values)) + da_gpu.attrs = dict(da_cpu.attrs) + p = str(tmp_path / "georef_1x1_gpu_1945.tif") + write_geotiff_gpu(da_gpu, p) + _assert_round_trip(p, da_cpu) + + def test_1x1_without_transform_raises(self, tmp_path): + import cupy + da_cpu = _pixel_1x1_no_transform() + da_gpu = da_cpu.copy(data=cupy.asarray(da_cpu.values)) + da_gpu.attrs = dict(da_cpu.attrs) + p = str(tmp_path / "georef_1x1_no_tx_gpu_1945.tif") + with pytest.raises(ValueError, match="(?i)pixel size|transform"): + write_geotiff_gpu(da_gpu, p) + + +# =========================================================================== +# Dask + cupy writer (to_geotiff with chunked cupy array) +# =========================================================================== + +@_gpu_only +class TestDaskCupyWriterDegenerateGeoref: + + def test_1xN_dask_cupy_round_trip(self, tmp_path): + import cupy + da_cpu = _strip_1xN() + da_gpu = da_cpu.copy(data=cupy.asarray(da_cpu.values)) + da_gpu = da_gpu.chunk({"x": 4, "y": 1}) + da_gpu.attrs = dict(da_cpu.attrs) + p = str(tmp_path / "georef_1xN_dask_cupy_1945.tif") + to_geotiff(da_gpu, p) + _assert_round_trip(p, da_cpu) + + def test_Nx1_dask_cupy_round_trip(self, tmp_path): + import cupy + da_cpu = _strip_Nx1() + da_gpu = da_cpu.copy(data=cupy.asarray(da_cpu.values)) + da_gpu = da_gpu.chunk({"x": 1, "y": 4}) + da_gpu.attrs = dict(da_cpu.attrs) + p = str(tmp_path / "georef_Nx1_dask_cupy_1945.tif") + to_geotiff(da_gpu, p) + _assert_round_trip(p, da_cpu) + + +# =========================================================================== +# VRT writer +# =========================================================================== + +class TestVrtTiledWriterDegenerateGeoref: + """``to_geotiff(da, '*.vrt')`` dispatches to ``_write_vrt_tiled``.""" + + def test_1xN_vrt_round_trip(self, tmp_path): + da = _strip_1xN() + p = str(tmp_path / "georef_1xN_1945.vrt") + to_geotiff(da, p) + _assert_round_trip(p, da) + + def test_Nx1_vrt_round_trip(self, tmp_path): + da = _strip_Nx1() + p = str(tmp_path / "georef_Nx1_1945.vrt") + to_geotiff(da, p) + _assert_round_trip(p, da) + + def test_1x1_vrt_without_transform_raises(self, tmp_path): + da = _pixel_1x1_no_transform() + p = str(tmp_path / "georef_1x1_no_tx_1945.vrt") + with pytest.raises(ValueError, match="(?i)pixel size|transform"): + to_geotiff(da, p) + + +# =========================================================================== +# raster_type='point' round trip +# +# Pins the no-half-pixel-shift path on 1xN / Nx1 degenerate writes. With +# PixelIsPoint the tiepoint already sits at the pixel center, so the +# writer must not add a 0.5*pixel offset when synthesising the transform +# from coords. +# =========================================================================== + +class TestEagerWriterPointRasterDegenerate: + + @pytest.mark.parametrize( + "factory, name", + [(_strip_1xN, "1xN"), (_strip_Nx1, "Nx1")], + ) + def test_point_raster_round_trip(self, tmp_path, factory, name): + da = factory(raster_type="point") + p = str(tmp_path / f"georef_{name}_point_eager_1945.tif") + to_geotiff(da, p) + _assert_round_trip(p, da) + + +# =========================================================================== +# Sign pinning on borrowed axis (#1953 review) +# +# When one axis is degenerate, ``coords_to_transform`` borrows the pixel +# size magnitude from the non-degenerate axis and applies a convention +# for the borrowed axis: pixel_width defaults positive (x increases +# left-to-right), pixel_height defaults negative (y decreases top-to- +# bottom). This strips the sign of the *source* axis if it doesn't match +# convention. These tests pin that behaviour so a future refactor does +# not silently change it. +# =========================================================================== + +class TestCoordsToTransformBorrowSignPinning: + """Pin the sign convention of the borrowed (degenerate) axis.""" + + def test_y_ascending_Nx1_borrows_positive_pixel_width(self): + from xrspatial.geotiff._coords import coords_to_transform + + da = xr.DataArray( + np.arange(8, dtype="float32").reshape(8, 1), + dims=("y", "x"), + coords={ + "x": np.array([X0], dtype="float64"), + # y ascending: 38, 39, 40, ... so pixel_height = +1.0 + "y": 38.0 + np.arange(8, dtype="float64") * PIXEL, + }, + ) + t = coords_to_transform(da) + # Non-degenerate axis (y) keeps its sign. + assert t.pixel_height == pytest.approx(1.0) + # Borrowed pixel_width takes ``abs(pixel_height)``: always positive, + # regardless of y's sign. This pins the current convention. + assert t.pixel_width == pytest.approx(1.0) + + def test_x_descending_1xN_borrows_negative_pixel_height(self): + from xrspatial.geotiff._coords import coords_to_transform + + da = xr.DataArray( + np.arange(8, dtype="float32").reshape(1, 8), + dims=("y", "x"), + coords={ + # x descending: pixel_width = -1.0 + "x": -113.0 - np.arange(8, dtype="float64") * PIXEL, + "y": np.array([Y0], dtype="float64"), + }, + ) + t = coords_to_transform(da) + # Non-degenerate axis (x) keeps its sign. + assert t.pixel_width == pytest.approx(-1.0) + # Borrowed pixel_height takes ``-abs(pixel_width)``: always negative, + # regardless of x's sign. This pins the current convention. + assert t.pixel_height == pytest.approx(-1.0)