diff --git a/.claude/sweep-metadata-state.csv b/.claude/sweep-metadata-state.csv index 677a4ad0..56c31f7a 100644 --- a/.claude/sweep-metadata-state.csv +++ b/.claude/sweep-metadata-state.csv @@ -1,3 +1,3 @@ module,last_inspected,issue,severity_max,categories_found,notes -geotiff,2026-05-11,1632,HIGH,1;5,extract_geo_info dropped user-defined CRS WKT: GeoKey *CSTypeGeoKey=32767 files emitted attrs[crs_name] only (no crs/crs_wkt) so to_geotiff silently lost the projection on read->write. Fixed by promoting WKT-looking citation to crs_wkt across all four read backends (#1632). +geotiff,2026-05-11,1640,HIGH,1;2;4,"open_geotiff(overview_level>=1) silently dropped CRS, transform attr, and georeferenced y/x coords because GDAL-style COGs (including to_geotiff output) write GeoKeys only on the level-0 IFD. Fixed by inheriting and rescaling georef from the level-0 IFD across all four read backends (#1640)." reproject,2026-05-10,1572;1573,HIGH,1;3;4,geoid_height_raster dropped input attrs and used dims[-2:] for 3D inputs (#1572). reproject/merge ignored nodatavals (rasterio convention) when rioxarray absent (#1573). Fixed in same branch. diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 479ba58e..f0d0c056 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -241,7 +241,7 @@ def _read_geo_info(source, *, overview_level: int | None = None): Overview IFD index (0 = full resolution). """ from ._dtypes import resolve_bits_per_sample, tiff_dtype_to_numpy - from ._geotags import extract_geo_info + from ._geotags import extract_geo_info_with_overview_inheritance from ._header import parse_all_ifds, parse_header, select_overview_ifd from ._reader import _coerce_path, _is_file_like @@ -275,7 +275,10 @@ def _read_geo_info(source, *, overview_level: int | None = None): if not ifds: raise ValueError("No IFDs found in TIFF file") ifd = select_overview_ifd(ifds, overview_level) - geo_info = extract_geo_info(ifd, data, header.byte_order) + # Inherit georef from the level-0 IFD when the overview itself + # has no geokeys (issue #1640). Pass-through for level 0. + geo_info = extract_geo_info_with_overview_inheritance( + ifd, ifds, data, header.byte_order) bps = resolve_bits_per_sample(ifd.bits_per_sample) file_dtype = tiff_dtype_to_numpy(bps, ifd.sample_format) n_bands = ifd.samples_per_pixel if ifd.samples_per_pixel > 1 else 0 @@ -2260,7 +2263,7 @@ def read_geotiff_gpu(source: str, *, parse_header, parse_all_ifds, select_overview_ifd, validate_tile_layout, ) from ._dtypes import resolve_bits_per_sample, tiff_dtype_to_numpy - from ._geotags import extract_geo_info + from ._geotags import extract_geo_info_with_overview_inheritance from ._gpu_decode import gpu_decode_tiles source = _coerce_path(source) @@ -2297,7 +2300,10 @@ def read_geotiff_gpu(source: str, *, bps = resolve_bits_per_sample(ifd.bits_per_sample) file_dtype = tiff_dtype_to_numpy(bps, ifd.sample_format) - geo_info = extract_geo_info(ifd, data, header.byte_order) + # Inherit georef from the level-0 IFD when the overview itself + # has no geokeys (issue #1640); pass-through for level 0. + geo_info = extract_geo_info_with_overview_inheritance( + ifd, ifds, data, header.byte_order) # Capture the Orientation tag (274) once so the post-decode flip # below picks it up for both the stripped fallback and the tiled # GPU pipelines. CPU read_to_array applies the array remap + diff --git a/xrspatial/geotiff/_geotags.py b/xrspatial/geotiff/_geotags.py index 46189cd7..34e2405a 100644 --- a/xrspatial/geotiff/_geotags.py +++ b/xrspatial/geotiff/_geotags.py @@ -672,6 +672,130 @@ def extract_geo_info(ifd: IFD, data: bytes | memoryview, ) +def extract_geo_info_with_overview_inheritance( + ifd: IFD, + ifds: list, + data: bytes | memoryview, + byte_order: str, +) -> GeoInfo: + """Extract geo metadata, inheriting from level 0 when the IFD lacks it. + + Wraps :func:`extract_geo_info` for overview reads. GDAL-style COG + writers (including this package's :func:`to_geotiff`) put the + GeoKeyDirectory, ModelPixelScale and ModelTiepoint only on the + level-0 IFD. Calling ``extract_geo_info`` directly on an overview + IFD therefore returns a default :class:`GeoTransform` with + ``has_georef=False`` and no CRS, so overview reads silently lose + their georeferencing. + + When ``ifd`` is a reduced-resolution overview (NewSubfileType bit 0 + set) that lacks its own georef, we re-run ``extract_geo_info`` on + the first full-resolution IFD (NewSubfileType bit 0 clear, bit 2 + clear) and rescale the pixel size by ``width_full / width_overview`` + so coords cover the same extent as level 0. + + If the overview IFD already carries its own geokeys (some writers do + replicate them), this returns its own ``extract_geo_info`` output + unchanged. If no full-resolution sibling exists or the parent's geo + info is also missing, the overview's own (possibly empty) info is + returned -- callers get the same fallback behaviour they used to. + + Parameters + ---------- + ifd : IFD + The IFD selected by ``select_overview_ifd``. + ifds : list of IFD + All IFDs parsed from the file. Used to locate the level-0 + sibling for georef inheritance. + data : bytes or memoryview + Full file bytes (forwarded to ``extract_geo_info``). + byte_order : str + ``'<'`` or ``'>'`` (forwarded to ``extract_geo_info``). + + Returns + ------- + GeoInfo + """ + info = extract_geo_info(ifd, data, byte_order) + + # Overview IFDs have NewSubfileType bit 0 set; mask IFDs (bit 2) and + # page IFDs (bit 1) are filtered out by ``select_overview_ifd`` + # before reaching here, so we never inherit a mask's geo info. + is_overview = bool(ifd.subfile_type & 1) + if not is_overview or info.has_georef: + return info + + # Find the level-0 IFD: NewSubfileType has bit 0 clear (not an + # overview) and bit 2 clear (not a transparency mask). This is the + # same criterion ``select_overview_ifd``'s filter uses to identify + # the full-resolution pyramid root. + base_ifd = None + for cand in ifds: + if cand is ifd: + continue + st = cand.subfile_type + if (st & 1) == 0 and (st & 4) == 0: + base_ifd = cand + break + if base_ifd is None: + return info + + base_info = extract_geo_info(base_ifd, data, byte_order) + if not base_info.has_georef: + return info + + # Rescale the pixel size by the integer reduction factor. Width and + # height ratios should match for power-of-two overview pyramids; + # average them so a slightly off-by-one edge size still produces a + # sensible transform. + base_w = base_ifd.width + base_h = base_ifd.height + ov_w = ifd.width + ov_h = ifd.height + if base_w <= 0 or base_h <= 0 or ov_w <= 0 or ov_h <= 0: + return info + scale_x = base_w / ov_w + scale_y = base_h / ov_h + + base_t = base_info.transform + info.transform = GeoTransform( + origin_x=base_t.origin_x, + origin_y=base_t.origin_y, + pixel_width=base_t.pixel_width * scale_x, + pixel_height=base_t.pixel_height * scale_y, + ) + info.has_georef = True + + # Inherit CRS-side metadata from the parent. The overview IFD may + # carry its own raster_type (rare) -- prefer the overview's value + # when it differs from the default, otherwise inherit. + info.crs_epsg = base_info.crs_epsg + info.crs_wkt = base_info.crs_wkt + info.crs_name = base_info.crs_name + info.geog_citation = base_info.geog_citation + info.datum_code = base_info.datum_code + info.angular_units = base_info.angular_units + info.angular_units_code = base_info.angular_units_code + info.linear_units = base_info.linear_units + info.linear_units_code = base_info.linear_units_code + info.semi_major_axis = base_info.semi_major_axis + info.inv_flattening = base_info.inv_flattening + info.projection_code = base_info.projection_code + info.vertical_epsg = base_info.vertical_epsg + info.vertical_citation = base_info.vertical_citation + info.vertical_datum = base_info.vertical_datum + info.vertical_units = base_info.vertical_units + info.vertical_units_code = base_info.vertical_units_code + info.model_type = base_info.model_type + # Keep ``raster_type`` from the overview unless it was the default + # AREA, in which case prefer the parent's setting (PixelIsPoint is + # almost never re-declared on overview IFDs). + if info.raster_type == RASTER_PIXEL_IS_AREA: + info.raster_type = base_info.raster_type + + return info + + def _model_type_from_wkt(wkt: str) -> int: """Guess ModelType from a WKT string prefix.""" upper = wkt.strip().upper() diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py index 81489db9..49235e95 100644 --- a/xrspatial/geotiff/_reader.py +++ b/xrspatial/geotiff/_reader.py @@ -26,6 +26,7 @@ GeoTransform, RASTER_PIXEL_IS_POINT, extract_geo_info, + extract_geo_info_with_overview_inheritance, ) from ._header import ( IFD, @@ -1236,7 +1237,13 @@ def _parse_cog_http_meta( raise ValueError("No IFDs found in COG") ifd = select_overview_ifd(ifds, overview_level) - geo_info = extract_geo_info(ifd, header_bytes, header.byte_order) + # When the requested IFD is an overview that lacks its own geokeys + # (the common case for COG writers, including this package's + # ``to_geotiff``), inherit and rescale the georef from the level-0 + # IFD so overview reads do not silently lose CRS / transform. + # See issue #1640. + geo_info = extract_geo_info_with_overview_inheritance( + ifd, ifds, header_bytes, header.byte_order) return header, ifd, geo_info, header_bytes @@ -1586,7 +1593,12 @@ def read_to_array(source, *, window=None, overview_level: int | None = None, bps = resolve_bits_per_sample(ifd.bits_per_sample) dtype = tiff_dtype_to_numpy(bps, ifd.sample_format) - geo_info = extract_geo_info(ifd, data, header.byte_order) + # Inherit georef from level 0 when an overview IFD lacks its own + # geokeys (issue #1640). For overview_level=0 (or None) this is a + # no-op: the helper short-circuits when the IFD is not a + # NewSubfileType=overview entry. + geo_info = extract_geo_info_with_overview_inheritance( + ifd, ifds, data, header.byte_order) # Orientation tag (274): values 2-8 mean the stored pixel order # differs from display order. We need to remap the array post diff --git a/xrspatial/geotiff/tests/test_overview_geo_inheritance_1640.py b/xrspatial/geotiff/tests/test_overview_geo_inheritance_1640.py new file mode 100644 index 00000000..cb1b3720 --- /dev/null +++ b/xrspatial/geotiff/tests/test_overview_geo_inheritance_1640.py @@ -0,0 +1,300 @@ +"""Regression tests for issue #1640. + +Overview IFDs in COGs typically carry no GeoKeys, ModelPixelScale, or +ModelTiepoint -- the writer puts those tags only on the level-0 IFD. +Before the fix, ``open_geotiff(path, overview_level=N)`` for ``N >= 1`` +returned a DataArray whose ``transform`` attr was the default unit +transform and whose ``crs`` attr was absent. Downstream slope/aspect +ops then used pixel sizes from the (integer) coord arrays and produced +silently-wrong answers. + +The fix inherits the georef from the level-0 IFD and rescales the pixel +size by the overview's reduction factor. + +These tests cover all four backends (numpy, dask+numpy, cupy, +dask+cupy) and assert that the transform / crs / coords match the +level-0 read up to the expected scale factor. +""" +from __future__ import annotations + +import importlib.util + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import open_geotiff, to_geotiff + + +def _gpu_available() -> bool: + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +_HAS_GPU = _gpu_available() +_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") + + +def _make_cog_with_overviews(path: str) -> xr.DataArray: + """Write a 1024x1024 COG with three overview levels and known geo. + + Origin (100, 200), 0.5 unit/pixel, EPSG:4326. Returns the source + DataArray so the caller can compare extents. + """ + arr = np.arange(1024 * 1024, dtype=np.float32).reshape(1024, 1024) + y = np.arange(1024, dtype=np.float64) * (-0.5) + 200.0 + x = np.arange(1024, dtype=np.float64) * 0.5 + 100.0 + da = xr.DataArray(arr, dims=['y', 'x'], + coords={'y': y, 'x': x}, + attrs={'crs': 4326}) + to_geotiff(da, path, cog=True, overview_levels=[1, 2, 3]) + return da + + +def _materialise(da: xr.DataArray) -> np.ndarray: + """Return a numpy view of the data regardless of backend.""" + raw = da.data + if hasattr(raw, 'compute'): + raw = raw.compute() + if hasattr(raw, 'get'): + raw = raw.get() + return np.asarray(raw) + + +@pytest.mark.parametrize("backend_kwargs", [ + pytest.param({}, id="numpy"), + pytest.param({"chunks": 128}, id="dask+numpy"), + pytest.param({"gpu": True}, id="cupy", + marks=pytest.mark.skipif( + not _HAS_GPU, reason="cupy + CUDA required")), + pytest.param({"gpu": True, "chunks": 128}, id="dask+cupy", + marks=pytest.mark.skipif( + not _HAS_GPU, reason="cupy + CUDA required")), +]) +def test_overview_inherits_crs_across_backends(tmp_path, backend_kwargs): + """Every backend keeps ``crs`` on overview reads.""" + path = str(tmp_path / "overview_inherit_1640_crs.tif") + _make_cog_with_overviews(path) + + for lvl in (0, 1, 2, 3): + da = open_geotiff(path, overview_level=lvl, **backend_kwargs) + assert da.attrs.get('crs') == 4326, ( + f"backend={backend_kwargs}, overview_level={lvl}: expected " + f"crs=4326, got {da.attrs.get('crs')!r}; full attrs=" + f"{sorted(da.attrs.keys())}" + ) + + +@pytest.mark.parametrize("backend_kwargs", [ + pytest.param({}, id="numpy"), + pytest.param({"chunks": 128}, id="dask+numpy"), + pytest.param({"gpu": True}, id="cupy", + marks=pytest.mark.skipif( + not _HAS_GPU, reason="cupy + CUDA required")), + pytest.param({"gpu": True, "chunks": 128}, id="dask+cupy", + marks=pytest.mark.skipif( + not _HAS_GPU, reason="cupy + CUDA required")), +]) +def test_overview_transform_scales_by_reduction_factor(tmp_path, + backend_kwargs): + """Pixel size doubles per overview level; origin is preserved.""" + path = str(tmp_path / "overview_inherit_1640_transform.tif") + _make_cog_with_overviews(path) + + base = open_geotiff(path, overview_level=0, **backend_kwargs) + base_t = base.attrs['transform'] + base_w = base.shape[-1] + + for lvl, expected_scale in ((1, 2.0), (2, 4.0), (3, 8.0)): + da = open_geotiff(path, overview_level=lvl, **backend_kwargs) + t = da.attrs.get('transform') + assert t is not None, ( + f"backend={backend_kwargs}, overview_level={lvl}: " + f"transform attr is missing") + + # The pixel-size scale is base_w / overview_w; for a power-of-two + # COG that lands exactly on the integer reduction factor. Allow a + # tiny tolerance for floating-point round-off only. + ov_w = da.shape[-1] + observed_scale = base_w / ov_w + assert abs(observed_scale - expected_scale) < 1e-9 + + # Pixel width / height should scale by the same factor. + assert abs(t[0] - base_t[0] * expected_scale) < 1e-9, ( + f"backend={backend_kwargs}, overview_level={lvl}: " + f"pixel width {t[0]} != base_pw*{expected_scale} " + f"({base_t[0] * expected_scale})" + ) + assert abs(t[4] - base_t[4] * expected_scale) < 1e-9, ( + f"backend={backend_kwargs}, overview_level={lvl}: " + f"pixel height {t[4]} != base_ph*{expected_scale} " + f"({base_t[4] * expected_scale})" + ) + # Origin should not move between levels. + assert abs(t[2] - base_t[2]) < 1e-9, ( + f"backend={backend_kwargs}, overview_level={lvl}: " + f"origin_x drifted: base={base_t[2]}, ov={t[2]}") + assert abs(t[5] - base_t[5]) < 1e-9, ( + f"backend={backend_kwargs}, overview_level={lvl}: " + f"origin_y drifted: base={base_t[5]}, ov={t[5]}") + + +@pytest.mark.parametrize("backend_kwargs", [ + pytest.param({}, id="numpy"), + pytest.param({"chunks": 128}, id="dask+numpy"), + pytest.param({"gpu": True}, id="cupy", + marks=pytest.mark.skipif( + not _HAS_GPU, reason="cupy + CUDA required")), + pytest.param({"gpu": True, "chunks": 128}, id="dask+cupy", + marks=pytest.mark.skipif( + not _HAS_GPU, reason="cupy + CUDA required")), +]) +def test_overview_coords_cover_same_extent(tmp_path, backend_kwargs): + """Pixel-center coords on an overview span the same extent as level 0.""" + path = str(tmp_path / "overview_inherit_1640_coords.tif") + _make_cog_with_overviews(path) + + base = open_geotiff(path, overview_level=0, **backend_kwargs) + by = np.asarray(base.coords['y']) + bx = np.asarray(base.coords['x']) + + # Total geographic extent on level 0 (PixelIsArea: edge = center +/- + # half_pixel). Recover from coord arrays + pixel size. + base_pw = base.attrs['transform'][0] + base_ph = base.attrs['transform'][4] + base_x_min = float(bx[0]) - 0.5 * base_pw + base_x_max = float(bx[-1]) + 0.5 * base_pw + base_y_top = float(by[0]) - 0.5 * base_ph # base_ph is negative + base_y_bot = float(by[-1]) + 0.5 * base_ph + + for lvl in (1, 2, 3): + da = open_geotiff(path, overview_level=lvl, **backend_kwargs) + y = np.asarray(da.coords['y']) + x = np.asarray(da.coords['x']) + pw = da.attrs['transform'][0] + ph = da.attrs['transform'][4] + x_min = float(x[0]) - 0.5 * pw + x_max = float(x[-1]) + 0.5 * pw + y_top = float(y[0]) - 0.5 * ph + y_bot = float(y[-1]) + 0.5 * ph + + # Extents should agree to within one full-resolution pixel + # (rounding of width_full / width_overview can leave a half-pixel + # of slack at the edges for non-aligned dimensions; we use a + # power-of-two case here so it's tighter than that). + assert abs(x_min - base_x_min) < 1e-9, ( + f"backend={backend_kwargs}, overview_level={lvl}: " + f"x_min drifted: base={base_x_min}, ov={x_min}") + assert abs(x_max - base_x_max) < 1e-9, ( + f"backend={backend_kwargs}, overview_level={lvl}: " + f"x_max drifted: base={base_x_max}, ov={x_max}") + assert abs(y_top - base_y_top) < 1e-9, ( + f"backend={backend_kwargs}, overview_level={lvl}: " + f"y_top drifted: base={base_y_top}, ov={y_top}") + assert abs(y_bot - base_y_bot) < 1e-9, ( + f"backend={backend_kwargs}, overview_level={lvl}: " + f"y_bot drifted: base={base_y_bot}, ov={y_bot}") + + +def test_overview_with_own_geokeys_is_not_overwritten(tmp_path): + """If an overview IFD carries its own valid georef, keep it. + + Some writers (rasterio with ``COPY_SRC_OVERVIEWS=YES``) replicate + geokeys on every overview. The inheritance helper must not stomp + those; it should fall back to the parent only when the overview + itself has no georef. + + We build a synthetic two-IFD file where the overview IFD carries its + own ModelPixelScale + ModelTiepoint that intentionally differ from + a naive parent-rescale. The reader must return the overview's own + values, not the inherited ones. + """ + tifffile = pytest.importorskip("tifffile") + + path = str(tmp_path / "overview_own_geokeys_1640.tif") + + base = np.arange(64 * 64, dtype=np.float32).reshape(64, 64) + ov = base[::2, ::2] + # GeoKeyDirectory: header (4) + GTModelType=2 (geographic) + + # GeographicType=4326. Same on both IFDs. + geokeys = (1, 1, 0, 2, 1024, 0, 1, 2, 2048, 0, 1, 4326) + + with tifffile.TiffWriter(path) as tw: + tw.write(base, tile=(32, 32), + extratags=[ + (33550, 12, 3, (0.5, 0.5, 0.0)), # ModelPixelScale + (33922, 12, 6, + (0.0, 0.0, 0.0, 100.0, 200.0, 0.0)), # Tiepoint + (34735, 3, 12, geokeys), + ]) + tw.write(ov, tile=(16, 16), subfiletype=1, + extratags=[ + # Overview carries its OWN scale (not just 2*0.5) and + # tiepoint deliberately shifted by 10 units to make + # the inheritance vs own-geo distinction observable. + (33550, 12, 3, (1.25, 1.25, 0.0)), + (33922, 12, 6, + (0.0, 0.0, 0.0, 110.0, 210.0, 0.0)), + (34735, 3, 12, geokeys), + ]) + + da_ov = open_geotiff(path, overview_level=1) + t = da_ov.attrs['transform'] + # The overview's own values must survive: pixel_width=1.25 (not + # 2*0.5=1.0 from rescaling), origin_x=110 (not 100 from the parent). + assert abs(t[0] - 1.25) < 1e-9, ( + f"overview's own pixel_width clobbered: transform={t}") + assert abs(t[2] - 110.0) < 1e-9, ( + f"overview's own origin_x clobbered: transform={t}") + + +def test_overview_without_full_res_sibling_falls_back_gracefully(tmp_path): + """No full-res IFD => return the overview's own (empty) geo info. + + Pathological but well-formed: a TIFF whose only IFD is marked + reduced-resolution. The helper should not raise; it returns the + overview's own ``GeoInfo`` (with ``has_georef=False``) so callers + fall back to integer pixel coords, matching the pre-fix behaviour + for files that genuinely have no georef anywhere. + """ + tifffile = pytest.importorskip("tifffile") + + path = str(tmp_path / "overview_no_parent_1640.tif") + arr = np.zeros((16, 16), dtype=np.float32) + tifffile.imwrite(path, arr, tile=(16, 16), subfiletype=1) + + da = open_geotiff(path, overview_level=0) + # No georef anywhere -> default coords, no crs/transform/etc. + assert da.attrs.get('crs') is None + # ``transform`` may or may not be emitted (the default unit tuple is + # still considered "no georef" by the helper); the key contract is + # that the read succeeds without raising and produces a 2-D array. + assert da.shape == (16, 16) + + +def test_overview_level_0_path_unchanged(tmp_path): + """For overview_level=0, the helper must be a no-op. + + Pin the contract that level-0 reads still get exactly the geo info + they did before #1640. + """ + path = str(tmp_path / "overview_lvl0_passthrough_1640.tif") + src = _make_cog_with_overviews(path) + + da = open_geotiff(path, overview_level=0) + t = da.attrs['transform'] + + # Origin matches the source DataArray's first pixel edge. + src_y = np.asarray(src.coords['y']) + src_x = np.asarray(src.coords['x']) + expected_origin_x = float(src_x[0]) - 0.5 * 0.5 # edge = center - half + expected_origin_y = float(src_y[0]) - 0.5 * (-0.5) + assert abs(t[2] - expected_origin_x) < 1e-9 + assert abs(t[5] - expected_origin_y) < 1e-9 + assert abs(t[0] - 0.5) < 1e-9 + assert abs(t[4] - (-0.5)) < 1e-9