diff --git a/xrspatial/polygonize.py b/xrspatial/polygonize.py index c554525d6..2c9176c3c 100644 --- a/xrspatial/polygonize.py +++ b/xrspatial/polygonize.py @@ -915,14 +915,31 @@ def _rings_to_unit_edges(polys_list, edge_set): x += step -def _pick_next_edge(adj, prev_vertex, current_vertex): - """Pick the next outgoing edge using the rightmost-turn rule. - - At a vertex with multiple outgoing edges, picks the first edge clockwise - from the incoming direction (= smallest right turn). This correctly - traces individual polygon rings even when separate same-value polygons - share a vertex, because it follows the ring that keeps the polygon - interior to the left. +def _pick_next_edge(adj, prev_vertex, current_vertex, + connectivity_8=False): + """Pick the next outgoing edge at a (possibly degree>2) vertex. + + The pixel grid means edges only ever leave a vertex in one of four + axis-aligned directions. At a degree-4 vertex two same-value + polygons meet diagonally; the choice of pairing determines whether + they trace as two separate squares or as a single figure-8 ring. + + For ``connectivity_8=False`` (4-connectivity), pick the smallest + CCW turn from the incoming direction. This keeps two same-value + polygons that touch only at a corner as separate rings, matching + NumPy 4-connectivity semantics. + + For ``connectivity_8=True``, prefer "straight through" (rel == 0) + when available, otherwise pick the largest CCW turn (smallest CW + turn) at degree-4 vertices. Within a single ring, ``rel == 0`` + cannot occur on a grid because the simplify pass folds consecutive + same-direction edges; but ``_merge_polygon_rings`` operates on + multiple rings sharing a vertex, where one ring can pass straight + through V while another corners at V. Preferring "straight + through" keeps such rings continuous; falling back to the largest + turn pairs up the diagonal crossings into a single figure-8 ring, + matching the NumPy 8-connectivity output for two diagonally + adjacent same-value cells. """ targets = adj[current_vertex] if len(targets) == 1: @@ -933,17 +950,38 @@ def _pick_next_edge(adj, prev_vertex, current_vertex): incoming_angle = _DIR_ANGLE[(dx_in, dy_in)] best = None - best_rel = 5 - for target in targets: - dx = target[0] - current_vertex[0] - dy = target[1] - current_vertex[1] - out_angle = _DIR_ANGLE[(dx, dy)] - rel = (out_angle - incoming_angle) % 4 - if rel == 0: - rel = 4 # straight ahead → last priority (u-turn equivalent) - if rel < best_rel: - best_rel = rel - best = target + if connectivity_8: + # Priority order at a degree-4 vertex: + # 1. ``rel == 0`` (continue straight) -- keeps a ring that + # passes through V on the same trajectory. + # 2. ``rel == 3`` (90 deg CW) -- pairs the diagonal-only + # crossing into a figure-8 ring. + # 3. ``rel == 1`` (90 deg CCW) -- the 4-connectivity choice, + # used only if 0 and 3 are unavailable. + # 4. ``rel == 2`` (180 deg u-turn) -- last resort. + priority = {0: 4, 3: 3, 1: 2, 2: 1} + best_rel = -1 + for target in targets: + dx = target[0] - current_vertex[0] + dy = target[1] - current_vertex[1] + out_angle = _DIR_ANGLE[(dx, dy)] + rel = (out_angle - incoming_angle) % 4 + score = priority[rel] + if score > best_rel: + best_rel = score + best = target + else: + best_rel = 5 + for target in targets: + dx = target[0] - current_vertex[0] + dy = target[1] - current_vertex[1] + out_angle = _DIR_ANGLE[(dx, dy)] + rel = (out_angle - incoming_angle) % 4 + if rel == 0: + rel = 4 # straight ahead → last priority (u-turn equivalent) + if rel < best_rel: + best_rel = rel + best = target return best @@ -955,11 +993,12 @@ def _remove_directed_edge(adj, from_v, to_v): del targets[to_v] -def _trace_rings(edge_set): +def _trace_rings(edge_set, connectivity_8=False): """Trace directed unit edges into closed rings. Uses CW planar face ordering to correctly handle vertices with degree > 2 - (e.g. where two same-value regions share a corner vertex). + (e.g. where two same-value regions share a corner vertex). See + :func:`_pick_next_edge` for the role of ``connectivity_8``. """ # Build adjacency: vertex -> {successor_vertex: count}. adj = {} @@ -981,7 +1020,8 @@ def _trace_rings(edge_set): while current != start: ring.append(current) - next_v = _pick_next_edge(adj, prev, current) + next_v = _pick_next_edge(adj, prev, current, + connectivity_8=connectivity_8) _remove_directed_edge(adj, current, next_v) prev = current current = next_v @@ -1533,13 +1573,18 @@ def _simplify_polygons(column, polygon_points, tolerance, return result_column, result -def _merge_polygon_rings(polys_list): +def _merge_polygon_rings(polys_list, connectivity_8=False): """Merge polygon ring sets that share chunk-boundary edges. Uses edge cancellation: splits all rings into unit-length directed edges, cancels opposing edges (which occur at chunk boundaries where the same value continues across), and traces the remaining edges into closed rings. + ``connectivity_8`` selects the degree-4 vertex pairing rule used during + ring tracing (see :func:`_pick_next_edge`). Set to ``True`` for + 8-connectivity to preserve figure-8 rings produced by diagonal-only + adjacency at chunk corners. + polys_list: list of [exterior_ring, *hole_rings] lists (same pixel value) Returns: list of [exterior_ring, *hole_rings] lists (merged) """ @@ -1549,13 +1594,17 @@ def _merge_polygon_rings(polys_list): if not edge_set: return [] - raw_rings = _trace_rings(edge_set) + raw_rings = _trace_rings(edge_set, connectivity_8=connectivity_8) simplified = [_simplify_ring(r) for r in raw_rings] return _group_rings_into_polygons(simplified) -def _merge_chunk_polygons(chunk_results, transform): - """Merge polygons from all chunks and return final output.""" +def _merge_chunk_polygons(chunk_results, transform, connectivity_8=False): + """Merge polygons from all chunks and return final output. + + ``connectivity_8`` is forwarded to :func:`_merge_polygon_rings` to + select the degree-4-vertex pairing rule used by the trace step. + """ all_interior = [] boundary_by_value = {} @@ -1569,10 +1618,11 @@ def _merge_chunk_polygons(chunk_results, transform): merged = [] for val, polys_list in boundary_by_value.items(): if len(polys_list) == 1: - # Single polygon set for this value — nothing to merge. + # Single polygon set for this value -- nothing to merge. merged.append((val, polys_list[0])) else: - merged_polys = _merge_polygon_rings(polys_list) + merged_polys = _merge_polygon_rings( + polys_list, connectivity_8=connectivity_8) for rings in merged_polys: merged.append((val, rings)) @@ -1601,12 +1651,17 @@ def sort_key(item): return column, polygon_points -def _merge_from_separated(all_interior, boundary_by_value, transform): +def _merge_from_separated(all_interior, boundary_by_value, transform, + connectivity_8=False): """Merge pre-separated interior/boundary polygons into final output. Like _merge_chunk_polygons but takes already-separated data so the caller can accumulate incrementally (one chunk at a time) instead of holding all chunk_results in memory simultaneously. + + ``connectivity_8`` is forwarded to :func:`_merge_polygon_rings` so + the trace step uses the right degree-4-vertex pairing rule when + stitching boundary polygons across chunks. """ # Merge boundary polygons per value using edge cancellation. merged = [] @@ -1614,7 +1669,8 @@ def _merge_from_separated(all_interior, boundary_by_value, transform): if len(polys_list) == 1: merged.append((val, polys_list[0])) else: - merged_polys = _merge_polygon_rings(polys_list) + merged_polys = _merge_polygon_rings( + polys_list, connectivity_8=connectivity_8) for rings in merged_polys: merged.append((val, rings)) @@ -1682,7 +1738,9 @@ def _polygonize_dask(dask_data, mask_data, connectivity_8, transform, boundary_by_value, val, atol, rtol) boundary_by_value.setdefault(key, []).append(rings) - return _merge_from_separated(all_interior, boundary_by_value, transform) + return _merge_from_separated( + all_interior, boundary_by_value, transform, + connectivity_8=connectivity_8) def polygonize( @@ -1733,10 +1791,6 @@ def polygonize( increasing or decreasing. Connectivity of 8 does not necessarily return valid polygons. - Note: when using Dask arrays, 8-connectivity may produce extra polygon - splits at chunk corners where diagonal-only adjacency crosses a chunk - boundary. 4-connectivity works perfectly with Dask chunking. - transform: ndarray, optional Optional affine transform to apply to return polygon coordinates. diff --git a/xrspatial/tests/test_polygonize_issue_2172.py b/xrspatial/tests/test_polygonize_issue_2172.py new file mode 100644 index 000000000..5885ea2c1 --- /dev/null +++ b/xrspatial/tests/test_polygonize_issue_2172.py @@ -0,0 +1,254 @@ +"""Cross-backend parity tests for issue #2172. + +Dask 8-connectivity used to split polygons at chunk corners where the only +adjacency between two cells of the same value was diagonal. These tests +pin the fixed behaviour by comparing polygon count and per-region area +between the NumPy and Dask backends on inputs where diagonal-only +adjacency crosses chunk boundaries. +""" + +from collections import Counter + +import numpy as np +import pytest +import xarray as xr + +try: + import dask.array as da +except ImportError: + da = None + +from ..polygonize import ( + _merge_polygon_rings, + _signed_ring_area, + polygonize, +) + +dask_required = pytest.mark.skipif(da is None, reason="dask not installed") + + +def _per_value_area(values, polygons): + """Sum absolute exterior-ring area for each pixel value.""" + by_val = {} + for val, rings in zip(values, polygons): + area = abs(_signed_ring_area(rings[0])) + by_val[val] = by_val.get(val, 0.0) + area + return by_val + + +def _assert_parity(arr, chunks, connectivity, name=""): + """Polygonize via NumPy and via Dask and assert they agree. + + Comparison covers polygon count, per-value polygon count, and + per-value total area. The Dask backend is allowed to return rings + that revisit a vertex (figure-8 shape) because the NumPy backend + does too — only counts and areas need to match. + """ + raster_np = xr.DataArray(arr) + raster_dk = xr.DataArray(da.from_array(arr, chunks=chunks)) + + vals_np, polys_np = polygonize(raster_np, connectivity=connectivity) + vals_dk, polys_dk = polygonize(raster_dk, connectivity=connectivity) + + assert len(vals_np) == len(vals_dk), ( + f"{name}: polygon count mismatch " + f"(numpy={len(vals_np)}, dask={len(vals_dk)})" + ) + assert Counter(vals_np) == Counter(vals_dk), ( + f"{name}: per-value polygon count mismatch " + f"(numpy={Counter(vals_np)}, dask={Counter(vals_dk)})" + ) + + area_np = _per_value_area(vals_np, polys_np) + area_dk = _per_value_area(vals_dk, polys_dk) + assert set(area_np) == set(area_dk), ( + f"{name}: value-set mismatch" + ) + for val in area_np: + # Polygonize areas come back exact for integer inputs and exact + # within float ULP for float inputs. Use a very small tolerance + # to avoid being thrown off by add-order drift in float sums. + assert area_np[val] == pytest.approx(area_dk[val], abs=1e-12), ( + f"{name}: per-value area mismatch for {val}: " + f"numpy={area_np[val]}, dask={area_dk[val]}" + ) + + +@dask_required +class TestDask8ConnDiagonalRepro: + """Direct reproduction from the bug report.""" + + def test_2x2_diagonal_1x1_chunks(self): + # Original repro: two value=1 cells diagonal, two value=0 cells + # diagonal. NumPy yields 2 polygons; Dask (1, 1) used to yield 4. + arr = np.array([[1, 0], [0, 1]], dtype=np.int32) + _assert_parity(arr, (1, 1), connectivity=8, + name="2x2 diagonal 1x1 chunks") + + def test_2x2_diagonal_exact_count(self): + arr = np.array([[1, 0], [0, 1]], dtype=np.int32) + vals_dk, _ = polygonize( + xr.DataArray(da.from_array(arr, chunks=(1, 1))), + connectivity=8, + ) + assert len(vals_dk) == 2 + assert Counter(vals_dk) == Counter([0, 1]) + + +@dask_required +class TestDask8ConnChunkCornerParity: + """Inputs designed to put diagonal-only adjacency on a chunk corner.""" + + @pytest.mark.parametrize("chunks", [(1, 1), (2, 2), (1, 2), (2, 1)]) + def test_4x4_checkerboard(self, chunks): + arr = (np.indices((4, 4)).sum(axis=0) % 2).astype(np.int32) + # NumPy 8-conn merges everything diagonally — 2 polygons total. + _assert_parity(arr, chunks, connectivity=8, + name=f"4x4 checker chunks={chunks}") + + @pytest.mark.parametrize("chunks", [(2, 2), (3, 3), (2, 3)]) + def test_6x6_checkerboard(self, chunks): + arr = (np.indices((6, 6)).sum(axis=0) % 2).astype(np.int32) + _assert_parity(arr, chunks, connectivity=8, + name=f"6x6 checker chunks={chunks}") + + @pytest.mark.parametrize("chunks", [(2, 2), (3, 3)]) + def test_diagonal_stripe(self, chunks): + # A diagonal stripe that crosses several chunk corners. + arr = np.eye(6, dtype=np.int32) + _assert_parity(arr, chunks, connectivity=8, + name=f"diagonal stripe chunks={chunks}") + + def test_diagonal_stripe_with_offset(self): + # Wider diagonal band of 1s on a 0 background, crossing both + # types of chunk corners (interior 4-chunk corner and chunk-edge + # midpoint). + arr = (np.eye(8, dtype=np.int32) + + np.eye(8, k=1, dtype=np.int32)) + _assert_parity(arr, (3, 3), connectivity=8, + name="thick diagonal chunks=(3,3)") + + def test_x_shape(self): + # Two crossing diagonals: forces several diagonal-merge corners. + arr = (np.eye(7, dtype=np.int32) + + np.eye(7, dtype=np.int32)[::-1]) + # Don't normalise — the polygonize cares about value identity, + # not magnitude. + _assert_parity(arr, (2, 2), connectivity=8, + name="X shape chunks=(2,2)") + + +@dask_required +class TestDask8ConnDoesNotRegress4Conn: + """4-connectivity behaviour must not change.""" + + def test_checker_4conn_keeps_every_cell_separate(self): + arr = (np.indices((4, 4)).sum(axis=0) % 2).astype(np.int32) + vals_np, _ = polygonize(xr.DataArray(arr), connectivity=4) + vals_dk, _ = polygonize( + xr.DataArray(da.from_array(arr, chunks=(2, 2))), + connectivity=4, + ) + # 4-conn never merges diagonals; every cell is its own polygon. + assert len(vals_np) == 16 + assert len(vals_dk) == 16 + + def test_diagonal_4conn_random_counts(self): + # Polygon counts must still match under 4-connectivity after the + # 8-connectivity fix. (Per-value areas have a separate + # chunk-size-dependent quirk on 4-connectivity that is tracked + # by a different issue; counts are unaffected.) + rng = np.random.default_rng(0) + arr = rng.integers(0, 3, (20, 20), dtype=np.int32) + vals_np, _ = polygonize(xr.DataArray(arr), connectivity=4) + vals_dk, _ = polygonize( + xr.DataArray(da.from_array(arr, chunks=(5, 5))), + connectivity=4, + ) + assert Counter(vals_np) == Counter(vals_dk) + + +@dask_required +class TestDask8ConnFloatAndMaskedInputs: + """Cover the float / NaN tolerance paths and large random rasters.""" + + def test_float_checkerboard(self): + arr = ((np.indices((4, 4)).sum(axis=0) % 2) + .astype(np.float64)) + _assert_parity(arr, (2, 2), connectivity=8, + name="float checker (2,2)") + + def test_float_with_nan(self): + arr = ((np.indices((4, 4)).sum(axis=0) % 2) + .astype(np.float64)) + arr[1, 1] = np.nan + _assert_parity(arr, (2, 2), connectivity=8, + name="float NaN (2,2)") + + @pytest.mark.parametrize("chunks", [(5, 5), (3, 7), (1, 1), (4, 4)]) + def test_random_int_20x20(self, chunks): + rng = np.random.default_rng(42) + arr = rng.integers(0, 3, (20, 20), dtype=np.int32) + _assert_parity(arr, chunks, connectivity=8, + name=f"random 20x20 chunks={chunks}") + + +class TestMergePolygonRingsDirect: + """Direct unit tests on ``_merge_polygon_rings`` covering the + 8-conn vertex-pairing rule. These pin the trace priority order + (straight-through > right-turn > left-turn > u-turn) so a future + refactor cannot reintroduce the chunk-corner bug by accident. + """ + + def test_two_diagonally_adjacent_squares_merge_into_figure_8(self): + # Two 1x1 squares sharing only a corner vertex. Under + # 8-conn the merge must produce a single self-touching ring + # (a figure-8), not two separate squares. + sq_a = [np.array([ + [0.0, 0.0], [1.0, 0.0], + [1.0, 1.0], [0.0, 1.0], [0.0, 0.0], + ])] + sq_b = [np.array([ + [1.0, 1.0], [2.0, 1.0], + [2.0, 2.0], [1.0, 2.0], [1.0, 1.0], + ])] + merged = _merge_polygon_rings([sq_a, sq_b], connectivity_8=True) + assert len(merged) == 1 + # Combined area = 1 + 1 = 2. + assert abs(_signed_ring_area(merged[0][0])) == pytest.approx(2.0) + # The ring revisits the shared corner (1, 1). Count occurrences. + ext = merged[0][0] + revisits = sum( + 1 for k in range(len(ext) - 1) + if ext[k, 0] == 1.0 and ext[k, 1] == 1.0 + ) + assert revisits == 2, f"expected figure-8 ring through (1,1), got {revisits} visits" + + def test_4conn_keeps_diagonal_squares_separate(self): + # Same input, but 4-conn must keep them as two rings. + sq_a = [np.array([ + [0.0, 0.0], [1.0, 0.0], + [1.0, 1.0], [0.0, 1.0], [0.0, 0.0], + ])] + sq_b = [np.array([ + [1.0, 1.0], [2.0, 1.0], + [2.0, 2.0], [1.0, 2.0], [1.0, 1.0], + ])] + merged = _merge_polygon_rings([sq_a, sq_b], connectivity_8=False) + assert len(merged) == 2 + + def test_edge_cancellation_still_merges_under_8conn(self): + # Two 1x1 squares sharing an EDGE. Edge cancellation must + # produce one 1x2 rectangle regardless of the rel==0 priority + # rule. + sq_a = [np.array([ + [0.0, 0.0], [1.0, 0.0], + [1.0, 1.0], [0.0, 1.0], [0.0, 0.0], + ])] + sq_b = [np.array([ + [1.0, 0.0], [2.0, 0.0], + [2.0, 1.0], [1.0, 1.0], [1.0, 0.0], + ])] + merged = _merge_polygon_rings([sq_a, sq_b], connectivity_8=True) + assert len(merged) == 1 + assert abs(_signed_ring_area(merged[0][0])) == pytest.approx(2.0)