diff --git a/xrspatial/tests/test_hypsometric_integral.py b/xrspatial/tests/test_hypsometric_integral.py index 0debcc33..abe1267a 100644 --- a/xrspatial/tests/test_hypsometric_integral.py +++ b/xrspatial/tests/test_hypsometric_integral.py @@ -254,3 +254,59 @@ def test_hypsometric_integral_list_of_pairs_zones(): result = hypsometric_integral(zones_pairs, values, nodata=0) assert isinstance(result, xr.DataArray) assert result.shape == values.shape + + +# --- backend-consistency regression (#2525) --------------------------------- + +@pytest.mark.parametrize("backend", ['numpy', 'cupy', 'dask+numpy', 'dask+cupy']) +def test_hi_preserves_backend_2525(backend): + """Regression: output backend must match input backend. + + Before the fix, `_hi_dask_cupy` converted chunks to numpy and called + `_hi_dask_numpy`, returning a dask+numpy array even when the input was + dask+cupy. Downstream dispatch via ArrayTypeFunctionMapping then routed + through the numpy path silently. See issue #2525. + """ + from xrspatial.utils import ( + has_cuda_and_cupy, has_dask_array, + is_cupy_array, is_dask_cupy, + ) + + if 'cupy' in backend and not has_cuda_and_cupy(): + pytest.skip("Requires CUDA and CuPy") + if 'dask' in backend and not has_dask_array(): + pytest.skip("Requires Dask") + + from xrspatial.zonal import hypsometric_integral + + zones_np = np.array([[1, 1, 2, 2], + [1, 1, 2, 2]], dtype=np.int32) + values_np = np.array([[10.0, 20.0, 30.0, 40.0], + [15.0, 25.0, 35.0, 45.0]]) + + zones = create_test_raster(zones_np, backend, dims=['y', 'x'], + chunks=(2, 2)) + values = create_test_raster(values_np, backend, dims=['y', 'x'], + chunks=(2, 2)) + + result = hypsometric_integral(zones, values) + assert isinstance(result, xr.DataArray) + assert result.shape == values.shape + + if backend == 'numpy': + assert isinstance(result.data, np.ndarray) + elif backend == 'cupy': + assert is_cupy_array(result.data) + elif backend == 'dask+numpy': + assert has_dask_array() and isinstance(result.data, da.Array) + assert not is_dask_cupy(result) + # confirm chunks are numpy + sample = result.data.blocks[0, 0].compute() + assert isinstance(sample, np.ndarray) + elif backend == 'dask+cupy': + assert is_dask_cupy(result), ( + "dask+cupy input must produce dask+cupy output (#2525)" + ) + # confirm chunks are cupy + sample = result.data.blocks[0, 0].compute() + assert is_cupy_array(sample) diff --git a/xrspatial/zonal.py b/xrspatial/zonal.py index 9db5e3d6..b24c6cf6 100644 --- a/xrspatial/zonal.py +++ b/xrspatial/zonal.py @@ -1538,14 +1538,24 @@ def _paint(zones_chunk, values_chunk, hi_map): def _hi_dask_cupy(zones_data, values_data, nodata): - """Dask+cupy backend: convert chunks to numpy, delegate.""" + """Dask+cupy backend: convert chunks to numpy, delegate, then re-wrap as cupy. + + The per-zone HI lookup table is a Python dict so the reduce step has to + pass through host memory. The painted output is wrapped back as cupy + chunks so the returned dask graph yields cupy arrays, keeping the + backend consistent with the dask+cupy input (issue #2525). + """ zones_cpu = zones_data.map_blocks( lambda x: x.get(), dtype=zones_data.dtype, meta=np.array(()), ) values_cpu = values_data.map_blocks( lambda x: x.get(), dtype=values_data.dtype, meta=np.array(()), ) - return _hi_dask_numpy(zones_cpu, values_cpu, nodata) + result_cpu = _hi_dask_numpy(zones_cpu, values_cpu, nodata) + # Re-wrap each numpy chunk as cupy so dispatch downstream stays on GPU. + return result_cpu.map_blocks( + cupy.asarray, dtype=result_cpu.dtype, meta=cupy.array(()), + ) def hypsometric_integral(