From f516277dc91efea75bddaff105a85db7f26f6f48 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 1 Jun 2026 10:32:37 -0700 Subject: [PATCH 1/2] Keep dask+cupy geodesic slope lat/lon lazy to avoid GPU OOM (#2762) Port the lazy-block pattern from aspect.py into slope's dask+cupy geodesic path. Building the graph no longer densifies the full (H, W) lat/lon grids onto a single GPU via cupy.asarray; each block is converted lazily with map_blocks(_to_cupy_f64) instead. Add a regression test that asserts graph construction stays under one full lat/lon grid of GPU memory. --- xrspatial/slope.py | 18 ++++++++++--- xrspatial/tests/test_geodesic_slope.py | 35 ++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/xrspatial/slope.py b/xrspatial/slope.py index 56b41df3f..17cd94af9 100644 --- a/xrspatial/slope.py +++ b/xrspatial/slope.py @@ -271,11 +271,21 @@ def _run_dask_numpy_geodesic(data, lat_2d, lon_2d, a2, b2, z_factor, boundary='n return out[0] +def _to_cupy_f64(block): + # Only reached from the dask+cupy path, so `cupy` is the real module here, + # never the import-time fallback class. + return cupy.asarray(block, dtype=cupy.float64) + + def _run_dask_cupy_geodesic(data, lat_2d, lon_2d, a2, b2, z_factor, boundary='nan'): - lat_dask = da.from_array(cupy.asarray(lat_2d, dtype=cupy.float64), - chunks=data.chunksize) - lon_dask = da.from_array(cupy.asarray(lon_2d, dtype=cupy.float64), - chunks=data.chunksize) + # Keep lat/lon as dask-of-numpy on the (zero-stride) broadcast views, then + # convert each block to cupy lazily. Converting up front with + # cupy.asarray(lat_2d) would densify the full (H, W) grid onto a single GPU + # at graph-construction time and OOM on large rasters. + lat_dask = da.from_array(lat_2d, chunks=data.chunksize).map_blocks( + _to_cupy_f64, dtype=np.float64) + lon_dask = da.from_array(lon_2d, chunks=data.chunksize).map_blocks( + _to_cupy_f64, dtype=np.float64) stacked = da.stack([ data.astype(cupy.float64), lat_dask, diff --git a/xrspatial/tests/test_geodesic_slope.py b/xrspatial/tests/test_geodesic_slope.py index d22a379dd..9e6255c92 100644 --- a/xrspatial/tests/test_geodesic_slope.py +++ b/xrspatial/tests/test_geodesic_slope.py @@ -321,3 +321,38 @@ def test_numpy_equals_dask_cupy(self): np.testing.assert_allclose( s_np.values, s_dc.data.compute().get(), rtol=1e-5, equal_nan=True ) + + def test_latlon_not_materialized_on_gpu_at_graph_build(self): + """The dask+cupy geodesic path must keep lat/lon chunked. + + Building the graph (no compute) for a large raster must not densify + the full (H, W) lat/lon grids onto the GPU. Converting the broadcast + views with ``cupy.asarray`` up front would allocate ~2*H*W*8 bytes of + GPU memory at graph-construction time and OOM on large rasters. + """ + import cupy + + H = W = 2048 + elev = cupy.zeros((H, W), dtype=cupy.float64) + lat = np.linspace(40.0, 41.0, H) + lon = np.linspace(10.0, 11.0, W) + raster = xr.DataArray( + da.from_array(elev, chunks=(256, 256)), + dims=['lat', 'lon'], + coords={'lat': lat, 'lon': lon}, + ) + + pool = cupy.get_default_memory_pool() + pool.free_all_blocks() + before = pool.used_bytes() + out = slope(raster, method='geodesic') # graph construction only + out.data.__dask_graph__() + delta = pool.used_bytes() - before + + # A single full lat or lon grid is H*W*8 bytes. If either were + # densified eagerly the delta would be at least that large. + one_full_grid = H * W * 8 + assert delta < one_full_grid, ( + f"graph construction allocated {delta} GPU bytes; expected well " + f"under one full lat/lon grid ({one_full_grid} bytes)" + ) From 5a16517a44a059db3704e2a5929a96caecc8fd02 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 1 Jun 2026 10:34:31 -0700 Subject: [PATCH 2/2] Address review nit: drop unused cupy import in slope geodesic test (#2762) --- xrspatial/tests/test_geodesic_slope.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xrspatial/tests/test_geodesic_slope.py b/xrspatial/tests/test_geodesic_slope.py index 9e6255c92..9f7ce422c 100644 --- a/xrspatial/tests/test_geodesic_slope.py +++ b/xrspatial/tests/test_geodesic_slope.py @@ -296,7 +296,6 @@ def test_numpy_equals_dask(self): class TestGeodesicSlopeCupy: def test_numpy_equals_cupy(self): - import cupy elev = _east_tilted_surface(H=8, W=10, grade=100.0) r_np = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0, backend='numpy') r_cu = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0, backend='cupy')