Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions xrspatial/slope.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,11 +271,21 @@ def _run_dask_numpy_geodesic(data, lat_2d, lon_2d, a2, b2, z_factor, boundary='n
return out[0]


def _to_cupy_f64(block):
# Only reached from the dask+cupy path, so `cupy` is the real module here,
# never the import-time fallback class.
return cupy.asarray(block, dtype=cupy.float64)


def _run_dask_cupy_geodesic(data, lat_2d, lon_2d, a2, b2, z_factor, boundary='nan'):
lat_dask = da.from_array(cupy.asarray(lat_2d, dtype=cupy.float64),
chunks=data.chunksize)
lon_dask = da.from_array(cupy.asarray(lon_2d, dtype=cupy.float64),
chunks=data.chunksize)
# Keep lat/lon as dask-of-numpy on the (zero-stride) broadcast views, then
# convert each block to cupy lazily. Converting up front with
# cupy.asarray(lat_2d) would densify the full (H, W) grid onto a single GPU
# at graph-construction time and OOM on large rasters.
lat_dask = da.from_array(lat_2d, chunks=data.chunksize).map_blocks(
_to_cupy_f64, dtype=np.float64)
lon_dask = da.from_array(lon_2d, chunks=data.chunksize).map_blocks(
_to_cupy_f64, dtype=np.float64)
stacked = da.stack([
data.astype(cupy.float64),
lat_dask,
Expand Down
36 changes: 35 additions & 1 deletion xrspatial/tests/test_geodesic_slope.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,6 @@ def test_numpy_equals_dask(self):
class TestGeodesicSlopeCupy:

def test_numpy_equals_cupy(self):
import cupy
elev = _east_tilted_surface(H=8, W=10, grade=100.0)
r_np = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0, backend='numpy')
r_cu = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0, backend='cupy')
Expand All @@ -321,3 +320,38 @@ def test_numpy_equals_dask_cupy(self):
np.testing.assert_allclose(
s_np.values, s_dc.data.compute().get(), rtol=1e-5, equal_nan=True
)

def test_latlon_not_materialized_on_gpu_at_graph_build(self):
"""The dask+cupy geodesic path must keep lat/lon chunked.

Building the graph (no compute) for a large raster must not densify
the full (H, W) lat/lon grids onto the GPU. Converting the broadcast
views with ``cupy.asarray`` up front would allocate ~2*H*W*8 bytes of
GPU memory at graph-construction time and OOM on large rasters.
"""
import cupy

H = W = 2048
elev = cupy.zeros((H, W), dtype=cupy.float64)
lat = np.linspace(40.0, 41.0, H)
lon = np.linspace(10.0, 11.0, W)
raster = xr.DataArray(
da.from_array(elev, chunks=(256, 256)),
dims=['lat', 'lon'],
coords={'lat': lat, 'lon': lon},
)

pool = cupy.get_default_memory_pool()
pool.free_all_blocks()
before = pool.used_bytes()
out = slope(raster, method='geodesic') # graph construction only
out.data.__dask_graph__()
delta = pool.used_bytes() - before

# A single full lat or lon grid is H*W*8 bytes. If either were
# densified eagerly the delta would be at least that large.
one_full_grid = H * W * 8
assert delta < one_full_grid, (
f"graph construction allocated {delta} GPU bytes; expected well "
f"under one full lat/lon grid ({one_full_grid} bytes)"
)
Loading