Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .claude/sweep-security-state.json
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,13 @@
"severity_max": null,
"categories_found": [],
"notes": "Clean. Small (271 LOC) module computing 3x3 second-derivative stencil. Cat 1: only single output buffer matching input shape (np.empty at line 37, cupy.empty at line 101) -- bounded by caller, per audit guidance not a finding. Cat 2: _cpu numba kernel uses range(1, rows-1)/range(1, cols-1) with simple (y, x) indices; no flat indexing or queue arrays; numba range loops produce int64. Cat 3: division by cellsize*cellsize on line 44 -- cellsize comes from get_dataarray_resolution() (raster property, not user-direct); cellsize=0 is unrealistic and would produce inf consistently across backends. NaN inputs propagate correctly through float arithmetic. Cat 4: _run_gpu (line 79-86) has full bounds guard via 'i + di <= out.shape[0] - 1 and j + dj <= out.shape[1] - 1' which guarantees i < shape[0] and j < shape[1] before the out[i, j] write; no shared memory; out is pre-filled with NaN at line 102 so threads outside the guard correctly leave NaN. Cat 5: no file I/O. Cat 6: curvature() calls _validate_raster at line 253; all four backend paths explicitly cast to float32 (lines 51, 62, 97, 112) so dtype is normalized before any computation; tests cover int32/int64/uint32/uint64/float32/float64 across numpy/cupy/dask+numpy/dask+cupy."
},
"erosion": {
"last_inspected": "2026-04-25",
"issue": 1275,
"severity_max": "HIGH",
"categories_found": [1, 3, 6],
"notes": "HIGH (fixed #1275): erode() accepted three user-controlled parameters with no upper bound. (1) iterations sized rng.random((iterations, 2)) on the host (16 B/particle) and was copied to the GPU via cupy.asarray, so iterations=10**12 attempted ~16 TB on each side. (2) params['radius'] drove _build_brush which iterates (2r+1)**2 cells and stores three arrays of the same length, so radius=10**6 allocated ~12 TB of brush data. (3) params['max_lifetime'] is the inner per-particle JIT loop in both _erode_cpu and _erode_gpu_kernel, so max_lifetime=10**12 with the default iterations=50000 ran 5e16 step iterations. The existing _check_erosion_memory helper only fired on dask paths and ignored the random_pos and brush working sets. Fixed by capping all three parameters at the public erode() entry via _validate_scalar(max_val=...) (_MAX_ITERATIONS=1e8, _MAX_RADIUS=1024, _MAX_LIFETIME=1e5), rewriting _check_erosion_memory to include the random_pos buffer and brush bytes in its budget, and wiring the guard into _erode_numpy and _erode_cupy so every backend benefits (the dask paths inherit it via their _erode_numpy/_erode_cupy calls). Mirrors diffuse #1268 pattern. Deferred follow-ups (separate PRs): Cat 3 HIGH NaN input is not guarded in _erode_cpu / _erode_gpu_kernel -- a NaN cell propagates through bilinear interpolation into dir_x/dir_y, NaN bounds checks fall through, and particles can deposit NaN into arbitrary cells via cuda.atomic.add. Cat 6 MEDIUM erode() does not call _validate_raster() on agg -- non-numeric or wrong-ndim input fails inside numba/cupy with a confusing error. No Cat 2 (no int32 flat-index math), no Cat 4 (GPU kernel has bounds guard at line 184 plus per-step bounds checks before every read/write, brush writes are explicitly bounds-checked, no shared memory), no Cat 5 (no file I/O)."
}
}
}
120 changes: 97 additions & 23 deletions xrspatial/erosion.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class cupy(object):

from xrspatial.utils import (
ArrayTypeFunctionMapping,
_validate_scalar,
has_cuda_and_cupy,
is_cupy_array,
is_dask_cupy,
Expand All @@ -37,6 +38,22 @@ class cupy(object):
max_lifetime=30,
)

# Upper bounds on user-controlled iteration / kernel-size parameters.
# These cap the worst-case CPU work and the worst-case host/device
# allocation regardless of raster shape.
#
# - _MAX_ITERATIONS sizes the (iterations, 2) random_pos buffer
# (16 bytes/particle on the host, copied to GPU). 10**8 is ~1.6 GB,
# well above any realistic erosion run (default is 50_000).
# - _MAX_RADIUS bounds the brush size: (2r+1)**2 cells, three arrays.
# At r=1024 the brush arrays total ~50 MB.
# - _MAX_LIFETIME bounds the inner per-particle loop in the JIT kernel.
# At 100_000 with iterations=10**8 the total JIT step count is 10**13,
# which is still excessive; the iteration cap is the binding limit.
_MAX_ITERATIONS = 10**8
_MAX_RADIUS = 1024
_MAX_LIFETIME = 100_000


def _build_brush(radius):
"""Precompute brush offsets and weights for the erosion kernel."""
Expand Down Expand Up @@ -293,8 +310,59 @@ def _erode_gpu_kernel(
pos_y = new_y


def _available_memory_bytes():
"""Best-effort estimate of available host memory in bytes."""
try:
with open('/proc/meminfo', 'r') as f:
for line in f:
if line.startswith('MemAvailable:'):
return int(line.split()[1]) * 1024 # kB -> bytes
except (OSError, ValueError, IndexError):
pass
try:
import psutil
return psutil.virtual_memory().available
except (ImportError, AttributeError):
pass
return 2 * 1024 ** 3


def _check_erosion_memory(shape, dtype, iterations, n_brush):
"""Raise MemoryError if the projected working set would exceed RAM.

Covers four allocations:
- input copy as float64 (numpy path) or float32 output
- random_pos buffer of shape (iterations, 2), float64
- brush arrays of length n_brush (two int32, one float64)
- output cast back to float32

The check fires before any of these are allocated, so callers see
a clean MemoryError instead of an opaque OOM from numpy/cupy.
"""
raster_bytes = int(np.prod(shape)) * np.dtype(dtype).itemsize
# float64 working copy + float32 output ~ 3x raster_bytes for float32
# input (8/4 = 2 for the copy and another 1 for the output cast).
raster_working = raster_bytes * 3
rng_bytes = int(iterations) * 2 * 8 # float64 (iterations, 2)
brush_bytes = int(n_brush) * (4 + 4 + 8) # two int32 + one float64
required = raster_working + rng_bytes + brush_bytes
avail = _available_memory_bytes()
if required > 0.8 * avail:
raise MemoryError(
f"erode() needs ~{required / 1e9:.1f} GB of working memory "
f"(raster ~{raster_working / 1e9:.2f} GB, "
f"random_pos ~{rng_bytes / 1e9:.2f} GB, "
f"brush ~{brush_bytes / 1e9:.2f} GB) but only "
f"~{avail / 1e9:.1f} GB is available. "
f"Reduce `iterations`, `params['radius']`, or downsample "
f"the input."
)


def _erode_cupy(data, random_pos_np, boy, box, bw, params):
"""Run erosion on a CuPy array using the CUDA kernel."""
_check_erosion_memory(data.shape, data.dtype,
random_pos_np.shape[0], bw.shape[0])
hm = data.astype(cupy.float64)

# Transfer brush and random positions to device
Expand All @@ -320,6 +388,8 @@ def _erode_cupy(data, random_pos_np, boy, box, bw, params):

def _erode_numpy(data, random_pos, boy, box, bw, params):
"""Run erosion on a NumPy array using the CPU kernel."""
_check_erosion_memory(data.shape, data.dtype,
random_pos.shape[0], bw.shape[0])
hm = data.astype(np.float64).copy()

hm = _erode_cpu(
Expand All @@ -332,33 +402,12 @@ def _erode_numpy(data, random_pos, boy, box, bw, params):
return hm.astype(np.float32)


def _check_erosion_memory(data):
"""Raise MemoryError if the array is too large to materialize."""
estimated = np.prod(data.shape) * data.dtype.itemsize
# The erosion kernel needs ~3x: input copy + brush scratch + output
working = estimated * 3
try:
from xrspatial.zonal import _available_memory_bytes
avail = _available_memory_bytes()
except ImportError:
avail = 2 * 1024**3
if working > 0.8 * avail:
raise MemoryError(
f"erode() needs ~{working / 1e9:.1f} GB to materialize and "
f"process the full grid but only ~{avail / 1e9:.1f} GB "
f"available. Particle erosion is a global operation that "
f"cannot be chunked. Downsample the input or use a machine "
f"with more RAM."
)


def _erode_dask_numpy(data, random_pos, boy, box, bw, params):
"""Run erosion on a dask+numpy array.

Erosion is a global operation (particles traverse the full grid),
so we materialize to numpy, run the CPU kernel, then re-wrap.
"""
_check_erosion_memory(data)
np_data = data.compute()
result = _erode_numpy(np_data, random_pos, boy, box, bw, params)
return da.from_array(result, chunks=data.chunksize)
Expand All @@ -370,7 +419,6 @@ def _erode_dask_cupy(data, random_pos, boy, box, bw, params):
Materializes to a single CuPy array, runs the GPU kernel, then
re-wraps as dask.
"""
_check_erosion_memory(data)
cp_data = data.compute() # CuPy ndarray
result = _erode_cupy(cp_data, random_pos, boy, box, bw, params)
return da.from_array(result, chunks=data.chunksize,
Expand All @@ -388,23 +436,49 @@ def erode(agg, iterations=50000, seed=42, params=None):
agg : xr.DataArray
2D terrain heightmap.
iterations : int
Number of water droplets to simulate.
Number of water droplets to simulate. Capped at
``_MAX_ITERATIONS`` (1e8) to keep host and device allocations
bounded.
seed : int
Random seed for droplet placement.
params : dict, optional
Override default erosion constants. Keys:
inertia, capacity, deposition, erosion, evaporation,
gravity, min_slope, radius, max_lifetime.
``radius`` is capped at ``_MAX_RADIUS`` (1024) and
``max_lifetime`` at ``_MAX_LIFETIME`` (1e5).

Returns
-------
xr.DataArray
Eroded terrain.

Raises
------
ValueError
If `iterations`, `params['radius']`, or `params['max_lifetime']`
is outside the allowed range.
MemoryError
If the projected working set exceeds available memory.
"""
_validate_scalar(
iterations, func_name='erode', name='iterations',
dtype=int, min_val=1, max_val=_MAX_ITERATIONS,
)

p = dict(_DEFAULT_PARAMS)
if params is not None:
p.update(params)

_validate_scalar(
p['radius'], func_name='erode', name="params['radius']",
dtype=int, min_val=1, max_val=_MAX_RADIUS,
)
_validate_scalar(
p['max_lifetime'], func_name='erode', name="params['max_lifetime']",
dtype=int, min_val=1, max_val=_MAX_LIFETIME,
)

# Precompute brush and random positions on the host
boy, box, bw = _build_brush(int(p['radius']))
rng = np.random.RandomState(seed)
Expand Down
136 changes: 135 additions & 1 deletion xrspatial/tests/test_erosion.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,14 @@
import pytest
import xarray as xr

from xrspatial.erosion import erode, _build_brush
from xrspatial import erosion as erosion_mod
from xrspatial.erosion import (
erode,
_build_brush,
_MAX_ITERATIONS,
_MAX_LIFETIME,
_MAX_RADIUS,
)
from xrspatial.tests.general_checks import (
create_test_raster,
general_output_checks,
Expand Down Expand Up @@ -205,3 +212,130 @@ def test_erode_dask_cupy_runs():
result_np = result.data.compute().get()
assert not np.array_equal(result_np, data)
assert np.isfinite(result_np).all()


# ---- parameter validation (issue #1275) ----

def test_erode_iterations_zero_rejected():
"""iterations < 1 should raise ValueError."""
agg = _input(_make_terrain(size=16), 'numpy')
with pytest.raises(ValueError, match="iterations"):
erode(agg, iterations=0)


def test_erode_iterations_too_large_rejected():
"""iterations > _MAX_ITERATIONS should raise ValueError."""
agg = _input(_make_terrain(size=16), 'numpy')
with pytest.raises(ValueError, match="iterations"):
erode(agg, iterations=_MAX_ITERATIONS + 1)


def test_erode_iterations_huge_rejected_before_alloc():
"""iterations=10**12 must not allocate the (10**12, 2) random_pos buffer.

The validator should fire before any np.random call, so the test
completes in milliseconds even though the buffer would be ~16 TB.
"""
agg = _input(_make_terrain(size=16), 'numpy')
with pytest.raises(ValueError, match="iterations"):
erode(agg, iterations=10**12)


def test_erode_iterations_non_int_rejected():
"""A float iterations argument should raise TypeError."""
agg = _input(_make_terrain(size=16), 'numpy')
with pytest.raises(TypeError, match="iterations"):
erode(agg, iterations=1.5)


def test_erode_radius_zero_rejected():
"""radius < 1 should raise ValueError."""
agg = _input(_make_terrain(size=16), 'numpy')
with pytest.raises(ValueError, match="radius"):
erode(agg, iterations=10, params={'radius': 0})


def test_erode_radius_too_large_rejected():
"""radius > _MAX_RADIUS should raise ValueError before _build_brush runs.

radius=100_000 would walk (200_001)**2 cells (~40 billion iterations)
before allocating the brush; the validator must fire first.
"""
agg = _input(_make_terrain(size=16), 'numpy')
with pytest.raises(ValueError, match="radius"):
erode(agg, iterations=10, params={'radius': _MAX_RADIUS + 1})


def test_erode_max_lifetime_zero_rejected():
"""max_lifetime < 1 should raise ValueError."""
agg = _input(_make_terrain(size=16), 'numpy')
with pytest.raises(ValueError, match="max_lifetime"):
erode(agg, iterations=10, params={'max_lifetime': 0})


def test_erode_max_lifetime_too_large_rejected():
"""max_lifetime > _MAX_LIFETIME should raise ValueError."""
agg = _input(_make_terrain(size=16), 'numpy')
with pytest.raises(ValueError, match="max_lifetime"):
erode(agg, iterations=10, params={'max_lifetime': _MAX_LIFETIME + 1})


def test_erode_at_iteration_cap_runs():
"""A small raster with iterations exactly at the cap must still be
accepted by the validator (the memory guard handles infeasible
combinations separately)."""
agg = _input(_make_terrain(size=8), 'numpy')
# We can't actually run iterations=_MAX_ITERATIONS in a test; the
# point here is that the validator accepts the boundary value.
# Use monkeypatch-free check: call _validate_scalar directly via
# a tiny wrapper.
from xrspatial.utils import _validate_scalar
_validate_scalar(
_MAX_ITERATIONS, func_name='erode', name='iterations',
dtype=int, min_val=1, max_val=_MAX_ITERATIONS,
)


def test_erode_memory_guard_fires_numpy(monkeypatch):
"""The memory guard must fire on the numpy path (not just dask)."""
# Pretend we have only 1 MB of RAM available. A 128x128 float32 raster
# with iterations=10000 needs a ~196 KB raster working set plus a
# ~160 KB rng buffer, well within 1 MB. Force the guard with a tighter
# budget by using a larger raster.
monkeypatch.setattr(erosion_mod, '_available_memory_bytes',
lambda: 1 * 1024 * 1024)
agg = _input(_make_terrain(size=512), 'numpy')
with pytest.raises(MemoryError, match="erode"):
erode(agg, iterations=1000)


@cuda_and_cupy_available
def test_erode_memory_guard_fires_cupy(monkeypatch):
"""The memory guard must fire on the cupy path (not just dask)."""
monkeypatch.setattr(erosion_mod, '_available_memory_bytes',
lambda: 1 * 1024 * 1024)
agg = _input(_make_terrain(size=512), 'cupy')
with pytest.raises(MemoryError, match="erode"):
erode(agg, iterations=1000)


def test_erode_memory_guard_includes_random_pos(monkeypatch):
"""A huge iterations count on a tiny raster must trip the memory
guard (the random_pos buffer dominates)."""
monkeypatch.setattr(erosion_mod, '_available_memory_bytes',
lambda: 10 * 1024 * 1024)
agg = _input(_make_terrain(size=4), 'numpy')
# 10**7 particles ~ 160 MB float64 buffer, well under _MAX_ITERATIONS
# but huge relative to a 4x4 raster's working set. The error message
# should call out the random_pos buffer specifically.
with pytest.raises(MemoryError, match="random_pos"):
erode(agg, iterations=10_000_000)


def test_erode_default_params_still_work():
"""Sanity check: the defaults stay well below every cap."""
data = _make_terrain(size=32)
agg = _input(data, 'numpy')
result = erode(agg, iterations=500) # default seed/params
assert result.shape == (32, 32)
assert np.isfinite(result.data).all()
Loading