diff --git a/.claude/sweep-security-state.json b/.claude/sweep-security-state.json index 38a23f13..1f1dc758 100644 --- a/.claude/sweep-security-state.json +++ b/.claude/sweep-security-state.json @@ -153,6 +153,13 @@ "severity_max": null, "categories_found": [], "notes": "Clean. Small (271 LOC) module computing 3x3 second-derivative stencil. Cat 1: only single output buffer matching input shape (np.empty at line 37, cupy.empty at line 101) -- bounded by caller, per audit guidance not a finding. Cat 2: _cpu numba kernel uses range(1, rows-1)/range(1, cols-1) with simple (y, x) indices; no flat indexing or queue arrays; numba range loops produce int64. Cat 3: division by cellsize*cellsize on line 44 -- cellsize comes from get_dataarray_resolution() (raster property, not user-direct); cellsize=0 is unrealistic and would produce inf consistently across backends. NaN inputs propagate correctly through float arithmetic. Cat 4: _run_gpu (line 79-86) has full bounds guard via 'i + di <= out.shape[0] - 1 and j + dj <= out.shape[1] - 1' which guarantees i < shape[0] and j < shape[1] before the out[i, j] write; no shared memory; out is pre-filled with NaN at line 102 so threads outside the guard correctly leave NaN. Cat 5: no file I/O. Cat 6: curvature() calls _validate_raster at line 253; all four backend paths explicitly cast to float32 (lines 51, 62, 97, 112) so dtype is normalized before any computation; tests cover int32/int64/uint32/uint64/float32/float64 across numpy/cupy/dask+numpy/dask+cupy." + }, + "erosion": { + "last_inspected": "2026-04-25", + "issue": 1275, + "severity_max": "HIGH", + "categories_found": [1, 3, 6], + "notes": "HIGH (fixed #1275): erode() accepted three user-controlled parameters with no upper bound. (1) iterations sized rng.random((iterations, 2)) on the host (16 B/particle) and was copied to the GPU via cupy.asarray, so iterations=10**12 attempted ~16 TB on each side. (2) params['radius'] drove _build_brush which iterates (2r+1)**2 cells and stores three arrays of the same length, so radius=10**6 allocated ~12 TB of brush data. (3) params['max_lifetime'] is the inner per-particle JIT loop in both _erode_cpu and _erode_gpu_kernel, so max_lifetime=10**12 with the default iterations=50000 ran 5e16 step iterations. The existing _check_erosion_memory helper only fired on dask paths and ignored the random_pos and brush working sets. Fixed by capping all three parameters at the public erode() entry via _validate_scalar(max_val=...) (_MAX_ITERATIONS=1e8, _MAX_RADIUS=1024, _MAX_LIFETIME=1e5), rewriting _check_erosion_memory to include the random_pos buffer and brush bytes in its budget, and wiring the guard into _erode_numpy and _erode_cupy so every backend benefits (the dask paths inherit it via their _erode_numpy/_erode_cupy calls). Mirrors diffuse #1268 pattern. Deferred follow-ups (separate PRs): Cat 3 HIGH NaN input is not guarded in _erode_cpu / _erode_gpu_kernel -- a NaN cell propagates through bilinear interpolation into dir_x/dir_y, NaN bounds checks fall through, and particles can deposit NaN into arbitrary cells via cuda.atomic.add. Cat 6 MEDIUM erode() does not call _validate_raster() on agg -- non-numeric or wrong-ndim input fails inside numba/cupy with a confusing error. No Cat 2 (no int32 flat-index math), no Cat 4 (GPU kernel has bounds guard at line 184 plus per-step bounds checks before every read/write, brush writes are explicitly bounds-checked, no shared memory), no Cat 5 (no file I/O)." } } } diff --git a/xrspatial/erosion.py b/xrspatial/erosion.py index 288eee3a..23d664f1 100644 --- a/xrspatial/erosion.py +++ b/xrspatial/erosion.py @@ -19,6 +19,7 @@ class cupy(object): from xrspatial.utils import ( ArrayTypeFunctionMapping, + _validate_scalar, has_cuda_and_cupy, is_cupy_array, is_dask_cupy, @@ -37,6 +38,22 @@ class cupy(object): max_lifetime=30, ) +# Upper bounds on user-controlled iteration / kernel-size parameters. +# These cap the worst-case CPU work and the worst-case host/device +# allocation regardless of raster shape. +# +# - _MAX_ITERATIONS sizes the (iterations, 2) random_pos buffer +# (16 bytes/particle on the host, copied to GPU). 10**8 is ~1.6 GB, +# well above any realistic erosion run (default is 50_000). +# - _MAX_RADIUS bounds the brush size: (2r+1)**2 cells, three arrays. +# At r=1024 the brush arrays total ~50 MB. +# - _MAX_LIFETIME bounds the inner per-particle loop in the JIT kernel. +# At 100_000 with iterations=10**8 the total JIT step count is 10**13, +# which is still excessive; the iteration cap is the binding limit. +_MAX_ITERATIONS = 10**8 +_MAX_RADIUS = 1024 +_MAX_LIFETIME = 100_000 + def _build_brush(radius): """Precompute brush offsets and weights for the erosion kernel.""" @@ -293,8 +310,59 @@ def _erode_gpu_kernel( pos_y = new_y +def _available_memory_bytes(): + """Best-effort estimate of available host memory in bytes.""" + try: + with open('/proc/meminfo', 'r') as f: + for line in f: + if line.startswith('MemAvailable:'): + return int(line.split()[1]) * 1024 # kB -> bytes + except (OSError, ValueError, IndexError): + pass + try: + import psutil + return psutil.virtual_memory().available + except (ImportError, AttributeError): + pass + return 2 * 1024 ** 3 + + +def _check_erosion_memory(shape, dtype, iterations, n_brush): + """Raise MemoryError if the projected working set would exceed RAM. + + Covers four allocations: + - input copy as float64 (numpy path) or float32 output + - random_pos buffer of shape (iterations, 2), float64 + - brush arrays of length n_brush (two int32, one float64) + - output cast back to float32 + + The check fires before any of these are allocated, so callers see + a clean MemoryError instead of an opaque OOM from numpy/cupy. + """ + raster_bytes = int(np.prod(shape)) * np.dtype(dtype).itemsize + # float64 working copy + float32 output ~ 3x raster_bytes for float32 + # input (8/4 = 2 for the copy and another 1 for the output cast). + raster_working = raster_bytes * 3 + rng_bytes = int(iterations) * 2 * 8 # float64 (iterations, 2) + brush_bytes = int(n_brush) * (4 + 4 + 8) # two int32 + one float64 + required = raster_working + rng_bytes + brush_bytes + avail = _available_memory_bytes() + if required > 0.8 * avail: + raise MemoryError( + f"erode() needs ~{required / 1e9:.1f} GB of working memory " + f"(raster ~{raster_working / 1e9:.2f} GB, " + f"random_pos ~{rng_bytes / 1e9:.2f} GB, " + f"brush ~{brush_bytes / 1e9:.2f} GB) but only " + f"~{avail / 1e9:.1f} GB is available. " + f"Reduce `iterations`, `params['radius']`, or downsample " + f"the input." + ) + + def _erode_cupy(data, random_pos_np, boy, box, bw, params): """Run erosion on a CuPy array using the CUDA kernel.""" + _check_erosion_memory(data.shape, data.dtype, + random_pos_np.shape[0], bw.shape[0]) hm = data.astype(cupy.float64) # Transfer brush and random positions to device @@ -320,6 +388,8 @@ def _erode_cupy(data, random_pos_np, boy, box, bw, params): def _erode_numpy(data, random_pos, boy, box, bw, params): """Run erosion on a NumPy array using the CPU kernel.""" + _check_erosion_memory(data.shape, data.dtype, + random_pos.shape[0], bw.shape[0]) hm = data.astype(np.float64).copy() hm = _erode_cpu( @@ -332,33 +402,12 @@ def _erode_numpy(data, random_pos, boy, box, bw, params): return hm.astype(np.float32) -def _check_erosion_memory(data): - """Raise MemoryError if the array is too large to materialize.""" - estimated = np.prod(data.shape) * data.dtype.itemsize - # The erosion kernel needs ~3x: input copy + brush scratch + output - working = estimated * 3 - try: - from xrspatial.zonal import _available_memory_bytes - avail = _available_memory_bytes() - except ImportError: - avail = 2 * 1024**3 - if working > 0.8 * avail: - raise MemoryError( - f"erode() needs ~{working / 1e9:.1f} GB to materialize and " - f"process the full grid but only ~{avail / 1e9:.1f} GB " - f"available. Particle erosion is a global operation that " - f"cannot be chunked. Downsample the input or use a machine " - f"with more RAM." - ) - - def _erode_dask_numpy(data, random_pos, boy, box, bw, params): """Run erosion on a dask+numpy array. Erosion is a global operation (particles traverse the full grid), so we materialize to numpy, run the CPU kernel, then re-wrap. """ - _check_erosion_memory(data) np_data = data.compute() result = _erode_numpy(np_data, random_pos, boy, box, bw, params) return da.from_array(result, chunks=data.chunksize) @@ -370,7 +419,6 @@ def _erode_dask_cupy(data, random_pos, boy, box, bw, params): Materializes to a single CuPy array, runs the GPU kernel, then re-wraps as dask. """ - _check_erosion_memory(data) cp_data = data.compute() # CuPy ndarray result = _erode_cupy(cp_data, random_pos, boy, box, bw, params) return da.from_array(result, chunks=data.chunksize, @@ -388,23 +436,49 @@ def erode(agg, iterations=50000, seed=42, params=None): agg : xr.DataArray 2D terrain heightmap. iterations : int - Number of water droplets to simulate. + Number of water droplets to simulate. Capped at + ``_MAX_ITERATIONS`` (1e8) to keep host and device allocations + bounded. seed : int Random seed for droplet placement. params : dict, optional Override default erosion constants. Keys: inertia, capacity, deposition, erosion, evaporation, gravity, min_slope, radius, max_lifetime. + ``radius`` is capped at ``_MAX_RADIUS`` (1024) and + ``max_lifetime`` at ``_MAX_LIFETIME`` (1e5). Returns ------- xr.DataArray Eroded terrain. + + Raises + ------ + ValueError + If `iterations`, `params['radius']`, or `params['max_lifetime']` + is outside the allowed range. + MemoryError + If the projected working set exceeds available memory. """ + _validate_scalar( + iterations, func_name='erode', name='iterations', + dtype=int, min_val=1, max_val=_MAX_ITERATIONS, + ) + p = dict(_DEFAULT_PARAMS) if params is not None: p.update(params) + _validate_scalar( + p['radius'], func_name='erode', name="params['radius']", + dtype=int, min_val=1, max_val=_MAX_RADIUS, + ) + _validate_scalar( + p['max_lifetime'], func_name='erode', name="params['max_lifetime']", + dtype=int, min_val=1, max_val=_MAX_LIFETIME, + ) + # Precompute brush and random positions on the host boy, box, bw = _build_brush(int(p['radius'])) rng = np.random.RandomState(seed) diff --git a/xrspatial/tests/test_erosion.py b/xrspatial/tests/test_erosion.py index 3525edaa..9b8ed7a8 100644 --- a/xrspatial/tests/test_erosion.py +++ b/xrspatial/tests/test_erosion.py @@ -4,7 +4,14 @@ import pytest import xarray as xr -from xrspatial.erosion import erode, _build_brush +from xrspatial import erosion as erosion_mod +from xrspatial.erosion import ( + erode, + _build_brush, + _MAX_ITERATIONS, + _MAX_LIFETIME, + _MAX_RADIUS, +) from xrspatial.tests.general_checks import ( create_test_raster, general_output_checks, @@ -205,3 +212,130 @@ def test_erode_dask_cupy_runs(): result_np = result.data.compute().get() assert not np.array_equal(result_np, data) assert np.isfinite(result_np).all() + + +# ---- parameter validation (issue #1275) ---- + +def test_erode_iterations_zero_rejected(): + """iterations < 1 should raise ValueError.""" + agg = _input(_make_terrain(size=16), 'numpy') + with pytest.raises(ValueError, match="iterations"): + erode(agg, iterations=0) + + +def test_erode_iterations_too_large_rejected(): + """iterations > _MAX_ITERATIONS should raise ValueError.""" + agg = _input(_make_terrain(size=16), 'numpy') + with pytest.raises(ValueError, match="iterations"): + erode(agg, iterations=_MAX_ITERATIONS + 1) + + +def test_erode_iterations_huge_rejected_before_alloc(): + """iterations=10**12 must not allocate the (10**12, 2) random_pos buffer. + + The validator should fire before any np.random call, so the test + completes in milliseconds even though the buffer would be ~16 TB. + """ + agg = _input(_make_terrain(size=16), 'numpy') + with pytest.raises(ValueError, match="iterations"): + erode(agg, iterations=10**12) + + +def test_erode_iterations_non_int_rejected(): + """A float iterations argument should raise TypeError.""" + agg = _input(_make_terrain(size=16), 'numpy') + with pytest.raises(TypeError, match="iterations"): + erode(agg, iterations=1.5) + + +def test_erode_radius_zero_rejected(): + """radius < 1 should raise ValueError.""" + agg = _input(_make_terrain(size=16), 'numpy') + with pytest.raises(ValueError, match="radius"): + erode(agg, iterations=10, params={'radius': 0}) + + +def test_erode_radius_too_large_rejected(): + """radius > _MAX_RADIUS should raise ValueError before _build_brush runs. + + radius=100_000 would walk (200_001)**2 cells (~40 billion iterations) + before allocating the brush; the validator must fire first. + """ + agg = _input(_make_terrain(size=16), 'numpy') + with pytest.raises(ValueError, match="radius"): + erode(agg, iterations=10, params={'radius': _MAX_RADIUS + 1}) + + +def test_erode_max_lifetime_zero_rejected(): + """max_lifetime < 1 should raise ValueError.""" + agg = _input(_make_terrain(size=16), 'numpy') + with pytest.raises(ValueError, match="max_lifetime"): + erode(agg, iterations=10, params={'max_lifetime': 0}) + + +def test_erode_max_lifetime_too_large_rejected(): + """max_lifetime > _MAX_LIFETIME should raise ValueError.""" + agg = _input(_make_terrain(size=16), 'numpy') + with pytest.raises(ValueError, match="max_lifetime"): + erode(agg, iterations=10, params={'max_lifetime': _MAX_LIFETIME + 1}) + + +def test_erode_at_iteration_cap_runs(): + """A small raster with iterations exactly at the cap must still be + accepted by the validator (the memory guard handles infeasible + combinations separately).""" + agg = _input(_make_terrain(size=8), 'numpy') + # We can't actually run iterations=_MAX_ITERATIONS in a test; the + # point here is that the validator accepts the boundary value. + # Use monkeypatch-free check: call _validate_scalar directly via + # a tiny wrapper. + from xrspatial.utils import _validate_scalar + _validate_scalar( + _MAX_ITERATIONS, func_name='erode', name='iterations', + dtype=int, min_val=1, max_val=_MAX_ITERATIONS, + ) + + +def test_erode_memory_guard_fires_numpy(monkeypatch): + """The memory guard must fire on the numpy path (not just dask).""" + # Pretend we have only 1 MB of RAM available. A 128x128 float32 raster + # with iterations=10000 needs a ~196 KB raster working set plus a + # ~160 KB rng buffer, well within 1 MB. Force the guard with a tighter + # budget by using a larger raster. + monkeypatch.setattr(erosion_mod, '_available_memory_bytes', + lambda: 1 * 1024 * 1024) + agg = _input(_make_terrain(size=512), 'numpy') + with pytest.raises(MemoryError, match="erode"): + erode(agg, iterations=1000) + + +@cuda_and_cupy_available +def test_erode_memory_guard_fires_cupy(monkeypatch): + """The memory guard must fire on the cupy path (not just dask).""" + monkeypatch.setattr(erosion_mod, '_available_memory_bytes', + lambda: 1 * 1024 * 1024) + agg = _input(_make_terrain(size=512), 'cupy') + with pytest.raises(MemoryError, match="erode"): + erode(agg, iterations=1000) + + +def test_erode_memory_guard_includes_random_pos(monkeypatch): + """A huge iterations count on a tiny raster must trip the memory + guard (the random_pos buffer dominates).""" + monkeypatch.setattr(erosion_mod, '_available_memory_bytes', + lambda: 10 * 1024 * 1024) + agg = _input(_make_terrain(size=4), 'numpy') + # 10**7 particles ~ 160 MB float64 buffer, well under _MAX_ITERATIONS + # but huge relative to a 4x4 raster's working set. The error message + # should call out the random_pos buffer specifically. + with pytest.raises(MemoryError, match="random_pos"): + erode(agg, iterations=10_000_000) + + +def test_erode_default_params_still_work(): + """Sanity check: the defaults stay well below every cap.""" + data = _make_terrain(size=32) + agg = _input(data, 'numpy') + result = erode(agg, iterations=500) # default seed/params + assert result.shape == (32, 32) + assert np.isfinite(result.data).all()