From 06ec26b98c19f906dca42a8b15c641c6fc0fec05 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 28 Apr 2026 06:39:15 -0700 Subject: [PATCH] Guard resample() against unbounded output allocations (#1295) resample() did not bound the output dimensions derived from user-supplied scale_factor or target_resolution. _output_shape returned (round(in_h * scale_y), round(in_w * scale_x)) and that was handed to the eager numpy and cupy backends, which allocated np.empty / cupy.empty / map_coordinates buffers of that size with no memory check. scale_factor=1e9 on a 4x4 raster requested ~190 EB before this commit. Add _available_memory_bytes() / _available_gpu_memory_bytes() and _check_resample_memory / _check_resample_gpu_memory helpers (12 B/cell budget covering the float64 working buffer, float32 output, and the scipy/cupyx map_coordinates temporary), wired into resample() before backend dispatch. The eager numpy and cupy paths run the guard; dask paths skip it because per-chunk allocations are already bounded by chunk size. Same memory-guard pattern as kde / line_density (#1287), focal (#1284), geodesic (#1283), cost_distance (#1262), and diffuse (#1267). Tests in TestMemoryGuard cover huge scale_factor, huge inverse target_resolution, the aggregate-method ValueError still winning over MemoryError, normal inputs still working, error message contents, and the dask path bypassing the guard. --- .claude/sweep-security-state.csv | 3 +- xrspatial/resample.py | 102 ++++++++++++++++++++++++++++++- xrspatial/tests/test_resample.py | 52 ++++++++++++++++ 3 files changed, 153 insertions(+), 4 deletions(-) diff --git a/.claude/sweep-security-state.csv b/.claude/sweep-security-state.csv index 88aee018..d4f1ea94 100644 --- a/.claude/sweep-security-state.csv +++ b/.claude/sweep-security-state.csv @@ -22,6 +22,7 @@ geotiff,2026-04-17,1215,HIGH,1;4,1219;1220,Follow-up #1220: GPU predictor=2 kern glcm,2026-04-24,1257,HIGH,1,,"HIGH (fixed #1257): glcm_texture() validated window_size only as >= 3 and distance only as >= 1, with no upper bound on either. _glcm_numba_kernel iterates range(r-half, r+half+1) for every pixel, so window_size=1_000_001 on a 10x10 raster ran ~10^14 loop iterations with all neighbors failing the interior bounds check (CPU DoS). On the dask backends depth = window_size // 2 + distance drove map_overlap padding, so a huge window also caused oversize per-chunk allocations (memory DoS). Fixed by adding max_val caps in the public entrypoint: window_size <= max(3, min(rows, cols)) and distance <= max(1, window_size // 2). One cap covers every backend because cupy and dask+cupy call through to the CPU kernel after cupy.asnumpy. No other HIGH findings: levels is already capped at 256 so the per-pixel np.zeros((levels, levels)) matrix in the kernel is bounded to 512 KB. No CUDA kernels. No file I/O. Quantization clips to [0, levels-1] before the kernel and NaN maps to -1 which the kernel filters with i_val >= 0. Entropy log(p) and correlation p / (std_i * std_j) are both guarded. All four backends use _validate_raster and cast to float64 before quantizing. MEDIUM (unfixed, Cat 1): the per-pixel np.zeros((levels, levels)) allocation inside the hot loop is a perf issue (levels=256 -> 512 KB alloc+free per pixel) but not a security issue because levels is bounded. Could be hoisted out of the loop or replaced with an in-place clear, but that is an efficiency concern, not security." hillshade,2026-04-27,,,,,"Clean. Cat 1: only allocation is the output np.empty(data.shape) at line 32 (cupy at line 165) and a _pad_array with hardcoded depth=1 (line 62) -- bounded by caller, no user-controlled amplifier. Azimuth/altitude are scalars and don't drive size. Cat 2: numba kernel uses range(1, rows-1) with simple (y, x) indexing; numba range loops promote to int64. Cat 3: math.sqrt(1.0 + xx_plus_yy) is always >= 1.0 (no neg sqrt, no div-by-zero); NaN elevation propagates correctly through dz_dx/dz_dy -> shaded -> output (the shaded < 0.0 / shaded > 1.0 clamps don't fire on NaN). Azimuth validated to [0, 360], altitude to [0, 90]. Cat 4: _gpu_calc_numba (line 107) guards both grid bounds and 3x3 stencil reads via i > 0 and i < shape[0]-1 and j > 0 and j < shape[1]-1; no shared memory. Cat 5: no file I/O. Cat 6: hillshade() calls _validate_raster (line 252) and _validate_scalar for both azimuth (253) and angle_altitude (254); all four backend paths cast to float32; tests parametrize int32/int64/float32/float64." hydro,2026-04-17,,MEDIUM,1;3;6,, +kde,2026-04-27,1287,HIGH,1,,"HIGH (fixed #1287): kde() and line_density() accepted user-controlled width/height with no upper bound. The eager numpy and cupy backends allocated np.zeros((height, width), dtype=float64) (or cupy.zeros) up front (kde.py: _run_kde_numpy line 308, _run_kde_cupy line 314, line_density inline at line 706). width=1_000_000, height=1_000_000 requested ~8 TB of float64 (or VRAM on the GPU path) before any check ran. Fixed by adding local _available_memory_bytes() helper (mirrors convolution/morphology/bump pattern) and _check_grid_memory(rows, cols) that raises MemoryError when rows*cols*8 exceeds 50% of available RAM. Wired into kde() (skipped for dask paths since _run_kde_dask_numpy/_run_kde_dask_cupy build per-tile via da.from_delayed and are bounded by chunk size) and line_density() (single numpy backend, always guarded). Error message names width/height so the caller knows which knob to turn. No other HIGH findings: Cat 2 (no int32 flat-index math, numba range loops are int64), Cat 3 (bandwidth <= 0 rejected, Silverman fallback returns 1.0 when sigma==0, NaN coords clamp to empty range via min/max), Cat 4 (_kde_cuda has 'if r >= rows or c >= cols: return' bounds guard at line 254, no shared memory, each thread writes own pixel), Cat 5 (no file I/O), Cat 6 (template only used for shape/coords, output dtype forced to float64). MEDIUM (unfixed, Cat 6): _validate_template only checks DataArray + ndim; does not call _validate_raster, but template dtype does not affect compute correctness here." mahalanobis,2026-04-27,1288,HIGH,1,,"HIGH (fixed #1288): mahalanobis() had no memory guard. Both _compute_stats_numpy/_compute_stats_cupy and _mahalanobis_pixel_numpy/_mahalanobis_pixel_cupy materialise float64 buffers of shape (n_bands, H*W) -- the np.stack at line 45/80, the reshape+transpose at line 184 (which forces a contiguous BLAS copy), the centered diff, and the diff @ inv_cov result are all live at peak. A 100kx100k 5-band raster projected to ~400 GB of host memory just for the stack. Fixed by adding _available_memory_bytes()/_available_gpu_memory_bytes() (mirroring cost_distance.py:261-292) plus _check_memory/_check_gpu_memory at 32 bytes/cell/band budget, and wiring them into the public mahalanobis() entry point before any np.stack runs. Eager paths (numpy, cupy) are guarded; dask paths skip the check because chunks are bounded by user-supplied chunksize. MEDIUM (unfixed, Cat 6): mahalanobis() does not call _validate_raster on each band -- validate_arrays only enforces matching shape and array-type, so boolean / non-numeric DataArrays silently coerce. Deferred to a separate PR per the security-sweep one-fix-per-PR policy. No other HIGH findings: Cat 2 (no int32 indexing, numpy default int64), Cat 3 (singular covariance raises a clean ValueError, dist_sq is clamped to 0 before sqrt to absorb numerical noise, NaN mask propagates correctly), Cat 4 (no CUDA kernels), Cat 5 (no file I/O beyond /proc/meminfo)." morphology,2026-04-24,1256,HIGH,1,,"HIGH (fixed #1256): morph_erode/morph_dilate/morph_opening/morph_closing/morph_gradient/morph_white_tophat/morph_black_tophat accepted a user-supplied kernel with only shape/dtype/odd-size validation. Kernel dimensions drove np.pad/cp.pad on every backend and map_overlap depth on dask paths; a 99999x99999 kernel on a 1000x1000 raster would try to allocate ~80 GB of padded float64 memory with no warning. Fixed by adding local _available_memory_bytes() helper and _check_kernel_memory(rows, cols, ky, kx) that raises MemoryError before allocation when padded size exceeds 50% of available RAM; wired into _dispatch() so every public API entry point is guarded across all four backends. Mirrors bilateral #1236, convolution #1241, bump #1231. No other HIGH findings: Cat 2 (loop indices are Python ints, numba promotes to int64), Cat 3 (NaN propagation explicit via v!=v in both numpy and CUDA paths, tests verify), Cat 4 (GPU kernels _erode_gpu/_dilate_gpu have if i 0 / scale_x > 0 enforced, AGGREGATE_METHODS rejects scale > 1.0, identity fast path bypasses dispatch entirely, all numba kernels guard count > 0 before division, no CUDA kernels (cupy paths use cupy ufuncs + cupyx.scipy.ndimage), no file I/O, all backends cast to float64 before computation and float32 on output." viewshed,2026-04-22,1229,HIGH,1,,"HIGH (fixed #1229): _viewshed_cpu allocated ~500 bytes/pixel of working memory (event_list 3*H*W*7*8 bytes + status_values/status_struct/idle + visibility_grid + lexsort temporary) with no guard. A 20000x20000 raster tried to allocate ~200 GB. Fixed by adding peak-memory guard mirroring the _viewshed_dask pattern (_available_memory_bytes() check, raises MemoryError with max_distance= hint). No other HIGH findings: dask path already guarded, _validate_raster is called, distance-sweep uses dtype=float64, _calc_dist_n_grad guards zero distance." zonal,2026-04-22,1227,HIGH,1;2;6,,"HIGH (fixed #1227): _stats_cupy used `if nodata_values:` (truthy) so nodata_values=0 silently skipped the filter on the cupy backend, producing wrong stats vs every other backend. MEDIUM (unfixed): _strides uses np.int32 for stride indices — can wrap for arrays > ~2B elements in the numpy path. MEDIUM (unfixed): hypsometric_integral() skips _validate_raster on zones/values; _regions_numpy has no memory guard (numpy-only path, bounded by caller-allocated input). MEDIUM (unfixed): _stats_numpy return_type='xarray.DataArray' allocates np.full((n_stats, values.size)) with no guard." -kde,2026-04-27,1287,HIGH,1,,"HIGH (fixed #1287): kde() and line_density() accepted user-controlled width/height with no upper bound. The eager numpy and cupy backends allocated np.zeros((height, width), dtype=float64) (or cupy.zeros) up front (kde.py: _run_kde_numpy line 308, _run_kde_cupy line 314, line_density inline at line 706). width=1_000_000, height=1_000_000 requested ~8 TB of float64 (or VRAM on the GPU path) before any check ran. Fixed by adding local _available_memory_bytes() helper (mirrors convolution/morphology/bump pattern) and _check_grid_memory(rows, cols) that raises MemoryError when rows*cols*8 exceeds 50% of available RAM. Wired into kde() (skipped for dask paths since _run_kde_dask_numpy/_run_kde_dask_cupy build per-tile via da.from_delayed and are bounded by chunk size) and line_density() (single numpy backend, always guarded). Error message names width/height so the caller knows which knob to turn. No other HIGH findings: Cat 2 (no int32 flat-index math, numba range loops are int64), Cat 3 (bandwidth <= 0 rejected, Silverman fallback returns 1.0 when sigma==0, NaN coords clamp to empty range via min/max), Cat 4 (_kde_cuda has 'if r >= rows or c >= cols: return' bounds guard at line 254, no shared memory, each thread writes own pixel), Cat 5 (no file I/O), Cat 6 (template only used for shape/coords, output dtype forced to float64). MEDIUM (unfixed, Cat 6): _validate_template only checks DataArray + ndim; does not call _validate_raster, but template dtype does not affect compute correctness here." diff --git a/xrspatial/resample.py b/xrspatial/resample.py index 21b69ba6..5eb93978 100644 --- a/xrspatial/resample.py +++ b/xrspatial/resample.py @@ -43,6 +43,90 @@ # epsilon. _INTERP_DEPTH = {'nearest': 1, 'bilinear': 1, 'cubic': 10} +# Approximate working-set size per output cell for the eager backends: +# one float64 working buffer (8 B) plus a float32 output cell (4 B). +# scipy.ndimage.map_coordinates also allocates a temporary of the same +# size during higher-order spline evaluation; the 0.5 * available bound +# below leaves room for that. +_BYTES_PER_OUTPUT_CELL = 12 + + +# -- Memory guard ------------------------------------------------------------ + +def _available_memory_bytes(): + """Best-effort estimate of available host memory in bytes.""" + # Try /proc/meminfo (Linux) + try: + with open('/proc/meminfo', 'r') as f: + for line in f: + if line.startswith('MemAvailable:'): + return int(line.split()[1]) * 1024 + except (OSError, ValueError, IndexError): + pass + # Try psutil + try: + import psutil + return psutil.virtual_memory().available + except (ImportError, AttributeError): + pass + # Fallback: 2 GB + return 2 * 1024 ** 3 + + +def _available_gpu_memory_bytes(): + """Best-effort estimate of free GPU memory in bytes. + + Returns 0 when CuPy / CUDA is unavailable or the query fails -- callers + treat that as a sentinel meaning "no GPU info, skip the guard". + """ + try: + import cupy as _cp + free, _total = _cp.cuda.runtime.memGetInfo() + return int(free) + except Exception: + return 0 + + +def _check_resample_memory(out_h, out_w): + """Raise MemoryError if the eager output buffer would exceed RAM. + + The numpy and cupy-eager backends allocate a single (out_h, out_w) + float64 working buffer plus a float32 output before any actual work. + A user passing a huge ``scale_factor`` (or a tiny ``target_resolution``) + would otherwise OOM the process before this function returns. + """ + required = int(out_h) * int(out_w) * _BYTES_PER_OUTPUT_CELL + available = _available_memory_bytes() + if required > 0.5 * available: + raise MemoryError( + f"resample output of {out_h}x{out_w} would need " + f"~{required / 1e9:.1f} GB of working memory but only " + f"~{available / 1e9:.1f} GB is available. " + f"Use a smaller scale_factor / larger target_resolution, " + f"or pass a dask-backed DataArray for out-of-core processing." + ) + + +def _check_resample_gpu_memory(out_h, out_w): + """Raise MemoryError if the cupy-eager output buffer would exceed VRAM. + + Skips the check (returns silently) when free GPU memory cannot be + queried -- the kernel will fail later at the cupy.empty boundary + anyway. + """ + available = _available_gpu_memory_bytes() + if available <= 0: + return + required = int(out_h) * int(out_w) * _BYTES_PER_OUTPUT_CELL + if required > 0.5 * available: + raise MemoryError( + f"resample output of {out_h}x{out_w} would need " + f"~{required / 1e9:.1f} GB of GPU working memory but only " + f"~{available / 1e9:.1f} GB is free on the active device. " + f"Use a smaller scale_factor / larger target_resolution, " + f"or pass a dask+cupy DataArray for out-of-core processing." + ) + # -- Output-geometry helpers ------------------------------------------------- @@ -742,6 +826,21 @@ def resample( out.name = name return out + # -- memory guard for eager backends ------------------------------------ + # Dask paths build per-chunk allocations lazily (chunk size already + # bounds peak memory). The eager numpy and cupy paths allocate the + # full (out_h, out_w) buffer up front and need an explicit guard. + in_h, in_w = agg.shape[-2:] + out_h, out_w = _output_shape(in_h, in_w, scale_y, scale_x) + + is_dask = da is not None and isinstance(agg.data, da.Array) + is_cupy = cupy is not None and isinstance(agg.data, cupy.ndarray) + if not is_dask: + if is_cupy: + _check_resample_gpu_memory(out_h, out_w) + else: + _check_resample_memory(out_h, out_w) + # -- dispatch to backend ------------------------------------------------- mapper = ArrayTypeFunctionMapping( numpy_func=_run_numpy, @@ -752,9 +851,6 @@ def resample( result_data = mapper(agg)(agg.data, scale_y, scale_x, method) # -- build output coordinates ------------------------------------------- - in_h, in_w = agg.shape[-2:] - out_h, out_w = _output_shape(in_h, in_w, scale_y, scale_x) - ydim, xdim = agg.dims[-2], agg.dims[-1] y_vals = np.asarray(agg[ydim].values, dtype=np.float64) x_vals = np.asarray(agg[xdim].values, dtype=np.float64) diff --git a/xrspatial/tests/test_resample.py b/xrspatial/tests/test_resample.py index a02600a7..d77e3061 100644 --- a/xrspatial/tests/test_resample.py +++ b/xrspatial/tests/test_resample.py @@ -378,3 +378,55 @@ def test_aggregate_parity(self, numpy_and_cupy_rasters, method): cp_out = resample(cp_agg, scale_factor=0.5, method=method) np.testing.assert_allclose(cp_out.data.get(), np_out.values, atol=1e-5, equal_nan=True) + + +# --------------------------------------------------------------------------- +# Memory guard (#1295) +# --------------------------------------------------------------------------- + +class TestMemoryGuard: + """Reject scale factors that would OOM the eager backends.""" + + def test_huge_scale_factor_raises(self, grid_4x4): + # 4 * 1e9 ~= 4e9 cells per axis -> 1.6e19 cells -> ~190 EB + with pytest.raises(MemoryError, match="resample output of"): + resample(grid_4x4, scale_factor=1e9, method='nearest') + + def test_huge_target_resolution_inverse_raises(self, grid_4x4): + # cellsize=1.0, target_resolution=1e-9 -> ~4e9 cells per axis + with pytest.raises(MemoryError, match="resample output of"): + resample(grid_4x4, target_resolution=1e-9, method='nearest') + + def test_huge_scale_factor_aggregate_path_unaffected(self, grid_4x4): + # Aggregate methods reject scale > 1.0 with ValueError before + # the memory guard runs; confirm that error path still wins. + with pytest.raises(ValueError, match="only supports downsampling"): + resample(grid_4x4, scale_factor=1e9, method='average') + + def test_normal_inputs_unaffected(self, grid_4x4): + # Sanity: a normal upsample call still works. + out = resample(grid_4x4, scale_factor=2.0, method='nearest') + assert out.shape == (8, 8) + + def test_error_message_names_parameters(self, grid_4x4): + # The hint should point the user at the parameters they control. + with pytest.raises(MemoryError) as excinfo: + resample(grid_4x4, scale_factor=1e9, method='bilinear') + msg = str(excinfo.value) + assert "scale_factor" in msg + assert "target_resolution" in msg + + def test_dask_path_skips_guard(self, grid_4x4): + # Dask backends build per-chunk allocations lazily -- the guard + # should not fire even for shapes that would OOM the eager path. + # We only check that the output graph builds; we never compute it. + if not dask_array_available(): + pytest.skip("dask not installed") + import dask.array as da + dask_agg = grid_4x4.copy() + dask_agg.data = da.from_array(grid_4x4.data, chunks=2) + # scale_factor=100 -> 400x400 output, well within RAM budget + # but still exercises the dask dispatch. We just want the + # guard not to short-circuit a reasonable dask call. + out = resample(dask_agg, scale_factor=100.0, method='nearest') + assert out.shape == (400, 400)