From ed91d8fefbf56c8c7cb2bdf79dafeb6b20ff6cc5 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sun, 10 May 2026 06:48:45 -0700 Subject: [PATCH 1/2] Run float-predictor encode through an ngjit row loop (#1554) --- xrspatial/geotiff/_compression.py | 14 +++-- .../tests/test_predictor_fp_write_1313.py | 52 +++++++++++++++++++ 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/xrspatial/geotiff/_compression.py b/xrspatial/geotiff/_compression.py index 007c78bd..40a39e7e 100644 --- a/xrspatial/geotiff/_compression.py +++ b/xrspatial/geotiff/_compression.py @@ -696,6 +696,15 @@ def _fp_predictor_encode_row(row_data, width, bps): row_data[i] = np.uint8((np.int32(row_data[i]) - np.int32(row_data[i - 1])) & 0xFF) +@ngjit +def _fp_predictor_encode_rows(data, width, height, bps): + """Dispatch per-row encode from Numba, avoiding Python loop overhead.""" + row_len = width * bps + for row in range(height): + start = row * row_len + _fp_predictor_encode_row(data[start:start + row_len], width, bps) + + def fp_predictor_encode(data: np.ndarray, width: int, height: int, bytes_per_sample: int) -> np.ndarray: """Apply floating-point predictor (predictor=3). @@ -715,10 +724,7 @@ def fp_predictor_encode(data: np.ndarray, width: int, height: int, Encoded array. """ buf = np.ascontiguousarray(data) - row_len = width * bytes_per_sample - for row in range(height): - start = row * row_len - _fp_predictor_encode_row(buf[start:start + row_len], width, bytes_per_sample) + _fp_predictor_encode_rows(buf, width, height, bytes_per_sample) return buf diff --git a/xrspatial/geotiff/tests/test_predictor_fp_write_1313.py b/xrspatial/geotiff/tests/test_predictor_fp_write_1313.py index d168a209..10e6dabb 100644 --- a/xrspatial/geotiff/tests/test_predictor_fp_write_1313.py +++ b/xrspatial/geotiff/tests/test_predictor_fp_write_1313.py @@ -201,3 +201,55 @@ def test_predictor3_multiband_round_trip(tmp_path): else: out_arr = out.values np.testing.assert_array_equal(out_arr, arr) + + +def test_predictor3_large_round_trip_value_exact(tmp_path): + """1024x1024 float32 deflate+predictor=3 round-trips with no value drift. + + The encode path was refactored to dispatch the per-row kernel from + inside an ``@ngjit`` wrapper instead of from a Python ``for`` loop. + This test guards against any silent corruption from the refactor by + asserting the output array is bit-for-bit equal to the input. + """ + h, w = 1024, 1024 + arr = _smooth_float((h, w), np.float32) + da = _da(arr) + path = tmp_path / 'pred3_large_round_trip_1313.tif' + to_geotiff(da, str(path), compression='deflate', predictor=3) + + assert _read_predictor_tag(str(path)) == 3 + out = open_geotiff(str(path)) + np.testing.assert_array_equal(out.values, arr) + + +@pytest.mark.skip(reason='timing test; flaky in CI, run manually for regression checks') +def test_predictor3_encode_within_2x_of_predictor2(tmp_path): + """Loose regression check: predictor=3 encode is within 2x of predictor=2. + + Before the ngjit row-loop refactor, predictor=3 was ~2.5x slower than + predictor=2 because the row loop was in Python. Skipped by default + so CI does not flake on shared-runner timing variance. + """ + import time + + arr = _smooth_float((1024, 1024), np.float32) + da = _da(arr) + p2 = tmp_path / 'pred2_timing.tif' + p3 = tmp_path / 'pred3_timing.tif' + + # Warm up numba + to_geotiff(da, str(p2), compression='deflate', predictor=2) + to_geotiff(da, str(p3), compression='deflate', predictor=3) + + t0 = time.perf_counter() + to_geotiff(da, str(p2), compression='deflate', predictor=2) + t_p2 = time.perf_counter() - t0 + + t0 = time.perf_counter() + to_geotiff(da, str(p3), compression='deflate', predictor=3) + t_p3 = time.perf_counter() - t0 + + assert t_p3 < 2.0 * t_p2, ( + f'predictor=3 ({t_p3*1000:.1f} ms) is more than 2x slower than ' + f'predictor=2 ({t_p2*1000:.1f} ms); ngjit row loop may have regressed' + ) From e9dd897e3bb903d8a92902da432508512e09624d Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sun, 10 May 2026 21:05:58 -0700 Subject: [PATCH 2/2] Address PR #1556 review - The "bit-for-bit equal" claim now actually checks bits: assert dtype matches and compare ``arr.tobytes() == out_arr.tobytes()`` so the test catches signed-zero drift, NaN payload changes, and any other bit-level divergence that ``np.testing.assert_array_equal`` would silently mask. - Convert the timing test from an unconditional ``@pytest.mark.skip`` to the existing ``XRSPATIAL_RUN_PERF_TESTS=1`` env-var gate used by ``test_streaming_write_parallel.test_streaming_write_perf_sanity``. Developers can now run it with one env var instead of editing the decorator. --- .../tests/test_predictor_fp_write_1313.py | 30 +++++++++++++++---- 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/xrspatial/geotiff/tests/test_predictor_fp_write_1313.py b/xrspatial/geotiff/tests/test_predictor_fp_write_1313.py index 10e6dabb..bcdf2872 100644 --- a/xrspatial/geotiff/tests/test_predictor_fp_write_1313.py +++ b/xrspatial/geotiff/tests/test_predictor_fp_write_1313.py @@ -9,6 +9,7 @@ """ from __future__ import annotations +import os import struct import numpy as np @@ -208,8 +209,11 @@ def test_predictor3_large_round_trip_value_exact(tmp_path): The encode path was refactored to dispatch the per-row kernel from inside an ``@ngjit`` wrapper instead of from a Python ``for`` loop. - This test guards against any silent corruption from the refactor by - asserting the output array is bit-for-bit equal to the input. + Guards against any silent corruption from the refactor by asserting + the output array is byte-for-byte identical to the input: dtype must + match, and a ``uint8`` view of the bytes must compare equal so the + check catches signed-zero drift, NaN payload changes, and any other + bit-level divergence that ``assert_array_equal`` would mask. """ h, w = 1024, 1024 arr = _smooth_float((h, w), np.float32) @@ -219,17 +223,31 @@ def test_predictor3_large_round_trip_value_exact(tmp_path): assert _read_predictor_tag(str(path)) == 3 out = open_geotiff(str(path)) - np.testing.assert_array_equal(out.values, arr) + out_arr = np.ascontiguousarray(out.values) + assert out_arr.dtype == arr.dtype, ( + f"dtype drift: in={arr.dtype}, out={out_arr.dtype}" + ) + assert out_arr.shape == arr.shape + assert out_arr.tobytes() == arr.tobytes(), ( + "predictor=3 round-trip diverged at the bit level " + "(signed zero, NaN payload, or actual corruption)" + ) -@pytest.mark.skip(reason='timing test; flaky in CI, run manually for regression checks') def test_predictor3_encode_within_2x_of_predictor2(tmp_path): """Loose regression check: predictor=3 encode is within 2x of predictor=2. Before the ngjit row-loop refactor, predictor=3 was ~2.5x slower than - predictor=2 because the row loop was in Python. Skipped by default - so CI does not flake on shared-runner timing variance. + predictor=2 because the row loop was in Python. Opt-in via + ``XRSPATIAL_RUN_PERF_TESTS=1`` -- shared CI runners, CPU throttling, + debug builds, and noisy filesystems all make absolute wall-clock + timings flaky, so the test stays off by default. Matches the + convention from ``test_streaming_write_parallel.py``. """ + if os.environ.get('XRSPATIAL_RUN_PERF_TESTS') != '1': + pytest.skip( + "set XRSPATIAL_RUN_PERF_TESTS=1 to run wall-clock perf tests") + import time arr = _smooth_float((1024, 1024), np.float32)