From fed6bf009eae74f7d7eee4eb3d8552ae17847f00 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Thu, 14 May 2026 05:21:07 +0200 Subject: [PATCH] feat(features): lower ReplenishmentConfig count_window_days floor to 3 (#113) PRP-3.1C's pseudocode originally referenced W=3 as the smallest meaningful trailing window for replenishment-count features, but PRP-3.1A's schema set `ge=7` with no recorded rationale. The mismatch forced `test_count_window_uses_shift_then_rolling` to use W=7 instead of the original W=3 probe. This change aligns the schema with PRP-3.1C's intent: - `count_window_days = Field(default=14, ge=3, le=60)` (was `ge=7`) - Update boundary test to probe the new floor (count_window_days=2) - Refresh leakage-test docstring and docs/PHASE/3 range comment All existing call sites already use W=7 or W=14, so no behavioral regression. `shift(1).rolling(W, min_periods=1).sum()` works for any W>=1; W=3 is the smallest window that can detect a cadence trend. Closes #113. --- app/features/featuresets/schemas.py | 4 ++-- app/features/featuresets/tests/test_leakage.py | 8 ++++---- app/features/featuresets/tests/test_schemas.py | 4 ++-- docs/PHASE/3-FEATURE_ENGINEERING.md | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/app/features/featuresets/schemas.py b/app/features/featuresets/schemas.py index fa0c08f4..d490749b 100644 --- a/app/features/featuresets/schemas.py +++ b/app/features/featuresets/schemas.py @@ -224,7 +224,7 @@ class ReplenishmentConfig(FeatureConfigBase): include_days_since_last: Emit days_since_last_replenishment_lag{N}. include_count_window: Emit replenishment_count_w{W}_lag{N}. lag_days: Lag offset (>= 1). - count_window_days: Rolling-window size for count features (7-60). + count_window_days: Rolling-window size for count features (3-60). """ include_days_since_last: bool = True @@ -232,7 +232,7 @@ class ReplenishmentConfig(FeatureConfigBase): lag_days: int = Field(default=1, ge=1, le=30, description="Lag offset in days") count_window_days: int = Field( default=14, - ge=7, + ge=3, le=60, description="Rolling-window size for replenishment count features", ) diff --git a/app/features/featuresets/tests/test_leakage.py b/app/features/featuresets/tests/test_leakage.py index a677dc22..2b8f8629 100644 --- a/app/features/featuresets/tests/test_leakage.py +++ b/app/features/featuresets/tests/test_leakage.py @@ -659,10 +659,10 @@ def test_days_since_uses_only_past_events(self) -> None: def test_count_window_uses_shift_then_rolling(self) -> None: """CRITICAL: ``shift(1).rolling(W).sum()`` MUST be the order. - Events on dates 2024-01-01, 2024-01-03, 2024-01-05 with W=7 (the - smallest window the config schema allows; ``ReplenishmentConfig`` - bounds ``count_window_days`` at ``ge=7``). Sales rows on every - date 2024-01-01..2024-01-07. + Events on dates 2024-01-01, 2024-01-03, 2024-01-05 with W=7 + (comfortably above the ``ReplenishmentConfig`` floor of + ``count_window_days >= 3``). Sales rows on every date + 2024-01-01..2024-01-07. Per-row event_count = [1, 0, 1, 0, 1, 0, 0]. Correct ``shift(1).rolling(7, min_periods=1).sum()`` -> NaN at diff --git a/app/features/featuresets/tests/test_schemas.py b/app/features/featuresets/tests/test_schemas.py index db01ccbf..0f0d258c 100644 --- a/app/features/featuresets/tests/test_schemas.py +++ b/app/features/featuresets/tests/test_schemas.py @@ -195,9 +195,9 @@ def test_rejects_lag_days_out_of_bounds(self): ReplenishmentConfig(lag_days=31) def test_rejects_count_window_below_min(self): - """count_window_days below 7 should be rejected.""" + """count_window_days below 3 should be rejected.""" with pytest.raises(ValidationError): - ReplenishmentConfig(count_window_days=6) + ReplenishmentConfig(count_window_days=2) def test_rejects_count_window_above_max(self): """count_window_days above 60 should be rejected.""" diff --git a/docs/PHASE/3-FEATURE_ENGINEERING.md b/docs/PHASE/3-FEATURE_ENGINEERING.md index 8238ae7f..7e34bdf0 100644 --- a/docs/PHASE/3-FEATURE_ENGINEERING.md +++ b/docs/PHASE/3-FEATURE_ENGINEERING.md @@ -267,7 +267,7 @@ ReplenishmentConfig( include_days_since_last=True, # default True include_count_window=True, # default True lag_days=1, # 1..30 - count_window_days=14, # 7..60 + count_window_days=14, # 3..60 ) ```