fix: resolve TypeError when LLM returns invalid JSON across all retries (#488) (#490)

nicoloboschi · web-flow · commit 66423b85f557 · 2026-03-04T14:17:55.000+01:00
- Rename shadowed `max_retries` variable to `llm_max_retries` and move
  config resolution outside the loop; the old code captured `range(2)`
  then overwrote `max_retries` inside the loop, so comparisons used a
  different value than the loop bound — causing `continue` on the final
  iteration, exhausting the loop, and reaching `raise last_error` where
  `last_error` was still None → TypeError
- Add fallback `raise RuntimeError(...)` after the retry loop so that if
  `last_error` is None a descriptive error is raised instead of None
- Add unit tests covering non-dict JSON responses with various retry counts
diff --git a/hindsight-api/hindsight_api/engine/retain/fact_extraction.py b/hindsight-api/hindsight_api/engine/retain/fact_extraction.py
@@ -944,16 +944,15 @@ async def _extract_facts_from_chunk(
     user_message = _build_user_message(chunk, chunk_index, total_chunks, event_date, context, metadata)
 
     # Retry logic for JSON validation errors
-    max_retries = 2
-    last_error = None
+    # Use retain-specific overrides if set, otherwise fall back to global LLM config
+    llm_max_retries = (
+        config.retain_llm_max_retries if config.retain_llm_max_retries is not None else config.llm_max_retries
+    )
+    last_error: Exception | None = None
 
     usage = TokenUsage()  # Track cumulative usage across retries
-    for attempt in range(max_retries):
+    for attempt in range(llm_max_retries):
         try:
-            # Use retain-specific overrides if set, otherwise fall back to global LLM config
-            max_retries = (
-                config.retain_llm_max_retries if config.retain_llm_max_retries is not None else config.llm_max_retries
-            )
             initial_backoff = (
                 config.retain_llm_initial_backoff
                 if config.retain_llm_initial_backoff is not None
@@ -969,7 +968,7 @@ async def _extract_facts_from_chunk(
                 scope="retain_extract_facts",
                 temperature=0.1,
                 max_completion_tokens=config.retain_max_completion_tokens,
-                max_retries=max_retries,
+                max_retries=llm_max_retries,
                 initial_backoff=initial_backoff,
                 max_backoff=max_backoff,
                 skip_validation=True,  # Get raw JSON, we'll validate leniently
@@ -983,14 +982,14 @@ async def _extract_facts_from_chunk(
 
             # Handle malformed LLM responses
             if not isinstance(extraction_response_json, dict):
-                if attempt < max_retries - 1:
+                if attempt < llm_max_retries - 1:
                     logger.warning(
-                        f"LLM returned non-dict JSON on attempt {attempt + 1}/{max_retries}: {type(extraction_response_json).__name__}. Retrying..."
+                        f"LLM returned non-dict JSON on attempt {attempt + 1}/{llm_max_retries}: {type(extraction_response_json).__name__}. Retrying..."
                     )
                     continue
                 else:
                     logger.warning(
-                        f"LLM returned non-dict JSON after {max_retries} attempts: {type(extraction_response_json).__name__}. "
+                        f"LLM returned non-dict JSON after {llm_max_retries} attempts: {type(extraction_response_json).__name__}. "
                         f"Raw: {str(extraction_response_json)[:500]}"
                     )
                     return [], usage
@@ -1206,9 +1205,9 @@ def get_value(field_name):
                     continue
 
             # If we got malformed facts and haven't exhausted retries, try again
-            if has_malformed_facts and len(chunk_facts) < len(raw_facts) * 0.8 and attempt < max_retries - 1:
+            if has_malformed_facts and len(chunk_facts) < len(raw_facts) * 0.8 and attempt < llm_max_retries - 1:
                 logger.warning(
-                    f"Got {len(raw_facts) - len(chunk_facts)} malformed facts out of {len(raw_facts)} on attempt {attempt + 1}/{max_retries}. Retrying..."
+                    f"Got {len(raw_facts) - len(chunk_facts)} malformed facts out of {len(raw_facts)} on attempt {attempt + 1}/{llm_max_retries}. Retrying..."
                 )
                 continue
 
@@ -1241,16 +1240,18 @@ def get_value(field_name):
 
             if "json_validate_failed" in str(e):
                 logger.warning(
-                    f"          [1.3.{chunk_index + 1}] Attempt {attempt + 1}/{max_retries} failed with JSON validation error: {e}"
+                    f"          [1.3.{chunk_index + 1}] Attempt {attempt + 1}/{llm_max_retries} failed with JSON validation error: {e}"
                 )
-                if attempt < max_retries - 1:
+                if attempt < llm_max_retries - 1:
                     logger.info(f"          [1.3.{chunk_index + 1}] Retrying...")
                     continue
             # If it's not a JSON validation error or we're out of retries, re-raise
             raise
 
-    # If we exhausted all retries, raise the last error
-    raise last_error
+    # If we exhausted all retries, raise the last error or a descriptive fallback
+    if last_error is not None:
+        raise last_error
+    raise RuntimeError(f"Fact extraction failed after {llm_max_retries} attempts: LLM did not return valid JSON")
 
 
 async def _extract_facts_with_auto_split(
diff --git a/hindsight-api/tests/test_fact_extraction_retry.py b/hindsight-api/tests/test_fact_extraction_retry.py
@@ -0,0 +1,141 @@
+"""
+Unit tests for fact extraction retry logic.
+
+Tests the fix for the TypeError when LLM returns invalid JSON across all retries.
+Previously, `raise last_error` would raise None (TypeError) because last_error was
+only set in the BadRequestError handler, not when the LLM returned non-dict JSON.
+"""
+
+from datetime import datetime, timezone
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+
+def _make_config(llm_max_retries: int = 3, retain_llm_max_retries: int | None = None):
+    """Build a minimal HindsightConfig for fact extraction tests."""
+    from hindsight_api.config import HindsightConfig
+
+    cfg = MagicMock(spec=HindsightConfig)
+    cfg.retain_llm_max_retries = retain_llm_max_retries
+    cfg.llm_max_retries = llm_max_retries
+    cfg.retain_llm_initial_backoff = None
+    cfg.llm_initial_backoff = 0.0
+    cfg.retain_llm_max_backoff = None
+    cfg.llm_max_backoff = 0.0
+    cfg.retain_max_completion_tokens = 8192
+    cfg.retain_extraction_mode = "concise"
+    cfg.retain_extract_causal_links = False
+    cfg.retain_mission = None
+    return cfg
+
+
+def _make_llm_config(mock_response):
+    """Build a mock LLMProvider that returns the given response."""
+    from hindsight_api.engine.llm_wrapper import LLMProvider
+
+    llm = MagicMock(spec=LLMProvider)
+    llm.provider = "mock"
+    token_usage = MagicMock()
+    token_usage.__add__ = lambda self, other: self
+    llm.call = AsyncMock(return_value=(mock_response, token_usage))
+    return llm
+
+
+@pytest.mark.asyncio
+async def test_non_dict_json_all_retries_returns_empty():
+    """
+    When LLM returns non-dict JSON on every attempt, extraction should return []
+    without raising TypeError ('exceptions must derive from BaseException').
+
+    This was the bug: the loop ran range(2) times (hardcoded), but comparisons
+    used config.llm_max_retries (default 10). On the last loop iteration (attempt=1),
+    `attempt < 10 - 1` was True, so the code called `continue`, the loop
+    exhausted, and `raise last_error` raised None → TypeError.
+    """
+    from hindsight_api.engine.retain.fact_extraction import _extract_facts_from_chunk
+
+    # llm_max_retries=3 ensures the bug triggers with the old code (3 != 2 hardcoded)
+    config = _make_config(llm_max_retries=3, retain_llm_max_retries=None)
+
+    # Mock: always returns a list (non-dict), which is invalid
+    llm_config = _make_llm_config(mock_response=[{"invalid": "response"}])
+
+    with patch(
+        "hindsight_api.engine.retain.fact_extraction._build_extraction_prompt_and_schema",
+        return_value=("system prompt", MagicMock()),
+    ):
+        facts, usage = await _extract_facts_from_chunk(
+            chunk="Alice visited Paris in 2023.",
+            chunk_index=0,
+            total_chunks=1,
+            event_date=datetime(2023, 1, 1, tzinfo=timezone.utc),
+            context="travel notes",
+            llm_config=llm_config,
+            config=config,
+            agent_name="test-agent",
+        )
+
+    assert facts == []
+
+
+@pytest.mark.asyncio
+async def test_non_dict_json_with_default_max_retries_returns_empty():
+    """
+    Same scenario with the default llm_max_retries=10 (matching real default config).
+    The old code ran range(2) but checked against 10, always continuing until
+    the loop exhausted, then raised None → TypeError.
+    """
+    from hindsight_api.engine.retain.fact_extraction import _extract_facts_from_chunk
+
+    config = _make_config(llm_max_retries=10, retain_llm_max_retries=None)
+    llm_config = _make_llm_config(mock_response="not a dict at all")
+
+    with patch(
+        "hindsight_api.engine.retain.fact_extraction._build_extraction_prompt_and_schema",
+        return_value=("system prompt", MagicMock()),
+    ):
+        facts, usage = await _extract_facts_from_chunk(
+            chunk="Some text.",
+            chunk_index=0,
+            total_chunks=1,
+            event_date=datetime(2023, 6, 1, tzinfo=timezone.utc),
+            context="",
+            llm_config=llm_config,
+            config=config,
+            agent_name="agent",
+        )
+
+    assert facts == []
+
+
+@pytest.mark.asyncio
+async def test_retain_llm_max_retries_overrides_global():
+    """
+    When retain_llm_max_retries is set, it should be used for the loop range
+    and all comparisons (no shadowing bug).
+    """
+    from hindsight_api.engine.retain.fact_extraction import _extract_facts_from_chunk
+
+    # retain_llm_max_retries=5 should override llm_max_retries=10
+    config = _make_config(llm_max_retries=10, retain_llm_max_retries=5)
+    llm_config = _make_llm_config(mock_response=42)  # non-dict: integer
+
+    with patch(
+        "hindsight_api.engine.retain.fact_extraction._build_extraction_prompt_and_schema",
+        return_value=("system prompt", MagicMock()),
+    ):
+        facts, usage = await _extract_facts_from_chunk(
+            chunk="Bob likes Python.",
+            chunk_index=0,
+            total_chunks=1,
+            event_date=datetime(2024, 1, 1, tzinfo=timezone.utc),
+            context="",
+            llm_config=llm_config,
+            config=config,
+            agent_name="agent",
+        )
+
+    assert facts == []
+    # Verify it retried exactly retain_llm_max_retries times
+    assert llm_config.call.call_count == 5