Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 47 additions & 2 deletions hindsight-api-slim/hindsight_api/engine/reflect/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,24 @@ def _is_context_overflow_error(exc: Exception) -> bool:
)


def _all_mental_models_are_usable_and_fresh(tool_output: dict[str, Any]) -> bool:
"""Return whether every retrieved mental model is explicitly fresh and has answerable content.

Used to decide — without an extra LLM call — whether a forced
``search_mental_models`` result is trustworthy enough to hand control back
to the agent. A model is usable only when it is explicitly ``is_stale ==
False`` (an unknown/missing staleness flag is treated as unsafe) and has
non-empty content.
"""
models = tool_output.get("mental_models") or []
for model in models:
if model.get("is_stale") is not False:
return False
if not str(model.get("content") or "").strip():
return False
return True


async def run_reflect_agent(
llm_config: "LLMProvider",
bank_id: str,
Expand Down Expand Up @@ -464,6 +482,11 @@ def _log_completion(answer: str, iterations: int, forced: bool = False):
)

consecutive_errors = 0
# When a forced ``search_mental_models`` returns fresh, usable models on a
# low/mid-budget call, we stop forcing the lower retrieval layers from this
# iteration onward and let the agent answer (or retrieve deeper itself)
# under ``auto`` tool choice. None means the full forced path still applies.
stop_forcing_from_iteration: int | None = None
for iteration in range(max_iterations):
is_last = iteration == max_iterations - 1

Expand Down Expand Up @@ -592,8 +615,11 @@ def _log_completion(answer: str, iterations: int, forced: bool = False):
if include_recall:
forced_sequence.append("recall")

if iteration < len(forced_sequence):
iter_tool_choice: str | dict = {"type": "function", "function": {"name": forced_sequence[iteration]}}
if stop_forcing_from_iteration is not None and iteration >= stop_forcing_from_iteration:
# A fresh mental model already short-circuited the forced path.
iter_tool_choice: str | dict = "auto"
elif iteration < len(forced_sequence):
iter_tool_choice = {"type": "function", "function": {"name": forced_sequence[iteration]}}
else:
iter_tool_choice = "auto"

Expand Down Expand Up @@ -956,6 +982,25 @@ def _log_completion(answer: str, iterations: int, forced: bool = False):
for mm in output["mental_models"]:
if "id" in mm:
available_mental_model_ids.add(mm["id"])
# Deterministic short-circuit (no extra LLM call): on a
# low/mid-budget call, if every retrieved mental model is
# fresh and has usable content, stop forcing the lower
# retrieval layers. The next iteration runs under ``auto``
# tool choice, so the agent can answer directly when the
# mental model suffices, or — having just read it — issue a
# targeted ``search_observations``/``recall`` itself. Stale,
# empty, or missing mental models keep the full forced path.
if (
stop_forcing_from_iteration is None
and (budget or "low").lower() != "high"
and output.get("mental_models")
and _all_mental_models_are_usable_and_fresh(output)
):
stop_forcing_from_iteration = iteration + 1
logger.info(
f"[REFLECT {reflect_id}] Fresh mental models sufficient on iteration {iteration + 1}; "
"releasing forced lower-level retrieval to auto."
)

if (
normalized_tool_name == "search_observations"
Expand Down
Loading
Loading