diff --git a/README.md b/README.md
index ad6de21..c8391d1 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
Reasoning-based Document Engine
-Reason, don't vector · Structure, not chunks · Agents, not embeddings · Exact, not synthesized
+Reason, don't vector · Structure, not chunks · Agents, not embeddings
[](https://pypi.org/project/vectorless/)
[](https://pepy.tech/projects/vectorless)
diff --git a/examples/session_walkthrough/README.md b/examples/session_walkthrough/README.md
new file mode 100644
index 0000000..17174a0
--- /dev/null
+++ b/examples/session_walkthrough/README.md
@@ -0,0 +1,32 @@
+# Session API Walkthrough
+
+Demonstrates the full high-level Vectorless Python API using the `Session` and `SyncSession` classes.
+
+## What it covers
+
+| # | Topic | API |
+|---|-------|-----|
+| 1 | Session creation | `Session()`, `from_env()`, `from_config_file()` |
+| 2 | Indexing sources | `index(content=)`, `index(path=)`, `index(bytes_data=)`, `index(directory=)` |
+| 3 | Batch indexing | `index_batch(paths, jobs=N)` |
+| 4 | Querying | `ask(question, doc_ids=)`, `ask(question, workspace_scope=True)` |
+| 5 | Streaming query | `query_stream()` async iterator |
+| 6 | Document management | `list_documents()`, `document_exists()`, `remove_document()`, `clear_all()` |
+| 7 | Document graph | `get_graph()` nodes, edges, keywords |
+| 8 | Event callbacks | `EventEmitter` with `@on_index` / `@on_query` decorators |
+| 9 | Metrics | `metrics_report()` |
+| 10 | Sync API | `SyncSession` (no async/await) |
+
+## Setup
+
+```bash
+pip install vectorless
+export VECTORLESS_API_KEY="sk-..."
+export VECTORLESS_MODEL="gpt-4o"
+```
+
+## Run
+
+```bash
+python main.py
+```
diff --git a/examples/session_walkthrough/main.py b/examples/session_walkthrough/main.py
new file mode 100644
index 0000000..f94e5bb
--- /dev/null
+++ b/examples/session_walkthrough/main.py
@@ -0,0 +1,589 @@
+"""
+Session API walkthrough -- demonstrates the full high-level Vectorless API.
+
+This example uses the Session class (recommended entry point) to cover:
+ 1. Session creation (constructor / from_env / from_config_file)
+ 2. Indexing from various sources (content, path, directory, bytes)
+ 3. Batch indexing with concurrency control
+ 4. Querying with doc_ids and workspace scope
+ 5. Streaming query with real-time events
+ 6. Document management (list, exists, remove, clear)
+ 7. Cross-document relationship graph
+ 8. Event callbacks for progress monitoring
+ 9. Metrics reporting
+ 10. SyncSession (synchronous API, no async/await)
+
+Usage:
+ export VECTORLESS_API_KEY="sk-..."
+ export VECTORLESS_MODEL="gpt-4o"
+ pip install vectorless
+ python main.py
+"""
+
+import asyncio
+import os
+import tempfile
+
+from vectorless import (
+ Session,
+ SyncSession,
+ EventEmitter,
+ VectorlessError,
+)
+from vectorless.events import IndexEventType, QueryEventType
+
+
+# ──────────────────────────────────────────────────────────────────
+# Sample documents used throughout the example
+# ──────────────────────────────────────────────────────────────────
+
+ARCHITECTURE_DOC = """\
+# Vectorless Architecture
+
+## Overview
+
+Vectorless is a reasoning-native document intelligence engine.
+It uses hierarchical semantic trees instead of vector embeddings.
+
+## Key Concepts
+
+- **Semantic Tree**: Documents are parsed into a tree of sections.
+- **LLM Navigation**: Queries are resolved by traversing the tree.
+- **No Vectors**: No embeddings, no similarity search, no vector DB.
+
+## Retrieval Flow
+
+Engine.query()
+ -> query/understand() -> QueryPlan
+ -> Orchestrator dispatches Workers
+ -> Workers navigate document trees
+ -> rerank -> synthesis -> answer
+"""
+
+FINANCE_DOC = """\
+# Q4 Financial Report
+
+## Revenue
+
+Total revenue for Q4 was $12.3M, up 15% from Q3.
+SaaS subscriptions accounted for $8.1M, consulting for $4.2M.
+
+## Costs
+
+Operating costs were $9.8M, including $3.2M in engineering salaries.
+Marketing spend was reduced by 8% to $1.5M.
+
+## Outlook
+
+Projected Q1 revenue is $13.5M based on current pipeline.
+"""
+
+SECURITY_DOC = """\
+# Security Policy
+
+## Authentication
+
+All API requests require a Bearer token in the Authorization header.
+Tokens expire after 24 hours and must be refreshed.
+
+## Data Encryption
+
+Data at rest is encrypted using AES-256. Data in transit uses TLS 1.3.
+
+## Audit Logging
+
+All access to sensitive data is logged and retained for 90 days.
+"""
+
+
+# ──────────────────────────────────────────────────────────────────
+# Helper: set up a temp directory with sample files
+# ──────────────────────────────────────────────────────────────────
+
+def create_sample_directory() -> tuple[str, list[str]]:
+ """Create a temp directory with sample documents. Returns (dir, paths)."""
+ tmpdir = tempfile.mkdtemp(prefix="vectorless_walkthrough_")
+ docs = {
+ "architecture.md": ARCHITECTURE_DOC,
+ "finance.md": FINANCE_DOC,
+ "security.md": SECURITY_DOC,
+ }
+ paths = []
+ for name, content in docs.items():
+ path = os.path.join(tmpdir, name)
+ with open(path, "w") as f:
+ f.write(content)
+ paths.append(path)
+ return tmpdir, paths
+
+
+def cleanup_directory(tmpdir: str) -> None:
+ """Remove all files in the temp directory."""
+ for fname in os.listdir(tmpdir):
+ os.remove(os.path.join(tmpdir, fname))
+ os.rmdir(tmpdir)
+
+
+# ──────────────────────────────────────────────────────────────────
+# Section 1: Session Creation
+# ──────────────────────────────────────────────────────────────────
+
+async def demo_session_creation() -> Session:
+ """Demonstrate different ways to create a Session."""
+ print("=" * 60)
+ print(" 1. Session Creation")
+ print("=" * 60)
+
+ # Option A: Constructor with explicit credentials
+ api_key = os.environ.get("VECTORLESS_API_KEY", "sk-...")
+ model = os.environ.get("VECTORLESS_MODEL", "gpt-4o")
+ endpoint = os.environ.get("VECTORLESS_ENDPOINT")
+
+ session = Session(api_key=api_key, model=model, endpoint=endpoint)
+ print(f" Created: {session}")
+
+ # Option B: from environment variables
+ # session = Session.from_env()
+
+ # Option C: from a config file
+ # session = Session.from_config_file("~/.vectorless/config.toml")
+
+ # Option D: with an EventEmitter for progress callbacks
+ # events = EventEmitter()
+ # session = Session(api_key=api_key, model=model, events=events)
+
+ print()
+ return session
+
+
+# ──────────────────────────────────────────────────────────────────
+# Section 2: Indexing from Various Sources
+# ──────────────────────────────────────────────────────────────────
+
+async def demo_indexing(session: Session, tmpdir: str, paths: list[str]) -> dict[str, str]:
+ """Demonstrate indexing from content, path, directory, and bytes."""
+ print("=" * 60)
+ print(" 2. Indexing")
+ print("=" * 60)
+
+ doc_ids: dict[str, str] = {}
+
+ # --- 2a. Index from in-memory content ---
+ print(" [content] Indexing from string...")
+ result = await session.index(
+ content=ARCHITECTURE_DOC,
+ format="markdown",
+ name="architecture",
+ )
+ doc_ids["architecture"] = result.doc_id # type: ignore[assignment]
+ print(f" doc_id: {result.doc_id}")
+ print(f" items: {result.total()}")
+
+ # --- 2b. Index from a file path ---
+ print(" [path] Indexing from file path...")
+ result = await session.index(path=paths[1], name="finance")
+ doc_ids["finance"] = result.doc_id # type: ignore[assignment]
+ print(f" doc_id: {result.doc_id}")
+
+ # --- 2c. Index from raw bytes ---
+ print(" [bytes] Indexing from raw bytes...")
+ result = await session.index(
+ bytes_data=SECURITY_DOC.encode("utf-8"),
+ format="markdown",
+ name="security",
+ )
+ doc_ids["security"] = result.doc_id # type: ignore[assignment]
+ print(f" doc_id: {result.doc_id}")
+
+ # --- 2d. Index a directory ---
+ print(" [dir] Indexing a directory...")
+ # Clear first to see fresh results
+ await session.clear_all()
+
+ result = await session.index(directory=tmpdir, name="all_docs")
+ print(f" doc_id: {result.doc_id}")
+ print(f" items: {len(result.items)}")
+ for item in result.items:
+ print(f" - {item.name} ({item.doc_id[:8]}...)")
+ doc_ids[item.name] = item.doc_id
+
+ print()
+ return doc_ids
+
+
+# ──────────────────────────────────────────────────────────────────
+# Section 3: Batch Indexing with Concurrency
+# ──────────────────────────────────────────────────────────────────
+
+async def demo_batch_indexing(session: Session, paths: list[str]) -> list[str]:
+ """Demonstrate batch indexing with concurrent jobs."""
+ print("=" * 60)
+ print(" 3. Batch Indexing (concurrency=2)")
+ print("=" * 60)
+
+ # Clear to start fresh
+ await session.clear_all()
+
+ results = await session.index_batch(
+ paths,
+ mode="default",
+ jobs=2, # max 2 concurrent indexing operations
+ force=False,
+ )
+
+ doc_ids = []
+ for r in results:
+ print(f" {r.doc_id[:8]}... ({len(r.items)} items)")
+ for item in r.items:
+ doc_ids.append(item.doc_id)
+
+ print(f" Batch indexed {len(results)} file(s), {len(doc_ids)} document(s) total")
+ print()
+ return doc_ids
+
+
+# ──────────────────────────────────────────────────────────────────
+# Section 4: Querying
+# ──────────────────────────────────────────────────────────────────
+
+async def demo_querying(session: Session, doc_ids: list[str]) -> None:
+ """Demonstrate querying with doc_ids and workspace scope."""
+ print("=" * 60)
+ print(" 4. Querying")
+ print("=" * 60)
+
+ # --- Query specific documents ---
+ print(" [ask] Query specific documents...")
+ response = await session.ask(
+ "What was the total revenue for Q4?",
+ doc_ids=doc_ids[:2], # limit to first two docs
+ )
+
+ result = response.single()
+ if result:
+ print(f" Score: {result.score:.2f}")
+ print(f" Confidence: {result.confidence:.2f}")
+ print(f" Answer: {result.content[:150]}...")
+ if result.evidence:
+ print(f" Evidence: {len(result.evidence)} item(s)")
+ for ev in result.evidence[:2]:
+ print(f" - {ev.title}: {ev.content[:80]}...")
+ if result.metrics:
+ print(f" LLM calls: {result.metrics.llm_calls}")
+ print(f" Nodes: {result.metrics.nodes_visited}")
+
+ # --- Query across all documents ---
+ print()
+ print(" [workspace_scope] Query across entire workspace...")
+ response = await session.ask(
+ "How is data encrypted?",
+ workspace_scope=True,
+ )
+ for item in response.items:
+ print(f" [{item.doc_id[:8]}...] score={item.score:.2f}")
+ print(f" {item.content[:120]}...")
+
+ # --- Query with timeout ---
+ print()
+ print(" [timeout] Query with 30s timeout...")
+ try:
+ response = await session.ask(
+ "What is the retrieval flow?",
+ doc_ids=doc_ids,
+ timeout_secs=30,
+ )
+ if response.single():
+ print(f" Answer: {response.single().content[:150]}...")
+ except VectorlessError as e:
+ print(f" Error: {e}")
+
+ print()
+
+
+# ──────────────────────────────────────────────────────────────────
+# Section 5: Streaming Query
+# ──────────────────────────────────────────────────────────────────
+
+async def demo_streaming(session: Session, doc_ids: list[str]) -> None:
+ """Demonstrate streaming query with real-time events."""
+ print("=" * 60)
+ print(" 5. Streaming Query")
+ print("=" * 60)
+
+ stream = await session.query_stream(
+ "What are the key concepts?",
+ doc_ids=doc_ids[:1],
+ )
+
+ event_count = 0
+ async for event in stream:
+ event_count += 1
+ event_type = event.get("type", "unknown")
+ # Print a compact summary of each event
+ if event_type == "completed":
+ results = event.get("results", [])
+ print(f" [{event_count}] completed — {len(results)} result(s)")
+ elif event_type == "error":
+ print(f" [{event_count}] error — {event.get('message', '')}")
+ else:
+ print(f" [{event_count}] {event_type}")
+
+ # The final result is available after iteration completes
+ if stream.result:
+ final = stream.result
+ item = final.single()
+ if item:
+ print(f" Final answer: {item.content[:150]}...")
+
+ print()
+
+
+# ──────────────────────────────────────────────────────────────────
+# Section 6: Document Management
+# ──────────────────────────────────────────────────────────────────
+
+async def demo_document_management(session: Session, doc_ids: list[str]) -> None:
+ """Demonstrate list, exists, remove, and clear."""
+ print("=" * 60)
+ print(" 6. Document Management")
+ print("=" * 60)
+
+ # --- List all documents ---
+ docs = await session.list_documents()
+ print(f" Listed {len(docs)} document(s):")
+ for doc in docs:
+ pages = f", pages={doc.page_count}" if doc.page_count else ""
+ print(f" {doc.name} id={doc.id[:8]}... format={doc.format}{pages}")
+
+ # --- Check existence ---
+ if doc_ids:
+ exists = await session.document_exists(doc_ids[0])
+ print(f"\n exists({doc_ids[0][:8]}...): {exists}")
+
+ # --- Remove a document ---
+ if len(doc_ids) > 1:
+ removed = await session.remove_document(doc_ids[1])
+ print(f" remove({doc_ids[1][:8]}...): {removed}")
+
+ # Verify removal
+ exists_after = await session.document_exists(doc_ids[1])
+ print(f" exists after removal: {exists_after}")
+
+ # --- List again ---
+ docs = await session.list_documents()
+ print(f"\n After removal: {len(docs)} document(s)")
+
+ print()
+
+
+# ──────────────────────────────────────────────────────────────────
+# Section 7: Cross-Document Relationship Graph
+# ──────────────────────────────────────────────────────────────────
+
+async def demo_graph(session: Session) -> None:
+ """Demonstrate the cross-document relationship graph."""
+ print("=" * 60)
+ print(" 7. Document Graph")
+ print("=" * 60)
+
+ graph = await session.get_graph()
+
+ if graph is None or graph.is_empty():
+ print(" Graph is empty (no documents or no relationships found)")
+ print()
+ return
+
+ print(f" Nodes: {graph.node_count()}, Edges: {graph.edge_count()}")
+
+ for did in graph.doc_ids():
+ node = graph.get_node(did)
+ if node:
+ keywords = ", ".join(k.keyword for k in node.top_keywords[:5])
+ neighbors = graph.get_neighbors(did)
+ print(f" {node.title}")
+ print(f" format: {node.format}, nodes: {node.node_count}")
+ print(f" keywords: [{keywords}]")
+ print(f" neighbors: {len(neighbors)}")
+ for edge in neighbors[:3]:
+ target = graph.get_node(edge.target_doc_id)
+ target_name = target.title if target else edge.target_doc_id[:8]
+ weight_str = f"weight={edge.weight:.2f}"
+ evidence_str = ""
+ if edge.evidence:
+ evidence_str = f", shared_keywords={edge.evidence.shared_keyword_count}"
+ print(f" -> {target_name} ({weight_str}{evidence_str})")
+
+ print()
+
+
+# ──────────────────────────────────────────────────────────────────
+# Section 8: Event Callbacks
+# ──────────────────────────────────────────────────────────────────
+
+async def demo_events() -> None:
+ """Demonstrate event callbacks with EventEmitter."""
+ print("=" * 60)
+ print(" 8. Event Callbacks")
+ print("=" * 60)
+
+ events = EventEmitter()
+
+ @events.on_index
+ def on_index_event(event):
+ if event.event_type == IndexEventType.STARTED:
+ print(f" [INDEX] Started: {event.path or event.message}")
+ elif event.event_type == IndexEventType.COMPLETE:
+ print(f" [INDEX] Complete: {event.doc_id or event.message}")
+ elif event.event_type == IndexEventType.ERROR:
+ print(f" [INDEX] Error: {event.message}")
+
+ @events.on_query
+ def on_query_event(event):
+ if event.event_type == QueryEventType.STARTED:
+ print(f" [QUERY] Started: {event.query}")
+ elif event.event_type == QueryEventType.COMPLETE:
+ print(f" [QUERY] Complete: {event.total_results} result(s)")
+
+ # Create a session with the event emitter
+ api_key = os.environ.get("VECTORLESS_API_KEY", "sk-...")
+ model = os.environ.get("VECTORLESS_MODEL", "gpt-4o")
+ session = Session(api_key=api_key, model=model, events=events)
+
+ # Index and query — events fire automatically
+ await session.index(content=ARCHITECTURE_DOC, format="markdown", name="demo_events")
+ await session.ask("What are the key concepts?", workspace_scope=True)
+
+ await session.clear_all()
+ print()
+
+
+# ──────────────────────────────────────────────────────────────────
+# Section 9: Metrics
+# ──────────────────────────────────────────────────────────────────
+
+async def demo_metrics(session: Session) -> None:
+ """Demonstrate metrics reporting."""
+ print("=" * 60)
+ print(" 9. Metrics Report")
+ print("=" * 60)
+
+ report = session.metrics_report()
+ if report:
+ # The report contains llm and retrieval subsections
+ if hasattr(report, "llm"):
+ llm = report.llm
+ print(f" LLM Metrics:")
+ print(f" Total calls: {getattr(llm, 'total_calls', 'N/A')}")
+ print(f" Total tokens: {getattr(llm, 'total_tokens', 'N/A')}")
+ print(f" Cache hit rate: {getattr(llm, 'cache_hit_rate', 'N/A')}")
+ if hasattr(report, "retrieval"):
+ ret = report.retrieval
+ print(f" Retrieval Metrics:")
+ print(f" Total queries: {getattr(ret, 'total_queries', 'N/A')}")
+ print(f" Avg latency: {getattr(ret, 'avg_latency_ms', 'N/A')} ms")
+ else:
+ print(" No metrics available")
+
+ print()
+
+
+# ──────────────────────────────────────────────────────────────────
+# Section 10: SyncSession (Synchronous API)
+# ──────────────────────────────────────────────────────────────────
+
+def demo_sync_session() -> None:
+ """Demonstrate the synchronous Session (no async/await needed)."""
+ print("=" * 60)
+ print(" 10. SyncSession (no async/await)")
+ print("=" * 60)
+
+ api_key = os.environ.get("VECTORLESS_API_KEY", "sk-...")
+ model = os.environ.get("VECTORLESS_MODEL", "gpt-4o")
+
+ # Can also use: SyncSession.from_env()
+ with SyncSession(api_key=api_key, model=model) as session:
+ # Index from content
+ result = session.index(
+ content=FINANCE_DOC,
+ format="markdown",
+ name="sync_demo",
+ )
+ print(f" Indexed: {result.doc_id}")
+
+ # Query
+ response = session.ask(
+ "What was the total revenue?",
+ doc_ids=[result.doc_id], # type: ignore[list-item]
+ )
+ item = response.single()
+ if item:
+ print(f" Answer: {item.content[:150]}...")
+
+ # Cleanup
+ session.clear_all()
+ print(" Cleaned up")
+
+ print()
+
+
+# ──────────────────────────────────────────────────────────────────
+# Main
+# ──────────────────────────────────────────────────────────────────
+
+async def main() -> None:
+ print()
+ print(" Vectorless — Session API Walkthrough")
+ print(" " + "-" * 38)
+ print()
+
+ # 1. Create session
+ session = await demo_session_creation()
+
+ # Set up sample directory
+ tmpdir, paths = create_sample_directory()
+
+ try:
+ # 2. Indexing
+ doc_id_map = await demo_indexing(session, tmpdir, paths)
+ all_doc_ids = list(doc_id_map.values())
+
+ # 3. Batch indexing (clears and re-indexes)
+ batch_doc_ids = await demo_batch_indexing(session, paths)
+ all_doc_ids = batch_doc_ids if batch_doc_ids else all_doc_ids
+
+ # 4. Querying
+ if all_doc_ids:
+ await demo_querying(session, all_doc_ids)
+
+ # 5. Streaming query
+ if all_doc_ids:
+ await demo_streaming(session, all_doc_ids)
+
+ # 6. Document management
+ await demo_document_management(session, all_doc_ids)
+
+ # 7. Graph
+ await demo_graph(session)
+
+ # 8. Events (creates its own session)
+ await demo_events()
+
+ # 9. Metrics
+ await demo_metrics(session)
+
+ finally:
+ # Cleanup
+ await session.clear_all()
+ cleanup_directory(tmpdir)
+ print("=" * 60)
+ print(" Cleanup complete.")
+ print("=" * 60)
+
+ # 10. SyncSession (separate, runs synchronously)
+ demo_sync_session()
+
+ print(" Done.")
+
+
+if __name__ == "__main__":
+ asyncio.run(main())
diff --git a/rust/examples/single_doc_challenge.rs b/rust/examples/single_doc_challenge.rs
new file mode 100644
index 0000000..332c241
--- /dev/null
+++ b/rust/examples/single_doc_challenge.rs
@@ -0,0 +1,254 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Single-document reasoning challenge.
+//!
+//! Indexes a realistic technical document and asks questions that require
+//! the engine to navigate deep into the tree, cross-reference details
+//! across distant sections, and extract information buried in nested
+//! structures — not surface-level keyword matches.
+//!
+//! ```bash
+//! LLM_API_KEY=sk-xxx LLM_MODEL=gpt-4o \
+//! LLM_ENDPOINT=https://api.openai.com/v1 \
+//! cargo run --example single_doc_challenge
+//! ```
+
+use vectorless::{DocumentFormat, EngineBuilder, IndexContext, QueryContext};
+
+/// A research report with information scattered across sections.
+/// The answers to the challenge questions require connecting dots
+/// from different parts of the document, not simple keyword lookup.
+const REPORT: &str = r#"
+# Quantum Computing Division — Annual Research Report 2025
+
+## Executive Summary
+
+The Quantum Computing Division achieved several milestones in fiscal year 2025.
+Total division revenue reached $47.2M, representing 23% year-over-year growth.
+The division employed 312 staff across four research labs as of December 2025.
+Headcount grew by 18% during the year, with the majority of new hires in the
+error correction and cryogenics teams.
+
+The board approved a $200M capital investment program spanning 2025-2028.
+Phase 1 ($52M) was fully deployed in 2025, primarily in dilution refrigerator
+procurement and cleanroom expansion at the Zurich facility.
+
+## Research Labs
+
+### Lab A — Superconducting Qubits (Zurich)
+
+Lab A focuses on transmon qubit design and fabrication. The lab operates
+two dilution refrigerators: FR-01 (purchased 2023, 20mK base temperature)
+and FR-02 (commissioned Q3 2025, 15mK base temperature). FR-02 was the
+single largest capital expenditure in 2025 at $8.7M.
+
+Current qubit specifications:
+- Qubit count: 127 (FR-01: 64, FR-02: 63)
+- Average T1 coherence time: 142 microseconds (up from 98μs in 2024)
+- Average T2 coherence time: 89 microseconds
+- Single-qubit gate fidelity: 99.92%
+- Two-qubit gate fidelity: 99.67%
+- Readout fidelity: 99.81%
+
+The 2025 coherence improvement was primarily driven by the transition from
+aluminum to tantalum transmon junctions, which reduced two-level system (TLS)
+defect density by 40%.
+
+### Lab B — Topological Qubits (Tokyo)
+
+Lab B pursues Majorana-based topological qubits using semiconductor-superconductor
+nanowires. The team fabricated 12 nanowire devices during 2025, of which 3
+demonstrated measurable topological gap. This is a significant improvement
+over 2024 when only 1 device out of 8 showed the gap.
+
+The topological gap measurement protocol requires the device temperature to
+remain below 20mK throughout the 48-hour characterization cycle. Only FR-02
+in Zurich meets this requirement, so Lab B ships devices to Zurich for final
+characterization — creating a logistical dependency between the two labs.
+
+Key metric: topological gap size averaged 0.35meV across successful devices,
+compared to the theoretical target of 0.5meV. The gap-to-target ratio improved
+from 48% in 2024 to 70% in 2025.
+
+### Lab C — Quantum Error Correction (Cambridge)
+
+Lab C develops surface code error correction protocols. In 2025, the team
+achieved a critical milestone: below-threshold error correction on a 17-qubit
+surface code patch, reducing logical error rate from 2.1×10⁻² to 3.4×10⁻³
+per correction cycle.
+
+The threshold simulations used Lab A's measured gate fidelities as input
+parameters. The below-threshold result was only possible after Lab A's T1
+coherence improvement from 98μs to 142μs — the simulation models showed
+that the 98μs regime was above the error correction threshold for the 17-qubit
+code, making the Lab A / Lab C dependency critical.
+
+Lab C also developed a new decoder algorithm called "Cascade" that reduces
+classical processing latency from 1.2μs to 0.4μs per syndrome extraction cycle.
+This decoder runs on an FPGA co-processor board that was custom-designed by
+Lab D.
+
+### Lab D — Control Systems (Boston)
+
+Lab D designs and manufactures the classical control electronics for all qubit
+types. The flagship product is the QCS-4 control system, capable of driving
+up to 256 qubit channels with 14-bit DAC resolution and sub-nanosecond timing
+precision.
+
+In 2025, Lab D delivered 4 QCS-4 units to Lab A and 2 units to Lab B.
+Lab C received a modified QCS-4 variant with the integrated FPGA decoder
+co-processor. The FPGA decoder board is a custom design: Xilinx Ultrascale+
+XCU26 FPGA, 400k logic cells, running at 350MHz. Lab D is the sole source
+for this board — there is no commercial equivalent.
+
+A notable incident occurred in August 2025 when a firmware bug in the QCS-4
+DAC calibration routine caused systematic phase errors in two-qubit gate
+operations. The bug was traced to an integer overflow in the calibration LUT
+when operating above 4.2 GHz. The issue affected Lab A's FR-01 for 11 days
+before a patched firmware was deployed. During this period, Lab A's measured
+two-qubit gate fidelity temporarily dropped to 97.31%.
+
+## Financial Summary
+
+| Category | 2024 | 2025 | Change |
+|----------|------|------|--------|
+| Revenue | $38.4M | $47.2M | +23% |
+| R&D Expense | $31.6M | $38.9M | +23% |
+| Capital Expenditure | $18.2M | $52.0M | +186% |
+| Staff Count (Dec) | 264 | 312 | +18% |
+| Patents Filed | 14 | 19 | +36% |
+
+Revenue breakdown by source:
+- Government contracts: $19.8M (42%)
+- Enterprise partnerships: $15.3M (32%)
+- IP licensing: $8.6M (18%)
+- Consulting services: $3.5M (8%)
+
+The $52M capital expenditure in 2025 included:
+- FR-02 dilution refrigerator (Zurich): $8.7M
+- Cleanroom expansion (Zurich): $14.2M
+- Nanowire fabrication equipment (Tokyo): $6.1M
+- FPGA development and QCS-4 production (Boston): $9.4M
+- General infrastructure and IT: $13.6M
+
+## Outlook for 2026
+
+Priority goals for 2026:
+1. Scale to 256 superconducting qubits by Q3 (requires a third dilution
+ refrigerator, procurement estimated at $9-11M)
+2. Achieve topological gap above 0.45meV (requires device process improvement)
+3. Demonstrate below-threshold error correction on a 49-qubit surface code
+ (requires both 256-qubit hardware AND the Cascade decoder scaling to
+ larger code distances)
+4. File 25+ patents
+5. Grow revenue to $60M
+"#;
+
+/// Challenge questions designed to test deep reasoning.
+/// None of these can be answered by simple keyword search —
+/// each requires connecting information from multiple sections.
+const CHALLENGE_QUESTIONS: &[&str] = &[
+ // Requires: cross-reference Lab B's device characterization needs with
+ // Lab A's FR-02 specs, then connect to the CapEx table for FR-02 cost
+ "How much did the only refrigerator capable of characterizing Lab B's devices cost, and where is it located?",
+
+ // Requires: trace Lab C's below-threshold result → depends on Lab A's T1
+ // improvement → depends on tantalum junction transition
+ "What specific materials change in another lab made Lab C's error correction milestone possible?",
+
+ // Requires: find the firmware bug in Lab D section, then look at the
+ // Lab A FR-01 qubit count, then compute the impact window
+ "How many qubits were affected by the firmware bug, and for how many days?",
+
+ // Requires: Lab B gap/target ratio (70%) × theoretical target (0.5meV)
+ // → actual gap = 0.35meV, compare with 2026 goal of 0.45meV
+ "What is the gap between Lab B's current topological gap achievement and the 2026 target, in meV?",
+
+ // Requires: trace the dependency chain: 256-qubit goal → need FR-03 →
+ // cost $9-11M → government contracts are largest revenue source at $19.8M
+ "If the 2026 qubit scaling goal requires a new refrigerator, can the largest revenue source category alone cover its estimated cost?",
+];
+
+#[tokio::main]
+async fn main() -> vectorless::Result<()> {
+ tracing_subscriber::fmt()
+ .compact()
+ .with_env_filter(
+ tracing_subscriber::EnvFilter::try_from_default_env()
+ .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")),
+ )
+ .init();
+
+ println!("=== Single-Document Reasoning Challenge ===\n");
+
+ let api_key = std::env::var("LLM_API_KEY").unwrap_or_else(|_| "sk-...".to_string());
+ let model = std::env::var("LLM_MODEL").unwrap_or_else(|_| "gpt-4o".to_string());
+ let endpoint =
+ std::env::var("LLM_ENDPOINT").unwrap_or_else(|_| "https://api.openai.com/v1".to_string());
+
+ let engine = EngineBuilder::new()
+ .with_key(&api_key)
+ .with_model(&model)
+ .with_endpoint(&endpoint)
+ .build()
+ .await
+ .map_err(|e| vectorless::Error::Config(e.to_string()))?;
+
+ // Index (skip if already indexed — we're testing retrieval, not indexing)
+ let doc_name = "qc_report_2025";
+ let doc_id = {
+ let existing = engine.list().await?;
+ if let Some(doc) = existing.iter().find(|d| d.name == doc_name) {
+ println!("Document already indexed, reusing: {}\n", doc.id);
+ doc.id.clone()
+ } else {
+ println!("Indexing research report...");
+ let result = engine
+ .index(IndexContext::from_content(REPORT, DocumentFormat::Markdown).with_name(doc_name))
+ .await?;
+ let id = result.doc_id().unwrap().to_string();
+ println!(" doc_id: {}\n", id);
+ id
+ }
+ };
+
+ // Challenge queries
+ for (i, question) in CHALLENGE_QUESTIONS.iter().enumerate() {
+ println!("Q{}: {}", i + 1, question);
+
+ match engine
+ .query(QueryContext::new(*question).with_doc_ids(vec![doc_id.clone()]))
+ .await
+ {
+ Ok(response) => {
+ if let Some(item) = response.single() {
+ if item.content.is_empty() {
+ println!(" (no answer found)\n");
+ } else {
+ // Print first 3 lines as preview
+ for line in item.content.lines().take(3) {
+ println!(" {}", line);
+ }
+ let remaining = item.content.lines().count().saturating_sub(3);
+ if remaining > 0 {
+ println!(" ... ({} more lines)", remaining);
+ }
+ println!(" confidence: {:.2}\n", item.confidence);
+ }
+ } else {
+ println!(" (no results)\n");
+ }
+ }
+ Err(e) => {
+ println!(" error: {}\n", e);
+ }
+ }
+ }
+
+ // Uncomment to remove the document after testing:
+ // engine.remove(&doc_id).await?;
+ // println!("Cleaned up.");
+
+ Ok(())
+}
diff --git a/rust/src/agent/command.rs b/rust/src/agent/command.rs
index 6d983dc..5507a1d 100644
--- a/rust/src/agent/command.rs
+++ b/rust/src/agent/command.rs
@@ -183,8 +183,11 @@ pub fn resolve_target(
/// Resolve a cd/cat target with additional context from the tree node titles.
///
-/// This extended resolver also checks against the actual tree node titles
-/// (in case NavEntry titles differ from TreeNode titles).
+/// Matching priority:
+/// 1. Direct children via NavigationIndex (exact, case-insensitive, substring, numeric)
+/// 2. Direct children via TreeNode titles (case-insensitive contains)
+/// 3. Deep descendant search (BFS, up to depth 4) — enables `cd "Research Labs"` from
+/// root when "Research Labs" is a grandchild behind an intermediate wrapper node.
pub fn resolve_target_extended(
target: &str,
nav_index: &NavigationIndex,
@@ -197,10 +200,10 @@ pub fn resolve_target_extended(
return Some(id);
}
- // Extended: check all children by their TreeNode titles
- let children: Vec = tree.children_iter(current_node).collect();
let target_lower = target.to_lowercase();
+ // Extended: check all direct children by their TreeNode titles
+ let children: Vec = tree.children_iter(current_node).collect();
for child_id in &children {
if let Some(node) = tree.get(*child_id) {
if node.title.to_lowercase().contains(&target_lower) {
@@ -209,6 +212,35 @@ pub fn resolve_target_extended(
}
}
+ // Deep search: BFS through descendants up to depth 4.
+ // Returns the shallowest match so `cd "Research Labs"` from root finds it
+ // at depth 1 even if another "Research Labs" exists deeper.
+ search_descendants(&target_lower, current_node, tree, 4)
+}
+
+/// BFS search through descendants, returning the shallowest matching NodeId.
+fn search_descendants(
+ target_lower: &str,
+ start: NodeId,
+ tree: &crate::document::DocumentTree,
+ max_depth: usize,
+) -> Option {
+ let mut queue: Vec<(NodeId, usize)> = vec![(start, 0)];
+
+ while let Some((node_id, depth)) = queue.pop() {
+ if depth >= max_depth {
+ continue;
+ }
+ for child_id in tree.children_iter(node_id) {
+ if let Some(node) = tree.get(child_id) {
+ if node.title.to_lowercase().contains(target_lower) {
+ return Some(child_id);
+ }
+ }
+ queue.push((child_id, depth + 1));
+ }
+ }
+
None
}
@@ -470,6 +502,65 @@ mod tests {
assert!(resolve_target("anything", &nav_index, tree.root()).is_none());
}
+ #[test]
+ fn test_resolve_target_extended_deep_search() {
+ use crate::document::{ChildRoute, DocumentTree};
+
+ // root → "Wrapper" → "Research Labs" → "Lab B"
+ let mut tree = DocumentTree::new("Root", "root content");
+ let root = tree.root();
+ let wrapper = tree.add_child(root, "Quantum Computing Division", "wrapper");
+ let labs = tree.add_child(wrapper, "Research Labs", "labs content");
+ let lab_b = tree.add_child(labs, "Lab B", "lab b content");
+
+ let mut nav = NavigationIndex::new();
+ nav.add_child_routes(
+ root,
+ vec![ChildRoute {
+ node_id: wrapper,
+ title: "Quantum Computing Division".to_string(),
+ description: "Division".to_string(),
+ leaf_count: 7,
+ }],
+ );
+ nav.add_child_routes(
+ wrapper,
+ vec![ChildRoute {
+ node_id: labs,
+ title: "Research Labs".to_string(),
+ description: "Labs".to_string(),
+ leaf_count: 4,
+ }],
+ );
+ nav.add_child_routes(
+ labs,
+ vec![ChildRoute {
+ node_id: lab_b,
+ title: "Lab B".to_string(),
+ description: "Topological".to_string(),
+ leaf_count: 1,
+ }],
+ );
+
+ // "Research Labs" is a grandchild of root — deep search should find it
+ assert_eq!(
+ resolve_target_extended("Research Labs", &nav, root, &tree),
+ Some(labs)
+ );
+
+ // "Lab B" is a great-grandchild — deep search should find it
+ assert_eq!(
+ resolve_target_extended("Lab B", &nav, root, &tree),
+ Some(lab_b)
+ );
+
+ // Direct children should still work via primary resolver
+ assert_eq!(
+ resolve_target_extended("Quantum Computing Division", &nav, root, &tree),
+ Some(wrapper)
+ );
+ }
+
#[test]
fn test_parse_grep() {
assert_eq!(
diff --git a/rust/src/agent/config.rs b/rust/src/agent/config.rs
index b276d94..0873c8c 100644
--- a/rust/src/agent/config.rs
+++ b/rust/src/agent/config.rs
@@ -23,8 +23,8 @@ pub struct WorkerConfig {
impl Default for WorkerConfig {
fn default() -> Self {
Self {
- max_rounds: 8,
- max_llm_calls: 15,
+ max_rounds: 100,
+ max_llm_calls: 200,
}
}
}
diff --git a/rust/src/agent/orchestrator/evaluate.rs b/rust/src/agent/orchestrator/evaluate.rs
index 27c8aab..88e7e07 100644
--- a/rust/src/agent/orchestrator/evaluate.rs
+++ b/rust/src/agent/orchestrator/evaluate.rs
@@ -34,6 +34,10 @@ pub async fn evaluate(
let evidence_summary = format_evidence_summary(evidence);
let (system, user) = check_sufficiency(query, &evidence_summary);
+ info!(
+ evidence = evidence.len(),
+ "Evaluating evidence sufficiency..."
+ );
let response = llm
.complete(&system, &user)
.await
@@ -74,6 +78,7 @@ pub async fn evaluate(
}
/// Format evidence summary for sufficiency check.
+/// Includes actual content so the check LLM can evaluate relevance.
pub fn format_evidence_summary(evidence: &[Evidence]) -> String {
if evidence.is_empty() {
return "(no evidence)".to_string();
@@ -82,15 +87,10 @@ pub fn format_evidence_summary(evidence: &[Evidence]) -> String {
.iter()
.map(|e| {
let doc = e.doc_name.as_deref().unwrap_or("unknown");
- format!(
- "- [{}] (from {}) {} chars",
- e.node_title,
- doc,
- e.content.len()
- )
+ format!("[{}] (from {})\n{}", e.node_title, doc, e.content)
})
.collect::>()
- .join("\n")
+ .join("\n\n")
}
#[cfg(test)]
diff --git a/rust/src/agent/orchestrator/mod.rs b/rust/src/agent/orchestrator/mod.rs
index 9cf9615..6bb6bc2 100644
--- a/rust/src/agent/orchestrator/mod.rs
+++ b/rust/src/agent/orchestrator/mod.rs
@@ -12,6 +12,7 @@ mod analyze;
mod dispatch;
mod evaluate;
mod replan;
+mod supervisor;
use tracing::info;
@@ -22,11 +23,9 @@ use super::Agent;
use super::config::{AgentConfig, Output, WorkspaceContext};
use super::events::EventEmitter;
use super::state::OrchestratorState;
-use super::tools::orchestrator as orch_tools;
use analyze::{AnalyzeOutcome, analyze};
-use evaluate::evaluate;
-use replan::replan;
+use supervisor::run_supervisor_loop;
/// Maximum supervisor loop iterations to prevent infinite loops.
const MAX_SUPERVISOR_ITERATIONS: u32 = 3;
@@ -131,98 +130,22 @@ impl<'a> Agent for Orchestrator<'a> {
};
// --- Phase 2: Supervisor loop ---
- let mut current_dispatches = initial_dispatches;
- let mut iteration: u32 = 0;
- let mut eval_sufficient = false;
-
- loop {
- if iteration >= MAX_SUPERVISOR_ITERATIONS {
- info!(iteration, "Supervisor loop budget exhausted");
- break;
- }
-
- // Dispatch current plan
- if !current_dispatches.is_empty() {
- info!(
- docs = current_dispatches.len(),
- docs_list = ?current_dispatches.iter().map(|d| d.doc_idx).collect::>(),
- iteration,
- "Dispatching Workers"
- );
- dispatch::dispatch_and_collect(
- &query,
- ¤t_dispatches,
- ws,
- &config,
- &llm,
- &mut state,
- &emitter,
- &query_plan,
- )
- .await;
- }
-
- // No evidence at all — nothing to evaluate
- if state.all_evidence.is_empty() {
- info!("No evidence collected from any Worker");
- break;
- }
-
- // Skip evaluation for user-specified documents (no replan needed)
- if skip_analysis {
- eval_sufficient = !state.all_evidence.is_empty();
- break;
- }
-
- // Evaluate sufficiency
- let eval_result = evaluate(&query, &state.all_evidence, &llm).await?;
- orch_llm_calls += 1;
-
- if eval_result.sufficient {
- eval_sufficient = true;
- info!(
- evidence = state.all_evidence.len(),
- iteration, "Evidence sufficient — exiting supervisor loop"
- );
- break;
- }
-
- // Insufficient — replan
- info!(
- evidence = state.all_evidence.len(),
- missing = eval_result.missing_info.len(),
- iteration,
- "Evidence insufficient — replanning"
- );
-
- let doc_cards_text = orch_tools::ls_docs(ws).feedback;
- let replan_result = replan(
- &query,
- &eval_result.missing_info,
- &state.all_evidence,
- &state.dispatched,
- ws.doc_count(),
- &doc_cards_text,
- &llm,
- )
- .await?;
- orch_llm_calls += 1;
-
- if replan_result.dispatches.is_empty() {
- info!("Replan produced no new dispatches — exiting supervisor loop");
- break;
- }
-
- current_dispatches = replan_result.dispatches;
- iteration += 1;
- }
+ let outcome = run_supervisor_loop(
+ &query,
+ initial_dispatches,
+ ws,
+ &config,
+ &llm,
+ &mut state,
+ &emitter,
+ &query_plan,
+ skip_analysis,
+ )
+ .await?;
+ orch_llm_calls += outcome.llm_calls;
- // Derive confidence from supervisor loop outcome:
- // - LLM evaluated sufficient on first try → high confidence
- // - Needed replan rounds → lower confidence
- // - No evaluation ran (skip_analysis / no evidence) → moderate
let confidence =
- compute_confidence(eval_sufficient, iteration, state.all_evidence.is_empty());
+ compute_confidence(outcome.eval_sufficient, outcome.iteration, state.all_evidence.is_empty());
// --- Phase 3: Finalize — rerank + synthesize ---
if state.all_evidence.is_empty() {
diff --git a/rust/src/agent/orchestrator/replan.rs b/rust/src/agent/orchestrator/replan.rs
index 57d5e24..507c171 100644
--- a/rust/src/agent/orchestrator/replan.rs
+++ b/rust/src/agent/orchestrator/replan.rs
@@ -59,6 +59,7 @@ pub async fn replan(
&find_text,
);
+ info!(evidence = collected_evidence.len(), "Replanning dispatch targets...");
let response = llm
.complete(&system, &user)
.await
@@ -87,6 +88,7 @@ pub async fn replan(
}
/// Format collected evidence for the replan prompt.
+/// Includes content so the LLM can reason about what's actually been found.
fn format_evidence_context(evidence: &[Evidence]) -> String {
if evidence.is_empty() {
return "(no evidence collected)".to_string();
@@ -95,15 +97,10 @@ fn format_evidence_context(evidence: &[Evidence]) -> String {
.iter()
.map(|e| {
let doc = e.doc_name.as_deref().unwrap_or("unknown");
- format!(
- "- [{}] (from {}) {} chars",
- e.node_title,
- doc,
- e.content.len()
- )
+ format!("[{}] (from {})\n{}", e.node_title, doc, e.content)
})
.collect::>()
- .join("\n")
+ .join("\n\n")
}
/// Build the replan prompt.
diff --git a/rust/src/agent/orchestrator/supervisor.rs b/rust/src/agent/orchestrator/supervisor.rs
new file mode 100644
index 0000000..9dd4a37
--- /dev/null
+++ b/rust/src/agent/orchestrator/supervisor.rs
@@ -0,0 +1,159 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Phase 2: Supervisor loop — dispatch → evaluate → replan.
+
+use tracing::info;
+
+use crate::llm::LlmClient;
+use crate::query::QueryPlan;
+
+use super::super::config::{AgentConfig, WorkspaceContext};
+use super::super::events::EventEmitter;
+use super::super::prompts::DispatchEntry;
+use super::super::state::OrchestratorState;
+use super::super::tools::orchestrator as orch_tools;
+use super::dispatch;
+use super::evaluate::evaluate;
+use super::replan::replan;
+use super::MAX_SUPERVISOR_ITERATIONS;
+
+/// Outcome of the supervisor loop.
+pub struct SupervisorOutcome {
+ /// Number of replan iterations performed.
+ pub iteration: u32,
+ /// Whether the LLM evaluator judged evidence sufficient.
+ pub eval_sufficient: bool,
+ /// LLM calls consumed within the supervisor loop itself.
+ pub llm_calls: u32,
+}
+
+/// Run the supervisor loop: dispatch → evaluate → replan.
+///
+/// Returns a [`SupervisorOutcome`] summarizing what happened.
+pub async fn run_supervisor_loop(
+ query: &str,
+ initial_dispatches: Vec,
+ ws: &WorkspaceContext<'_>,
+ config: &AgentConfig,
+ llm: &LlmClient,
+ state: &mut OrchestratorState,
+ emitter: &EventEmitter,
+ query_plan: &QueryPlan,
+ skip_analysis: bool,
+) -> crate::error::Result {
+ let mut current_dispatches = initial_dispatches;
+ let mut iteration: u32 = 0;
+ let mut eval_sufficient = false;
+ let mut llm_calls: u32 = 0;
+
+ loop {
+ if iteration >= MAX_SUPERVISOR_ITERATIONS {
+ info!(iteration, "Supervisor loop budget exhausted");
+ break;
+ }
+
+ // Dispatch current plan
+ if !current_dispatches.is_empty() {
+ info!(
+ docs = current_dispatches.len(),
+ docs_list = ?current_dispatches.iter().map(|d| d.doc_idx).collect::>(),
+ iteration,
+ "Dispatching Workers"
+ );
+ dispatch::dispatch_and_collect(
+ query,
+ ¤t_dispatches,
+ ws,
+ config,
+ llm,
+ state,
+ emitter,
+ query_plan,
+ )
+ .await;
+ }
+
+ // No evidence at all — nothing to evaluate
+ if state.all_evidence.is_empty() {
+ info!("No evidence collected from any Worker");
+ break;
+ }
+
+ // Skip evaluation for user-specified documents (no replan needed)
+ if skip_analysis {
+ eval_sufficient = !state.all_evidence.is_empty();
+ break;
+ }
+
+ // Evaluate sufficiency
+ let eval_result = evaluate(query, &state.all_evidence, llm).await?;
+ llm_calls += 1;
+
+ if eval_result.sufficient {
+ eval_sufficient = true;
+ info!(
+ evidence = state.all_evidence.len(),
+ iteration, "Evidence sufficient — exiting supervisor loop"
+ );
+ break;
+ }
+
+ // Insufficient — replan
+ info!(
+ evidence = state.all_evidence.len(),
+ missing = eval_result.missing_info.len(),
+ iteration,
+ "Evidence insufficient — replanning"
+ );
+
+ let doc_cards_text = orch_tools::ls_docs(ws).feedback;
+ let replan_result = replan(
+ query,
+ &eval_result.missing_info,
+ &state.all_evidence,
+ &state.dispatched,
+ ws.doc_count(),
+ &doc_cards_text,
+ llm,
+ )
+ .await?;
+ llm_calls += 1;
+
+ if replan_result.dispatches.is_empty() {
+ info!("Replan produced no new dispatches — exiting supervisor loop");
+ break;
+ }
+
+ current_dispatches = replan_result.dispatches;
+ iteration += 1;
+ }
+
+ Ok(SupervisorOutcome {
+ iteration,
+ eval_sufficient,
+ llm_calls,
+ })
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_supervisor_outcome_fields() {
+ let outcome = SupervisorOutcome {
+ iteration: 2,
+ eval_sufficient: true,
+ llm_calls: 5,
+ };
+ assert_eq!(outcome.iteration, 2);
+ assert!(outcome.eval_sufficient);
+ assert_eq!(outcome.llm_calls, 5);
+ }
+
+ #[test]
+ fn test_max_iterations_constant() {
+ assert_eq!(MAX_SUPERVISOR_ITERATIONS, 3);
+ }
+}
diff --git a/rust/src/agent/prompts.rs b/rust/src/agent/prompts.rs
index 42699cc..11d26fb 100644
--- a/rust/src/agent/prompts.rs
+++ b/rust/src/agent/prompts.rs
@@ -115,12 +115,12 @@ pub fn worker_navigation(params: &NavigationParams) -> (String, String) {
Available commands:
- ls List children at current position (with summaries and leaf counts)
-- cd Enter a child node (supports absolute paths like /root/Section)
+- cd Enter a child node (supports relative paths like Section/Sub and absolute paths like /root/Section)
- cd .. Go back to parent node
- cat Read a child node's content (automatically collected as evidence)
- cat Read the current node's content (useful at leaf nodes)
- head Preview first 20 lines of a node (does NOT collect evidence)
-- find Search for a keyword in the document index
+- find Search for a keyword in the document index (also supports multi-word like 'Lab C')
- findtree Search for nodes by title pattern (case-insensitive)
- grep Regex search across all content in current subtree
- wc Show content size (lines, words, chars)
@@ -129,12 +129,21 @@ Available commands:
- done End navigation
SEARCH STRATEGY (important — follow this priority order):
-- When keyword matches are shown below, use find with the EXACT keyword from the list (single word, \
+- When keyword matches are shown below, navigate directly to the highest-weight matched node. \
+Do NOT explore other branches first — the keyword index has already identified the most relevant location.
+- When find results include content snippets that answer the question, cd to that node and cat it immediately.
+- Use find with the EXACT keyword from the list (single word, \
not multi-word phrases). Example: if hint shows keyword 'performance' pointing to Performance section, \
use find performance, NOT find \"performance guide\".
-- Use ls when you have no keyword hints or need to discover the structure of an unknown section.
+- Use ls only when you have no keyword hints or need to discover the structure of an unknown section.
- Use findtree when you know a section title pattern but not the exact name.
+NAVIGATION EFFICIENCY (critical — every round counts):
+- Prefer cd with absolute paths (/root/Section/Subsection) or relative paths (Section/Sub) \
+to reach target nodes in ONE command instead of multiple cd steps.
+- Do NOT ls before cd if keyword hints or find results already tell you which node to enter.
+- Do NOT cd into nodes one level at a time when you can use a multi-segment path.
+
Rules:
- Output exactly ONE command per response, nothing else.
- Content from cat is automatically saved as evidence — don't re-cat the same node.
@@ -239,13 +248,15 @@ pub fn worker_dispatch(params: &WorkerDispatchParams) -> (String, String) {
"You are a document navigation assistant. You are searching inside the document \
\"{doc_name}\" for specific information.
-Available commands: ls, cd , cd .., cat, cat , head , find , \
-findtree , grep , wc , pwd, check, done
+Available commands: ls, cd (supports Section/Sub paths and /root/Section absolute paths), \
+cd .., cat, cat , head , find , findtree , grep , wc , \
+pwd, check, done
SEARCH STRATEGY:
-- Prefer find to jump directly to relevant sections over manual ls→cd exploration. \
-Use single-word keywords, not multi-word phrases.
-- Use ls when you need to discover the structure of an unknown section.
+- Prefer find to jump directly to relevant sections over manual ls→cd exploration.
+- When find results include content snippets that answer your task, cd to that node and cat it immediately.
+- Use multi-segment paths (e.g. cd Research Labs/Lab A) to reach targets in ONE command.
+- Do NOT ls before cd if find results already tell you which node to enter.
- Use findtree when you know a section title pattern but not the exact name.
Rules:
@@ -396,7 +407,7 @@ mod tests {
missing_info: "2024 comparison",
last_feedback: "[1] Q1 Report — Q1 data (5 leaves)\n[2] Q2 Report — Q2 data (5 leaves)",
remaining: 5,
- max_rounds: 8,
+ max_rounds: 15,
history: "(no history yet)",
visited_titles: "(none)",
plan: "",
@@ -411,7 +422,7 @@ mod tests {
assert!(user.contains("root/Financial Statements"));
assert!(user.contains("200 chars"));
assert!(user.contains("2024 comparison"));
- assert!(user.contains("5/8"));
+ assert!(user.contains("5/15"));
assert!(!user.contains("sub-task"));
}
@@ -425,7 +436,7 @@ mod tests {
missing_info: "",
last_feedback: "",
remaining: 8,
- max_rounds: 8,
+ max_rounds: 15,
history: "(no history yet)",
visited_titles: "(none)",
plan: "",
@@ -448,7 +459,7 @@ mod tests {
missing_info: "",
last_feedback: "",
remaining: 8,
- max_rounds: 8,
+ max_rounds: 15,
history: "(no history yet)",
visited_titles: "(none)",
plan: "",
diff --git a/rust/src/agent/state.rs b/rust/src/agent/state.rs
index a4c1e67..e0deb3b 100644
--- a/rust/src/agent/state.rs
+++ b/rust/src/agent/state.rs
@@ -25,6 +25,9 @@ pub struct WorkerState {
pub evidence: Vec,
/// Nodes already visited (prevents redundant reads).
pub visited: HashSet,
+ /// Nodes whose content has been collected via cat. Separate from visited
+ /// because cd-ing through a node ≠ reading its content.
+ pub collected_nodes: HashSet,
/// Remaining navigation rounds.
pub remaining: u32,
/// Maximum rounds (for display in prompts).
@@ -49,10 +52,6 @@ pub struct WorkerState {
/// Maximum number of history entries to keep for prompt injection.
const MAX_HISTORY_ENTRIES: usize = 6;
-/// Maximum characters for `last_feedback` before truncation.
-/// Prevents large cat/grep outputs from bloating subsequent prompts.
-const MAX_FEEDBACK_CHARS: usize = 500;
-
impl WorkerState {
/// Create a new state starting at the given root node.
pub fn new(root: NodeId, max_rounds: u32) -> Self {
@@ -61,6 +60,7 @@ impl WorkerState {
current_node: root,
evidence: Vec::new(),
visited: HashSet::new(),
+ collected_nodes: HashSet::new(),
remaining: max_rounds,
max_rounds,
last_feedback: String::new(),
@@ -79,20 +79,9 @@ impl WorkerState {
}
}
- /// Set feedback with automatic truncation to prevent prompt bloat.
+ /// Set feedback from tool execution.
pub fn set_feedback(&mut self, feedback: String) {
- if feedback.len() <= MAX_FEEDBACK_CHARS {
- self.last_feedback = feedback;
- } else {
- // Find a clean truncation point (line boundary if possible)
- let truncated = &feedback[..MAX_FEEDBACK_CHARS];
- let end = truncated.rfind('\n').unwrap_or(MAX_FEEDBACK_CHARS);
- self.last_feedback = format!(
- "{}...\n(truncated, {} chars total)",
- &feedback[..end.min(MAX_FEEDBACK_CHARS)],
- feedback.len()
- );
- }
+ self.last_feedback = feedback;
}
/// Navigate into a child node.
@@ -118,6 +107,11 @@ impl WorkerState {
self.evidence.push(evidence);
}
+ /// Check if evidence has already been collected for a specific node.
+ pub fn has_evidence_for(&self, node_id: crate::document::NodeId) -> bool {
+ self.collected_nodes.contains(&node_id)
+ }
+
/// Push a history entry (command + result summary).
/// Keeps only the last `MAX_HISTORY_ENTRIES` entries.
pub fn push_history(&mut self, entry: String) {
@@ -157,6 +151,20 @@ impl WorkerState {
.join("\n")
}
+ /// Evidence with actual content for sufficiency evaluation.
+ pub fn evidence_for_check(&self) -> String {
+ if self.evidence.is_empty() {
+ return "(no evidence collected yet)".to_string();
+ }
+ self.evidence
+ .iter()
+ .map(|e| {
+ format!("[{}]\n{}", e.node_title, e.content)
+ })
+ .collect::>()
+ .join("\n\n")
+ }
+
/// Convert this state into a WorkerOutput (consuming the state), with budget flag.
/// Worker returns evidence only — no answer synthesis.
pub fn into_worker_output(
diff --git a/rust/src/agent/tools/mod.rs b/rust/src/agent/tools/mod.rs
index af90013..c44c002 100644
--- a/rust/src/agent/tools/mod.rs
+++ b/rust/src/agent/tools/mod.rs
@@ -51,3 +51,51 @@ impl ToolResult {
}
}
}
+
+/// Extract a content snippet around the first occurrence of `keyword`.
+///
+/// Returns `None` if the content is empty. If the keyword is not found,
+/// returns the beginning of the content instead.
+pub fn content_snippet(content: &str, keyword: &str, max_len: usize) -> Option {
+ if content.trim().is_empty() {
+ return None;
+ }
+
+ let keyword_lower = keyword.to_lowercase();
+ let content_lower = content.to_lowercase();
+
+ let start = match content_lower.find(&keyword_lower) {
+ Some(pos) => {
+ let back = (max_len / 4).min(pos);
+ pos - back
+ }
+ None => 0,
+ };
+
+ let start = content
+ .char_indices()
+ .find(|(i, _)| *i >= start)
+ .map(|(i, _)| i)
+ .unwrap_or(0);
+
+ let end = content
+ .char_indices()
+ .take_while(|(i, _)| *i <= start + max_len)
+ .last()
+ .map(|(i, c)| i + c.len_utf8())
+ .unwrap_or(content.len());
+
+ let snippet = content[start..end].trim();
+ if snippet.is_empty() {
+ return None;
+ }
+
+ let mut result = snippet.to_string();
+ if end < content.len() {
+ result.push_str("...");
+ }
+ if start > 0 {
+ result = format!("...{}", result);
+ }
+ Some(result)
+}
diff --git a/rust/src/agent/tools/orchestrator.rs b/rust/src/agent/tools/orchestrator.rs
index 73c78ac..96dcb11 100644
--- a/rust/src/agent/tools/orchestrator.rs
+++ b/rust/src/agent/tools/orchestrator.rs
@@ -68,7 +68,7 @@ pub fn ls_docs(ctx: &WorkspaceContext) -> ToolResult {
/// Execute `find_cross` — search keywords across all documents.
///
-/// Returns formatted results showing which documents matched.
+/// Returns formatted results showing which documents matched, with content snippets.
pub fn find_cross(keywords: &[String], ctx: &WorkspaceContext) -> ToolResult {
let results = ctx.find_cross_all(keywords);
@@ -90,16 +90,15 @@ pub fn find_cross(keywords: &[String], ctx: &WorkspaceContext) -> ToolResult {
let title = doc
.and_then(|d| d.node_title(entry.node_id))
.unwrap_or("unknown");
- let summary = doc
- .and_then(|d| d.nav_entry(entry.node_id))
- .map(|e| e.overview.as_str())
- .unwrap_or("");
output.push_str(&format!(
" keyword '{}' → {} (depth {}, weight {:.2})",
hit.keyword, title, entry.depth, entry.weight
));
- if !summary.is_empty() {
- output.push_str(&format!(" — {}", summary));
+ // Include content snippet for cross-doc relevance judgment
+ if let Some(content) = doc.and_then(|d| d.cat(entry.node_id)) {
+ if let Some(snippet) = super::content_snippet(content, &hit.keyword, 300) {
+ output.push_str(&format!("\n \"{}\"", snippet));
+ }
}
output.push('\n');
}
diff --git a/rust/src/agent/tools/worker/cat.rs b/rust/src/agent/tools/worker/cat.rs
index 0e13257..3792290 100644
--- a/rust/src/agent/tools/worker/cat.rs
+++ b/rust/src/agent/tools/worker/cat.rs
@@ -30,7 +30,7 @@ pub fn cat(target: &str, ctx: &DocContext, state: &mut WorkerState) -> ToolResul
}
};
- if state.visited.contains(&node_id) {
+ if state.has_evidence_for(node_id) {
let title = ctx.node_title(node_id).unwrap_or("unknown");
return ToolResult::ok(format!(
"[Already collected: {}]. Use a different target or cd to another branch.",
@@ -50,19 +50,10 @@ pub fn cat(target: &str, ctx: &DocContext, state: &mut WorkerState) -> ToolResul
doc_name: Some(ctx.doc_name.to_string()),
});
+ state.collected_nodes.insert(node_id);
state.visited.insert(node_id);
- let preview = if content_string.len() > 500 {
- format!(
- "{}...(truncated, {} chars total)",
- &content_string[..500],
- content_string.len()
- )
- } else {
- content_string
- };
-
- ToolResult::ok(format!("[Evidence collected: {}]\n{}", title, preview))
+ ToolResult::ok(format!("[Evidence collected: {}]\n{}", title, content_string))
}
None => ToolResult::fail(format!("No content available for '{}'.", target)),
}
@@ -110,7 +101,7 @@ mod tests {
reasoning_index: &crate::document::ReasoningIndex::default(),
doc_name: "test",
};
- let mut state = WorkerState::new(root, 8);
+ let mut state = WorkerState::new(root, 15);
let result = cat("Getting Started", &ctx, &mut state);
assert!(result.success);
diff --git a/rust/src/agent/tools/worker/cd.rs b/rust/src/agent/tools/worker/cd.rs
index 765d6e2..8d87483 100644
--- a/rust/src/agent/tools/worker/cd.rs
+++ b/rust/src/agent/tools/worker/cd.rs
@@ -13,12 +13,18 @@ use super::super::ToolResult;
///
/// Supports:
/// - Relative names (child of current node): `cd "Getting Started"`
+/// - Relative paths with `/`: `cd "Research Labs/Lab B"`
/// - Absolute paths starting with `/`: `cd /root/Chapter 1/Section 1.2`
pub fn cd(target: &str, ctx: &DocContext, state: &mut WorkerState) -> ToolResult {
if target.starts_with('/') {
return cd_absolute(target, ctx, state);
}
+ // Relative path with segments: "Research Labs/Lab B"
+ if target.contains('/') {
+ return cd_relative_path(target, ctx, state);
+ }
+
match command::resolve_target_extended(target, ctx.nav_index, state.current_node, ctx.tree) {
Some(node_id) => {
let title = ctx.node_title(node_id).unwrap_or(target).to_string();
@@ -32,6 +38,40 @@ pub fn cd(target: &str, ctx: &DocContext, state: &mut WorkerState) -> ToolResult
}
}
+/// Navigate using a relative multi-segment path (e.g., `"Research Labs/Lab B"`).
+fn cd_relative_path(path: &str, ctx: &DocContext, state: &mut WorkerState) -> ToolResult {
+ let segments: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
+ if segments.is_empty() {
+ return ToolResult::fail("Empty path.".to_string());
+ }
+
+ let mut current = state.current_node;
+ let mut breadcrumb = state.breadcrumb.clone();
+
+ for segment in &segments {
+ match command::resolve_target_extended(segment, ctx.nav_index, current, ctx.tree) {
+ Some(node_id) => {
+ let title = ctx.node_title(node_id).unwrap_or(*segment).to_string();
+ breadcrumb.push(title);
+ current = node_id;
+ }
+ None => {
+ return ToolResult::fail(format!(
+ "Path segment '{}' not found at '/{}'. Use ls to see available children.",
+ segment,
+ breadcrumb.join("/")
+ ));
+ }
+ }
+ }
+
+ state.breadcrumb = breadcrumb;
+ state.current_node = current;
+ state.visited.insert(current);
+
+ ToolResult::ok(format!("Entered: {}", state.path_str()))
+}
+
/// Navigate using an absolute path (e.g., `/root/Chapter 1/Section 1.2`).
fn cd_absolute(path: &str, ctx: &DocContext, state: &mut WorkerState) -> ToolResult {
let segments: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
@@ -131,7 +171,7 @@ mod tests {
reasoning_index: &crate::document::ReasoningIndex::default(),
doc_name: "test",
};
- let mut state = WorkerState::new(root, 8);
+ let mut state = WorkerState::new(root, 15);
let result = cd("Getting Started", &ctx, &mut state);
assert!(result.success);
@@ -148,11 +188,75 @@ mod tests {
reasoning_index: &crate::document::ReasoningIndex::default(),
doc_name: "test",
};
- let mut state = WorkerState::new(root, 8);
+ let mut state = WorkerState::new(root, 15);
cd("Getting Started", &ctx, &mut state);
let result = cd_up(&ctx, &mut state);
assert!(result.success);
assert_eq!(state.current_node, root);
}
+
+ fn build_deep_tree() -> (DocumentTree, NavigationIndex, NodeId, NodeId, NodeId) {
+ // Root → "Research Labs" → "Lab B"
+ let mut tree = DocumentTree::new("Root", "root content");
+ let root = tree.root();
+ let section = tree.add_child(root, "Research Labs", "section content");
+ let lab_b = tree.add_child(section, "Lab B", "lab b content");
+
+ let mut nav = NavigationIndex::new();
+ nav.add_child_routes(
+ root,
+ vec![ChildRoute {
+ node_id: section,
+ title: "Research Labs".to_string(),
+ description: "Lab sections".to_string(),
+ leaf_count: 4,
+ }],
+ );
+ nav.add_child_routes(
+ section,
+ vec![ChildRoute {
+ node_id: lab_b,
+ title: "Lab B".to_string(),
+ description: "Topological qubits".to_string(),
+ leaf_count: 1,
+ }],
+ );
+
+ (tree, nav, root, section, lab_b)
+ }
+
+ #[test]
+ fn test_cd_relative_path() {
+ let (tree, nav, root, _, lab_b) = build_deep_tree();
+ let ctx = DocContext {
+ tree: &tree,
+ nav_index: &nav,
+ reasoning_index: &crate::document::ReasoningIndex::default(),
+ doc_name: "test",
+ };
+ let mut state = WorkerState::new(root, 15);
+
+ let result = cd("Research Labs/Lab B", &ctx, &mut state);
+ assert!(result.success);
+ assert_eq!(state.current_node, lab_b);
+ assert!(state.path_str().contains("Research Labs"));
+ assert!(state.path_str().contains("Lab B"));
+ }
+
+ #[test]
+ fn test_cd_relative_path_partial_fail() {
+ let (tree, nav, root, _, _) = build_deep_tree();
+ let ctx = DocContext {
+ tree: &tree,
+ nav_index: &nav,
+ reasoning_index: &crate::document::ReasoningIndex::default(),
+ doc_name: "test",
+ };
+ let mut state = WorkerState::new(root, 15);
+
+ let result = cd("Research Labs/Nonexistent", &ctx, &mut state);
+ assert!(!result.success);
+ assert!(result.feedback.contains("Nonexistent"));
+ }
}
diff --git a/rust/src/agent/tools/worker/grep.rs b/rust/src/agent/tools/worker/grep.rs
index 6dc5c2c..077b077 100644
--- a/rust/src/agent/tools/worker/grep.rs
+++ b/rust/src/agent/tools/worker/grep.rs
@@ -42,12 +42,7 @@ pub fn grep(pattern: &str, ctx: &DocContext, state: &WorkerState) -> ToolResult
break;
}
if re.is_match(line) {
- let preview = if line.len() > 120 {
- format!("{}...", &line[..120])
- } else {
- line.to_string()
- };
- output.push_str(&format!("[{}] {}\n", title, preview));
+ output.push_str(&format!("[{}] {}\n", title, line));
matches_found += 1;
}
}
@@ -124,7 +119,7 @@ mod tests {
fn test_grep_finds_matches() {
let (tree, nav, root) = build_rich_tree();
let ctx = rich_ctx!(tree, nav);
- let state = WorkerState::new(root, 8);
+ let state = WorkerState::new(root, 15);
let result = grep("revenue", &ctx, &state);
assert!(result.success);
@@ -136,7 +131,7 @@ mod tests {
fn test_grep_regex() {
let (tree, nav, root) = build_rich_tree();
let ctx = rich_ctx!(tree, nav);
- let state = WorkerState::new(root, 8);
+ let state = WorkerState::new(root, 15);
let result = grep("EBITDA|\\$\\d+", &ctx, &state);
assert!(result.success);
@@ -148,7 +143,7 @@ mod tests {
fn test_grep_no_matches() {
let (tree, nav, root) = build_rich_tree();
let ctx = rich_ctx!(tree, nav);
- let state = WorkerState::new(root, 8);
+ let state = WorkerState::new(root, 15);
let result = grep("nonexistent_term_xyz", &ctx, &state);
assert!(result.success);
@@ -159,7 +154,7 @@ mod tests {
fn test_grep_invalid_regex() {
let (tree, nav, root) = build_rich_tree();
let ctx = rich_ctx!(tree, nav);
- let state = WorkerState::new(root, 8);
+ let state = WorkerState::new(root, 15);
let result = grep("[invalid", &ctx, &state);
assert!(!result.success);
@@ -170,7 +165,7 @@ mod tests {
fn test_grep_subtree_only() {
let (tree, nav, root) = build_rich_tree();
let ctx = rich_ctx!(tree, nav);
- let mut state = WorkerState::new(root, 8);
+ let mut state = WorkerState::new(root, 15);
crate::agent::tools::worker::cd::cd("Expenses", &ctx, &mut state);
let result = grep("revenue", &ctx, &state);
diff --git a/rust/src/agent/tools/worker/head.rs b/rust/src/agent/tools/worker/head.rs
index 06dd443..5fefa23 100644
--- a/rust/src/agent/tools/worker/head.rs
+++ b/rust/src/agent/tools/worker/head.rs
@@ -98,7 +98,7 @@ mod tests {
fn test_head_preview() {
let (tree, nav, root) = build_rich_tree();
let ctx = rich_ctx!(tree, nav);
- let state = WorkerState::new(root, 8);
+ let state = WorkerState::new(root, 15);
let result = head("Revenue", 2, &ctx, &state);
assert!(result.success);
@@ -111,7 +111,7 @@ mod tests {
fn test_head_not_found() {
let (tree, nav, root) = build_rich_tree();
let ctx = rich_ctx!(tree, nav);
- let state = WorkerState::new(root, 8);
+ let state = WorkerState::new(root, 15);
let result = head("NonExistent", 10, &ctx, &state);
assert!(!result.success);
diff --git a/rust/src/agent/tools/worker/ls.rs b/rust/src/agent/tools/worker/ls.rs
index 351ab89..a00d688 100644
--- a/rust/src/agent/tools/worker/ls.rs
+++ b/rust/src/agent/tools/worker/ls.rs
@@ -32,13 +32,22 @@ pub fn ls(ctx: &DocContext, state: &WorkerState) -> ToolResult {
}
for (i, route) in routes.iter().enumerate() {
- output.push_str(&format!(
- "[{}] {} — {} ({} leaves)",
- i + 1,
- route.title,
- route.description,
- route.leaf_count
- ));
+ if route.title == route.description {
+ output.push_str(&format!(
+ "[{}] {} ({} leaves)",
+ i + 1,
+ route.title,
+ route.leaf_count
+ ));
+ } else {
+ output.push_str(&format!(
+ "[{}] {} — {} ({} leaves)",
+ i + 1,
+ route.title,
+ route.description,
+ route.leaf_count
+ ));
+ }
if let Some(nav) = ctx.nav_entry(route.node_id) {
if !nav.question_hints.is_empty() {
output.push_str(&format!(
@@ -105,7 +114,7 @@ mod tests {
reasoning_index: &crate::document::ReasoningIndex::default(),
doc_name: "test",
};
- let state = WorkerState::new(root, 8);
+ let state = WorkerState::new(root, 15);
let result = ls(&ctx, &state);
assert!(result.success);
diff --git a/rust/src/agent/tools/worker/pwd.rs b/rust/src/agent/tools/worker/pwd.rs
index 4f71a7e..7adcf08 100644
--- a/rust/src/agent/tools/worker/pwd.rs
+++ b/rust/src/agent/tools/worker/pwd.rs
@@ -48,7 +48,7 @@ mod tests {
reasoning_index: &crate::document::ReasoningIndex::default(),
doc_name: "test",
};
- let mut state = WorkerState::new(root, 8);
+ let mut state = WorkerState::new(root, 15);
cd("API Reference", &ctx, &mut state);
let result = pwd(&state);
diff --git a/rust/src/agent/tools/worker/wc.rs b/rust/src/agent/tools/worker/wc.rs
index ac37f29..4ea7ec0 100644
--- a/rust/src/agent/tools/worker/wc.rs
+++ b/rust/src/agent/tools/worker/wc.rs
@@ -87,7 +87,7 @@ mod tests {
fn test_wc_stats() {
let (tree, nav, root) = build_rich_tree();
let ctx = rich_ctx!(tree, nav);
- let state = WorkerState::new(root, 8);
+ let state = WorkerState::new(root, 15);
let result = wc("Revenue", &ctx, &state);
assert!(result.success);
@@ -101,7 +101,7 @@ mod tests {
fn test_wc_not_found() {
let (tree, nav, root) = build_rich_tree();
let ctx = rich_ctx!(tree, nav);
- let state = WorkerState::new(root, 8);
+ let state = WorkerState::new(root, 15);
let result = wc("NonExistent", &ctx, &state);
assert!(!result.success);
diff --git a/rust/src/agent/worker/execute.rs b/rust/src/agent/worker/execute.rs
index c8e1b9a..c4fa8c1 100644
--- a/rust/src/agent/worker/execute.rs
+++ b/rust/src/agent/worker/execute.rs
@@ -96,22 +96,57 @@ pub async fn execute_command(
continue;
}
let title = ctx.node_title(entry.node_id).unwrap_or("unknown");
- let summary = ctx
- .nav_entry(entry.node_id)
- .map(|e| e.overview.as_str())
- .unwrap_or("");
output.push_str(&format!(
" - {} (depth {}, weight {:.2})",
title, entry.depth, entry.weight
));
- if !summary.is_empty() {
- output.push_str(&format!(" — {}", summary));
+ if let Some(content) = ctx.cat(entry.node_id) {
+ if let Some(snippet) =
+ super::super::tools::content_snippet(content, keyword, 300)
+ {
+ output.push_str(&format!("\n \"{}\"", snippet));
+ }
}
output.push('\n');
}
output
}
- None => format!("No results for '{}'", keyword),
+ None => {
+ // Fallback: search node titles (like findtree) with content snippets
+ let pattern_lower = keyword.to_lowercase();
+ let all_nodes = ctx.tree.traverse();
+ let mut results = Vec::new();
+ for node_id in &all_nodes {
+ if let Some(node) = ctx.tree.get(*node_id) {
+ if node.title.to_lowercase().contains(&pattern_lower) {
+ let depth = ctx.tree.depth(*node_id);
+ results.push((node.title.clone(), *node_id, depth));
+ }
+ }
+ }
+ if results.is_empty() {
+ format!("No results for '{}' in index or titles.", keyword)
+ } else {
+ let mut output = format!(
+ "Results for '{}' (title match, {} found):\n",
+ keyword,
+ results.len()
+ );
+ for (title, node_id, depth) in &results {
+ output.push_str(&format!(
+ " - {} (depth {})",
+ title, depth
+ ));
+ if let Some(content) = ctx.cat(*node_id) {
+ if let Some(snippet) = super::super::tools::content_snippet(content, keyword, 300) {
+ output.push_str(&format!("\n \"{}\"", snippet));
+ }
+ }
+ output.push('\n');
+ }
+ output
+ }
+ }
};
info!(doc = ctx.doc_name, keyword, feedback = %truncate_log(&feedback), "find result");
state.set_feedback(feedback);
@@ -125,9 +160,9 @@ pub async fn execute_command(
}
Command::Check => {
- let evidence_summary = state.evidence_summary();
+ let evidence_text = state.evidence_for_check();
- let (system, user) = check_sufficiency(query, &evidence_summary);
+ let (system, user) = check_sufficiency(query, &evidence_text);
info!(
doc = ctx.doc_name,
diff --git a/rust/src/agent/worker/mod.rs b/rust/src/agent/worker/mod.rs
index 803ced7..f0577c8 100644
--- a/rust/src/agent/worker/mod.rs
+++ b/rust/src/agent/worker/mod.rs
@@ -13,16 +13,15 @@
mod execute;
mod format;
+mod navigation;
mod planning;
-use tracing::{debug, info};
+use tracing::info;
use super::Agent;
-use super::command::Command;
-use super::config::{DocContext, Step, WorkerConfig, WorkerOutput};
+use super::config::{DocContext, WorkerConfig, WorkerOutput};
use super::context::FindHit;
use super::events::EventEmitter;
-use super::prompts::{NavigationParams, worker_dispatch, worker_navigation};
use super::state::WorkerState;
use super::tools::worker as tools;
use crate::error::Error;
@@ -30,9 +29,8 @@ use crate::llm::LlmClient;
use crate::query::QueryPlan;
use crate::scoring::bm25::extract_keywords;
-use execute::{execute_command, parse_and_detect_failure};
-use format::format_visited_titles;
-use planning::{build_plan_prompt, build_replan_prompt, format_keyword_hints};
+use navigation::run_navigation_loop;
+use planning::build_plan_prompt;
/// Worker agent — navigates a single document to collect evidence.
///
@@ -102,19 +100,12 @@ impl<'a> Agent for Worker<'a> {
);
let mut llm_calls: u32 = 0;
- let max_llm = config.max_llm_calls;
-
- macro_rules! llm_budget_exhausted {
- () => {
- max_llm > 0 && llm_calls >= max_llm
- };
- }
// Gather keyword hits as context for LLM planning (not routing rules)
let keywords = extract_keywords(&query);
let index_hits: Vec = ctx.find_all(&keywords);
if !index_hits.is_empty() {
- debug!(
+ tracing::debug!(
doc = ctx.doc_name,
hit_count = index_hits.len(),
"ReasoningIndex keyword hits available for planning"
@@ -127,7 +118,8 @@ impl<'a> Agent for Worker<'a> {
state.set_feedback(ls_result.feedback);
// --- Phase 1.5: Navigation planning ---
- if state.remaining > 0 && !llm_budget_exhausted!() {
+ if state.remaining > 0 && (config.max_llm_calls == 0 || llm_calls < config.max_llm_calls) {
+ info!(doc = ctx.doc_name, "Generating navigation plan...");
let plan_prompt = build_plan_prompt(
&query,
task_ref,
@@ -159,173 +151,23 @@ impl<'a> Agent for Worker<'a> {
}
// --- Phase 2: Navigation loop ---
- let use_dispatch_prompt = task_ref.is_some();
- let keyword_hints = format_keyword_hints(&index_hits, ctx);
-
- loop {
- if state.remaining == 0 {
- info!(doc = ctx.doc_name, "Navigation budget exhausted");
- break;
- }
- if llm_budget_exhausted!() {
- info!(
- doc = ctx.doc_name,
- llm_calls, max_llm, "LLM call budget exhausted"
- );
- break;
- }
-
- // Build prompt
- let (system, user) = if use_dispatch_prompt && state.remaining == config.max_rounds {
- worker_dispatch(&super::prompts::WorkerDispatchParams {
- original_query: &query,
- task: task_ref.unwrap_or(&query),
- doc_name: ctx.doc_name,
- breadcrumb: &state.path_str(),
- })
- } else {
- let visited_titles = format_visited_titles(&state, ctx);
- worker_navigation(&NavigationParams {
- query: &query,
- task: task_ref,
- breadcrumb: &state.path_str(),
- evidence_summary: &state.evidence_summary(),
- missing_info: &state.missing_info,
- last_feedback: &state.last_feedback,
- remaining: state.remaining,
- max_rounds: state.max_rounds,
- history: &state.history_text(),
- visited_titles: &visited_titles,
- plan: &state.plan,
- intent_context: &intent_context,
- keyword_hints: &keyword_hints,
- })
- };
-
- // LLM decision
- let round_num = config.max_rounds - state.remaining + 1;
- let round_start = std::time::Instant::now();
- let llm_output =
- llm.complete(&system, &user)
- .await
- .map_err(|e| Error::LlmReasoning {
- stage: "worker/navigation".to_string(),
- detail: format!("Nav loop LLM call failed (round {round_num}): {e}"),
- })?;
- llm_calls += 1;
-
- // Parse command
- if llm_output.trim().len() < 2 {
- tracing::warn!(
- doc = ctx.doc_name,
- round = config.max_rounds - state.remaining + 1,
- response = llm_output.trim(),
- "LLM response unusually short"
- );
- }
- let (command, is_parse_failure) = parse_and_detect_failure(&llm_output);
- if is_parse_failure {
- let raw_preview = if llm_output.trim().len() > 200 {
- format!("{}...", &llm_output.trim()[..200])
- } else {
- llm_output.trim().to_string()
- };
- state.last_feedback = format!(
- "Your output was not recognized as a valid command:\n\"{}\"\n\n\
- Please output exactly one command (ls, cd, cat, head, find, findtree, grep, wc, pwd, check, or done).",
- raw_preview
- );
- state.push_history("(unrecognized) → parse failure".to_string());
- continue;
- }
-
- debug!(doc = ctx.doc_name, ?command, "Parsed command");
-
- let is_check = matches!(command, Command::Check);
-
- // Execute
- let step = execute_command(
- &command,
- ctx,
- &mut state,
- &query,
- &llm,
- &mut llm_calls,
- &emitter,
- )
- .await;
-
- // Dynamic re-planning after insufficient check
- if is_check
- && !state.missing_info.is_empty()
- && state.remaining >= 3
- && !llm_budget_exhausted!()
- {
- let missing = state.missing_info.clone();
- let replan = build_replan_prompt(&query, task_ref, &state, ctx);
- let new_plan =
- llm.complete(&replan.0, &replan.1)
- .await
- .map_err(|e| Error::LlmReasoning {
- stage: "worker/replan".to_string(),
- detail: format!("Re-plan LLM call failed: {e}"),
- })?;
- llm_calls += 1;
- let plan_text = new_plan.trim().to_string();
- if !plan_text.is_empty() {
- info!(
- doc = ctx.doc_name,
- plan = %plan_text,
- "Re-plan generated"
- );
- emitter.emit_worker_replan(ctx.doc_name, &missing, plan_text.len());
- state.plan = plan_text;
- }
- state.missing_info.clear();
- } else if is_check && !state.missing_info.is_empty() {
- state.plan.clear();
- state.missing_info.clear();
- }
-
- // Emit round event
- let cmd_str = format!("{:?}", command);
- let success = !matches!(step, Step::ForceDone(_));
- let round_elapsed = round_start.elapsed().as_millis() as u64;
- emitter.emit_worker_round(ctx.doc_name, round_num, &cmd_str, success, round_elapsed);
-
- let feedback_preview = if state.last_feedback.len() > 120 {
- format!("{}...", &state.last_feedback[..120])
- } else {
- state.last_feedback.clone()
- };
- state.push_history(format!("{} → {}", cmd_str, feedback_preview));
-
- // Check termination
- match step {
- Step::Done => {
- info!(
- doc = ctx.doc_name,
- evidence = state.evidence.len(),
- "Navigation done"
- );
- break;
- }
- Step::ForceDone(reason) => {
- info!(doc = ctx.doc_name, reason = %reason, "Forced done");
- break;
- }
- Step::Continue => {
- if !is_check {
- state.dec_round();
- }
- }
- }
- }
-
- let budget_exhausted = state.remaining == 0 || llm_budget_exhausted!();
+ run_navigation_loop(
+ &query,
+ task_ref,
+ ctx,
+ &config,
+ &llm,
+ &mut state,
+ &emitter,
+ &index_hits,
+ &intent_context,
+ &mut llm_calls,
+ )
+ .await?;
+
+ let budget_exhausted = state.remaining == 0
+ || (config.max_llm_calls > 0 && llm_calls >= config.max_llm_calls);
- // Worker returns raw evidence — no synthesis.
- // The Orchestrator owns the single synthesis/fusion point via rerank::process.
let output = state.into_worker_output(llm_calls, budget_exhausted, ctx.doc_name);
emitter.emit_worker_done(
@@ -348,3 +190,47 @@ impl<'a> Agent for Worker<'a> {
Ok(output)
}
}
+
+#[cfg(test)]
+mod truncation_tests {
+ /// Verify that truncating feedback with multi-byte UTF-8 characters
+ /// never panics. This mirrors the truncation logic in the navigation loop.
+ #[test]
+ fn test_utf8_safe_truncation_ascii() {
+ let feedback = "a".repeat(200);
+ let boundary = feedback.ceil_char_boundary(120);
+ let truncated = &feedback[..boundary];
+ assert!(truncated.len() <= 123); // 120 + "..." fits
+ assert!(truncated.is_char_boundary(truncated.len()));
+ }
+
+ #[test]
+ fn test_utf8_safe_truncation_multibyte() {
+ // Each '中' is 3 bytes in UTF-8
+ let feedback = "中文反馈内容测试截断安全".repeat(20);
+ assert!(feedback.len() > 120);
+ let boundary = feedback.ceil_char_boundary(120);
+ let truncated = &feedback[..boundary];
+ assert!(truncated.len() <= 120);
+ assert!(truncated.is_char_boundary(truncated.len()));
+ }
+
+ #[test]
+ fn test_utf8_safe_truncation_emoji() {
+ // Emojis are 4 bytes each
+ let feedback = "🦀🎉🚀".repeat(50);
+ assert!(feedback.len() > 120);
+ let boundary = feedback.ceil_char_boundary(120);
+ let truncated = &feedback[..boundary];
+ assert!(truncated.len() <= 120);
+ assert!(truncated.is_char_boundary(truncated.len()));
+ }
+
+ #[test]
+ fn test_utf8_safe_truncation_short_string() {
+ // String shorter than limit — no truncation needed
+ let feedback = "short feedback".to_string();
+ let boundary = feedback.ceil_char_boundary(120);
+ assert_eq!(boundary, feedback.len());
+ }
+}
diff --git a/rust/src/agent/worker/navigation.rs b/rust/src/agent/worker/navigation.rs
new file mode 100644
index 0000000..c41f0bf
--- /dev/null
+++ b/rust/src/agent/worker/navigation.rs
@@ -0,0 +1,454 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Phase 2: Navigation loop — LLM-driven command loop until done or budget exhausted.
+
+use tracing::{debug, info};
+
+use super::super::command::Command;
+use super::super::config::{DocContext, Step, WorkerConfig};
+use super::super::context::FindHit;
+use super::super::events::EventEmitter;
+use super::super::prompts::{NavigationParams, worker_dispatch, worker_navigation};
+use super::super::state::WorkerState;
+use super::execute::{execute_command, parse_and_detect_failure};
+use super::format::format_visited_titles;
+use super::planning::{build_replan_prompt, format_keyword_hints};
+use crate::error::Error;
+use crate::llm::LlmClient;
+
+/// Run the Phase 2 navigation loop.
+///
+/// Loops until budget exhausted, `done`/`force_done`, or error.
+/// Mutates `state` and `llm_calls` in place.
+pub async fn run_navigation_loop(
+ query: &str,
+ task: Option<&str>,
+ ctx: &DocContext<'_>,
+ config: &WorkerConfig,
+ llm: &LlmClient,
+ state: &mut WorkerState,
+ emitter: &EventEmitter,
+ index_hits: &[FindHit],
+ intent_context: &str,
+ llm_calls: &mut u32,
+) -> crate::error::Result<()> {
+ let use_dispatch_prompt = task.is_some();
+ let keyword_hints = format_keyword_hints(index_hits, ctx);
+ let max_llm = config.max_llm_calls;
+
+ loop {
+ if state.remaining == 0 {
+ info!(doc = ctx.doc_name, "Navigation budget exhausted");
+ break;
+ }
+ if max_llm > 0 && *llm_calls >= max_llm {
+ info!(
+ doc = ctx.doc_name,
+ llm_calls, max_llm, "LLM call budget exhausted"
+ );
+ break;
+ }
+
+ // Build prompt
+ let (system, user) = build_round_prompt(
+ query,
+ task,
+ ctx,
+ state,
+ intent_context,
+ &keyword_hints,
+ use_dispatch_prompt,
+ config.max_rounds,
+ );
+
+ // LLM decision
+ let round_num = config.max_rounds - state.remaining + 1;
+ let round_start = std::time::Instant::now();
+ info!(
+ doc = ctx.doc_name,
+ round = round_num,
+ max_rounds = config.max_rounds,
+ "Navigation round: calling LLM..."
+ );
+ let llm_output =
+ llm.complete(&system, &user)
+ .await
+ .map_err(|e| Error::LlmReasoning {
+ stage: "worker/navigation".to_string(),
+ detail: format!("Nav loop LLM call failed (round {round_num}): {e}"),
+ })?;
+ *llm_calls += 1;
+
+ // Parse command
+ let (command, is_parse_failure) = handle_parse_failure(&llm_output, ctx.doc_name, state);
+ if is_parse_failure {
+ continue;
+ }
+
+ debug!(doc = ctx.doc_name, ?command, "Parsed command");
+
+ let is_check = matches!(command, Command::Check);
+
+ // Execute
+ let step = execute_command(
+ &command,
+ ctx,
+ state,
+ query,
+ llm,
+ llm_calls,
+ emitter,
+ )
+ .await;
+
+ // Dynamic re-planning after insufficient check
+ handle_replan(
+ is_check,
+ query,
+ task,
+ ctx,
+ llm,
+ state,
+ emitter,
+ llm_calls,
+ max_llm,
+ )
+ .await?;
+
+ // Emit round event
+ let cmd_str = format!("{:?}", command);
+ let success = !matches!(step, Step::ForceDone(_));
+ let round_elapsed = round_start.elapsed().as_millis() as u64;
+ emitter.emit_worker_round(ctx.doc_name, round_num, &cmd_str, success, round_elapsed);
+
+ push_round_history(state, &cmd_str);
+
+ // Check termination
+ match step {
+ Step::Done => {
+ info!(
+ doc = ctx.doc_name,
+ evidence = state.evidence.len(),
+ "Navigation done"
+ );
+ break;
+ }
+ Step::ForceDone(reason) => {
+ info!(doc = ctx.doc_name, reason = %reason, "Forced done");
+ break;
+ }
+ Step::Continue => {
+ if !is_check {
+ state.dec_round();
+ }
+ }
+ }
+ }
+
+ Ok(())
+}
+
+/// Build the (system, user) prompt pair for a single navigation round.
+fn build_round_prompt(
+ query: &str,
+ task: Option<&str>,
+ ctx: &DocContext<'_>,
+ state: &WorkerState,
+ intent_context: &str,
+ keyword_hints: &str,
+ use_dispatch_prompt: bool,
+ max_rounds: u32,
+) -> (String, String) {
+ if use_dispatch_prompt && state.remaining == max_rounds {
+ worker_dispatch(&super::super::prompts::WorkerDispatchParams {
+ original_query: query,
+ task: task.unwrap_or(query),
+ doc_name: ctx.doc_name,
+ breadcrumb: &state.path_str(),
+ })
+ } else {
+ let visited_titles = format_visited_titles(state, ctx);
+ worker_navigation(&NavigationParams {
+ query,
+ task,
+ breadcrumb: &state.path_str(),
+ evidence_summary: &state.evidence_summary(),
+ missing_info: &state.missing_info,
+ last_feedback: &state.last_feedback,
+ remaining: state.remaining,
+ max_rounds: state.max_rounds,
+ history: &state.history_text(),
+ visited_titles: &visited_titles,
+ plan: &state.plan,
+ intent_context,
+ keyword_hints,
+ })
+ }
+}
+
+/// Parse LLM output and handle parse failures.
+///
+/// Returns `(command, is_parse_failure)`. On parse failure, updates state
+/// with feedback and pushes a history entry.
+fn handle_parse_failure(
+ llm_output: &str,
+ doc_name: &str,
+ state: &mut WorkerState,
+) -> (Command, bool) {
+ if llm_output.trim().len() < 2 {
+ tracing::warn!(
+ doc = doc_name,
+ response = llm_output.trim(),
+ "LLM response unusually short"
+ );
+ }
+ let (command, is_parse_failure) = parse_and_detect_failure(llm_output);
+ if is_parse_failure {
+ let raw_preview = if llm_output.trim().len() > 200 {
+ format!("{}...", &llm_output.trim()[..200])
+ } else {
+ llm_output.trim().to_string()
+ };
+ state.last_feedback = format!(
+ "Your output was not recognized as a valid command:\n\"{}\"\n\n\
+ Please output exactly one command (ls, cd, cat, head, find, findtree, grep, wc, pwd, check, or done).",
+ raw_preview
+ );
+ state.push_history("(unrecognized) → parse failure".to_string());
+ }
+ (command, is_parse_failure)
+}
+
+/// Push a round's command + feedback preview into history.
+fn push_round_history(state: &mut WorkerState, cmd_str: &str) {
+ let feedback_preview = if state.last_feedback.len() > 120 {
+ let boundary = state.last_feedback.ceil_char_boundary(120);
+ format!("{}...", &state.last_feedback[..boundary])
+ } else {
+ state.last_feedback.clone()
+ };
+ state.push_history(format!("{} → {}", cmd_str, feedback_preview));
+}
+
+/// Dynamic re-planning after an insufficient check.
+///
+/// If check returned INSUFFICIENT with enough remaining rounds and LLM budget,
+/// generates a new navigation plan. Otherwise clears stale replan state.
+async fn handle_replan(
+ is_check: bool,
+ query: &str,
+ task: Option<&str>,
+ ctx: &DocContext<'_>,
+ llm: &LlmClient,
+ state: &mut WorkerState,
+ emitter: &EventEmitter,
+ llm_calls: &mut u32,
+ max_llm: u32,
+) -> crate::error::Result<()> {
+ if !is_check {
+ return Ok(());
+ }
+
+ if !state.missing_info.is_empty() && state.remaining >= 3 && (max_llm == 0 || *llm_calls < max_llm) {
+ let missing = state.missing_info.clone();
+ info!(doc = ctx.doc_name, missing = %missing, "Re-planning navigation...");
+ let replan = build_replan_prompt(query, task, state, ctx);
+ let new_plan =
+ llm.complete(&replan.0, &replan.1)
+ .await
+ .map_err(|e| Error::LlmReasoning {
+ stage: "worker/replan".to_string(),
+ detail: format!("Re-plan LLM call failed: {e}"),
+ })?;
+ *llm_calls += 1;
+ let plan_text = new_plan.trim().to_string();
+ if !plan_text.is_empty() {
+ info!(
+ doc = ctx.doc_name,
+ plan = %plan_text,
+ "Re-plan generated"
+ );
+ emitter.emit_worker_replan(ctx.doc_name, &missing, plan_text.len());
+ state.plan = plan_text;
+ }
+ state.missing_info.clear();
+ } else if !state.missing_info.is_empty() {
+ state.plan.clear();
+ state.missing_info.clear();
+ }
+
+ Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::agent::config::DocContext;
+ use crate::agent::state::WorkerState;
+ use crate::document::{DocumentTree, NodeId};
+
+ fn test_ctx() -> (DocumentTree, NodeId) {
+ let tree = DocumentTree::new("Root", "root content");
+ let root = tree.root();
+ (tree, root)
+ }
+
+ #[test]
+ fn test_handle_parse_failure_valid_command() {
+ let (tree, root) = test_ctx();
+ let nav = crate::document::NavigationIndex::new();
+ let ctx = DocContext {
+ tree: &tree,
+ nav_index: &nav,
+ reasoning_index: &crate::document::ReasoningIndex::default(),
+ doc_name: "test",
+ };
+ let mut state = WorkerState::new(root, 10);
+
+ let (cmd, is_failure) = handle_parse_failure("ls", ctx.doc_name, &mut state);
+ assert!(!is_failure);
+ assert!(matches!(cmd, Command::Ls));
+ }
+
+ #[test]
+ fn test_handle_parse_failure_unrecognized() {
+ let (tree, root) = test_ctx();
+ let nav = crate::document::NavigationIndex::new();
+ let ctx = DocContext {
+ tree: &tree,
+ nav_index: &nav,
+ reasoning_index: &crate::document::ReasoningIndex::default(),
+ doc_name: "test",
+ };
+ let mut state = WorkerState::new(root, 10);
+
+ let (_cmd, is_failure) = handle_parse_failure("random garbage text", ctx.doc_name, &mut state);
+ assert!(is_failure);
+ assert!(state.last_feedback.contains("not recognized"));
+ assert!(state.history.last().unwrap().contains("unrecognized"));
+ }
+
+ #[test]
+ fn test_handle_parse_failure_short_response() {
+ let (tree, root) = test_ctx();
+ let nav = crate::document::NavigationIndex::new();
+ let ctx = DocContext {
+ tree: &tree,
+ nav_index: &nav,
+ reasoning_index: &crate::document::ReasoningIndex::default(),
+ doc_name: "test",
+ };
+ let mut state = WorkerState::new(root, 10);
+
+ // Single character response — short but not a parse failure if it's "ls"
+ let (cmd, is_failure) = handle_parse_failure("ls", ctx.doc_name, &mut state);
+ assert!(!is_failure);
+ assert!(matches!(cmd, Command::Ls));
+ }
+
+ #[test]
+ fn test_push_round_history_short_feedback() {
+ let (_, root) = test_ctx();
+ let mut state = WorkerState::new(root, 10);
+ state.last_feedback = "short feedback".to_string();
+
+ push_round_history(&mut state, "ls");
+ assert_eq!(state.history.len(), 1);
+ assert!(state.history[0].contains("ls → short feedback"));
+ }
+
+ #[test]
+ fn test_push_round_history_long_feedback() {
+ let (_, root) = test_ctx();
+ let mut state = WorkerState::new(root, 10);
+ state.last_feedback = "a".repeat(200);
+
+ push_round_history(&mut state, "cat");
+ assert_eq!(state.history.len(), 1);
+ assert!(state.history[0].contains("cat → "));
+ // Should be truncated with ...
+ assert!(state.history[0].contains("..."));
+ }
+
+ #[test]
+ fn test_push_round_history_respects_max_entries() {
+ let (_, root) = test_ctx();
+ let mut state = WorkerState::new(root, 10);
+ state.last_feedback = "ok".to_string();
+
+ for i in 0..8 {
+ push_round_history(&mut state, &format!("cmd_{i}"));
+ }
+ // MAX_HISTORY_ENTRIES is 6, so only last 6 should remain
+ assert_eq!(state.history.len(), 6);
+ }
+
+ #[test]
+ fn test_build_round_prompt_dispatch_first_round() {
+ let (tree, root) = test_ctx();
+ let nav = crate::document::NavigationIndex::new();
+ let ctx = DocContext {
+ tree: &tree,
+ nav_index: &nav,
+ reasoning_index: &crate::document::ReasoningIndex::default(),
+ doc_name: "test_doc",
+ };
+ let mut state = WorkerState::new(root, 10);
+ // remaining == max_rounds means first round
+ assert_eq!(state.remaining, 10);
+
+ let (system, user) = build_round_prompt(
+ "test query",
+ Some("sub-task"),
+ &ctx,
+ &state,
+ "factual — find answer",
+ "",
+ true, // use_dispatch_prompt
+ 10,
+ );
+ assert!(system.contains("dispatch") || !system.is_empty());
+ assert!(user.contains("test query") || user.contains("sub-task"));
+ }
+
+ #[test]
+ fn test_build_round_prompt_navigation_subsequent_round() {
+ let (tree, root) = test_ctx();
+ let nav = crate::document::NavigationIndex::new();
+ let ctx = DocContext {
+ tree: &tree,
+ nav_index: &nav,
+ reasoning_index: &crate::document::ReasoningIndex::default(),
+ doc_name: "test_doc",
+ };
+ let mut state = WorkerState::new(root, 10);
+ state.remaining = 8; // not first round
+
+ let (system, _user) = build_round_prompt(
+ "test query",
+ None,
+ &ctx,
+ &state,
+ "factual",
+ "keyword hints here",
+ false, // use_dispatch_prompt
+ 10,
+ );
+ assert!(!system.is_empty());
+ }
+
+ #[test]
+ fn test_utf8_safe_truncation_in_history() {
+ let (_, root) = test_ctx();
+ let mut state = WorkerState::new(root, 10);
+ // Each '中' is 3 bytes in UTF-8
+ state.last_feedback = "中文反馈内容测试截断安全".repeat(20);
+
+ push_round_history(&mut state, "cat");
+ let entry = &state.history[0];
+ // Should be truncated without panicking
+ assert!(entry.contains("cat → "));
+ assert!(entry.len() < state.last_feedback.len() + 20);
+ }
+}
diff --git a/rust/src/agent/worker/planning.rs b/rust/src/agent/worker/planning.rs
index eb98999..a42bf52 100644
--- a/rust/src/agent/worker/planning.rs
+++ b/rust/src/agent/worker/planning.rs
@@ -13,8 +13,12 @@ use super::super::context::FindHit;
use super::super::state::WorkerState;
use super::format::format_visited_titles;
-/// Maximum total chars for keyword + semantic sections in planning prompt.
-const PLAN_CONTEXT_BUDGET: usize = 1500;
+/// Maximum keyword/semantic hit entries in plan prompt.
+const MAX_PLAN_ENTRIES: usize = 15;
+/// Maximum section summaries in plan prompt.
+const MAX_SECTION_SUMMARIES: usize = 10;
+/// Maximum deep expansion entries.
+const MAX_EXPANSION_ENTRIES: usize = 8;
/// Build the navigation planning prompt (Phase 1.5).
pub fn build_plan_prompt(
@@ -39,6 +43,7 @@ pub fn build_plan_prompt(
} else {
let mut section =
String::from("\nKeyword index matches (use these to prioritize navigation):\n");
+ let mut entry_count = 0;
for hit in keyword_hits {
let mut entries = hit.entries.clone();
entries.sort_by(|a, b| {
@@ -56,12 +61,20 @@ pub fn build_plan_prompt(
" - keyword '{}' → {} (depth {}, weight {:.2})\n",
hit.keyword, ancestor_path, entry.depth, entry.weight
));
- if section.len() > PLAN_CONTEXT_BUDGET {
- section.push_str(" ... (more hits truncated)\n");
+ if let Some(content) = ctx.cat(entry.node_id) {
+ if let Some(snippet) =
+ super::super::tools::content_snippet(content, &hit.keyword, 300)
+ {
+ section.push_str(&format!(" \"{}\"\n", snippet));
+ }
+ }
+ entry_count += 1;
+ if entry_count >= MAX_PLAN_ENTRIES {
+ section.push_str(" ... (more hits omitted)\n");
break;
}
}
- if section.len() > PLAN_CONTEXT_BUDGET {
+ if entry_count >= MAX_PLAN_ENTRIES {
break;
}
}
@@ -70,9 +83,7 @@ pub fn build_plan_prompt(
let deep_expansion = build_deep_expansion(keyword_hits, ctx);
if !deep_expansion.is_empty() {
- if keyword_section.len() + deep_expansion.len() <= PLAN_CONTEXT_BUDGET {
- keyword_section.push_str(&deep_expansion);
- }
+ keyword_section.push_str(&deep_expansion);
}
let semantic_section = build_semantic_hints(&query_keywords, &query_lower, ctx);
@@ -165,7 +176,9 @@ pub fn build_replan_prompt(
/// ```text
/// Keyword matches (use find to jump directly):
/// - 'complex' → Performance (weight 0.85)
+/// "...complexity analysis shows..."
/// - 'latency' → Performance (weight 0.72)
+/// "...latency benchmarks indicate..."
/// ```
pub fn format_keyword_hints(keyword_hits: &[FindHit], ctx: &DocContext<'_>) -> String {
if keyword_hits.is_empty() {
@@ -173,6 +186,7 @@ pub fn format_keyword_hints(keyword_hits: &[FindHit], ctx: &DocContext<'_>) -> S
}
let mut section = String::from("Keyword matches (use find to jump directly):\n");
+ let mut entry_count = 0;
for hit in keyword_hits {
let mut entries = hit.entries.clone();
entries.sort_by(|a, b| {
@@ -190,8 +204,16 @@ pub fn format_keyword_hints(keyword_hits: &[FindHit], ctx: &DocContext<'_>) -> S
" - '{}' → {} (weight {:.2})\n",
hit.keyword, title, entry.weight
));
- if section.len() > 800 {
- section.push_str(" ... (more)\n");
+ if let Some(content) = ctx.cat(entry.node_id) {
+ if let Some(snippet) =
+ super::super::tools::content_snippet(content, &hit.keyword, 300)
+ {
+ section.push_str(&format!(" \"{}\"\n", snippet));
+ }
+ }
+ entry_count += 1;
+ if entry_count >= MAX_PLAN_ENTRIES {
+ section.push_str(" ... (more omitted)\n");
return section;
}
}
@@ -228,21 +250,18 @@ fn build_intent_signals(intent: QueryIntent, ctx: &DocContext<'_>) -> String {
if !shortcut.document_summary.is_empty() {
section.push_str(&format!(
"Document summary: {}\n",
- &shortcut.document_summary[..shortcut.document_summary.len().min(500)]
+ shortcut.document_summary
));
}
+ let mut summary_count = 0;
for ss in &shortcut.section_summaries {
- let summary_preview = if ss.summary.len() > 200 {
- format!("{}...", &ss.summary[..200])
- } else {
- ss.summary.clone()
- };
section.push_str(&format!(
" - Section '{}' (depth {}): {}\n",
- ss.title, ss.depth, summary_preview
+ ss.title, ss.depth, ss.summary
));
- if section.len() > PLAN_CONTEXT_BUDGET {
- section.push_str(" ... (more sections truncated)\n");
+ summary_count += 1;
+ if summary_count >= MAX_SECTION_SUMMARIES {
+ section.push_str(" ... (more sections omitted)\n");
break;
}
}
@@ -312,7 +331,7 @@ fn build_semantic_hints(
.collect();
let mut section = String::new();
- let budget_remaining = PLAN_CONTEXT_BUDGET.saturating_sub(section.len());
+ let mut entry_count = 0;
for route in routes {
let nav = match ctx.nav_entry(route.node_id) {
@@ -374,10 +393,11 @@ fn build_semantic_hints(
" - Section '{}' — BM25: {:.2}{}\n",
route.title, bm25_score, annotation_str
);
- if section.len() + line.len() > budget_remaining {
+ section.push_str(&line);
+ entry_count += 1;
+ if entry_count >= MAX_PLAN_ENTRIES {
break;
}
- section.push_str(&line);
}
if section.is_empty() {
@@ -398,6 +418,7 @@ fn build_deep_expansion(keyword_hits: &[FindHit], ctx: &DocContext<'_>) -> Strin
let mut seen_parents = HashSet::new();
let mut expansion = String::new();
+ let mut expansion_count = 0;
for hit in keyword_hits {
for entry in &hit.entries {
@@ -432,12 +453,13 @@ fn build_deep_expansion(keyword_hits: &[FindHit], ctx: &DocContext<'_>) -> Strin
));
}
expansion.push('\n');
- if expansion.len() > 500 {
- expansion.push_str(" ... (more expansions truncated)\n");
+ expansion_count += 1;
+ if expansion_count >= MAX_EXPANSION_ENTRIES {
+ expansion.push_str(" ... (more expansions omitted)\n");
break;
}
}
- if expansion.len() > 500 {
+ if expansion_count >= MAX_EXPANSION_ENTRIES {
break;
}
}
@@ -640,7 +662,7 @@ mod tests {
#[test]
fn test_build_replan_prompt() {
let (tree, nav, root, _, _) = build_semantic_test_tree();
- let mut state = WorkerState::new(root, 8);
+ let mut state = WorkerState::new(root, 15);
state.missing_info = "Need Q2 revenue figures".to_string();
state.add_evidence(Evidence {
source_path: "root/Revenue".to_string(),
diff --git a/rust/src/client/engine.rs b/rust/src/client/engine.rs
index d32b4e6..cc95a5f 100644
--- a/rust/src/client/engine.rs
+++ b/rust/src/client/engine.rs
@@ -40,7 +40,7 @@
use std::{collections::HashMap, sync::Arc};
use futures::StreamExt;
-use tracing::info;
+use tracing::{info, warn};
use crate::{
DocumentTree, Error,
@@ -366,9 +366,15 @@ impl Engine {
old_id: Option<&str>,
) -> (Vec, Vec) {
let item = Self::build_index_item(&doc);
+
+ info!(
+ "[index] Persisting document '{}'...",
+ doc.name,
+ );
let persisted = IndexerClient::to_persisted(doc, pipeline_options).await;
if let Err(e) = self.workspace.save(&persisted).await {
+ warn!("[index] Failed to save document: {}", e);
return (
Vec::new(),
vec![FailedItem::new(source_label, e.to_string())],
@@ -377,11 +383,11 @@ impl Engine {
// Clean up old document after successful save
if let Some(old_id) = old_id {
if let Err(e) = self.workspace.remove(old_id).await {
- tracing::warn!("Failed to remove old document {}: {}", old_id, e);
+ warn!("Failed to remove old document {}: {}", old_id, e);
}
}
- info!("Indexed document: {}", item.doc_id);
+ info!("[index] Document persisted: {}", item.doc_id);
(vec![item], Vec::new())
}
@@ -417,8 +423,14 @@ impl Engine {
self.with_timeout(timeout_secs, async move {
let doc_ids = self.resolve_scope(&ctx.scope).await?;
+ info!(doc_count = doc_ids.len(), "Resolving documents for query");
let (documents, failed) = self.load_documents(&doc_ids).await?;
+ info!(
+ loaded = documents.len(),
+ failed = failed.len(),
+ "Documents loaded"
+ );
if documents.is_empty() {
return Err(Error::Config(format!(
"No documents available for query: {} failures",
@@ -1004,20 +1016,44 @@ impl Engine {
);
let concurrency = self.config.llm.throttle.max_concurrent_requests;
- let loaded: Vec