diff --git a/README.md b/README.md index ad6de21..c8391d1 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Vectorless

Reasoning-based Document Engine

-
Reason, don't vector · Structure, not chunks · Agents, not embeddings · Exact, not synthesized
+
Reason, don't vector · Structure, not chunks · Agents, not embeddings
[![PyPI](https://img.shields.io/pypi/v/vectorless.svg)](https://pypi.org/project/vectorless/) [![PyPI Downloads](https://static.pepy.tech/badge/vectorless/month)](https://pepy.tech/projects/vectorless) diff --git a/examples/session_walkthrough/README.md b/examples/session_walkthrough/README.md new file mode 100644 index 0000000..17174a0 --- /dev/null +++ b/examples/session_walkthrough/README.md @@ -0,0 +1,32 @@ +# Session API Walkthrough + +Demonstrates the full high-level Vectorless Python API using the `Session` and `SyncSession` classes. + +## What it covers + +| # | Topic | API | +|---|-------|-----| +| 1 | Session creation | `Session()`, `from_env()`, `from_config_file()` | +| 2 | Indexing sources | `index(content=)`, `index(path=)`, `index(bytes_data=)`, `index(directory=)` | +| 3 | Batch indexing | `index_batch(paths, jobs=N)` | +| 4 | Querying | `ask(question, doc_ids=)`, `ask(question, workspace_scope=True)` | +| 5 | Streaming query | `query_stream()` async iterator | +| 6 | Document management | `list_documents()`, `document_exists()`, `remove_document()`, `clear_all()` | +| 7 | Document graph | `get_graph()` nodes, edges, keywords | +| 8 | Event callbacks | `EventEmitter` with `@on_index` / `@on_query` decorators | +| 9 | Metrics | `metrics_report()` | +| 10 | Sync API | `SyncSession` (no async/await) | + +## Setup + +```bash +pip install vectorless +export VECTORLESS_API_KEY="sk-..." +export VECTORLESS_MODEL="gpt-4o" +``` + +## Run + +```bash +python main.py +``` diff --git a/examples/session_walkthrough/main.py b/examples/session_walkthrough/main.py new file mode 100644 index 0000000..f94e5bb --- /dev/null +++ b/examples/session_walkthrough/main.py @@ -0,0 +1,589 @@ +""" +Session API walkthrough -- demonstrates the full high-level Vectorless API. + +This example uses the Session class (recommended entry point) to cover: + 1. Session creation (constructor / from_env / from_config_file) + 2. Indexing from various sources (content, path, directory, bytes) + 3. Batch indexing with concurrency control + 4. Querying with doc_ids and workspace scope + 5. Streaming query with real-time events + 6. Document management (list, exists, remove, clear) + 7. Cross-document relationship graph + 8. Event callbacks for progress monitoring + 9. Metrics reporting + 10. SyncSession (synchronous API, no async/await) + +Usage: + export VECTORLESS_API_KEY="sk-..." + export VECTORLESS_MODEL="gpt-4o" + pip install vectorless + python main.py +""" + +import asyncio +import os +import tempfile + +from vectorless import ( + Session, + SyncSession, + EventEmitter, + VectorlessError, +) +from vectorless.events import IndexEventType, QueryEventType + + +# ────────────────────────────────────────────────────────────────── +# Sample documents used throughout the example +# ────────────────────────────────────────────────────────────────── + +ARCHITECTURE_DOC = """\ +# Vectorless Architecture + +## Overview + +Vectorless is a reasoning-native document intelligence engine. +It uses hierarchical semantic trees instead of vector embeddings. + +## Key Concepts + +- **Semantic Tree**: Documents are parsed into a tree of sections. +- **LLM Navigation**: Queries are resolved by traversing the tree. +- **No Vectors**: No embeddings, no similarity search, no vector DB. + +## Retrieval Flow + +Engine.query() + -> query/understand() -> QueryPlan + -> Orchestrator dispatches Workers + -> Workers navigate document trees + -> rerank -> synthesis -> answer +""" + +FINANCE_DOC = """\ +# Q4 Financial Report + +## Revenue + +Total revenue for Q4 was $12.3M, up 15% from Q3. +SaaS subscriptions accounted for $8.1M, consulting for $4.2M. + +## Costs + +Operating costs were $9.8M, including $3.2M in engineering salaries. +Marketing spend was reduced by 8% to $1.5M. + +## Outlook + +Projected Q1 revenue is $13.5M based on current pipeline. +""" + +SECURITY_DOC = """\ +# Security Policy + +## Authentication + +All API requests require a Bearer token in the Authorization header. +Tokens expire after 24 hours and must be refreshed. + +## Data Encryption + +Data at rest is encrypted using AES-256. Data in transit uses TLS 1.3. + +## Audit Logging + +All access to sensitive data is logged and retained for 90 days. +""" + + +# ────────────────────────────────────────────────────────────────── +# Helper: set up a temp directory with sample files +# ────────────────────────────────────────────────────────────────── + +def create_sample_directory() -> tuple[str, list[str]]: + """Create a temp directory with sample documents. Returns (dir, paths).""" + tmpdir = tempfile.mkdtemp(prefix="vectorless_walkthrough_") + docs = { + "architecture.md": ARCHITECTURE_DOC, + "finance.md": FINANCE_DOC, + "security.md": SECURITY_DOC, + } + paths = [] + for name, content in docs.items(): + path = os.path.join(tmpdir, name) + with open(path, "w") as f: + f.write(content) + paths.append(path) + return tmpdir, paths + + +def cleanup_directory(tmpdir: str) -> None: + """Remove all files in the temp directory.""" + for fname in os.listdir(tmpdir): + os.remove(os.path.join(tmpdir, fname)) + os.rmdir(tmpdir) + + +# ────────────────────────────────────────────────────────────────── +# Section 1: Session Creation +# ────────────────────────────────────────────────────────────────── + +async def demo_session_creation() -> Session: + """Demonstrate different ways to create a Session.""" + print("=" * 60) + print(" 1. Session Creation") + print("=" * 60) + + # Option A: Constructor with explicit credentials + api_key = os.environ.get("VECTORLESS_API_KEY", "sk-...") + model = os.environ.get("VECTORLESS_MODEL", "gpt-4o") + endpoint = os.environ.get("VECTORLESS_ENDPOINT") + + session = Session(api_key=api_key, model=model, endpoint=endpoint) + print(f" Created: {session}") + + # Option B: from environment variables + # session = Session.from_env() + + # Option C: from a config file + # session = Session.from_config_file("~/.vectorless/config.toml") + + # Option D: with an EventEmitter for progress callbacks + # events = EventEmitter() + # session = Session(api_key=api_key, model=model, events=events) + + print() + return session + + +# ────────────────────────────────────────────────────────────────── +# Section 2: Indexing from Various Sources +# ────────────────────────────────────────────────────────────────── + +async def demo_indexing(session: Session, tmpdir: str, paths: list[str]) -> dict[str, str]: + """Demonstrate indexing from content, path, directory, and bytes.""" + print("=" * 60) + print(" 2. Indexing") + print("=" * 60) + + doc_ids: dict[str, str] = {} + + # --- 2a. Index from in-memory content --- + print(" [content] Indexing from string...") + result = await session.index( + content=ARCHITECTURE_DOC, + format="markdown", + name="architecture", + ) + doc_ids["architecture"] = result.doc_id # type: ignore[assignment] + print(f" doc_id: {result.doc_id}") + print(f" items: {result.total()}") + + # --- 2b. Index from a file path --- + print(" [path] Indexing from file path...") + result = await session.index(path=paths[1], name="finance") + doc_ids["finance"] = result.doc_id # type: ignore[assignment] + print(f" doc_id: {result.doc_id}") + + # --- 2c. Index from raw bytes --- + print(" [bytes] Indexing from raw bytes...") + result = await session.index( + bytes_data=SECURITY_DOC.encode("utf-8"), + format="markdown", + name="security", + ) + doc_ids["security"] = result.doc_id # type: ignore[assignment] + print(f" doc_id: {result.doc_id}") + + # --- 2d. Index a directory --- + print(" [dir] Indexing a directory...") + # Clear first to see fresh results + await session.clear_all() + + result = await session.index(directory=tmpdir, name="all_docs") + print(f" doc_id: {result.doc_id}") + print(f" items: {len(result.items)}") + for item in result.items: + print(f" - {item.name} ({item.doc_id[:8]}...)") + doc_ids[item.name] = item.doc_id + + print() + return doc_ids + + +# ────────────────────────────────────────────────────────────────── +# Section 3: Batch Indexing with Concurrency +# ────────────────────────────────────────────────────────────────── + +async def demo_batch_indexing(session: Session, paths: list[str]) -> list[str]: + """Demonstrate batch indexing with concurrent jobs.""" + print("=" * 60) + print(" 3. Batch Indexing (concurrency=2)") + print("=" * 60) + + # Clear to start fresh + await session.clear_all() + + results = await session.index_batch( + paths, + mode="default", + jobs=2, # max 2 concurrent indexing operations + force=False, + ) + + doc_ids = [] + for r in results: + print(f" {r.doc_id[:8]}... ({len(r.items)} items)") + for item in r.items: + doc_ids.append(item.doc_id) + + print(f" Batch indexed {len(results)} file(s), {len(doc_ids)} document(s) total") + print() + return doc_ids + + +# ────────────────────────────────────────────────────────────────── +# Section 4: Querying +# ────────────────────────────────────────────────────────────────── + +async def demo_querying(session: Session, doc_ids: list[str]) -> None: + """Demonstrate querying with doc_ids and workspace scope.""" + print("=" * 60) + print(" 4. Querying") + print("=" * 60) + + # --- Query specific documents --- + print(" [ask] Query specific documents...") + response = await session.ask( + "What was the total revenue for Q4?", + doc_ids=doc_ids[:2], # limit to first two docs + ) + + result = response.single() + if result: + print(f" Score: {result.score:.2f}") + print(f" Confidence: {result.confidence:.2f}") + print(f" Answer: {result.content[:150]}...") + if result.evidence: + print(f" Evidence: {len(result.evidence)} item(s)") + for ev in result.evidence[:2]: + print(f" - {ev.title}: {ev.content[:80]}...") + if result.metrics: + print(f" LLM calls: {result.metrics.llm_calls}") + print(f" Nodes: {result.metrics.nodes_visited}") + + # --- Query across all documents --- + print() + print(" [workspace_scope] Query across entire workspace...") + response = await session.ask( + "How is data encrypted?", + workspace_scope=True, + ) + for item in response.items: + print(f" [{item.doc_id[:8]}...] score={item.score:.2f}") + print(f" {item.content[:120]}...") + + # --- Query with timeout --- + print() + print(" [timeout] Query with 30s timeout...") + try: + response = await session.ask( + "What is the retrieval flow?", + doc_ids=doc_ids, + timeout_secs=30, + ) + if response.single(): + print(f" Answer: {response.single().content[:150]}...") + except VectorlessError as e: + print(f" Error: {e}") + + print() + + +# ────────────────────────────────────────────────────────────────── +# Section 5: Streaming Query +# ────────────────────────────────────────────────────────────────── + +async def demo_streaming(session: Session, doc_ids: list[str]) -> None: + """Demonstrate streaming query with real-time events.""" + print("=" * 60) + print(" 5. Streaming Query") + print("=" * 60) + + stream = await session.query_stream( + "What are the key concepts?", + doc_ids=doc_ids[:1], + ) + + event_count = 0 + async for event in stream: + event_count += 1 + event_type = event.get("type", "unknown") + # Print a compact summary of each event + if event_type == "completed": + results = event.get("results", []) + print(f" [{event_count}] completed — {len(results)} result(s)") + elif event_type == "error": + print(f" [{event_count}] error — {event.get('message', '')}") + else: + print(f" [{event_count}] {event_type}") + + # The final result is available after iteration completes + if stream.result: + final = stream.result + item = final.single() + if item: + print(f" Final answer: {item.content[:150]}...") + + print() + + +# ────────────────────────────────────────────────────────────────── +# Section 6: Document Management +# ────────────────────────────────────────────────────────────────── + +async def demo_document_management(session: Session, doc_ids: list[str]) -> None: + """Demonstrate list, exists, remove, and clear.""" + print("=" * 60) + print(" 6. Document Management") + print("=" * 60) + + # --- List all documents --- + docs = await session.list_documents() + print(f" Listed {len(docs)} document(s):") + for doc in docs: + pages = f", pages={doc.page_count}" if doc.page_count else "" + print(f" {doc.name} id={doc.id[:8]}... format={doc.format}{pages}") + + # --- Check existence --- + if doc_ids: + exists = await session.document_exists(doc_ids[0]) + print(f"\n exists({doc_ids[0][:8]}...): {exists}") + + # --- Remove a document --- + if len(doc_ids) > 1: + removed = await session.remove_document(doc_ids[1]) + print(f" remove({doc_ids[1][:8]}...): {removed}") + + # Verify removal + exists_after = await session.document_exists(doc_ids[1]) + print(f" exists after removal: {exists_after}") + + # --- List again --- + docs = await session.list_documents() + print(f"\n After removal: {len(docs)} document(s)") + + print() + + +# ────────────────────────────────────────────────────────────────── +# Section 7: Cross-Document Relationship Graph +# ────────────────────────────────────────────────────────────────── + +async def demo_graph(session: Session) -> None: + """Demonstrate the cross-document relationship graph.""" + print("=" * 60) + print(" 7. Document Graph") + print("=" * 60) + + graph = await session.get_graph() + + if graph is None or graph.is_empty(): + print(" Graph is empty (no documents or no relationships found)") + print() + return + + print(f" Nodes: {graph.node_count()}, Edges: {graph.edge_count()}") + + for did in graph.doc_ids(): + node = graph.get_node(did) + if node: + keywords = ", ".join(k.keyword for k in node.top_keywords[:5]) + neighbors = graph.get_neighbors(did) + print(f" {node.title}") + print(f" format: {node.format}, nodes: {node.node_count}") + print(f" keywords: [{keywords}]") + print(f" neighbors: {len(neighbors)}") + for edge in neighbors[:3]: + target = graph.get_node(edge.target_doc_id) + target_name = target.title if target else edge.target_doc_id[:8] + weight_str = f"weight={edge.weight:.2f}" + evidence_str = "" + if edge.evidence: + evidence_str = f", shared_keywords={edge.evidence.shared_keyword_count}" + print(f" -> {target_name} ({weight_str}{evidence_str})") + + print() + + +# ────────────────────────────────────────────────────────────────── +# Section 8: Event Callbacks +# ────────────────────────────────────────────────────────────────── + +async def demo_events() -> None: + """Demonstrate event callbacks with EventEmitter.""" + print("=" * 60) + print(" 8. Event Callbacks") + print("=" * 60) + + events = EventEmitter() + + @events.on_index + def on_index_event(event): + if event.event_type == IndexEventType.STARTED: + print(f" [INDEX] Started: {event.path or event.message}") + elif event.event_type == IndexEventType.COMPLETE: + print(f" [INDEX] Complete: {event.doc_id or event.message}") + elif event.event_type == IndexEventType.ERROR: + print(f" [INDEX] Error: {event.message}") + + @events.on_query + def on_query_event(event): + if event.event_type == QueryEventType.STARTED: + print(f" [QUERY] Started: {event.query}") + elif event.event_type == QueryEventType.COMPLETE: + print(f" [QUERY] Complete: {event.total_results} result(s)") + + # Create a session with the event emitter + api_key = os.environ.get("VECTORLESS_API_KEY", "sk-...") + model = os.environ.get("VECTORLESS_MODEL", "gpt-4o") + session = Session(api_key=api_key, model=model, events=events) + + # Index and query — events fire automatically + await session.index(content=ARCHITECTURE_DOC, format="markdown", name="demo_events") + await session.ask("What are the key concepts?", workspace_scope=True) + + await session.clear_all() + print() + + +# ────────────────────────────────────────────────────────────────── +# Section 9: Metrics +# ────────────────────────────────────────────────────────────────── + +async def demo_metrics(session: Session) -> None: + """Demonstrate metrics reporting.""" + print("=" * 60) + print(" 9. Metrics Report") + print("=" * 60) + + report = session.metrics_report() + if report: + # The report contains llm and retrieval subsections + if hasattr(report, "llm"): + llm = report.llm + print(f" LLM Metrics:") + print(f" Total calls: {getattr(llm, 'total_calls', 'N/A')}") + print(f" Total tokens: {getattr(llm, 'total_tokens', 'N/A')}") + print(f" Cache hit rate: {getattr(llm, 'cache_hit_rate', 'N/A')}") + if hasattr(report, "retrieval"): + ret = report.retrieval + print(f" Retrieval Metrics:") + print(f" Total queries: {getattr(ret, 'total_queries', 'N/A')}") + print(f" Avg latency: {getattr(ret, 'avg_latency_ms', 'N/A')} ms") + else: + print(" No metrics available") + + print() + + +# ────────────────────────────────────────────────────────────────── +# Section 10: SyncSession (Synchronous API) +# ────────────────────────────────────────────────────────────────── + +def demo_sync_session() -> None: + """Demonstrate the synchronous Session (no async/await needed).""" + print("=" * 60) + print(" 10. SyncSession (no async/await)") + print("=" * 60) + + api_key = os.environ.get("VECTORLESS_API_KEY", "sk-...") + model = os.environ.get("VECTORLESS_MODEL", "gpt-4o") + + # Can also use: SyncSession.from_env() + with SyncSession(api_key=api_key, model=model) as session: + # Index from content + result = session.index( + content=FINANCE_DOC, + format="markdown", + name="sync_demo", + ) + print(f" Indexed: {result.doc_id}") + + # Query + response = session.ask( + "What was the total revenue?", + doc_ids=[result.doc_id], # type: ignore[list-item] + ) + item = response.single() + if item: + print(f" Answer: {item.content[:150]}...") + + # Cleanup + session.clear_all() + print(" Cleaned up") + + print() + + +# ────────────────────────────────────────────────────────────────── +# Main +# ────────────────────────────────────────────────────────────────── + +async def main() -> None: + print() + print(" Vectorless — Session API Walkthrough") + print(" " + "-" * 38) + print() + + # 1. Create session + session = await demo_session_creation() + + # Set up sample directory + tmpdir, paths = create_sample_directory() + + try: + # 2. Indexing + doc_id_map = await demo_indexing(session, tmpdir, paths) + all_doc_ids = list(doc_id_map.values()) + + # 3. Batch indexing (clears and re-indexes) + batch_doc_ids = await demo_batch_indexing(session, paths) + all_doc_ids = batch_doc_ids if batch_doc_ids else all_doc_ids + + # 4. Querying + if all_doc_ids: + await demo_querying(session, all_doc_ids) + + # 5. Streaming query + if all_doc_ids: + await demo_streaming(session, all_doc_ids) + + # 6. Document management + await demo_document_management(session, all_doc_ids) + + # 7. Graph + await demo_graph(session) + + # 8. Events (creates its own session) + await demo_events() + + # 9. Metrics + await demo_metrics(session) + + finally: + # Cleanup + await session.clear_all() + cleanup_directory(tmpdir) + print("=" * 60) + print(" Cleanup complete.") + print("=" * 60) + + # 10. SyncSession (separate, runs synchronously) + demo_sync_session() + + print(" Done.") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/rust/examples/single_doc_challenge.rs b/rust/examples/single_doc_challenge.rs new file mode 100644 index 0000000..332c241 --- /dev/null +++ b/rust/examples/single_doc_challenge.rs @@ -0,0 +1,254 @@ +// Copyright (c) 2026 vectorless developers +// SPDX-License-Identifier: Apache-2.0 + +//! Single-document reasoning challenge. +//! +//! Indexes a realistic technical document and asks questions that require +//! the engine to navigate deep into the tree, cross-reference details +//! across distant sections, and extract information buried in nested +//! structures — not surface-level keyword matches. +//! +//! ```bash +//! LLM_API_KEY=sk-xxx LLM_MODEL=gpt-4o \ +//! LLM_ENDPOINT=https://api.openai.com/v1 \ +//! cargo run --example single_doc_challenge +//! ``` + +use vectorless::{DocumentFormat, EngineBuilder, IndexContext, QueryContext}; + +/// A research report with information scattered across sections. +/// The answers to the challenge questions require connecting dots +/// from different parts of the document, not simple keyword lookup. +const REPORT: &str = r#" +# Quantum Computing Division — Annual Research Report 2025 + +## Executive Summary + +The Quantum Computing Division achieved several milestones in fiscal year 2025. +Total division revenue reached $47.2M, representing 23% year-over-year growth. +The division employed 312 staff across four research labs as of December 2025. +Headcount grew by 18% during the year, with the majority of new hires in the +error correction and cryogenics teams. + +The board approved a $200M capital investment program spanning 2025-2028. +Phase 1 ($52M) was fully deployed in 2025, primarily in dilution refrigerator +procurement and cleanroom expansion at the Zurich facility. + +## Research Labs + +### Lab A — Superconducting Qubits (Zurich) + +Lab A focuses on transmon qubit design and fabrication. The lab operates +two dilution refrigerators: FR-01 (purchased 2023, 20mK base temperature) +and FR-02 (commissioned Q3 2025, 15mK base temperature). FR-02 was the +single largest capital expenditure in 2025 at $8.7M. + +Current qubit specifications: +- Qubit count: 127 (FR-01: 64, FR-02: 63) +- Average T1 coherence time: 142 microseconds (up from 98μs in 2024) +- Average T2 coherence time: 89 microseconds +- Single-qubit gate fidelity: 99.92% +- Two-qubit gate fidelity: 99.67% +- Readout fidelity: 99.81% + +The 2025 coherence improvement was primarily driven by the transition from +aluminum to tantalum transmon junctions, which reduced two-level system (TLS) +defect density by 40%. + +### Lab B — Topological Qubits (Tokyo) + +Lab B pursues Majorana-based topological qubits using semiconductor-superconductor +nanowires. The team fabricated 12 nanowire devices during 2025, of which 3 +demonstrated measurable topological gap. This is a significant improvement +over 2024 when only 1 device out of 8 showed the gap. + +The topological gap measurement protocol requires the device temperature to +remain below 20mK throughout the 48-hour characterization cycle. Only FR-02 +in Zurich meets this requirement, so Lab B ships devices to Zurich for final +characterization — creating a logistical dependency between the two labs. + +Key metric: topological gap size averaged 0.35meV across successful devices, +compared to the theoretical target of 0.5meV. The gap-to-target ratio improved +from 48% in 2024 to 70% in 2025. + +### Lab C — Quantum Error Correction (Cambridge) + +Lab C develops surface code error correction protocols. In 2025, the team +achieved a critical milestone: below-threshold error correction on a 17-qubit +surface code patch, reducing logical error rate from 2.1×10⁻² to 3.4×10⁻³ +per correction cycle. + +The threshold simulations used Lab A's measured gate fidelities as input +parameters. The below-threshold result was only possible after Lab A's T1 +coherence improvement from 98μs to 142μs — the simulation models showed +that the 98μs regime was above the error correction threshold for the 17-qubit +code, making the Lab A / Lab C dependency critical. + +Lab C also developed a new decoder algorithm called "Cascade" that reduces +classical processing latency from 1.2μs to 0.4μs per syndrome extraction cycle. +This decoder runs on an FPGA co-processor board that was custom-designed by +Lab D. + +### Lab D — Control Systems (Boston) + +Lab D designs and manufactures the classical control electronics for all qubit +types. The flagship product is the QCS-4 control system, capable of driving +up to 256 qubit channels with 14-bit DAC resolution and sub-nanosecond timing +precision. + +In 2025, Lab D delivered 4 QCS-4 units to Lab A and 2 units to Lab B. +Lab C received a modified QCS-4 variant with the integrated FPGA decoder +co-processor. The FPGA decoder board is a custom design: Xilinx Ultrascale+ +XCU26 FPGA, 400k logic cells, running at 350MHz. Lab D is the sole source +for this board — there is no commercial equivalent. + +A notable incident occurred in August 2025 when a firmware bug in the QCS-4 +DAC calibration routine caused systematic phase errors in two-qubit gate +operations. The bug was traced to an integer overflow in the calibration LUT +when operating above 4.2 GHz. The issue affected Lab A's FR-01 for 11 days +before a patched firmware was deployed. During this period, Lab A's measured +two-qubit gate fidelity temporarily dropped to 97.31%. + +## Financial Summary + +| Category | 2024 | 2025 | Change | +|----------|------|------|--------| +| Revenue | $38.4M | $47.2M | +23% | +| R&D Expense | $31.6M | $38.9M | +23% | +| Capital Expenditure | $18.2M | $52.0M | +186% | +| Staff Count (Dec) | 264 | 312 | +18% | +| Patents Filed | 14 | 19 | +36% | + +Revenue breakdown by source: +- Government contracts: $19.8M (42%) +- Enterprise partnerships: $15.3M (32%) +- IP licensing: $8.6M (18%) +- Consulting services: $3.5M (8%) + +The $52M capital expenditure in 2025 included: +- FR-02 dilution refrigerator (Zurich): $8.7M +- Cleanroom expansion (Zurich): $14.2M +- Nanowire fabrication equipment (Tokyo): $6.1M +- FPGA development and QCS-4 production (Boston): $9.4M +- General infrastructure and IT: $13.6M + +## Outlook for 2026 + +Priority goals for 2026: +1. Scale to 256 superconducting qubits by Q3 (requires a third dilution + refrigerator, procurement estimated at $9-11M) +2. Achieve topological gap above 0.45meV (requires device process improvement) +3. Demonstrate below-threshold error correction on a 49-qubit surface code + (requires both 256-qubit hardware AND the Cascade decoder scaling to + larger code distances) +4. File 25+ patents +5. Grow revenue to $60M +"#; + +/// Challenge questions designed to test deep reasoning. +/// None of these can be answered by simple keyword search — +/// each requires connecting information from multiple sections. +const CHALLENGE_QUESTIONS: &[&str] = &[ + // Requires: cross-reference Lab B's device characterization needs with + // Lab A's FR-02 specs, then connect to the CapEx table for FR-02 cost + "How much did the only refrigerator capable of characterizing Lab B's devices cost, and where is it located?", + + // Requires: trace Lab C's below-threshold result → depends on Lab A's T1 + // improvement → depends on tantalum junction transition + "What specific materials change in another lab made Lab C's error correction milestone possible?", + + // Requires: find the firmware bug in Lab D section, then look at the + // Lab A FR-01 qubit count, then compute the impact window + "How many qubits were affected by the firmware bug, and for how many days?", + + // Requires: Lab B gap/target ratio (70%) × theoretical target (0.5meV) + // → actual gap = 0.35meV, compare with 2026 goal of 0.45meV + "What is the gap between Lab B's current topological gap achievement and the 2026 target, in meV?", + + // Requires: trace the dependency chain: 256-qubit goal → need FR-03 → + // cost $9-11M → government contracts are largest revenue source at $19.8M + "If the 2026 qubit scaling goal requires a new refrigerator, can the largest revenue source category alone cover its estimated cost?", +]; + +#[tokio::main] +async fn main() -> vectorless::Result<()> { + tracing_subscriber::fmt() + .compact() + .with_env_filter( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")), + ) + .init(); + + println!("=== Single-Document Reasoning Challenge ===\n"); + + let api_key = std::env::var("LLM_API_KEY").unwrap_or_else(|_| "sk-...".to_string()); + let model = std::env::var("LLM_MODEL").unwrap_or_else(|_| "gpt-4o".to_string()); + let endpoint = + std::env::var("LLM_ENDPOINT").unwrap_or_else(|_| "https://api.openai.com/v1".to_string()); + + let engine = EngineBuilder::new() + .with_key(&api_key) + .with_model(&model) + .with_endpoint(&endpoint) + .build() + .await + .map_err(|e| vectorless::Error::Config(e.to_string()))?; + + // Index (skip if already indexed — we're testing retrieval, not indexing) + let doc_name = "qc_report_2025"; + let doc_id = { + let existing = engine.list().await?; + if let Some(doc) = existing.iter().find(|d| d.name == doc_name) { + println!("Document already indexed, reusing: {}\n", doc.id); + doc.id.clone() + } else { + println!("Indexing research report..."); + let result = engine + .index(IndexContext::from_content(REPORT, DocumentFormat::Markdown).with_name(doc_name)) + .await?; + let id = result.doc_id().unwrap().to_string(); + println!(" doc_id: {}\n", id); + id + } + }; + + // Challenge queries + for (i, question) in CHALLENGE_QUESTIONS.iter().enumerate() { + println!("Q{}: {}", i + 1, question); + + match engine + .query(QueryContext::new(*question).with_doc_ids(vec![doc_id.clone()])) + .await + { + Ok(response) => { + if let Some(item) = response.single() { + if item.content.is_empty() { + println!(" (no answer found)\n"); + } else { + // Print first 3 lines as preview + for line in item.content.lines().take(3) { + println!(" {}", line); + } + let remaining = item.content.lines().count().saturating_sub(3); + if remaining > 0 { + println!(" ... ({} more lines)", remaining); + } + println!(" confidence: {:.2}\n", item.confidence); + } + } else { + println!(" (no results)\n"); + } + } + Err(e) => { + println!(" error: {}\n", e); + } + } + } + + // Uncomment to remove the document after testing: + // engine.remove(&doc_id).await?; + // println!("Cleaned up."); + + Ok(()) +} diff --git a/rust/src/agent/command.rs b/rust/src/agent/command.rs index 6d983dc..5507a1d 100644 --- a/rust/src/agent/command.rs +++ b/rust/src/agent/command.rs @@ -183,8 +183,11 @@ pub fn resolve_target( /// Resolve a cd/cat target with additional context from the tree node titles. /// -/// This extended resolver also checks against the actual tree node titles -/// (in case NavEntry titles differ from TreeNode titles). +/// Matching priority: +/// 1. Direct children via NavigationIndex (exact, case-insensitive, substring, numeric) +/// 2. Direct children via TreeNode titles (case-insensitive contains) +/// 3. Deep descendant search (BFS, up to depth 4) — enables `cd "Research Labs"` from +/// root when "Research Labs" is a grandchild behind an intermediate wrapper node. pub fn resolve_target_extended( target: &str, nav_index: &NavigationIndex, @@ -197,10 +200,10 @@ pub fn resolve_target_extended( return Some(id); } - // Extended: check all children by their TreeNode titles - let children: Vec = tree.children_iter(current_node).collect(); let target_lower = target.to_lowercase(); + // Extended: check all direct children by their TreeNode titles + let children: Vec = tree.children_iter(current_node).collect(); for child_id in &children { if let Some(node) = tree.get(*child_id) { if node.title.to_lowercase().contains(&target_lower) { @@ -209,6 +212,35 @@ pub fn resolve_target_extended( } } + // Deep search: BFS through descendants up to depth 4. + // Returns the shallowest match so `cd "Research Labs"` from root finds it + // at depth 1 even if another "Research Labs" exists deeper. + search_descendants(&target_lower, current_node, tree, 4) +} + +/// BFS search through descendants, returning the shallowest matching NodeId. +fn search_descendants( + target_lower: &str, + start: NodeId, + tree: &crate::document::DocumentTree, + max_depth: usize, +) -> Option { + let mut queue: Vec<(NodeId, usize)> = vec![(start, 0)]; + + while let Some((node_id, depth)) = queue.pop() { + if depth >= max_depth { + continue; + } + for child_id in tree.children_iter(node_id) { + if let Some(node) = tree.get(child_id) { + if node.title.to_lowercase().contains(target_lower) { + return Some(child_id); + } + } + queue.push((child_id, depth + 1)); + } + } + None } @@ -470,6 +502,65 @@ mod tests { assert!(resolve_target("anything", &nav_index, tree.root()).is_none()); } + #[test] + fn test_resolve_target_extended_deep_search() { + use crate::document::{ChildRoute, DocumentTree}; + + // root → "Wrapper" → "Research Labs" → "Lab B" + let mut tree = DocumentTree::new("Root", "root content"); + let root = tree.root(); + let wrapper = tree.add_child(root, "Quantum Computing Division", "wrapper"); + let labs = tree.add_child(wrapper, "Research Labs", "labs content"); + let lab_b = tree.add_child(labs, "Lab B", "lab b content"); + + let mut nav = NavigationIndex::new(); + nav.add_child_routes( + root, + vec![ChildRoute { + node_id: wrapper, + title: "Quantum Computing Division".to_string(), + description: "Division".to_string(), + leaf_count: 7, + }], + ); + nav.add_child_routes( + wrapper, + vec![ChildRoute { + node_id: labs, + title: "Research Labs".to_string(), + description: "Labs".to_string(), + leaf_count: 4, + }], + ); + nav.add_child_routes( + labs, + vec![ChildRoute { + node_id: lab_b, + title: "Lab B".to_string(), + description: "Topological".to_string(), + leaf_count: 1, + }], + ); + + // "Research Labs" is a grandchild of root — deep search should find it + assert_eq!( + resolve_target_extended("Research Labs", &nav, root, &tree), + Some(labs) + ); + + // "Lab B" is a great-grandchild — deep search should find it + assert_eq!( + resolve_target_extended("Lab B", &nav, root, &tree), + Some(lab_b) + ); + + // Direct children should still work via primary resolver + assert_eq!( + resolve_target_extended("Quantum Computing Division", &nav, root, &tree), + Some(wrapper) + ); + } + #[test] fn test_parse_grep() { assert_eq!( diff --git a/rust/src/agent/config.rs b/rust/src/agent/config.rs index b276d94..0873c8c 100644 --- a/rust/src/agent/config.rs +++ b/rust/src/agent/config.rs @@ -23,8 +23,8 @@ pub struct WorkerConfig { impl Default for WorkerConfig { fn default() -> Self { Self { - max_rounds: 8, - max_llm_calls: 15, + max_rounds: 100, + max_llm_calls: 200, } } } diff --git a/rust/src/agent/orchestrator/evaluate.rs b/rust/src/agent/orchestrator/evaluate.rs index 27c8aab..88e7e07 100644 --- a/rust/src/agent/orchestrator/evaluate.rs +++ b/rust/src/agent/orchestrator/evaluate.rs @@ -34,6 +34,10 @@ pub async fn evaluate( let evidence_summary = format_evidence_summary(evidence); let (system, user) = check_sufficiency(query, &evidence_summary); + info!( + evidence = evidence.len(), + "Evaluating evidence sufficiency..." + ); let response = llm .complete(&system, &user) .await @@ -74,6 +78,7 @@ pub async fn evaluate( } /// Format evidence summary for sufficiency check. +/// Includes actual content so the check LLM can evaluate relevance. pub fn format_evidence_summary(evidence: &[Evidence]) -> String { if evidence.is_empty() { return "(no evidence)".to_string(); @@ -82,15 +87,10 @@ pub fn format_evidence_summary(evidence: &[Evidence]) -> String { .iter() .map(|e| { let doc = e.doc_name.as_deref().unwrap_or("unknown"); - format!( - "- [{}] (from {}) {} chars", - e.node_title, - doc, - e.content.len() - ) + format!("[{}] (from {})\n{}", e.node_title, doc, e.content) }) .collect::>() - .join("\n") + .join("\n\n") } #[cfg(test)] diff --git a/rust/src/agent/orchestrator/mod.rs b/rust/src/agent/orchestrator/mod.rs index 9cf9615..6bb6bc2 100644 --- a/rust/src/agent/orchestrator/mod.rs +++ b/rust/src/agent/orchestrator/mod.rs @@ -12,6 +12,7 @@ mod analyze; mod dispatch; mod evaluate; mod replan; +mod supervisor; use tracing::info; @@ -22,11 +23,9 @@ use super::Agent; use super::config::{AgentConfig, Output, WorkspaceContext}; use super::events::EventEmitter; use super::state::OrchestratorState; -use super::tools::orchestrator as orch_tools; use analyze::{AnalyzeOutcome, analyze}; -use evaluate::evaluate; -use replan::replan; +use supervisor::run_supervisor_loop; /// Maximum supervisor loop iterations to prevent infinite loops. const MAX_SUPERVISOR_ITERATIONS: u32 = 3; @@ -131,98 +130,22 @@ impl<'a> Agent for Orchestrator<'a> { }; // --- Phase 2: Supervisor loop --- - let mut current_dispatches = initial_dispatches; - let mut iteration: u32 = 0; - let mut eval_sufficient = false; - - loop { - if iteration >= MAX_SUPERVISOR_ITERATIONS { - info!(iteration, "Supervisor loop budget exhausted"); - break; - } - - // Dispatch current plan - if !current_dispatches.is_empty() { - info!( - docs = current_dispatches.len(), - docs_list = ?current_dispatches.iter().map(|d| d.doc_idx).collect::>(), - iteration, - "Dispatching Workers" - ); - dispatch::dispatch_and_collect( - &query, - ¤t_dispatches, - ws, - &config, - &llm, - &mut state, - &emitter, - &query_plan, - ) - .await; - } - - // No evidence at all — nothing to evaluate - if state.all_evidence.is_empty() { - info!("No evidence collected from any Worker"); - break; - } - - // Skip evaluation for user-specified documents (no replan needed) - if skip_analysis { - eval_sufficient = !state.all_evidence.is_empty(); - break; - } - - // Evaluate sufficiency - let eval_result = evaluate(&query, &state.all_evidence, &llm).await?; - orch_llm_calls += 1; - - if eval_result.sufficient { - eval_sufficient = true; - info!( - evidence = state.all_evidence.len(), - iteration, "Evidence sufficient — exiting supervisor loop" - ); - break; - } - - // Insufficient — replan - info!( - evidence = state.all_evidence.len(), - missing = eval_result.missing_info.len(), - iteration, - "Evidence insufficient — replanning" - ); - - let doc_cards_text = orch_tools::ls_docs(ws).feedback; - let replan_result = replan( - &query, - &eval_result.missing_info, - &state.all_evidence, - &state.dispatched, - ws.doc_count(), - &doc_cards_text, - &llm, - ) - .await?; - orch_llm_calls += 1; - - if replan_result.dispatches.is_empty() { - info!("Replan produced no new dispatches — exiting supervisor loop"); - break; - } - - current_dispatches = replan_result.dispatches; - iteration += 1; - } + let outcome = run_supervisor_loop( + &query, + initial_dispatches, + ws, + &config, + &llm, + &mut state, + &emitter, + &query_plan, + skip_analysis, + ) + .await?; + orch_llm_calls += outcome.llm_calls; - // Derive confidence from supervisor loop outcome: - // - LLM evaluated sufficient on first try → high confidence - // - Needed replan rounds → lower confidence - // - No evaluation ran (skip_analysis / no evidence) → moderate let confidence = - compute_confidence(eval_sufficient, iteration, state.all_evidence.is_empty()); + compute_confidence(outcome.eval_sufficient, outcome.iteration, state.all_evidence.is_empty()); // --- Phase 3: Finalize — rerank + synthesize --- if state.all_evidence.is_empty() { diff --git a/rust/src/agent/orchestrator/replan.rs b/rust/src/agent/orchestrator/replan.rs index 57d5e24..507c171 100644 --- a/rust/src/agent/orchestrator/replan.rs +++ b/rust/src/agent/orchestrator/replan.rs @@ -59,6 +59,7 @@ pub async fn replan( &find_text, ); + info!(evidence = collected_evidence.len(), "Replanning dispatch targets..."); let response = llm .complete(&system, &user) .await @@ -87,6 +88,7 @@ pub async fn replan( } /// Format collected evidence for the replan prompt. +/// Includes content so the LLM can reason about what's actually been found. fn format_evidence_context(evidence: &[Evidence]) -> String { if evidence.is_empty() { return "(no evidence collected)".to_string(); @@ -95,15 +97,10 @@ fn format_evidence_context(evidence: &[Evidence]) -> String { .iter() .map(|e| { let doc = e.doc_name.as_deref().unwrap_or("unknown"); - format!( - "- [{}] (from {}) {} chars", - e.node_title, - doc, - e.content.len() - ) + format!("[{}] (from {})\n{}", e.node_title, doc, e.content) }) .collect::>() - .join("\n") + .join("\n\n") } /// Build the replan prompt. diff --git a/rust/src/agent/orchestrator/supervisor.rs b/rust/src/agent/orchestrator/supervisor.rs new file mode 100644 index 0000000..9dd4a37 --- /dev/null +++ b/rust/src/agent/orchestrator/supervisor.rs @@ -0,0 +1,159 @@ +// Copyright (c) 2026 vectorless developers +// SPDX-License-Identifier: Apache-2.0 + +//! Phase 2: Supervisor loop — dispatch → evaluate → replan. + +use tracing::info; + +use crate::llm::LlmClient; +use crate::query::QueryPlan; + +use super::super::config::{AgentConfig, WorkspaceContext}; +use super::super::events::EventEmitter; +use super::super::prompts::DispatchEntry; +use super::super::state::OrchestratorState; +use super::super::tools::orchestrator as orch_tools; +use super::dispatch; +use super::evaluate::evaluate; +use super::replan::replan; +use super::MAX_SUPERVISOR_ITERATIONS; + +/// Outcome of the supervisor loop. +pub struct SupervisorOutcome { + /// Number of replan iterations performed. + pub iteration: u32, + /// Whether the LLM evaluator judged evidence sufficient. + pub eval_sufficient: bool, + /// LLM calls consumed within the supervisor loop itself. + pub llm_calls: u32, +} + +/// Run the supervisor loop: dispatch → evaluate → replan. +/// +/// Returns a [`SupervisorOutcome`] summarizing what happened. +pub async fn run_supervisor_loop( + query: &str, + initial_dispatches: Vec, + ws: &WorkspaceContext<'_>, + config: &AgentConfig, + llm: &LlmClient, + state: &mut OrchestratorState, + emitter: &EventEmitter, + query_plan: &QueryPlan, + skip_analysis: bool, +) -> crate::error::Result { + let mut current_dispatches = initial_dispatches; + let mut iteration: u32 = 0; + let mut eval_sufficient = false; + let mut llm_calls: u32 = 0; + + loop { + if iteration >= MAX_SUPERVISOR_ITERATIONS { + info!(iteration, "Supervisor loop budget exhausted"); + break; + } + + // Dispatch current plan + if !current_dispatches.is_empty() { + info!( + docs = current_dispatches.len(), + docs_list = ?current_dispatches.iter().map(|d| d.doc_idx).collect::>(), + iteration, + "Dispatching Workers" + ); + dispatch::dispatch_and_collect( + query, + ¤t_dispatches, + ws, + config, + llm, + state, + emitter, + query_plan, + ) + .await; + } + + // No evidence at all — nothing to evaluate + if state.all_evidence.is_empty() { + info!("No evidence collected from any Worker"); + break; + } + + // Skip evaluation for user-specified documents (no replan needed) + if skip_analysis { + eval_sufficient = !state.all_evidence.is_empty(); + break; + } + + // Evaluate sufficiency + let eval_result = evaluate(query, &state.all_evidence, llm).await?; + llm_calls += 1; + + if eval_result.sufficient { + eval_sufficient = true; + info!( + evidence = state.all_evidence.len(), + iteration, "Evidence sufficient — exiting supervisor loop" + ); + break; + } + + // Insufficient — replan + info!( + evidence = state.all_evidence.len(), + missing = eval_result.missing_info.len(), + iteration, + "Evidence insufficient — replanning" + ); + + let doc_cards_text = orch_tools::ls_docs(ws).feedback; + let replan_result = replan( + query, + &eval_result.missing_info, + &state.all_evidence, + &state.dispatched, + ws.doc_count(), + &doc_cards_text, + llm, + ) + .await?; + llm_calls += 1; + + if replan_result.dispatches.is_empty() { + info!("Replan produced no new dispatches — exiting supervisor loop"); + break; + } + + current_dispatches = replan_result.dispatches; + iteration += 1; + } + + Ok(SupervisorOutcome { + iteration, + eval_sufficient, + llm_calls, + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_supervisor_outcome_fields() { + let outcome = SupervisorOutcome { + iteration: 2, + eval_sufficient: true, + llm_calls: 5, + }; + assert_eq!(outcome.iteration, 2); + assert!(outcome.eval_sufficient); + assert_eq!(outcome.llm_calls, 5); + } + + #[test] + fn test_max_iterations_constant() { + assert_eq!(MAX_SUPERVISOR_ITERATIONS, 3); + } +} diff --git a/rust/src/agent/prompts.rs b/rust/src/agent/prompts.rs index 42699cc..11d26fb 100644 --- a/rust/src/agent/prompts.rs +++ b/rust/src/agent/prompts.rs @@ -115,12 +115,12 @@ pub fn worker_navigation(params: &NavigationParams) -> (String, String) { Available commands: - ls List children at current position (with summaries and leaf counts) -- cd Enter a child node (supports absolute paths like /root/Section) +- cd Enter a child node (supports relative paths like Section/Sub and absolute paths like /root/Section) - cd .. Go back to parent node - cat Read a child node's content (automatically collected as evidence) - cat Read the current node's content (useful at leaf nodes) - head Preview first 20 lines of a node (does NOT collect evidence) -- find Search for a keyword in the document index +- find Search for a keyword in the document index (also supports multi-word like 'Lab C') - findtree Search for nodes by title pattern (case-insensitive) - grep Regex search across all content in current subtree - wc Show content size (lines, words, chars) @@ -129,12 +129,21 @@ Available commands: - done End navigation SEARCH STRATEGY (important — follow this priority order): -- When keyword matches are shown below, use find with the EXACT keyword from the list (single word, \ +- When keyword matches are shown below, navigate directly to the highest-weight matched node. \ +Do NOT explore other branches first — the keyword index has already identified the most relevant location. +- When find results include content snippets that answer the question, cd to that node and cat it immediately. +- Use find with the EXACT keyword from the list (single word, \ not multi-word phrases). Example: if hint shows keyword 'performance' pointing to Performance section, \ use find performance, NOT find \"performance guide\". -- Use ls when you have no keyword hints or need to discover the structure of an unknown section. +- Use ls only when you have no keyword hints or need to discover the structure of an unknown section. - Use findtree when you know a section title pattern but not the exact name. +NAVIGATION EFFICIENCY (critical — every round counts): +- Prefer cd with absolute paths (/root/Section/Subsection) or relative paths (Section/Sub) \ +to reach target nodes in ONE command instead of multiple cd steps. +- Do NOT ls before cd if keyword hints or find results already tell you which node to enter. +- Do NOT cd into nodes one level at a time when you can use a multi-segment path. + Rules: - Output exactly ONE command per response, nothing else. - Content from cat is automatically saved as evidence — don't re-cat the same node. @@ -239,13 +248,15 @@ pub fn worker_dispatch(params: &WorkerDispatchParams) -> (String, String) { "You are a document navigation assistant. You are searching inside the document \ \"{doc_name}\" for specific information. -Available commands: ls, cd , cd .., cat, cat , head , find , \ -findtree , grep , wc , pwd, check, done +Available commands: ls, cd (supports Section/Sub paths and /root/Section absolute paths), \ +cd .., cat, cat , head , find , findtree , grep , wc , \ +pwd, check, done SEARCH STRATEGY: -- Prefer find to jump directly to relevant sections over manual ls→cd exploration. \ -Use single-word keywords, not multi-word phrases. -- Use ls when you need to discover the structure of an unknown section. +- Prefer find to jump directly to relevant sections over manual ls→cd exploration. +- When find results include content snippets that answer your task, cd to that node and cat it immediately. +- Use multi-segment paths (e.g. cd Research Labs/Lab A) to reach targets in ONE command. +- Do NOT ls before cd if find results already tell you which node to enter. - Use findtree when you know a section title pattern but not the exact name. Rules: @@ -396,7 +407,7 @@ mod tests { missing_info: "2024 comparison", last_feedback: "[1] Q1 Report — Q1 data (5 leaves)\n[2] Q2 Report — Q2 data (5 leaves)", remaining: 5, - max_rounds: 8, + max_rounds: 15, history: "(no history yet)", visited_titles: "(none)", plan: "", @@ -411,7 +422,7 @@ mod tests { assert!(user.contains("root/Financial Statements")); assert!(user.contains("200 chars")); assert!(user.contains("2024 comparison")); - assert!(user.contains("5/8")); + assert!(user.contains("5/15")); assert!(!user.contains("sub-task")); } @@ -425,7 +436,7 @@ mod tests { missing_info: "", last_feedback: "", remaining: 8, - max_rounds: 8, + max_rounds: 15, history: "(no history yet)", visited_titles: "(none)", plan: "", @@ -448,7 +459,7 @@ mod tests { missing_info: "", last_feedback: "", remaining: 8, - max_rounds: 8, + max_rounds: 15, history: "(no history yet)", visited_titles: "(none)", plan: "", diff --git a/rust/src/agent/state.rs b/rust/src/agent/state.rs index a4c1e67..e0deb3b 100644 --- a/rust/src/agent/state.rs +++ b/rust/src/agent/state.rs @@ -25,6 +25,9 @@ pub struct WorkerState { pub evidence: Vec, /// Nodes already visited (prevents redundant reads). pub visited: HashSet, + /// Nodes whose content has been collected via cat. Separate from visited + /// because cd-ing through a node ≠ reading its content. + pub collected_nodes: HashSet, /// Remaining navigation rounds. pub remaining: u32, /// Maximum rounds (for display in prompts). @@ -49,10 +52,6 @@ pub struct WorkerState { /// Maximum number of history entries to keep for prompt injection. const MAX_HISTORY_ENTRIES: usize = 6; -/// Maximum characters for `last_feedback` before truncation. -/// Prevents large cat/grep outputs from bloating subsequent prompts. -const MAX_FEEDBACK_CHARS: usize = 500; - impl WorkerState { /// Create a new state starting at the given root node. pub fn new(root: NodeId, max_rounds: u32) -> Self { @@ -61,6 +60,7 @@ impl WorkerState { current_node: root, evidence: Vec::new(), visited: HashSet::new(), + collected_nodes: HashSet::new(), remaining: max_rounds, max_rounds, last_feedback: String::new(), @@ -79,20 +79,9 @@ impl WorkerState { } } - /// Set feedback with automatic truncation to prevent prompt bloat. + /// Set feedback from tool execution. pub fn set_feedback(&mut self, feedback: String) { - if feedback.len() <= MAX_FEEDBACK_CHARS { - self.last_feedback = feedback; - } else { - // Find a clean truncation point (line boundary if possible) - let truncated = &feedback[..MAX_FEEDBACK_CHARS]; - let end = truncated.rfind('\n').unwrap_or(MAX_FEEDBACK_CHARS); - self.last_feedback = format!( - "{}...\n(truncated, {} chars total)", - &feedback[..end.min(MAX_FEEDBACK_CHARS)], - feedback.len() - ); - } + self.last_feedback = feedback; } /// Navigate into a child node. @@ -118,6 +107,11 @@ impl WorkerState { self.evidence.push(evidence); } + /// Check if evidence has already been collected for a specific node. + pub fn has_evidence_for(&self, node_id: crate::document::NodeId) -> bool { + self.collected_nodes.contains(&node_id) + } + /// Push a history entry (command + result summary). /// Keeps only the last `MAX_HISTORY_ENTRIES` entries. pub fn push_history(&mut self, entry: String) { @@ -157,6 +151,20 @@ impl WorkerState { .join("\n") } + /// Evidence with actual content for sufficiency evaluation. + pub fn evidence_for_check(&self) -> String { + if self.evidence.is_empty() { + return "(no evidence collected yet)".to_string(); + } + self.evidence + .iter() + .map(|e| { + format!("[{}]\n{}", e.node_title, e.content) + }) + .collect::>() + .join("\n\n") + } + /// Convert this state into a WorkerOutput (consuming the state), with budget flag. /// Worker returns evidence only — no answer synthesis. pub fn into_worker_output( diff --git a/rust/src/agent/tools/mod.rs b/rust/src/agent/tools/mod.rs index af90013..c44c002 100644 --- a/rust/src/agent/tools/mod.rs +++ b/rust/src/agent/tools/mod.rs @@ -51,3 +51,51 @@ impl ToolResult { } } } + +/// Extract a content snippet around the first occurrence of `keyword`. +/// +/// Returns `None` if the content is empty. If the keyword is not found, +/// returns the beginning of the content instead. +pub fn content_snippet(content: &str, keyword: &str, max_len: usize) -> Option { + if content.trim().is_empty() { + return None; + } + + let keyword_lower = keyword.to_lowercase(); + let content_lower = content.to_lowercase(); + + let start = match content_lower.find(&keyword_lower) { + Some(pos) => { + let back = (max_len / 4).min(pos); + pos - back + } + None => 0, + }; + + let start = content + .char_indices() + .find(|(i, _)| *i >= start) + .map(|(i, _)| i) + .unwrap_or(0); + + let end = content + .char_indices() + .take_while(|(i, _)| *i <= start + max_len) + .last() + .map(|(i, c)| i + c.len_utf8()) + .unwrap_or(content.len()); + + let snippet = content[start..end].trim(); + if snippet.is_empty() { + return None; + } + + let mut result = snippet.to_string(); + if end < content.len() { + result.push_str("..."); + } + if start > 0 { + result = format!("...{}", result); + } + Some(result) +} diff --git a/rust/src/agent/tools/orchestrator.rs b/rust/src/agent/tools/orchestrator.rs index 73c78ac..96dcb11 100644 --- a/rust/src/agent/tools/orchestrator.rs +++ b/rust/src/agent/tools/orchestrator.rs @@ -68,7 +68,7 @@ pub fn ls_docs(ctx: &WorkspaceContext) -> ToolResult { /// Execute `find_cross` — search keywords across all documents. /// -/// Returns formatted results showing which documents matched. +/// Returns formatted results showing which documents matched, with content snippets. pub fn find_cross(keywords: &[String], ctx: &WorkspaceContext) -> ToolResult { let results = ctx.find_cross_all(keywords); @@ -90,16 +90,15 @@ pub fn find_cross(keywords: &[String], ctx: &WorkspaceContext) -> ToolResult { let title = doc .and_then(|d| d.node_title(entry.node_id)) .unwrap_or("unknown"); - let summary = doc - .and_then(|d| d.nav_entry(entry.node_id)) - .map(|e| e.overview.as_str()) - .unwrap_or(""); output.push_str(&format!( " keyword '{}' → {} (depth {}, weight {:.2})", hit.keyword, title, entry.depth, entry.weight )); - if !summary.is_empty() { - output.push_str(&format!(" — {}", summary)); + // Include content snippet for cross-doc relevance judgment + if let Some(content) = doc.and_then(|d| d.cat(entry.node_id)) { + if let Some(snippet) = super::content_snippet(content, &hit.keyword, 300) { + output.push_str(&format!("\n \"{}\"", snippet)); + } } output.push('\n'); } diff --git a/rust/src/agent/tools/worker/cat.rs b/rust/src/agent/tools/worker/cat.rs index 0e13257..3792290 100644 --- a/rust/src/agent/tools/worker/cat.rs +++ b/rust/src/agent/tools/worker/cat.rs @@ -30,7 +30,7 @@ pub fn cat(target: &str, ctx: &DocContext, state: &mut WorkerState) -> ToolResul } }; - if state.visited.contains(&node_id) { + if state.has_evidence_for(node_id) { let title = ctx.node_title(node_id).unwrap_or("unknown"); return ToolResult::ok(format!( "[Already collected: {}]. Use a different target or cd to another branch.", @@ -50,19 +50,10 @@ pub fn cat(target: &str, ctx: &DocContext, state: &mut WorkerState) -> ToolResul doc_name: Some(ctx.doc_name.to_string()), }); + state.collected_nodes.insert(node_id); state.visited.insert(node_id); - let preview = if content_string.len() > 500 { - format!( - "{}...(truncated, {} chars total)", - &content_string[..500], - content_string.len() - ) - } else { - content_string - }; - - ToolResult::ok(format!("[Evidence collected: {}]\n{}", title, preview)) + ToolResult::ok(format!("[Evidence collected: {}]\n{}", title, content_string)) } None => ToolResult::fail(format!("No content available for '{}'.", target)), } @@ -110,7 +101,7 @@ mod tests { reasoning_index: &crate::document::ReasoningIndex::default(), doc_name: "test", }; - let mut state = WorkerState::new(root, 8); + let mut state = WorkerState::new(root, 15); let result = cat("Getting Started", &ctx, &mut state); assert!(result.success); diff --git a/rust/src/agent/tools/worker/cd.rs b/rust/src/agent/tools/worker/cd.rs index 765d6e2..8d87483 100644 --- a/rust/src/agent/tools/worker/cd.rs +++ b/rust/src/agent/tools/worker/cd.rs @@ -13,12 +13,18 @@ use super::super::ToolResult; /// /// Supports: /// - Relative names (child of current node): `cd "Getting Started"` +/// - Relative paths with `/`: `cd "Research Labs/Lab B"` /// - Absolute paths starting with `/`: `cd /root/Chapter 1/Section 1.2` pub fn cd(target: &str, ctx: &DocContext, state: &mut WorkerState) -> ToolResult { if target.starts_with('/') { return cd_absolute(target, ctx, state); } + // Relative path with segments: "Research Labs/Lab B" + if target.contains('/') { + return cd_relative_path(target, ctx, state); + } + match command::resolve_target_extended(target, ctx.nav_index, state.current_node, ctx.tree) { Some(node_id) => { let title = ctx.node_title(node_id).unwrap_or(target).to_string(); @@ -32,6 +38,40 @@ pub fn cd(target: &str, ctx: &DocContext, state: &mut WorkerState) -> ToolResult } } +/// Navigate using a relative multi-segment path (e.g., `"Research Labs/Lab B"`). +fn cd_relative_path(path: &str, ctx: &DocContext, state: &mut WorkerState) -> ToolResult { + let segments: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect(); + if segments.is_empty() { + return ToolResult::fail("Empty path.".to_string()); + } + + let mut current = state.current_node; + let mut breadcrumb = state.breadcrumb.clone(); + + for segment in &segments { + match command::resolve_target_extended(segment, ctx.nav_index, current, ctx.tree) { + Some(node_id) => { + let title = ctx.node_title(node_id).unwrap_or(*segment).to_string(); + breadcrumb.push(title); + current = node_id; + } + None => { + return ToolResult::fail(format!( + "Path segment '{}' not found at '/{}'. Use ls to see available children.", + segment, + breadcrumb.join("/") + )); + } + } + } + + state.breadcrumb = breadcrumb; + state.current_node = current; + state.visited.insert(current); + + ToolResult::ok(format!("Entered: {}", state.path_str())) +} + /// Navigate using an absolute path (e.g., `/root/Chapter 1/Section 1.2`). fn cd_absolute(path: &str, ctx: &DocContext, state: &mut WorkerState) -> ToolResult { let segments: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect(); @@ -131,7 +171,7 @@ mod tests { reasoning_index: &crate::document::ReasoningIndex::default(), doc_name: "test", }; - let mut state = WorkerState::new(root, 8); + let mut state = WorkerState::new(root, 15); let result = cd("Getting Started", &ctx, &mut state); assert!(result.success); @@ -148,11 +188,75 @@ mod tests { reasoning_index: &crate::document::ReasoningIndex::default(), doc_name: "test", }; - let mut state = WorkerState::new(root, 8); + let mut state = WorkerState::new(root, 15); cd("Getting Started", &ctx, &mut state); let result = cd_up(&ctx, &mut state); assert!(result.success); assert_eq!(state.current_node, root); } + + fn build_deep_tree() -> (DocumentTree, NavigationIndex, NodeId, NodeId, NodeId) { + // Root → "Research Labs" → "Lab B" + let mut tree = DocumentTree::new("Root", "root content"); + let root = tree.root(); + let section = tree.add_child(root, "Research Labs", "section content"); + let lab_b = tree.add_child(section, "Lab B", "lab b content"); + + let mut nav = NavigationIndex::new(); + nav.add_child_routes( + root, + vec![ChildRoute { + node_id: section, + title: "Research Labs".to_string(), + description: "Lab sections".to_string(), + leaf_count: 4, + }], + ); + nav.add_child_routes( + section, + vec![ChildRoute { + node_id: lab_b, + title: "Lab B".to_string(), + description: "Topological qubits".to_string(), + leaf_count: 1, + }], + ); + + (tree, nav, root, section, lab_b) + } + + #[test] + fn test_cd_relative_path() { + let (tree, nav, root, _, lab_b) = build_deep_tree(); + let ctx = DocContext { + tree: &tree, + nav_index: &nav, + reasoning_index: &crate::document::ReasoningIndex::default(), + doc_name: "test", + }; + let mut state = WorkerState::new(root, 15); + + let result = cd("Research Labs/Lab B", &ctx, &mut state); + assert!(result.success); + assert_eq!(state.current_node, lab_b); + assert!(state.path_str().contains("Research Labs")); + assert!(state.path_str().contains("Lab B")); + } + + #[test] + fn test_cd_relative_path_partial_fail() { + let (tree, nav, root, _, _) = build_deep_tree(); + let ctx = DocContext { + tree: &tree, + nav_index: &nav, + reasoning_index: &crate::document::ReasoningIndex::default(), + doc_name: "test", + }; + let mut state = WorkerState::new(root, 15); + + let result = cd("Research Labs/Nonexistent", &ctx, &mut state); + assert!(!result.success); + assert!(result.feedback.contains("Nonexistent")); + } } diff --git a/rust/src/agent/tools/worker/grep.rs b/rust/src/agent/tools/worker/grep.rs index 6dc5c2c..077b077 100644 --- a/rust/src/agent/tools/worker/grep.rs +++ b/rust/src/agent/tools/worker/grep.rs @@ -42,12 +42,7 @@ pub fn grep(pattern: &str, ctx: &DocContext, state: &WorkerState) -> ToolResult break; } if re.is_match(line) { - let preview = if line.len() > 120 { - format!("{}...", &line[..120]) - } else { - line.to_string() - }; - output.push_str(&format!("[{}] {}\n", title, preview)); + output.push_str(&format!("[{}] {}\n", title, line)); matches_found += 1; } } @@ -124,7 +119,7 @@ mod tests { fn test_grep_finds_matches() { let (tree, nav, root) = build_rich_tree(); let ctx = rich_ctx!(tree, nav); - let state = WorkerState::new(root, 8); + let state = WorkerState::new(root, 15); let result = grep("revenue", &ctx, &state); assert!(result.success); @@ -136,7 +131,7 @@ mod tests { fn test_grep_regex() { let (tree, nav, root) = build_rich_tree(); let ctx = rich_ctx!(tree, nav); - let state = WorkerState::new(root, 8); + let state = WorkerState::new(root, 15); let result = grep("EBITDA|\\$\\d+", &ctx, &state); assert!(result.success); @@ -148,7 +143,7 @@ mod tests { fn test_grep_no_matches() { let (tree, nav, root) = build_rich_tree(); let ctx = rich_ctx!(tree, nav); - let state = WorkerState::new(root, 8); + let state = WorkerState::new(root, 15); let result = grep("nonexistent_term_xyz", &ctx, &state); assert!(result.success); @@ -159,7 +154,7 @@ mod tests { fn test_grep_invalid_regex() { let (tree, nav, root) = build_rich_tree(); let ctx = rich_ctx!(tree, nav); - let state = WorkerState::new(root, 8); + let state = WorkerState::new(root, 15); let result = grep("[invalid", &ctx, &state); assert!(!result.success); @@ -170,7 +165,7 @@ mod tests { fn test_grep_subtree_only() { let (tree, nav, root) = build_rich_tree(); let ctx = rich_ctx!(tree, nav); - let mut state = WorkerState::new(root, 8); + let mut state = WorkerState::new(root, 15); crate::agent::tools::worker::cd::cd("Expenses", &ctx, &mut state); let result = grep("revenue", &ctx, &state); diff --git a/rust/src/agent/tools/worker/head.rs b/rust/src/agent/tools/worker/head.rs index 06dd443..5fefa23 100644 --- a/rust/src/agent/tools/worker/head.rs +++ b/rust/src/agent/tools/worker/head.rs @@ -98,7 +98,7 @@ mod tests { fn test_head_preview() { let (tree, nav, root) = build_rich_tree(); let ctx = rich_ctx!(tree, nav); - let state = WorkerState::new(root, 8); + let state = WorkerState::new(root, 15); let result = head("Revenue", 2, &ctx, &state); assert!(result.success); @@ -111,7 +111,7 @@ mod tests { fn test_head_not_found() { let (tree, nav, root) = build_rich_tree(); let ctx = rich_ctx!(tree, nav); - let state = WorkerState::new(root, 8); + let state = WorkerState::new(root, 15); let result = head("NonExistent", 10, &ctx, &state); assert!(!result.success); diff --git a/rust/src/agent/tools/worker/ls.rs b/rust/src/agent/tools/worker/ls.rs index 351ab89..a00d688 100644 --- a/rust/src/agent/tools/worker/ls.rs +++ b/rust/src/agent/tools/worker/ls.rs @@ -32,13 +32,22 @@ pub fn ls(ctx: &DocContext, state: &WorkerState) -> ToolResult { } for (i, route) in routes.iter().enumerate() { - output.push_str(&format!( - "[{}] {} — {} ({} leaves)", - i + 1, - route.title, - route.description, - route.leaf_count - )); + if route.title == route.description { + output.push_str(&format!( + "[{}] {} ({} leaves)", + i + 1, + route.title, + route.leaf_count + )); + } else { + output.push_str(&format!( + "[{}] {} — {} ({} leaves)", + i + 1, + route.title, + route.description, + route.leaf_count + )); + } if let Some(nav) = ctx.nav_entry(route.node_id) { if !nav.question_hints.is_empty() { output.push_str(&format!( @@ -105,7 +114,7 @@ mod tests { reasoning_index: &crate::document::ReasoningIndex::default(), doc_name: "test", }; - let state = WorkerState::new(root, 8); + let state = WorkerState::new(root, 15); let result = ls(&ctx, &state); assert!(result.success); diff --git a/rust/src/agent/tools/worker/pwd.rs b/rust/src/agent/tools/worker/pwd.rs index 4f71a7e..7adcf08 100644 --- a/rust/src/agent/tools/worker/pwd.rs +++ b/rust/src/agent/tools/worker/pwd.rs @@ -48,7 +48,7 @@ mod tests { reasoning_index: &crate::document::ReasoningIndex::default(), doc_name: "test", }; - let mut state = WorkerState::new(root, 8); + let mut state = WorkerState::new(root, 15); cd("API Reference", &ctx, &mut state); let result = pwd(&state); diff --git a/rust/src/agent/tools/worker/wc.rs b/rust/src/agent/tools/worker/wc.rs index ac37f29..4ea7ec0 100644 --- a/rust/src/agent/tools/worker/wc.rs +++ b/rust/src/agent/tools/worker/wc.rs @@ -87,7 +87,7 @@ mod tests { fn test_wc_stats() { let (tree, nav, root) = build_rich_tree(); let ctx = rich_ctx!(tree, nav); - let state = WorkerState::new(root, 8); + let state = WorkerState::new(root, 15); let result = wc("Revenue", &ctx, &state); assert!(result.success); @@ -101,7 +101,7 @@ mod tests { fn test_wc_not_found() { let (tree, nav, root) = build_rich_tree(); let ctx = rich_ctx!(tree, nav); - let state = WorkerState::new(root, 8); + let state = WorkerState::new(root, 15); let result = wc("NonExistent", &ctx, &state); assert!(!result.success); diff --git a/rust/src/agent/worker/execute.rs b/rust/src/agent/worker/execute.rs index c8e1b9a..c4fa8c1 100644 --- a/rust/src/agent/worker/execute.rs +++ b/rust/src/agent/worker/execute.rs @@ -96,22 +96,57 @@ pub async fn execute_command( continue; } let title = ctx.node_title(entry.node_id).unwrap_or("unknown"); - let summary = ctx - .nav_entry(entry.node_id) - .map(|e| e.overview.as_str()) - .unwrap_or(""); output.push_str(&format!( " - {} (depth {}, weight {:.2})", title, entry.depth, entry.weight )); - if !summary.is_empty() { - output.push_str(&format!(" — {}", summary)); + if let Some(content) = ctx.cat(entry.node_id) { + if let Some(snippet) = + super::super::tools::content_snippet(content, keyword, 300) + { + output.push_str(&format!("\n \"{}\"", snippet)); + } } output.push('\n'); } output } - None => format!("No results for '{}'", keyword), + None => { + // Fallback: search node titles (like findtree) with content snippets + let pattern_lower = keyword.to_lowercase(); + let all_nodes = ctx.tree.traverse(); + let mut results = Vec::new(); + for node_id in &all_nodes { + if let Some(node) = ctx.tree.get(*node_id) { + if node.title.to_lowercase().contains(&pattern_lower) { + let depth = ctx.tree.depth(*node_id); + results.push((node.title.clone(), *node_id, depth)); + } + } + } + if results.is_empty() { + format!("No results for '{}' in index or titles.", keyword) + } else { + let mut output = format!( + "Results for '{}' (title match, {} found):\n", + keyword, + results.len() + ); + for (title, node_id, depth) in &results { + output.push_str(&format!( + " - {} (depth {})", + title, depth + )); + if let Some(content) = ctx.cat(*node_id) { + if let Some(snippet) = super::super::tools::content_snippet(content, keyword, 300) { + output.push_str(&format!("\n \"{}\"", snippet)); + } + } + output.push('\n'); + } + output + } + } }; info!(doc = ctx.doc_name, keyword, feedback = %truncate_log(&feedback), "find result"); state.set_feedback(feedback); @@ -125,9 +160,9 @@ pub async fn execute_command( } Command::Check => { - let evidence_summary = state.evidence_summary(); + let evidence_text = state.evidence_for_check(); - let (system, user) = check_sufficiency(query, &evidence_summary); + let (system, user) = check_sufficiency(query, &evidence_text); info!( doc = ctx.doc_name, diff --git a/rust/src/agent/worker/mod.rs b/rust/src/agent/worker/mod.rs index 803ced7..f0577c8 100644 --- a/rust/src/agent/worker/mod.rs +++ b/rust/src/agent/worker/mod.rs @@ -13,16 +13,15 @@ mod execute; mod format; +mod navigation; mod planning; -use tracing::{debug, info}; +use tracing::info; use super::Agent; -use super::command::Command; -use super::config::{DocContext, Step, WorkerConfig, WorkerOutput}; +use super::config::{DocContext, WorkerConfig, WorkerOutput}; use super::context::FindHit; use super::events::EventEmitter; -use super::prompts::{NavigationParams, worker_dispatch, worker_navigation}; use super::state::WorkerState; use super::tools::worker as tools; use crate::error::Error; @@ -30,9 +29,8 @@ use crate::llm::LlmClient; use crate::query::QueryPlan; use crate::scoring::bm25::extract_keywords; -use execute::{execute_command, parse_and_detect_failure}; -use format::format_visited_titles; -use planning::{build_plan_prompt, build_replan_prompt, format_keyword_hints}; +use navigation::run_navigation_loop; +use planning::build_plan_prompt; /// Worker agent — navigates a single document to collect evidence. /// @@ -102,19 +100,12 @@ impl<'a> Agent for Worker<'a> { ); let mut llm_calls: u32 = 0; - let max_llm = config.max_llm_calls; - - macro_rules! llm_budget_exhausted { - () => { - max_llm > 0 && llm_calls >= max_llm - }; - } // Gather keyword hits as context for LLM planning (not routing rules) let keywords = extract_keywords(&query); let index_hits: Vec = ctx.find_all(&keywords); if !index_hits.is_empty() { - debug!( + tracing::debug!( doc = ctx.doc_name, hit_count = index_hits.len(), "ReasoningIndex keyword hits available for planning" @@ -127,7 +118,8 @@ impl<'a> Agent for Worker<'a> { state.set_feedback(ls_result.feedback); // --- Phase 1.5: Navigation planning --- - if state.remaining > 0 && !llm_budget_exhausted!() { + if state.remaining > 0 && (config.max_llm_calls == 0 || llm_calls < config.max_llm_calls) { + info!(doc = ctx.doc_name, "Generating navigation plan..."); let plan_prompt = build_plan_prompt( &query, task_ref, @@ -159,173 +151,23 @@ impl<'a> Agent for Worker<'a> { } // --- Phase 2: Navigation loop --- - let use_dispatch_prompt = task_ref.is_some(); - let keyword_hints = format_keyword_hints(&index_hits, ctx); - - loop { - if state.remaining == 0 { - info!(doc = ctx.doc_name, "Navigation budget exhausted"); - break; - } - if llm_budget_exhausted!() { - info!( - doc = ctx.doc_name, - llm_calls, max_llm, "LLM call budget exhausted" - ); - break; - } - - // Build prompt - let (system, user) = if use_dispatch_prompt && state.remaining == config.max_rounds { - worker_dispatch(&super::prompts::WorkerDispatchParams { - original_query: &query, - task: task_ref.unwrap_or(&query), - doc_name: ctx.doc_name, - breadcrumb: &state.path_str(), - }) - } else { - let visited_titles = format_visited_titles(&state, ctx); - worker_navigation(&NavigationParams { - query: &query, - task: task_ref, - breadcrumb: &state.path_str(), - evidence_summary: &state.evidence_summary(), - missing_info: &state.missing_info, - last_feedback: &state.last_feedback, - remaining: state.remaining, - max_rounds: state.max_rounds, - history: &state.history_text(), - visited_titles: &visited_titles, - plan: &state.plan, - intent_context: &intent_context, - keyword_hints: &keyword_hints, - }) - }; - - // LLM decision - let round_num = config.max_rounds - state.remaining + 1; - let round_start = std::time::Instant::now(); - let llm_output = - llm.complete(&system, &user) - .await - .map_err(|e| Error::LlmReasoning { - stage: "worker/navigation".to_string(), - detail: format!("Nav loop LLM call failed (round {round_num}): {e}"), - })?; - llm_calls += 1; - - // Parse command - if llm_output.trim().len() < 2 { - tracing::warn!( - doc = ctx.doc_name, - round = config.max_rounds - state.remaining + 1, - response = llm_output.trim(), - "LLM response unusually short" - ); - } - let (command, is_parse_failure) = parse_and_detect_failure(&llm_output); - if is_parse_failure { - let raw_preview = if llm_output.trim().len() > 200 { - format!("{}...", &llm_output.trim()[..200]) - } else { - llm_output.trim().to_string() - }; - state.last_feedback = format!( - "Your output was not recognized as a valid command:\n\"{}\"\n\n\ - Please output exactly one command (ls, cd, cat, head, find, findtree, grep, wc, pwd, check, or done).", - raw_preview - ); - state.push_history("(unrecognized) → parse failure".to_string()); - continue; - } - - debug!(doc = ctx.doc_name, ?command, "Parsed command"); - - let is_check = matches!(command, Command::Check); - - // Execute - let step = execute_command( - &command, - ctx, - &mut state, - &query, - &llm, - &mut llm_calls, - &emitter, - ) - .await; - - // Dynamic re-planning after insufficient check - if is_check - && !state.missing_info.is_empty() - && state.remaining >= 3 - && !llm_budget_exhausted!() - { - let missing = state.missing_info.clone(); - let replan = build_replan_prompt(&query, task_ref, &state, ctx); - let new_plan = - llm.complete(&replan.0, &replan.1) - .await - .map_err(|e| Error::LlmReasoning { - stage: "worker/replan".to_string(), - detail: format!("Re-plan LLM call failed: {e}"), - })?; - llm_calls += 1; - let plan_text = new_plan.trim().to_string(); - if !plan_text.is_empty() { - info!( - doc = ctx.doc_name, - plan = %plan_text, - "Re-plan generated" - ); - emitter.emit_worker_replan(ctx.doc_name, &missing, plan_text.len()); - state.plan = plan_text; - } - state.missing_info.clear(); - } else if is_check && !state.missing_info.is_empty() { - state.plan.clear(); - state.missing_info.clear(); - } - - // Emit round event - let cmd_str = format!("{:?}", command); - let success = !matches!(step, Step::ForceDone(_)); - let round_elapsed = round_start.elapsed().as_millis() as u64; - emitter.emit_worker_round(ctx.doc_name, round_num, &cmd_str, success, round_elapsed); - - let feedback_preview = if state.last_feedback.len() > 120 { - format!("{}...", &state.last_feedback[..120]) - } else { - state.last_feedback.clone() - }; - state.push_history(format!("{} → {}", cmd_str, feedback_preview)); - - // Check termination - match step { - Step::Done => { - info!( - doc = ctx.doc_name, - evidence = state.evidence.len(), - "Navigation done" - ); - break; - } - Step::ForceDone(reason) => { - info!(doc = ctx.doc_name, reason = %reason, "Forced done"); - break; - } - Step::Continue => { - if !is_check { - state.dec_round(); - } - } - } - } - - let budget_exhausted = state.remaining == 0 || llm_budget_exhausted!(); + run_navigation_loop( + &query, + task_ref, + ctx, + &config, + &llm, + &mut state, + &emitter, + &index_hits, + &intent_context, + &mut llm_calls, + ) + .await?; + + let budget_exhausted = state.remaining == 0 + || (config.max_llm_calls > 0 && llm_calls >= config.max_llm_calls); - // Worker returns raw evidence — no synthesis. - // The Orchestrator owns the single synthesis/fusion point via rerank::process. let output = state.into_worker_output(llm_calls, budget_exhausted, ctx.doc_name); emitter.emit_worker_done( @@ -348,3 +190,47 @@ impl<'a> Agent for Worker<'a> { Ok(output) } } + +#[cfg(test)] +mod truncation_tests { + /// Verify that truncating feedback with multi-byte UTF-8 characters + /// never panics. This mirrors the truncation logic in the navigation loop. + #[test] + fn test_utf8_safe_truncation_ascii() { + let feedback = "a".repeat(200); + let boundary = feedback.ceil_char_boundary(120); + let truncated = &feedback[..boundary]; + assert!(truncated.len() <= 123); // 120 + "..." fits + assert!(truncated.is_char_boundary(truncated.len())); + } + + #[test] + fn test_utf8_safe_truncation_multibyte() { + // Each '中' is 3 bytes in UTF-8 + let feedback = "中文反馈内容测试截断安全".repeat(20); + assert!(feedback.len() > 120); + let boundary = feedback.ceil_char_boundary(120); + let truncated = &feedback[..boundary]; + assert!(truncated.len() <= 120); + assert!(truncated.is_char_boundary(truncated.len())); + } + + #[test] + fn test_utf8_safe_truncation_emoji() { + // Emojis are 4 bytes each + let feedback = "🦀🎉🚀".repeat(50); + assert!(feedback.len() > 120); + let boundary = feedback.ceil_char_boundary(120); + let truncated = &feedback[..boundary]; + assert!(truncated.len() <= 120); + assert!(truncated.is_char_boundary(truncated.len())); + } + + #[test] + fn test_utf8_safe_truncation_short_string() { + // String shorter than limit — no truncation needed + let feedback = "short feedback".to_string(); + let boundary = feedback.ceil_char_boundary(120); + assert_eq!(boundary, feedback.len()); + } +} diff --git a/rust/src/agent/worker/navigation.rs b/rust/src/agent/worker/navigation.rs new file mode 100644 index 0000000..c41f0bf --- /dev/null +++ b/rust/src/agent/worker/navigation.rs @@ -0,0 +1,454 @@ +// Copyright (c) 2026 vectorless developers +// SPDX-License-Identifier: Apache-2.0 + +//! Phase 2: Navigation loop — LLM-driven command loop until done or budget exhausted. + +use tracing::{debug, info}; + +use super::super::command::Command; +use super::super::config::{DocContext, Step, WorkerConfig}; +use super::super::context::FindHit; +use super::super::events::EventEmitter; +use super::super::prompts::{NavigationParams, worker_dispatch, worker_navigation}; +use super::super::state::WorkerState; +use super::execute::{execute_command, parse_and_detect_failure}; +use super::format::format_visited_titles; +use super::planning::{build_replan_prompt, format_keyword_hints}; +use crate::error::Error; +use crate::llm::LlmClient; + +/// Run the Phase 2 navigation loop. +/// +/// Loops until budget exhausted, `done`/`force_done`, or error. +/// Mutates `state` and `llm_calls` in place. +pub async fn run_navigation_loop( + query: &str, + task: Option<&str>, + ctx: &DocContext<'_>, + config: &WorkerConfig, + llm: &LlmClient, + state: &mut WorkerState, + emitter: &EventEmitter, + index_hits: &[FindHit], + intent_context: &str, + llm_calls: &mut u32, +) -> crate::error::Result<()> { + let use_dispatch_prompt = task.is_some(); + let keyword_hints = format_keyword_hints(index_hits, ctx); + let max_llm = config.max_llm_calls; + + loop { + if state.remaining == 0 { + info!(doc = ctx.doc_name, "Navigation budget exhausted"); + break; + } + if max_llm > 0 && *llm_calls >= max_llm { + info!( + doc = ctx.doc_name, + llm_calls, max_llm, "LLM call budget exhausted" + ); + break; + } + + // Build prompt + let (system, user) = build_round_prompt( + query, + task, + ctx, + state, + intent_context, + &keyword_hints, + use_dispatch_prompt, + config.max_rounds, + ); + + // LLM decision + let round_num = config.max_rounds - state.remaining + 1; + let round_start = std::time::Instant::now(); + info!( + doc = ctx.doc_name, + round = round_num, + max_rounds = config.max_rounds, + "Navigation round: calling LLM..." + ); + let llm_output = + llm.complete(&system, &user) + .await + .map_err(|e| Error::LlmReasoning { + stage: "worker/navigation".to_string(), + detail: format!("Nav loop LLM call failed (round {round_num}): {e}"), + })?; + *llm_calls += 1; + + // Parse command + let (command, is_parse_failure) = handle_parse_failure(&llm_output, ctx.doc_name, state); + if is_parse_failure { + continue; + } + + debug!(doc = ctx.doc_name, ?command, "Parsed command"); + + let is_check = matches!(command, Command::Check); + + // Execute + let step = execute_command( + &command, + ctx, + state, + query, + llm, + llm_calls, + emitter, + ) + .await; + + // Dynamic re-planning after insufficient check + handle_replan( + is_check, + query, + task, + ctx, + llm, + state, + emitter, + llm_calls, + max_llm, + ) + .await?; + + // Emit round event + let cmd_str = format!("{:?}", command); + let success = !matches!(step, Step::ForceDone(_)); + let round_elapsed = round_start.elapsed().as_millis() as u64; + emitter.emit_worker_round(ctx.doc_name, round_num, &cmd_str, success, round_elapsed); + + push_round_history(state, &cmd_str); + + // Check termination + match step { + Step::Done => { + info!( + doc = ctx.doc_name, + evidence = state.evidence.len(), + "Navigation done" + ); + break; + } + Step::ForceDone(reason) => { + info!(doc = ctx.doc_name, reason = %reason, "Forced done"); + break; + } + Step::Continue => { + if !is_check { + state.dec_round(); + } + } + } + } + + Ok(()) +} + +/// Build the (system, user) prompt pair for a single navigation round. +fn build_round_prompt( + query: &str, + task: Option<&str>, + ctx: &DocContext<'_>, + state: &WorkerState, + intent_context: &str, + keyword_hints: &str, + use_dispatch_prompt: bool, + max_rounds: u32, +) -> (String, String) { + if use_dispatch_prompt && state.remaining == max_rounds { + worker_dispatch(&super::super::prompts::WorkerDispatchParams { + original_query: query, + task: task.unwrap_or(query), + doc_name: ctx.doc_name, + breadcrumb: &state.path_str(), + }) + } else { + let visited_titles = format_visited_titles(state, ctx); + worker_navigation(&NavigationParams { + query, + task, + breadcrumb: &state.path_str(), + evidence_summary: &state.evidence_summary(), + missing_info: &state.missing_info, + last_feedback: &state.last_feedback, + remaining: state.remaining, + max_rounds: state.max_rounds, + history: &state.history_text(), + visited_titles: &visited_titles, + plan: &state.plan, + intent_context, + keyword_hints, + }) + } +} + +/// Parse LLM output and handle parse failures. +/// +/// Returns `(command, is_parse_failure)`. On parse failure, updates state +/// with feedback and pushes a history entry. +fn handle_parse_failure( + llm_output: &str, + doc_name: &str, + state: &mut WorkerState, +) -> (Command, bool) { + if llm_output.trim().len() < 2 { + tracing::warn!( + doc = doc_name, + response = llm_output.trim(), + "LLM response unusually short" + ); + } + let (command, is_parse_failure) = parse_and_detect_failure(llm_output); + if is_parse_failure { + let raw_preview = if llm_output.trim().len() > 200 { + format!("{}...", &llm_output.trim()[..200]) + } else { + llm_output.trim().to_string() + }; + state.last_feedback = format!( + "Your output was not recognized as a valid command:\n\"{}\"\n\n\ + Please output exactly one command (ls, cd, cat, head, find, findtree, grep, wc, pwd, check, or done).", + raw_preview + ); + state.push_history("(unrecognized) → parse failure".to_string()); + } + (command, is_parse_failure) +} + +/// Push a round's command + feedback preview into history. +fn push_round_history(state: &mut WorkerState, cmd_str: &str) { + let feedback_preview = if state.last_feedback.len() > 120 { + let boundary = state.last_feedback.ceil_char_boundary(120); + format!("{}...", &state.last_feedback[..boundary]) + } else { + state.last_feedback.clone() + }; + state.push_history(format!("{} → {}", cmd_str, feedback_preview)); +} + +/// Dynamic re-planning after an insufficient check. +/// +/// If check returned INSUFFICIENT with enough remaining rounds and LLM budget, +/// generates a new navigation plan. Otherwise clears stale replan state. +async fn handle_replan( + is_check: bool, + query: &str, + task: Option<&str>, + ctx: &DocContext<'_>, + llm: &LlmClient, + state: &mut WorkerState, + emitter: &EventEmitter, + llm_calls: &mut u32, + max_llm: u32, +) -> crate::error::Result<()> { + if !is_check { + return Ok(()); + } + + if !state.missing_info.is_empty() && state.remaining >= 3 && (max_llm == 0 || *llm_calls < max_llm) { + let missing = state.missing_info.clone(); + info!(doc = ctx.doc_name, missing = %missing, "Re-planning navigation..."); + let replan = build_replan_prompt(query, task, state, ctx); + let new_plan = + llm.complete(&replan.0, &replan.1) + .await + .map_err(|e| Error::LlmReasoning { + stage: "worker/replan".to_string(), + detail: format!("Re-plan LLM call failed: {e}"), + })?; + *llm_calls += 1; + let plan_text = new_plan.trim().to_string(); + if !plan_text.is_empty() { + info!( + doc = ctx.doc_name, + plan = %plan_text, + "Re-plan generated" + ); + emitter.emit_worker_replan(ctx.doc_name, &missing, plan_text.len()); + state.plan = plan_text; + } + state.missing_info.clear(); + } else if !state.missing_info.is_empty() { + state.plan.clear(); + state.missing_info.clear(); + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::agent::config::DocContext; + use crate::agent::state::WorkerState; + use crate::document::{DocumentTree, NodeId}; + + fn test_ctx() -> (DocumentTree, NodeId) { + let tree = DocumentTree::new("Root", "root content"); + let root = tree.root(); + (tree, root) + } + + #[test] + fn test_handle_parse_failure_valid_command() { + let (tree, root) = test_ctx(); + let nav = crate::document::NavigationIndex::new(); + let ctx = DocContext { + tree: &tree, + nav_index: &nav, + reasoning_index: &crate::document::ReasoningIndex::default(), + doc_name: "test", + }; + let mut state = WorkerState::new(root, 10); + + let (cmd, is_failure) = handle_parse_failure("ls", ctx.doc_name, &mut state); + assert!(!is_failure); + assert!(matches!(cmd, Command::Ls)); + } + + #[test] + fn test_handle_parse_failure_unrecognized() { + let (tree, root) = test_ctx(); + let nav = crate::document::NavigationIndex::new(); + let ctx = DocContext { + tree: &tree, + nav_index: &nav, + reasoning_index: &crate::document::ReasoningIndex::default(), + doc_name: "test", + }; + let mut state = WorkerState::new(root, 10); + + let (_cmd, is_failure) = handle_parse_failure("random garbage text", ctx.doc_name, &mut state); + assert!(is_failure); + assert!(state.last_feedback.contains("not recognized")); + assert!(state.history.last().unwrap().contains("unrecognized")); + } + + #[test] + fn test_handle_parse_failure_short_response() { + let (tree, root) = test_ctx(); + let nav = crate::document::NavigationIndex::new(); + let ctx = DocContext { + tree: &tree, + nav_index: &nav, + reasoning_index: &crate::document::ReasoningIndex::default(), + doc_name: "test", + }; + let mut state = WorkerState::new(root, 10); + + // Single character response — short but not a parse failure if it's "ls" + let (cmd, is_failure) = handle_parse_failure("ls", ctx.doc_name, &mut state); + assert!(!is_failure); + assert!(matches!(cmd, Command::Ls)); + } + + #[test] + fn test_push_round_history_short_feedback() { + let (_, root) = test_ctx(); + let mut state = WorkerState::new(root, 10); + state.last_feedback = "short feedback".to_string(); + + push_round_history(&mut state, "ls"); + assert_eq!(state.history.len(), 1); + assert!(state.history[0].contains("ls → short feedback")); + } + + #[test] + fn test_push_round_history_long_feedback() { + let (_, root) = test_ctx(); + let mut state = WorkerState::new(root, 10); + state.last_feedback = "a".repeat(200); + + push_round_history(&mut state, "cat"); + assert_eq!(state.history.len(), 1); + assert!(state.history[0].contains("cat → ")); + // Should be truncated with ... + assert!(state.history[0].contains("...")); + } + + #[test] + fn test_push_round_history_respects_max_entries() { + let (_, root) = test_ctx(); + let mut state = WorkerState::new(root, 10); + state.last_feedback = "ok".to_string(); + + for i in 0..8 { + push_round_history(&mut state, &format!("cmd_{i}")); + } + // MAX_HISTORY_ENTRIES is 6, so only last 6 should remain + assert_eq!(state.history.len(), 6); + } + + #[test] + fn test_build_round_prompt_dispatch_first_round() { + let (tree, root) = test_ctx(); + let nav = crate::document::NavigationIndex::new(); + let ctx = DocContext { + tree: &tree, + nav_index: &nav, + reasoning_index: &crate::document::ReasoningIndex::default(), + doc_name: "test_doc", + }; + let mut state = WorkerState::new(root, 10); + // remaining == max_rounds means first round + assert_eq!(state.remaining, 10); + + let (system, user) = build_round_prompt( + "test query", + Some("sub-task"), + &ctx, + &state, + "factual — find answer", + "", + true, // use_dispatch_prompt + 10, + ); + assert!(system.contains("dispatch") || !system.is_empty()); + assert!(user.contains("test query") || user.contains("sub-task")); + } + + #[test] + fn test_build_round_prompt_navigation_subsequent_round() { + let (tree, root) = test_ctx(); + let nav = crate::document::NavigationIndex::new(); + let ctx = DocContext { + tree: &tree, + nav_index: &nav, + reasoning_index: &crate::document::ReasoningIndex::default(), + doc_name: "test_doc", + }; + let mut state = WorkerState::new(root, 10); + state.remaining = 8; // not first round + + let (system, _user) = build_round_prompt( + "test query", + None, + &ctx, + &state, + "factual", + "keyword hints here", + false, // use_dispatch_prompt + 10, + ); + assert!(!system.is_empty()); + } + + #[test] + fn test_utf8_safe_truncation_in_history() { + let (_, root) = test_ctx(); + let mut state = WorkerState::new(root, 10); + // Each '中' is 3 bytes in UTF-8 + state.last_feedback = "中文反馈内容测试截断安全".repeat(20); + + push_round_history(&mut state, "cat"); + let entry = &state.history[0]; + // Should be truncated without panicking + assert!(entry.contains("cat → ")); + assert!(entry.len() < state.last_feedback.len() + 20); + } +} diff --git a/rust/src/agent/worker/planning.rs b/rust/src/agent/worker/planning.rs index eb98999..a42bf52 100644 --- a/rust/src/agent/worker/planning.rs +++ b/rust/src/agent/worker/planning.rs @@ -13,8 +13,12 @@ use super::super::context::FindHit; use super::super::state::WorkerState; use super::format::format_visited_titles; -/// Maximum total chars for keyword + semantic sections in planning prompt. -const PLAN_CONTEXT_BUDGET: usize = 1500; +/// Maximum keyword/semantic hit entries in plan prompt. +const MAX_PLAN_ENTRIES: usize = 15; +/// Maximum section summaries in plan prompt. +const MAX_SECTION_SUMMARIES: usize = 10; +/// Maximum deep expansion entries. +const MAX_EXPANSION_ENTRIES: usize = 8; /// Build the navigation planning prompt (Phase 1.5). pub fn build_plan_prompt( @@ -39,6 +43,7 @@ pub fn build_plan_prompt( } else { let mut section = String::from("\nKeyword index matches (use these to prioritize navigation):\n"); + let mut entry_count = 0; for hit in keyword_hits { let mut entries = hit.entries.clone(); entries.sort_by(|a, b| { @@ -56,12 +61,20 @@ pub fn build_plan_prompt( " - keyword '{}' → {} (depth {}, weight {:.2})\n", hit.keyword, ancestor_path, entry.depth, entry.weight )); - if section.len() > PLAN_CONTEXT_BUDGET { - section.push_str(" ... (more hits truncated)\n"); + if let Some(content) = ctx.cat(entry.node_id) { + if let Some(snippet) = + super::super::tools::content_snippet(content, &hit.keyword, 300) + { + section.push_str(&format!(" \"{}\"\n", snippet)); + } + } + entry_count += 1; + if entry_count >= MAX_PLAN_ENTRIES { + section.push_str(" ... (more hits omitted)\n"); break; } } - if section.len() > PLAN_CONTEXT_BUDGET { + if entry_count >= MAX_PLAN_ENTRIES { break; } } @@ -70,9 +83,7 @@ pub fn build_plan_prompt( let deep_expansion = build_deep_expansion(keyword_hits, ctx); if !deep_expansion.is_empty() { - if keyword_section.len() + deep_expansion.len() <= PLAN_CONTEXT_BUDGET { - keyword_section.push_str(&deep_expansion); - } + keyword_section.push_str(&deep_expansion); } let semantic_section = build_semantic_hints(&query_keywords, &query_lower, ctx); @@ -165,7 +176,9 @@ pub fn build_replan_prompt( /// ```text /// Keyword matches (use find to jump directly): /// - 'complex' → Performance (weight 0.85) +/// "...complexity analysis shows..." /// - 'latency' → Performance (weight 0.72) +/// "...latency benchmarks indicate..." /// ``` pub fn format_keyword_hints(keyword_hits: &[FindHit], ctx: &DocContext<'_>) -> String { if keyword_hits.is_empty() { @@ -173,6 +186,7 @@ pub fn format_keyword_hints(keyword_hits: &[FindHit], ctx: &DocContext<'_>) -> S } let mut section = String::from("Keyword matches (use find to jump directly):\n"); + let mut entry_count = 0; for hit in keyword_hits { let mut entries = hit.entries.clone(); entries.sort_by(|a, b| { @@ -190,8 +204,16 @@ pub fn format_keyword_hints(keyword_hits: &[FindHit], ctx: &DocContext<'_>) -> S " - '{}' → {} (weight {:.2})\n", hit.keyword, title, entry.weight )); - if section.len() > 800 { - section.push_str(" ... (more)\n"); + if let Some(content) = ctx.cat(entry.node_id) { + if let Some(snippet) = + super::super::tools::content_snippet(content, &hit.keyword, 300) + { + section.push_str(&format!(" \"{}\"\n", snippet)); + } + } + entry_count += 1; + if entry_count >= MAX_PLAN_ENTRIES { + section.push_str(" ... (more omitted)\n"); return section; } } @@ -228,21 +250,18 @@ fn build_intent_signals(intent: QueryIntent, ctx: &DocContext<'_>) -> String { if !shortcut.document_summary.is_empty() { section.push_str(&format!( "Document summary: {}\n", - &shortcut.document_summary[..shortcut.document_summary.len().min(500)] + shortcut.document_summary )); } + let mut summary_count = 0; for ss in &shortcut.section_summaries { - let summary_preview = if ss.summary.len() > 200 { - format!("{}...", &ss.summary[..200]) - } else { - ss.summary.clone() - }; section.push_str(&format!( " - Section '{}' (depth {}): {}\n", - ss.title, ss.depth, summary_preview + ss.title, ss.depth, ss.summary )); - if section.len() > PLAN_CONTEXT_BUDGET { - section.push_str(" ... (more sections truncated)\n"); + summary_count += 1; + if summary_count >= MAX_SECTION_SUMMARIES { + section.push_str(" ... (more sections omitted)\n"); break; } } @@ -312,7 +331,7 @@ fn build_semantic_hints( .collect(); let mut section = String::new(); - let budget_remaining = PLAN_CONTEXT_BUDGET.saturating_sub(section.len()); + let mut entry_count = 0; for route in routes { let nav = match ctx.nav_entry(route.node_id) { @@ -374,10 +393,11 @@ fn build_semantic_hints( " - Section '{}' — BM25: {:.2}{}\n", route.title, bm25_score, annotation_str ); - if section.len() + line.len() > budget_remaining { + section.push_str(&line); + entry_count += 1; + if entry_count >= MAX_PLAN_ENTRIES { break; } - section.push_str(&line); } if section.is_empty() { @@ -398,6 +418,7 @@ fn build_deep_expansion(keyword_hits: &[FindHit], ctx: &DocContext<'_>) -> Strin let mut seen_parents = HashSet::new(); let mut expansion = String::new(); + let mut expansion_count = 0; for hit in keyword_hits { for entry in &hit.entries { @@ -432,12 +453,13 @@ fn build_deep_expansion(keyword_hits: &[FindHit], ctx: &DocContext<'_>) -> Strin )); } expansion.push('\n'); - if expansion.len() > 500 { - expansion.push_str(" ... (more expansions truncated)\n"); + expansion_count += 1; + if expansion_count >= MAX_EXPANSION_ENTRIES { + expansion.push_str(" ... (more expansions omitted)\n"); break; } } - if expansion.len() > 500 { + if expansion_count >= MAX_EXPANSION_ENTRIES { break; } } @@ -640,7 +662,7 @@ mod tests { #[test] fn test_build_replan_prompt() { let (tree, nav, root, _, _) = build_semantic_test_tree(); - let mut state = WorkerState::new(root, 8); + let mut state = WorkerState::new(root, 15); state.missing_info = "Need Q2 revenue figures".to_string(); state.add_evidence(Evidence { source_path: "root/Revenue".to_string(), diff --git a/rust/src/client/engine.rs b/rust/src/client/engine.rs index d32b4e6..cc95a5f 100644 --- a/rust/src/client/engine.rs +++ b/rust/src/client/engine.rs @@ -40,7 +40,7 @@ use std::{collections::HashMap, sync::Arc}; use futures::StreamExt; -use tracing::info; +use tracing::{info, warn}; use crate::{ DocumentTree, Error, @@ -366,9 +366,15 @@ impl Engine { old_id: Option<&str>, ) -> (Vec, Vec) { let item = Self::build_index_item(&doc); + + info!( + "[index] Persisting document '{}'...", + doc.name, + ); let persisted = IndexerClient::to_persisted(doc, pipeline_options).await; if let Err(e) = self.workspace.save(&persisted).await { + warn!("[index] Failed to save document: {}", e); return ( Vec::new(), vec![FailedItem::new(source_label, e.to_string())], @@ -377,11 +383,11 @@ impl Engine { // Clean up old document after successful save if let Some(old_id) = old_id { if let Err(e) = self.workspace.remove(old_id).await { - tracing::warn!("Failed to remove old document {}: {}", old_id, e); + warn!("Failed to remove old document {}: {}", old_id, e); } } - info!("Indexed document: {}", item.doc_id); + info!("[index] Document persisted: {}", item.doc_id); (vec![item], Vec::new()) } @@ -417,8 +423,14 @@ impl Engine { self.with_timeout(timeout_secs, async move { let doc_ids = self.resolve_scope(&ctx.scope).await?; + info!(doc_count = doc_ids.len(), "Resolving documents for query"); let (documents, failed) = self.load_documents(&doc_ids).await?; + info!( + loaded = documents.len(), + failed = failed.len(), + "Documents loaded" + ); if documents.is_empty() { return Err(Error::Config(format!( "No documents available for query: {} failures", @@ -1004,20 +1016,44 @@ impl Engine { ); let concurrency = self.config.llm.throttle.max_concurrent_requests; - let loaded: Vec> = futures::stream::iter(doc_ids.iter().cloned()) - .map(|doc_id| { - let ws = self.workspace.clone(); - async move { ws.load(&doc_id).await.ok().flatten() } - }) - .buffer_unordered(concurrency) - .collect() - .await; + let doc_ids_clone: Vec = doc_ids.iter().cloned().collect(); + let loaded: Vec<(String, Result>)> = + futures::stream::iter(doc_ids_clone.into_iter()) + .map(|doc_id| { + let ws = self.workspace.clone(); + async move { + let result = ws.load(&doc_id).await; + (doc_id, result) + } + }) + .buffer_unordered(concurrency) + .collect() + .await; + + let mut failed_count = 0usize; + let mut loaded_docs: Vec = Vec::new(); + for (doc_id, result) in loaded { + match result { + Ok(Some(doc)) => loaded_docs.push(doc), + Ok(None) => { + warn!(doc_id, "Document in meta index but not in backend during graph rebuild"); + failed_count += 1; + } + Err(e) => { + warn!(doc_id, error = %e, "Failed to load document for graph rebuild"); + failed_count += 1; + } + } + } - let loaded_count = loaded.iter().filter(|d| d.is_some()).count(); - info!(loaded_count, "Documents loaded, building graph"); + info!( + loaded = loaded_docs.len(), + failed = failed_count, + "Documents loaded for graph rebuild" + ); let mut builder = crate::graph::DocumentGraphBuilder::new(self.config.graph.clone()); - for doc in loaded.into_iter().flatten() { + for doc in &loaded_docs { let keywords = Self::extract_keywords_from_doc(&doc); builder.add_document( &doc.meta.id, diff --git a/rust/src/index/stages/reasoning.rs b/rust/src/index/stages/reasoning.rs index 612c0b3..d30e130 100644 --- a/rust/src/index/stages/reasoning.rs +++ b/rust/src/index/stages/reasoning.rs @@ -68,13 +68,9 @@ impl ReasoningIndexStage { Self::extract_node_keywords(&node.title, config.min_keyword_length); let summary_keywords = Self::extract_node_keywords(&node.summary, config.min_keyword_length); - let content_keywords = if node.summary.is_empty() { - // Fallback: extract from content if no summary - let content_sample: String = node.content.chars().take(500).collect(); - Self::extract_node_keywords(&content_sample, config.min_keyword_length) - } else { - Vec::new() - }; + // Always extract from content — keywords can appear anywhere + let content_keywords = + Self::extract_node_keywords(&node.content, config.min_keyword_length); // Title keywords get higher weight (2.0), summary (1.5), content (1.0) for kw in &title_keywords { diff --git a/rust/src/query/understand.rs b/rust/src/query/understand.rs index 66a1857..9790e55 100644 --- a/rust/src/query/understand.rs +++ b/rust/src/query/understand.rs @@ -34,6 +34,7 @@ pub async fn understand( llm: &LlmClient, ) -> crate::error::Result { let (system, user) = understand_prompt(query, keywords); + info!("Query understanding: calling LLM..."); let response = llm.complete(&system, &user).await?; if response.trim().is_empty() { @@ -46,21 +47,20 @@ pub async fn understand( } let analysis = parse_analysis(&response).ok_or_else(|| { - warn!( - response = &response[..response.len().min(500)], - "Query understanding: failed to parse LLM response as JSON" - ); + let preview = &response[..response.len().min(300)]; crate::error::Error::Config(format!( "Query understanding returned unparseable response ({} chars): {}", response.len(), - &response[..response.len().min(300)] + preview )) })?; info!( intent = %analysis.intent, complexity = %analysis.complexity, - concepts = analysis.key_concepts.len(), + concepts = ?analysis.key_concepts, + strategy = %analysis.strategy_hint, + rewritten = ?analysis.rewritten, "Query understanding complete" ); Ok(analysis.into_plan(query, keywords)) @@ -69,18 +69,37 @@ pub async fn understand( /// Parse the LLM's JSON response into a QueryAnalysis. fn parse_analysis(response: &str) -> Option { let trimmed = response.trim(); + // Try to extract JSON from the response (LLM may wrap it in markdown) let json_str = if trimmed.starts_with("```") { - // Strip markdown code fences - let without_start = trimmed - .trim_start_matches(|c| c == '`' || c == 'j' || c == 's' || c == 'o' || c == 'n'); - let without_end = without_start.trim_end_matches(|c| c == '`'); + // Find the first newline after the opening fence (skips language tag) + let after_fence = if let Some(nl) = trimmed.find('\n') { + &trimmed[nl + 1..] + } else { + trimmed + }; + // Strip the closing fence + let without_end = if let Some(end) = after_fence.rfind("```") { + &after_fence[..end] + } else { + after_fence + }; without_end.trim() } else { trimmed }; - serde_json::from_str(json_str).ok() + match serde_json::from_str(json_str) { + Ok(analysis) => Some(analysis), + Err(e) => { + warn!( + error = %e, + json_len = json_str.len(), + "Query understanding: JSON parse failed" + ); + None + } + } } impl QueryAnalysis { @@ -189,6 +208,32 @@ mod tests { assert!(parse_analysis("not json").is_none()); } + #[test] + fn test_parse_analysis_code_fence_no_newline() { + // Edge case: ```json{"intent":...}``` with no newline after language tag + let response = "```json\n{\"intent\":\"factual\",\"key_concepts\":[\"test\"],\"strategy_hint\":\"focused\",\"complexity\":\"simple\",\"rewritten\":null,\"sub_queries\":[]}\n```"; + let analysis = parse_analysis(response).unwrap(); + assert_eq!(analysis.intent, "factual"); + } + + #[test] + fn test_parse_analysis_code_fence_no_closing() { + // LLM sometimes omits the closing fence + let response = "```json\n{\"intent\":\"summary\",\"key_concepts\":[\"overview\"],\"strategy_hint\":\"summary\",\"complexity\":\"simple\",\"rewritten\":null,\"sub_queries\":[]}"; + let analysis = parse_analysis(response).unwrap(); + assert_eq!(analysis.intent, "summary"); + } + + #[test] + fn test_parse_analysis_keys_starting_with_fence_letters() { + // The old trim_start_matches(|c| 'j' | 's' | 'o' | 'n') would eat + // JSON keys starting with those letters. Verify this works correctly. + let response = r#"{"intent":"navigational","key_concepts":["journal","offset","node"],"strategy_hint":"focused","complexity":"moderate","rewritten":null,"sub_queries":[]}"#; + let analysis = parse_analysis(response).unwrap(); + assert_eq!(analysis.intent, "navigational"); + assert_eq!(analysis.key_concepts, vec!["journal", "offset", "node"]); + } + #[test] fn test_default_plan() { let plan = QueryPlan::default_for("test query", vec!["test".to_string()]); diff --git a/rust/src/rerank/dedup.rs b/rust/src/rerank/dedup.rs index 9b30f75..c54fc0a 100644 --- a/rust/src/rerank/dedup.rs +++ b/rust/src/rerank/dedup.rs @@ -31,7 +31,8 @@ pub fn dedup(evidence: &[Evidence]) -> Vec { let source_deduped: Vec<&Evidence> = quality .into_iter() .filter(|e| { - let key = format!("{}:{}", e.doc_name.as_deref().unwrap_or(""), e.source_path); + let doc_key = e.doc_name.as_deref().unwrap_or("_unknown"); + let key = format!("{}:{}", doc_key, e.source_path); seen_sources.insert(key) }) .collect(); @@ -144,4 +145,71 @@ mod tests { let b = tokenize("ccc ddd"); assert!((jaccard(&a, &b)).abs() < 0.001); } + + #[test] + fn test_source_dedup_none_doc_name() { + // Evidence with doc_name: None should use "_unknown" as doc key, + // so same source_path with None doc_name still deduplicates correctly. + let evidence = vec![ + Evidence { + source_path: "root/section_a".to_string(), + node_title: "A".to_string(), + content: "content A with enough text to pass the quality filter threshold" + .to_string(), + doc_name: None, + }, + Evidence { + source_path: "root/section_a".to_string(), + node_title: "A2".to_string(), + content: "different content but same source path that should be deduped".to_string(), + doc_name: None, + }, + ]; + let result = dedup(&evidence); + assert_eq!(result.len(), 1); + } + + #[test] + fn test_source_dedup_mixed_doc_name() { + // Same source_path but different doc_name should produce different dedup keys, + // so both survive source dedup. Content must be sufficiently different too. + let evidence = vec![ + Evidence { + source_path: "root/section".to_string(), + node_title: "A".to_string(), + content: "Revenue for Q4 was twelve million dollars driven by SaaS growth in the enterprise segment".to_string(), + doc_name: Some("doc_a".to_string()), + }, + Evidence { + source_path: "root/section".to_string(), + node_title: "B".to_string(), + content: "The encryption module uses AES-256 for data at rest and TLS 1.3 for all network communication".to_string(), + doc_name: Some("doc_b".to_string()), + }, + ]; + let result = dedup(&evidence); + assert_eq!(result.len(), 2); + } + + #[test] + fn test_source_dedup_none_vs_some_doc_name() { + // None doc_name ("_unknown") and Some doc_name produce different keys, + // so both survive source dedup. Content must be sufficiently different too. + let evidence = vec![ + Evidence { + source_path: "root/section".to_string(), + node_title: "A".to_string(), + content: "The database uses a log-structured merge tree with write-ahead logging for durability".to_string(), + doc_name: None, + }, + Evidence { + source_path: "root/section".to_string(), + node_title: "B".to_string(), + content: "Authentication requires Bearer tokens with automatic refresh after twenty-four hours".to_string(), + doc_name: Some("doc_x".to_string()), + }, + ]; + let result = dedup(&evidence); + assert_eq!(result.len(), 2); + } } diff --git a/rust/src/retrieval/cache.rs b/rust/src/retrieval/cache.rs index f4d9839..ecfce79 100644 --- a/rust/src/retrieval/cache.rs +++ b/rust/src/retrieval/cache.rs @@ -16,10 +16,12 @@ //! Node scores are independent of the query, so they can be shared across //! different queries on the same document. -use std::collections::HashMap; +use std::collections::{HashMap, VecDeque}; use std::sync::RwLock; use std::time::Instant; +use tracing::warn; + use crate::document::NodeId; use crate::utils::fingerprint::Fingerprint; @@ -86,7 +88,7 @@ pub struct CachedCandidate { struct L1Store { entries: HashMap, - order: Vec, // For LRU eviction + order: VecDeque, // For LRU eviction — O(1) pop_front } // ---- L2: Path Pattern Cache ---- @@ -102,7 +104,7 @@ struct L2Entry { struct L2Store { entries: HashMap, // Key: "doc_fp:node_path" - order: Vec, + order: VecDeque, } // ---- L3: Strategy Score Cache ---- @@ -118,7 +120,7 @@ struct L3Entry { struct L3Store { entries: HashMap, // Key: node content fingerprint - order: Vec, + order: VecDeque, } // ---- Public API ---- @@ -134,15 +136,15 @@ impl ReasoningCache { Self { l1: RwLock::new(L1Store { entries: HashMap::new(), - order: Vec::new(), + order: VecDeque::new(), }), l2: RwLock::new(L2Store { entries: HashMap::new(), - order: Vec::new(), + order: VecDeque::new(), }), l3: RwLock::new(L3Store { entries: HashMap::new(), - order: Vec::new(), + order: VecDeque::new(), }), config, } @@ -156,7 +158,7 @@ impl ReasoningCache { /// on the same document scope. pub fn l1_get(&self, query: &str, scope_fp: &Fingerprint) -> Option> { let query_fp = Fingerprint::from_str(query); - let l1 = self.l1.read().ok()?; + let l1 = read_lock(&self.l1)?; let entry = l1.entries.get(&query_fp)?; // Scope must match (same document set) if &entry.scope_fp != scope_fp { @@ -187,7 +189,7 @@ impl ReasoningCache { created_at: Instant::now(), }, ); - l1.order.push(query_fp); + l1.order.push_back(query_fp); } } @@ -199,7 +201,7 @@ impl ReasoningCache { /// return the confidence score. pub fn l2_get(&self, doc_key: &str, node_path: &str) -> Option { let key = format!("{}:{}", doc_key, node_path); - let l2 = self.l2.read().ok()?; + let l2 = read_lock(&self.l2)?; let entry = l2.entries.get(&key)?; Some(entry.confidence) } @@ -227,7 +229,7 @@ impl ReasoningCache { created_at: Instant::now(), }, ); - l2.order.push(key); + l2.order.push_back(key); } } } @@ -237,9 +239,9 @@ impl ReasoningCache { /// Useful for bootstrapping new queries on a known document. pub fn l2_top_paths(&self, doc_key: &str, n: usize) -> Vec<(String, f32)> { let prefix = format!("{}:", doc_key); - let l2 = match self.l2.read() { - Ok(guard) => guard, - Err(_) => return Vec::new(), + let l2 = match read_lock(&self.l2) { + Some(guard) => guard, + None => return Vec::new(), }; let mut paths: Vec<(String, f32)> = l2 @@ -260,7 +262,7 @@ impl ReasoningCache { /// Node scores from keyword/BM25 are content-dependent but /// query-independent, so they can be shared across queries. pub fn l3_get(&self, node_content_fp: &Fingerprint) -> Option<(f32, String)> { - let l3 = self.l3.read().ok()?; + let l3 = read_lock(&self.l3)?; let entry = l3.entries.get(node_content_fp)?; Some((entry.score, entry.strategy.clone())) } @@ -279,7 +281,7 @@ impl ReasoningCache { created_at: Instant::now(), }, ); - l3.order.push(node_content_fp); + l3.order.push_back(node_content_fp); } } @@ -288,9 +290,9 @@ impl ReasoningCache { /// Get cache statistics. pub fn stats(&self) -> ReasoningCacheStats { let (l1_count, l2_count, l3_count) = ( - self.l1.read().map(|g| g.entries.len()).unwrap_or(0), - self.l2.read().map(|g| g.entries.len()).unwrap_or(0), - self.l3.read().map(|g| g.entries.len()).unwrap_or(0), + read_lock(&self.l1).map(|g| g.entries.len()).unwrap_or(0), + read_lock(&self.l2).map(|g| g.entries.len()).unwrap_or(0), + read_lock(&self.l3).map(|g| g.entries.len()).unwrap_or(0), ); ReasoningCacheStats { l1_entries: l1_count, @@ -318,23 +320,20 @@ impl ReasoningCache { // ============ Eviction helpers ============ fn evict_lru_fingerprint(l1: &mut L1Store) { - if let Some(old) = l1.order.first().copied() { + if let Some(old) = l1.order.pop_front() { l1.entries.remove(&old); - l1.order.remove(0); } } fn evict_lru_string(l2: &mut L2Store) { - if let Some(old) = l2.order.first().cloned() { + if let Some(old) = l2.order.pop_front() { l2.entries.remove(&old); - l2.order.remove(0); } } fn evict_lru_fingerprint_l3(l3: &mut L3Store) { - if let Some(old) = l3.order.first().copied() { + if let Some(old) = l3.order.pop_front() { l3.entries.remove(&old); - l3.order.remove(0); } } } @@ -345,6 +344,21 @@ impl Default for ReasoningCache { } } +/// Read from a RwLock, recovering from poison by taking the guard anyway. +/// +/// A poisoned lock means another thread panicked while holding it — the data +/// is still valid, just potentially in an inconsistent state. For a cache, +/// returning stale/empty data is always preferable to failing silently. +fn read_lock(lock: &RwLock) -> Option> { + match lock.read() { + Ok(guard) => Some(guard), + Err(poisoned) => { + warn!("ReasoningCache: recovering from poisoned lock"); + Some(poisoned.into_inner()) + } + } +} + /// Cache statistics. #[derive(Debug, Clone)] pub struct ReasoningCacheStats { @@ -477,4 +491,87 @@ mod tests { assert!(cache.l1_get("q2", &scope).is_some()); assert!(cache.l1_get("q3", &scope).is_some()); } + + #[test] + fn test_l2_lru_eviction() { + let config = ReasoningCacheConfig { + l2_max: 2, + ..Default::default() + }; + let cache = ReasoningCache::with_config(config); + + cache.l2_record("doc", "1", 0.5); + cache.l2_record("doc", "2", 0.6); + cache.l2_record("doc", "3", 0.7); // evicts "doc:1" + + assert!(cache.l2_get("doc", "1").is_none()); + assert!(cache.l2_get("doc", "2").is_some()); + assert!(cache.l2_get("doc", "3").is_some()); + } + + #[test] + fn test_l3_lru_eviction() { + let config = ReasoningCacheConfig { + l3_max: 2, + ..Default::default() + }; + let cache = ReasoningCache::with_config(config); + + let fp1 = Fingerprint::from_str("content_a"); + let fp2 = Fingerprint::from_str("content_b"); + let fp3 = Fingerprint::from_str("content_c"); + + cache.l3_store(fp1, 0.5, "kw".into()); + cache.l3_store(fp2, 0.6, "kw".into()); + cache.l3_store(fp3, 0.7, "kw".into()); // evicts fp1 + + assert!(cache.l3_get(&fp1).is_none()); + assert!(cache.l3_get(&fp2).is_some()); + assert!(cache.l3_get(&fp3).is_some()); + } + + #[test] + fn test_poisoned_lock_recovery() { + let cache = ReasoningCache::new(); + + // Verify normal operation: store and retrieve still works + let scope = Fingerprint::from_str("doc"); + cache.l1_store("query", scope, vec![], "kw".into()); + + let scope2 = Fingerprint::from_str("doc2"); + cache.l1_store("q2", scope2, vec![], "kw".into()); + assert!(cache.l1_get("q2", &scope2).is_some()); + + // Verify stats still works (internally uses read_lock) + let stats = cache.stats(); + assert!(stats.l1_entries >= 1); + } + + #[test] + fn test_poisoned_lock_read_recovery() { + use std::sync::Arc; + use std::thread; + + // Create a cache and populate it + let cache = Arc::new(ReasoningCache::new()); + let scope = Fingerprint::from_str("doc"); + cache.l1_store("query", scope, vec![], "kw".into()); + + // Poison the lock from another thread + let cache_clone = Arc::clone(&cache); + let handle = thread::spawn(move || { + // This will poison the L1 lock + let _guard = cache_clone.l1.write().unwrap(); + panic!("intentional panic to poison lock"); + }); + + // Wait for the panicking thread to finish + let _ = handle.join(); + + // The lock is now poisoned. Our read_lock() should recover from it. + // l1_get uses read_lock internally + let result = cache.l1_get("query", &scope); + // Should still return data (recovered from poison) + assert!(result.is_some()); + } } diff --git a/rust/src/retrieval/dispatcher.rs b/rust/src/retrieval/dispatcher.rs index 0ea3a2e..e92766f 100644 --- a/rust/src/retrieval/dispatcher.rs +++ b/rust/src/retrieval/dispatcher.rs @@ -58,13 +58,8 @@ pub async fn dispatch( // Step 1: Query understanding — LLM analyzes intent, concepts, complexity. // This is required. "Model fails, we fail." — errors propagate. + info!("Starting query understanding..."); let query_plan = QueryPipeline::understand(query, llm).await?; - info!( - intent = %query_plan.intent, - complexity = %query_plan.complexity, - concepts = query_plan.key_concepts.len(), - "Query understanding complete" - ); // Step 2: Dispatch to Orchestrator with the query plan. let orchestrator = Orchestrator::new(