From 0f120ead1e9cde2921ecd2275a5a504f36b560fd Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Wed, 22 Apr 2026 08:33:56 +0800 Subject: [PATCH 01/10] feat: add comprehensive Session API walkthrough example - Add detailed README.md documenting the full high-level Vectorless Python API - Implement main.py demonstrating Session and SyncSession classes usage - Cover 10 key topics including session creation, indexing, querying, and metrics - Include sample documents for architecture, finance, and security domains - Provide examples for both async and sync APIs with proper error handling - Demonstrate event callbacks, document management, and streaming queries --- examples/session_walkthrough/README.md | 32 ++ examples/session_walkthrough/main.py | 589 +++++++++++++++++++++++++ 2 files changed, 621 insertions(+) create mode 100644 examples/session_walkthrough/README.md create mode 100644 examples/session_walkthrough/main.py diff --git a/examples/session_walkthrough/README.md b/examples/session_walkthrough/README.md new file mode 100644 index 0000000..17174a0 --- /dev/null +++ b/examples/session_walkthrough/README.md @@ -0,0 +1,32 @@ +# Session API Walkthrough + +Demonstrates the full high-level Vectorless Python API using the `Session` and `SyncSession` classes. + +## What it covers + +| # | Topic | API | +|---|-------|-----| +| 1 | Session creation | `Session()`, `from_env()`, `from_config_file()` | +| 2 | Indexing sources | `index(content=)`, `index(path=)`, `index(bytes_data=)`, `index(directory=)` | +| 3 | Batch indexing | `index_batch(paths, jobs=N)` | +| 4 | Querying | `ask(question, doc_ids=)`, `ask(question, workspace_scope=True)` | +| 5 | Streaming query | `query_stream()` async iterator | +| 6 | Document management | `list_documents()`, `document_exists()`, `remove_document()`, `clear_all()` | +| 7 | Document graph | `get_graph()` nodes, edges, keywords | +| 8 | Event callbacks | `EventEmitter` with `@on_index` / `@on_query` decorators | +| 9 | Metrics | `metrics_report()` | +| 10 | Sync API | `SyncSession` (no async/await) | + +## Setup + +```bash +pip install vectorless +export VECTORLESS_API_KEY="sk-..." +export VECTORLESS_MODEL="gpt-4o" +``` + +## Run + +```bash +python main.py +``` diff --git a/examples/session_walkthrough/main.py b/examples/session_walkthrough/main.py new file mode 100644 index 0000000..f94e5bb --- /dev/null +++ b/examples/session_walkthrough/main.py @@ -0,0 +1,589 @@ +""" +Session API walkthrough -- demonstrates the full high-level Vectorless API. + +This example uses the Session class (recommended entry point) to cover: + 1. Session creation (constructor / from_env / from_config_file) + 2. Indexing from various sources (content, path, directory, bytes) + 3. Batch indexing with concurrency control + 4. Querying with doc_ids and workspace scope + 5. Streaming query with real-time events + 6. Document management (list, exists, remove, clear) + 7. Cross-document relationship graph + 8. Event callbacks for progress monitoring + 9. Metrics reporting + 10. SyncSession (synchronous API, no async/await) + +Usage: + export VECTORLESS_API_KEY="sk-..." + export VECTORLESS_MODEL="gpt-4o" + pip install vectorless + python main.py +""" + +import asyncio +import os +import tempfile + +from vectorless import ( + Session, + SyncSession, + EventEmitter, + VectorlessError, +) +from vectorless.events import IndexEventType, QueryEventType + + +# ────────────────────────────────────────────────────────────────── +# Sample documents used throughout the example +# ────────────────────────────────────────────────────────────────── + +ARCHITECTURE_DOC = """\ +# Vectorless Architecture + +## Overview + +Vectorless is a reasoning-native document intelligence engine. +It uses hierarchical semantic trees instead of vector embeddings. + +## Key Concepts + +- **Semantic Tree**: Documents are parsed into a tree of sections. +- **LLM Navigation**: Queries are resolved by traversing the tree. +- **No Vectors**: No embeddings, no similarity search, no vector DB. + +## Retrieval Flow + +Engine.query() + -> query/understand() -> QueryPlan + -> Orchestrator dispatches Workers + -> Workers navigate document trees + -> rerank -> synthesis -> answer +""" + +FINANCE_DOC = """\ +# Q4 Financial Report + +## Revenue + +Total revenue for Q4 was $12.3M, up 15% from Q3. +SaaS subscriptions accounted for $8.1M, consulting for $4.2M. + +## Costs + +Operating costs were $9.8M, including $3.2M in engineering salaries. +Marketing spend was reduced by 8% to $1.5M. + +## Outlook + +Projected Q1 revenue is $13.5M based on current pipeline. +""" + +SECURITY_DOC = """\ +# Security Policy + +## Authentication + +All API requests require a Bearer token in the Authorization header. +Tokens expire after 24 hours and must be refreshed. + +## Data Encryption + +Data at rest is encrypted using AES-256. Data in transit uses TLS 1.3. + +## Audit Logging + +All access to sensitive data is logged and retained for 90 days. +""" + + +# ────────────────────────────────────────────────────────────────── +# Helper: set up a temp directory with sample files +# ────────────────────────────────────────────────────────────────── + +def create_sample_directory() -> tuple[str, list[str]]: + """Create a temp directory with sample documents. Returns (dir, paths).""" + tmpdir = tempfile.mkdtemp(prefix="vectorless_walkthrough_") + docs = { + "architecture.md": ARCHITECTURE_DOC, + "finance.md": FINANCE_DOC, + "security.md": SECURITY_DOC, + } + paths = [] + for name, content in docs.items(): + path = os.path.join(tmpdir, name) + with open(path, "w") as f: + f.write(content) + paths.append(path) + return tmpdir, paths + + +def cleanup_directory(tmpdir: str) -> None: + """Remove all files in the temp directory.""" + for fname in os.listdir(tmpdir): + os.remove(os.path.join(tmpdir, fname)) + os.rmdir(tmpdir) + + +# ────────────────────────────────────────────────────────────────── +# Section 1: Session Creation +# ────────────────────────────────────────────────────────────────── + +async def demo_session_creation() -> Session: + """Demonstrate different ways to create a Session.""" + print("=" * 60) + print(" 1. Session Creation") + print("=" * 60) + + # Option A: Constructor with explicit credentials + api_key = os.environ.get("VECTORLESS_API_KEY", "sk-...") + model = os.environ.get("VECTORLESS_MODEL", "gpt-4o") + endpoint = os.environ.get("VECTORLESS_ENDPOINT") + + session = Session(api_key=api_key, model=model, endpoint=endpoint) + print(f" Created: {session}") + + # Option B: from environment variables + # session = Session.from_env() + + # Option C: from a config file + # session = Session.from_config_file("~/.vectorless/config.toml") + + # Option D: with an EventEmitter for progress callbacks + # events = EventEmitter() + # session = Session(api_key=api_key, model=model, events=events) + + print() + return session + + +# ────────────────────────────────────────────────────────────────── +# Section 2: Indexing from Various Sources +# ────────────────────────────────────────────────────────────────── + +async def demo_indexing(session: Session, tmpdir: str, paths: list[str]) -> dict[str, str]: + """Demonstrate indexing from content, path, directory, and bytes.""" + print("=" * 60) + print(" 2. Indexing") + print("=" * 60) + + doc_ids: dict[str, str] = {} + + # --- 2a. Index from in-memory content --- + print(" [content] Indexing from string...") + result = await session.index( + content=ARCHITECTURE_DOC, + format="markdown", + name="architecture", + ) + doc_ids["architecture"] = result.doc_id # type: ignore[assignment] + print(f" doc_id: {result.doc_id}") + print(f" items: {result.total()}") + + # --- 2b. Index from a file path --- + print(" [path] Indexing from file path...") + result = await session.index(path=paths[1], name="finance") + doc_ids["finance"] = result.doc_id # type: ignore[assignment] + print(f" doc_id: {result.doc_id}") + + # --- 2c. Index from raw bytes --- + print(" [bytes] Indexing from raw bytes...") + result = await session.index( + bytes_data=SECURITY_DOC.encode("utf-8"), + format="markdown", + name="security", + ) + doc_ids["security"] = result.doc_id # type: ignore[assignment] + print(f" doc_id: {result.doc_id}") + + # --- 2d. Index a directory --- + print(" [dir] Indexing a directory...") + # Clear first to see fresh results + await session.clear_all() + + result = await session.index(directory=tmpdir, name="all_docs") + print(f" doc_id: {result.doc_id}") + print(f" items: {len(result.items)}") + for item in result.items: + print(f" - {item.name} ({item.doc_id[:8]}...)") + doc_ids[item.name] = item.doc_id + + print() + return doc_ids + + +# ────────────────────────────────────────────────────────────────── +# Section 3: Batch Indexing with Concurrency +# ────────────────────────────────────────────────────────────────── + +async def demo_batch_indexing(session: Session, paths: list[str]) -> list[str]: + """Demonstrate batch indexing with concurrent jobs.""" + print("=" * 60) + print(" 3. Batch Indexing (concurrency=2)") + print("=" * 60) + + # Clear to start fresh + await session.clear_all() + + results = await session.index_batch( + paths, + mode="default", + jobs=2, # max 2 concurrent indexing operations + force=False, + ) + + doc_ids = [] + for r in results: + print(f" {r.doc_id[:8]}... ({len(r.items)} items)") + for item in r.items: + doc_ids.append(item.doc_id) + + print(f" Batch indexed {len(results)} file(s), {len(doc_ids)} document(s) total") + print() + return doc_ids + + +# ────────────────────────────────────────────────────────────────── +# Section 4: Querying +# ────────────────────────────────────────────────────────────────── + +async def demo_querying(session: Session, doc_ids: list[str]) -> None: + """Demonstrate querying with doc_ids and workspace scope.""" + print("=" * 60) + print(" 4. Querying") + print("=" * 60) + + # --- Query specific documents --- + print(" [ask] Query specific documents...") + response = await session.ask( + "What was the total revenue for Q4?", + doc_ids=doc_ids[:2], # limit to first two docs + ) + + result = response.single() + if result: + print(f" Score: {result.score:.2f}") + print(f" Confidence: {result.confidence:.2f}") + print(f" Answer: {result.content[:150]}...") + if result.evidence: + print(f" Evidence: {len(result.evidence)} item(s)") + for ev in result.evidence[:2]: + print(f" - {ev.title}: {ev.content[:80]}...") + if result.metrics: + print(f" LLM calls: {result.metrics.llm_calls}") + print(f" Nodes: {result.metrics.nodes_visited}") + + # --- Query across all documents --- + print() + print(" [workspace_scope] Query across entire workspace...") + response = await session.ask( + "How is data encrypted?", + workspace_scope=True, + ) + for item in response.items: + print(f" [{item.doc_id[:8]}...] score={item.score:.2f}") + print(f" {item.content[:120]}...") + + # --- Query with timeout --- + print() + print(" [timeout] Query with 30s timeout...") + try: + response = await session.ask( + "What is the retrieval flow?", + doc_ids=doc_ids, + timeout_secs=30, + ) + if response.single(): + print(f" Answer: {response.single().content[:150]}...") + except VectorlessError as e: + print(f" Error: {e}") + + print() + + +# ────────────────────────────────────────────────────────────────── +# Section 5: Streaming Query +# ────────────────────────────────────────────────────────────────── + +async def demo_streaming(session: Session, doc_ids: list[str]) -> None: + """Demonstrate streaming query with real-time events.""" + print("=" * 60) + print(" 5. Streaming Query") + print("=" * 60) + + stream = await session.query_stream( + "What are the key concepts?", + doc_ids=doc_ids[:1], + ) + + event_count = 0 + async for event in stream: + event_count += 1 + event_type = event.get("type", "unknown") + # Print a compact summary of each event + if event_type == "completed": + results = event.get("results", []) + print(f" [{event_count}] completed — {len(results)} result(s)") + elif event_type == "error": + print(f" [{event_count}] error — {event.get('message', '')}") + else: + print(f" [{event_count}] {event_type}") + + # The final result is available after iteration completes + if stream.result: + final = stream.result + item = final.single() + if item: + print(f" Final answer: {item.content[:150]}...") + + print() + + +# ────────────────────────────────────────────────────────────────── +# Section 6: Document Management +# ────────────────────────────────────────────────────────────────── + +async def demo_document_management(session: Session, doc_ids: list[str]) -> None: + """Demonstrate list, exists, remove, and clear.""" + print("=" * 60) + print(" 6. Document Management") + print("=" * 60) + + # --- List all documents --- + docs = await session.list_documents() + print(f" Listed {len(docs)} document(s):") + for doc in docs: + pages = f", pages={doc.page_count}" if doc.page_count else "" + print(f" {doc.name} id={doc.id[:8]}... format={doc.format}{pages}") + + # --- Check existence --- + if doc_ids: + exists = await session.document_exists(doc_ids[0]) + print(f"\n exists({doc_ids[0][:8]}...): {exists}") + + # --- Remove a document --- + if len(doc_ids) > 1: + removed = await session.remove_document(doc_ids[1]) + print(f" remove({doc_ids[1][:8]}...): {removed}") + + # Verify removal + exists_after = await session.document_exists(doc_ids[1]) + print(f" exists after removal: {exists_after}") + + # --- List again --- + docs = await session.list_documents() + print(f"\n After removal: {len(docs)} document(s)") + + print() + + +# ────────────────────────────────────────────────────────────────── +# Section 7: Cross-Document Relationship Graph +# ────────────────────────────────────────────────────────────────── + +async def demo_graph(session: Session) -> None: + """Demonstrate the cross-document relationship graph.""" + print("=" * 60) + print(" 7. Document Graph") + print("=" * 60) + + graph = await session.get_graph() + + if graph is None or graph.is_empty(): + print(" Graph is empty (no documents or no relationships found)") + print() + return + + print(f" Nodes: {graph.node_count()}, Edges: {graph.edge_count()}") + + for did in graph.doc_ids(): + node = graph.get_node(did) + if node: + keywords = ", ".join(k.keyword for k in node.top_keywords[:5]) + neighbors = graph.get_neighbors(did) + print(f" {node.title}") + print(f" format: {node.format}, nodes: {node.node_count}") + print(f" keywords: [{keywords}]") + print(f" neighbors: {len(neighbors)}") + for edge in neighbors[:3]: + target = graph.get_node(edge.target_doc_id) + target_name = target.title if target else edge.target_doc_id[:8] + weight_str = f"weight={edge.weight:.2f}" + evidence_str = "" + if edge.evidence: + evidence_str = f", shared_keywords={edge.evidence.shared_keyword_count}" + print(f" -> {target_name} ({weight_str}{evidence_str})") + + print() + + +# ────────────────────────────────────────────────────────────────── +# Section 8: Event Callbacks +# ────────────────────────────────────────────────────────────────── + +async def demo_events() -> None: + """Demonstrate event callbacks with EventEmitter.""" + print("=" * 60) + print(" 8. Event Callbacks") + print("=" * 60) + + events = EventEmitter() + + @events.on_index + def on_index_event(event): + if event.event_type == IndexEventType.STARTED: + print(f" [INDEX] Started: {event.path or event.message}") + elif event.event_type == IndexEventType.COMPLETE: + print(f" [INDEX] Complete: {event.doc_id or event.message}") + elif event.event_type == IndexEventType.ERROR: + print(f" [INDEX] Error: {event.message}") + + @events.on_query + def on_query_event(event): + if event.event_type == QueryEventType.STARTED: + print(f" [QUERY] Started: {event.query}") + elif event.event_type == QueryEventType.COMPLETE: + print(f" [QUERY] Complete: {event.total_results} result(s)") + + # Create a session with the event emitter + api_key = os.environ.get("VECTORLESS_API_KEY", "sk-...") + model = os.environ.get("VECTORLESS_MODEL", "gpt-4o") + session = Session(api_key=api_key, model=model, events=events) + + # Index and query — events fire automatically + await session.index(content=ARCHITECTURE_DOC, format="markdown", name="demo_events") + await session.ask("What are the key concepts?", workspace_scope=True) + + await session.clear_all() + print() + + +# ────────────────────────────────────────────────────────────────── +# Section 9: Metrics +# ────────────────────────────────────────────────────────────────── + +async def demo_metrics(session: Session) -> None: + """Demonstrate metrics reporting.""" + print("=" * 60) + print(" 9. Metrics Report") + print("=" * 60) + + report = session.metrics_report() + if report: + # The report contains llm and retrieval subsections + if hasattr(report, "llm"): + llm = report.llm + print(f" LLM Metrics:") + print(f" Total calls: {getattr(llm, 'total_calls', 'N/A')}") + print(f" Total tokens: {getattr(llm, 'total_tokens', 'N/A')}") + print(f" Cache hit rate: {getattr(llm, 'cache_hit_rate', 'N/A')}") + if hasattr(report, "retrieval"): + ret = report.retrieval + print(f" Retrieval Metrics:") + print(f" Total queries: {getattr(ret, 'total_queries', 'N/A')}") + print(f" Avg latency: {getattr(ret, 'avg_latency_ms', 'N/A')} ms") + else: + print(" No metrics available") + + print() + + +# ────────────────────────────────────────────────────────────────── +# Section 10: SyncSession (Synchronous API) +# ────────────────────────────────────────────────────────────────── + +def demo_sync_session() -> None: + """Demonstrate the synchronous Session (no async/await needed).""" + print("=" * 60) + print(" 10. SyncSession (no async/await)") + print("=" * 60) + + api_key = os.environ.get("VECTORLESS_API_KEY", "sk-...") + model = os.environ.get("VECTORLESS_MODEL", "gpt-4o") + + # Can also use: SyncSession.from_env() + with SyncSession(api_key=api_key, model=model) as session: + # Index from content + result = session.index( + content=FINANCE_DOC, + format="markdown", + name="sync_demo", + ) + print(f" Indexed: {result.doc_id}") + + # Query + response = session.ask( + "What was the total revenue?", + doc_ids=[result.doc_id], # type: ignore[list-item] + ) + item = response.single() + if item: + print(f" Answer: {item.content[:150]}...") + + # Cleanup + session.clear_all() + print(" Cleaned up") + + print() + + +# ────────────────────────────────────────────────────────────────── +# Main +# ────────────────────────────────────────────────────────────────── + +async def main() -> None: + print() + print(" Vectorless — Session API Walkthrough") + print(" " + "-" * 38) + print() + + # 1. Create session + session = await demo_session_creation() + + # Set up sample directory + tmpdir, paths = create_sample_directory() + + try: + # 2. Indexing + doc_id_map = await demo_indexing(session, tmpdir, paths) + all_doc_ids = list(doc_id_map.values()) + + # 3. Batch indexing (clears and re-indexes) + batch_doc_ids = await demo_batch_indexing(session, paths) + all_doc_ids = batch_doc_ids if batch_doc_ids else all_doc_ids + + # 4. Querying + if all_doc_ids: + await demo_querying(session, all_doc_ids) + + # 5. Streaming query + if all_doc_ids: + await demo_streaming(session, all_doc_ids) + + # 6. Document management + await demo_document_management(session, all_doc_ids) + + # 7. Graph + await demo_graph(session) + + # 8. Events (creates its own session) + await demo_events() + + # 9. Metrics + await demo_metrics(session) + + finally: + # Cleanup + await session.clear_all() + cleanup_directory(tmpdir) + print("=" * 60) + print(" Cleanup complete.") + print("=" * 60) + + # 10. SyncSession (separate, runs synchronously) + demo_sync_session() + + print(" Done.") + + +if __name__ == "__main__": + asyncio.run(main()) From 69093d61622e971961726d30b70b85474a39df14 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Wed, 22 Apr 2026 09:06:52 +0800 Subject: [PATCH 02/10] feat(worker): implement UTF-8 safe string truncation with comprehensive tests Implement proper UTF-8 character boundary handling when truncating long feedback strings to prevent panics with multi-byte characters like emojis and Chinese characters. Replace unsafe byte-based slicing with ceil_char_boundary method and add extensive test coverage for various UTF-8 scenarios including ASCII, multi-byte characters, emojis, and short strings. fix(engine): improve document loading error handling during graph rebuild Enhance error reporting and handling when loading documents for graph rebuilding by tracking individual document failures, adding detailed warnings for missing or inaccessible documents, and providing more granular statistics about successful vs failed loads. refactor(understand): improve JSON parsing robustness and error messages Update JSON extraction from LLM responses to properly handle code fences with language tags and missing closing fences. Add detailed warning logs for parsing failures and fix edge cases where JSON keys start with fence identifier letters ('j', 's', 'o', 'n'). fix(dedup): handle None document names correctly in evidence deduplication Ensure proper deduplication when Evidence objects have None for doc_name by using "_unknown" placeholder, preventing incorrect deduplication between documents with explicit names and those without. perf(cache): optimize cache performance with VecDeque and poison recovery Replace Vec with VecDeque for O(1) LRU eviction operations, reducing cache maintenance overhead. Add poison lock recovery mechanism to maintain cache availability when worker threads panic, preventing silent failures and ensuring continued operation with stale data instead of blocking access. --- rust/src/agent/worker/mod.rs | 47 ++++++++++- rust/src/client/engine.rs | 48 +++++++++--- rust/src/query/understand.rs | 62 ++++++++++++--- rust/src/rerank/dedup.rs | 70 ++++++++++++++++- rust/src/retrieval/cache.rs | 147 +++++++++++++++++++++++++++++------ 5 files changed, 325 insertions(+), 49 deletions(-) diff --git a/rust/src/agent/worker/mod.rs b/rust/src/agent/worker/mod.rs index 803ced7..beebc89 100644 --- a/rust/src/agent/worker/mod.rs +++ b/rust/src/agent/worker/mod.rs @@ -294,7 +294,8 @@ impl<'a> Agent for Worker<'a> { emitter.emit_worker_round(ctx.doc_name, round_num, &cmd_str, success, round_elapsed); let feedback_preview = if state.last_feedback.len() > 120 { - format!("{}...", &state.last_feedback[..120]) + let boundary = state.last_feedback.ceil_char_boundary(120); + format!("{}...", &state.last_feedback[..boundary]) } else { state.last_feedback.clone() }; @@ -348,3 +349,47 @@ impl<'a> Agent for Worker<'a> { Ok(output) } } + +#[cfg(test)] +mod truncation_tests { + /// Verify that truncating feedback with multi-byte UTF-8 characters + /// never panics. This mirrors the truncation logic in the navigation loop. + #[test] + fn test_utf8_safe_truncation_ascii() { + let feedback = "a".repeat(200); + let boundary = feedback.ceil_char_boundary(120); + let truncated = &feedback[..boundary]; + assert!(truncated.len() <= 123); // 120 + "..." fits + assert!(truncated.is_char_boundary(truncated.len())); + } + + #[test] + fn test_utf8_safe_truncation_multibyte() { + // Each '中' is 3 bytes in UTF-8 + let feedback = "中文反馈内容测试截断安全".repeat(20); + assert!(feedback.len() > 120); + let boundary = feedback.ceil_char_boundary(120); + let truncated = &feedback[..boundary]; + assert!(truncated.len() <= 120); + assert!(truncated.is_char_boundary(truncated.len())); + } + + #[test] + fn test_utf8_safe_truncation_emoji() { + // Emojis are 4 bytes each + let feedback = "🦀🎉🚀".repeat(50); + assert!(feedback.len() > 120); + let boundary = feedback.ceil_char_boundary(120); + let truncated = &feedback[..boundary]; + assert!(truncated.len() <= 120); + assert!(truncated.is_char_boundary(truncated.len())); + } + + #[test] + fn test_utf8_safe_truncation_short_string() { + // String shorter than limit — no truncation needed + let feedback = "short feedback".to_string(); + let boundary = feedback.ceil_char_boundary(120); + assert_eq!(boundary, feedback.len()); + } +} diff --git a/rust/src/client/engine.rs b/rust/src/client/engine.rs index d32b4e6..a74a36b 100644 --- a/rust/src/client/engine.rs +++ b/rust/src/client/engine.rs @@ -40,7 +40,7 @@ use std::{collections::HashMap, sync::Arc}; use futures::StreamExt; -use tracing::info; +use tracing::{info, warn}; use crate::{ DocumentTree, Error, @@ -1004,20 +1004,44 @@ impl Engine { ); let concurrency = self.config.llm.throttle.max_concurrent_requests; - let loaded: Vec> = futures::stream::iter(doc_ids.iter().cloned()) - .map(|doc_id| { - let ws = self.workspace.clone(); - async move { ws.load(&doc_id).await.ok().flatten() } - }) - .buffer_unordered(concurrency) - .collect() - .await; + let doc_ids_clone: Vec = doc_ids.iter().cloned().collect(); + let loaded: Vec<(String, Result>)> = + futures::stream::iter(doc_ids_clone.into_iter()) + .map(|doc_id| { + let ws = self.workspace.clone(); + async move { + let result = ws.load(&doc_id).await; + (doc_id, result) + } + }) + .buffer_unordered(concurrency) + .collect() + .await; + + let mut failed_count = 0usize; + let mut loaded_docs: Vec = Vec::new(); + for (doc_id, result) in loaded { + match result { + Ok(Some(doc)) => loaded_docs.push(doc), + Ok(None) => { + warn!(doc_id, "Document in meta index but not in backend during graph rebuild"); + failed_count += 1; + } + Err(e) => { + warn!(doc_id, error = %e, "Failed to load document for graph rebuild"); + failed_count += 1; + } + } + } - let loaded_count = loaded.iter().filter(|d| d.is_some()).count(); - info!(loaded_count, "Documents loaded, building graph"); + info!( + loaded = loaded_docs.len(), + failed = failed_count, + "Documents loaded for graph rebuild" + ); let mut builder = crate::graph::DocumentGraphBuilder::new(self.config.graph.clone()); - for doc in loaded.into_iter().flatten() { + for doc in &loaded_docs { let keywords = Self::extract_keywords_from_doc(&doc); builder.add_document( &doc.meta.id, diff --git a/rust/src/query/understand.rs b/rust/src/query/understand.rs index 66a1857..491faf2 100644 --- a/rust/src/query/understand.rs +++ b/rust/src/query/understand.rs @@ -46,14 +46,11 @@ pub async fn understand( } let analysis = parse_analysis(&response).ok_or_else(|| { - warn!( - response = &response[..response.len().min(500)], - "Query understanding: failed to parse LLM response as JSON" - ); + let preview = &response[..response.len().min(300)]; crate::error::Error::Config(format!( "Query understanding returned unparseable response ({} chars): {}", response.len(), - &response[..response.len().min(300)] + preview )) })?; @@ -69,18 +66,37 @@ pub async fn understand( /// Parse the LLM's JSON response into a QueryAnalysis. fn parse_analysis(response: &str) -> Option { let trimmed = response.trim(); + // Try to extract JSON from the response (LLM may wrap it in markdown) let json_str = if trimmed.starts_with("```") { - // Strip markdown code fences - let without_start = trimmed - .trim_start_matches(|c| c == '`' || c == 'j' || c == 's' || c == 'o' || c == 'n'); - let without_end = without_start.trim_end_matches(|c| c == '`'); + // Find the first newline after the opening fence (skips language tag) + let after_fence = if let Some(nl) = trimmed.find('\n') { + &trimmed[nl + 1..] + } else { + trimmed + }; + // Strip the closing fence + let without_end = if let Some(end) = after_fence.rfind("```") { + &after_fence[..end] + } else { + after_fence + }; without_end.trim() } else { trimmed }; - serde_json::from_str(json_str).ok() + match serde_json::from_str(json_str) { + Ok(analysis) => Some(analysis), + Err(e) => { + warn!( + error = %e, + json_len = json_str.len(), + "Query understanding: JSON parse failed" + ); + None + } + } } impl QueryAnalysis { @@ -189,6 +205,32 @@ mod tests { assert!(parse_analysis("not json").is_none()); } + #[test] + fn test_parse_analysis_code_fence_no_newline() { + // Edge case: ```json{"intent":...}``` with no newline after language tag + let response = "```json\n{\"intent\":\"factual\",\"key_concepts\":[\"test\"],\"strategy_hint\":\"focused\",\"complexity\":\"simple\",\"rewritten\":null,\"sub_queries\":[]}\n```"; + let analysis = parse_analysis(response).unwrap(); + assert_eq!(analysis.intent, "factual"); + } + + #[test] + fn test_parse_analysis_code_fence_no_closing() { + // LLM sometimes omits the closing fence + let response = "```json\n{\"intent\":\"summary\",\"key_concepts\":[\"overview\"],\"strategy_hint\":\"summary\",\"complexity\":\"simple\",\"rewritten\":null,\"sub_queries\":[]}"; + let analysis = parse_analysis(response).unwrap(); + assert_eq!(analysis.intent, "summary"); + } + + #[test] + fn test_parse_analysis_keys_starting_with_fence_letters() { + // The old trim_start_matches(|c| 'j' | 's' | 'o' | 'n') would eat + // JSON keys starting with those letters. Verify this works correctly. + let response = r#"{"intent":"navigational","key_concepts":["journal","offset","node"],"strategy_hint":"focused","complexity":"moderate","rewritten":null,"sub_queries":[]}"#; + let analysis = parse_analysis(response).unwrap(); + assert_eq!(analysis.intent, "navigational"); + assert_eq!(analysis.key_concepts, vec!["journal", "offset", "node"]); + } + #[test] fn test_default_plan() { let plan = QueryPlan::default_for("test query", vec!["test".to_string()]); diff --git a/rust/src/rerank/dedup.rs b/rust/src/rerank/dedup.rs index 9b30f75..c54fc0a 100644 --- a/rust/src/rerank/dedup.rs +++ b/rust/src/rerank/dedup.rs @@ -31,7 +31,8 @@ pub fn dedup(evidence: &[Evidence]) -> Vec { let source_deduped: Vec<&Evidence> = quality .into_iter() .filter(|e| { - let key = format!("{}:{}", e.doc_name.as_deref().unwrap_or(""), e.source_path); + let doc_key = e.doc_name.as_deref().unwrap_or("_unknown"); + let key = format!("{}:{}", doc_key, e.source_path); seen_sources.insert(key) }) .collect(); @@ -144,4 +145,71 @@ mod tests { let b = tokenize("ccc ddd"); assert!((jaccard(&a, &b)).abs() < 0.001); } + + #[test] + fn test_source_dedup_none_doc_name() { + // Evidence with doc_name: None should use "_unknown" as doc key, + // so same source_path with None doc_name still deduplicates correctly. + let evidence = vec![ + Evidence { + source_path: "root/section_a".to_string(), + node_title: "A".to_string(), + content: "content A with enough text to pass the quality filter threshold" + .to_string(), + doc_name: None, + }, + Evidence { + source_path: "root/section_a".to_string(), + node_title: "A2".to_string(), + content: "different content but same source path that should be deduped".to_string(), + doc_name: None, + }, + ]; + let result = dedup(&evidence); + assert_eq!(result.len(), 1); + } + + #[test] + fn test_source_dedup_mixed_doc_name() { + // Same source_path but different doc_name should produce different dedup keys, + // so both survive source dedup. Content must be sufficiently different too. + let evidence = vec![ + Evidence { + source_path: "root/section".to_string(), + node_title: "A".to_string(), + content: "Revenue for Q4 was twelve million dollars driven by SaaS growth in the enterprise segment".to_string(), + doc_name: Some("doc_a".to_string()), + }, + Evidence { + source_path: "root/section".to_string(), + node_title: "B".to_string(), + content: "The encryption module uses AES-256 for data at rest and TLS 1.3 for all network communication".to_string(), + doc_name: Some("doc_b".to_string()), + }, + ]; + let result = dedup(&evidence); + assert_eq!(result.len(), 2); + } + + #[test] + fn test_source_dedup_none_vs_some_doc_name() { + // None doc_name ("_unknown") and Some doc_name produce different keys, + // so both survive source dedup. Content must be sufficiently different too. + let evidence = vec![ + Evidence { + source_path: "root/section".to_string(), + node_title: "A".to_string(), + content: "The database uses a log-structured merge tree with write-ahead logging for durability".to_string(), + doc_name: None, + }, + Evidence { + source_path: "root/section".to_string(), + node_title: "B".to_string(), + content: "Authentication requires Bearer tokens with automatic refresh after twenty-four hours".to_string(), + doc_name: Some("doc_x".to_string()), + }, + ]; + let result = dedup(&evidence); + assert_eq!(result.len(), 2); + } } diff --git a/rust/src/retrieval/cache.rs b/rust/src/retrieval/cache.rs index f4d9839..ecfce79 100644 --- a/rust/src/retrieval/cache.rs +++ b/rust/src/retrieval/cache.rs @@ -16,10 +16,12 @@ //! Node scores are independent of the query, so they can be shared across //! different queries on the same document. -use std::collections::HashMap; +use std::collections::{HashMap, VecDeque}; use std::sync::RwLock; use std::time::Instant; +use tracing::warn; + use crate::document::NodeId; use crate::utils::fingerprint::Fingerprint; @@ -86,7 +88,7 @@ pub struct CachedCandidate { struct L1Store { entries: HashMap, - order: Vec, // For LRU eviction + order: VecDeque, // For LRU eviction — O(1) pop_front } // ---- L2: Path Pattern Cache ---- @@ -102,7 +104,7 @@ struct L2Entry { struct L2Store { entries: HashMap, // Key: "doc_fp:node_path" - order: Vec, + order: VecDeque, } // ---- L3: Strategy Score Cache ---- @@ -118,7 +120,7 @@ struct L3Entry { struct L3Store { entries: HashMap, // Key: node content fingerprint - order: Vec, + order: VecDeque, } // ---- Public API ---- @@ -134,15 +136,15 @@ impl ReasoningCache { Self { l1: RwLock::new(L1Store { entries: HashMap::new(), - order: Vec::new(), + order: VecDeque::new(), }), l2: RwLock::new(L2Store { entries: HashMap::new(), - order: Vec::new(), + order: VecDeque::new(), }), l3: RwLock::new(L3Store { entries: HashMap::new(), - order: Vec::new(), + order: VecDeque::new(), }), config, } @@ -156,7 +158,7 @@ impl ReasoningCache { /// on the same document scope. pub fn l1_get(&self, query: &str, scope_fp: &Fingerprint) -> Option> { let query_fp = Fingerprint::from_str(query); - let l1 = self.l1.read().ok()?; + let l1 = read_lock(&self.l1)?; let entry = l1.entries.get(&query_fp)?; // Scope must match (same document set) if &entry.scope_fp != scope_fp { @@ -187,7 +189,7 @@ impl ReasoningCache { created_at: Instant::now(), }, ); - l1.order.push(query_fp); + l1.order.push_back(query_fp); } } @@ -199,7 +201,7 @@ impl ReasoningCache { /// return the confidence score. pub fn l2_get(&self, doc_key: &str, node_path: &str) -> Option { let key = format!("{}:{}", doc_key, node_path); - let l2 = self.l2.read().ok()?; + let l2 = read_lock(&self.l2)?; let entry = l2.entries.get(&key)?; Some(entry.confidence) } @@ -227,7 +229,7 @@ impl ReasoningCache { created_at: Instant::now(), }, ); - l2.order.push(key); + l2.order.push_back(key); } } } @@ -237,9 +239,9 @@ impl ReasoningCache { /// Useful for bootstrapping new queries on a known document. pub fn l2_top_paths(&self, doc_key: &str, n: usize) -> Vec<(String, f32)> { let prefix = format!("{}:", doc_key); - let l2 = match self.l2.read() { - Ok(guard) => guard, - Err(_) => return Vec::new(), + let l2 = match read_lock(&self.l2) { + Some(guard) => guard, + None => return Vec::new(), }; let mut paths: Vec<(String, f32)> = l2 @@ -260,7 +262,7 @@ impl ReasoningCache { /// Node scores from keyword/BM25 are content-dependent but /// query-independent, so they can be shared across queries. pub fn l3_get(&self, node_content_fp: &Fingerprint) -> Option<(f32, String)> { - let l3 = self.l3.read().ok()?; + let l3 = read_lock(&self.l3)?; let entry = l3.entries.get(node_content_fp)?; Some((entry.score, entry.strategy.clone())) } @@ -279,7 +281,7 @@ impl ReasoningCache { created_at: Instant::now(), }, ); - l3.order.push(node_content_fp); + l3.order.push_back(node_content_fp); } } @@ -288,9 +290,9 @@ impl ReasoningCache { /// Get cache statistics. pub fn stats(&self) -> ReasoningCacheStats { let (l1_count, l2_count, l3_count) = ( - self.l1.read().map(|g| g.entries.len()).unwrap_or(0), - self.l2.read().map(|g| g.entries.len()).unwrap_or(0), - self.l3.read().map(|g| g.entries.len()).unwrap_or(0), + read_lock(&self.l1).map(|g| g.entries.len()).unwrap_or(0), + read_lock(&self.l2).map(|g| g.entries.len()).unwrap_or(0), + read_lock(&self.l3).map(|g| g.entries.len()).unwrap_or(0), ); ReasoningCacheStats { l1_entries: l1_count, @@ -318,23 +320,20 @@ impl ReasoningCache { // ============ Eviction helpers ============ fn evict_lru_fingerprint(l1: &mut L1Store) { - if let Some(old) = l1.order.first().copied() { + if let Some(old) = l1.order.pop_front() { l1.entries.remove(&old); - l1.order.remove(0); } } fn evict_lru_string(l2: &mut L2Store) { - if let Some(old) = l2.order.first().cloned() { + if let Some(old) = l2.order.pop_front() { l2.entries.remove(&old); - l2.order.remove(0); } } fn evict_lru_fingerprint_l3(l3: &mut L3Store) { - if let Some(old) = l3.order.first().copied() { + if let Some(old) = l3.order.pop_front() { l3.entries.remove(&old); - l3.order.remove(0); } } } @@ -345,6 +344,21 @@ impl Default for ReasoningCache { } } +/// Read from a RwLock, recovering from poison by taking the guard anyway. +/// +/// A poisoned lock means another thread panicked while holding it — the data +/// is still valid, just potentially in an inconsistent state. For a cache, +/// returning stale/empty data is always preferable to failing silently. +fn read_lock(lock: &RwLock) -> Option> { + match lock.read() { + Ok(guard) => Some(guard), + Err(poisoned) => { + warn!("ReasoningCache: recovering from poisoned lock"); + Some(poisoned.into_inner()) + } + } +} + /// Cache statistics. #[derive(Debug, Clone)] pub struct ReasoningCacheStats { @@ -477,4 +491,87 @@ mod tests { assert!(cache.l1_get("q2", &scope).is_some()); assert!(cache.l1_get("q3", &scope).is_some()); } + + #[test] + fn test_l2_lru_eviction() { + let config = ReasoningCacheConfig { + l2_max: 2, + ..Default::default() + }; + let cache = ReasoningCache::with_config(config); + + cache.l2_record("doc", "1", 0.5); + cache.l2_record("doc", "2", 0.6); + cache.l2_record("doc", "3", 0.7); // evicts "doc:1" + + assert!(cache.l2_get("doc", "1").is_none()); + assert!(cache.l2_get("doc", "2").is_some()); + assert!(cache.l2_get("doc", "3").is_some()); + } + + #[test] + fn test_l3_lru_eviction() { + let config = ReasoningCacheConfig { + l3_max: 2, + ..Default::default() + }; + let cache = ReasoningCache::with_config(config); + + let fp1 = Fingerprint::from_str("content_a"); + let fp2 = Fingerprint::from_str("content_b"); + let fp3 = Fingerprint::from_str("content_c"); + + cache.l3_store(fp1, 0.5, "kw".into()); + cache.l3_store(fp2, 0.6, "kw".into()); + cache.l3_store(fp3, 0.7, "kw".into()); // evicts fp1 + + assert!(cache.l3_get(&fp1).is_none()); + assert!(cache.l3_get(&fp2).is_some()); + assert!(cache.l3_get(&fp3).is_some()); + } + + #[test] + fn test_poisoned_lock_recovery() { + let cache = ReasoningCache::new(); + + // Verify normal operation: store and retrieve still works + let scope = Fingerprint::from_str("doc"); + cache.l1_store("query", scope, vec![], "kw".into()); + + let scope2 = Fingerprint::from_str("doc2"); + cache.l1_store("q2", scope2, vec![], "kw".into()); + assert!(cache.l1_get("q2", &scope2).is_some()); + + // Verify stats still works (internally uses read_lock) + let stats = cache.stats(); + assert!(stats.l1_entries >= 1); + } + + #[test] + fn test_poisoned_lock_read_recovery() { + use std::sync::Arc; + use std::thread; + + // Create a cache and populate it + let cache = Arc::new(ReasoningCache::new()); + let scope = Fingerprint::from_str("doc"); + cache.l1_store("query", scope, vec![], "kw".into()); + + // Poison the lock from another thread + let cache_clone = Arc::clone(&cache); + let handle = thread::spawn(move || { + // This will poison the L1 lock + let _guard = cache_clone.l1.write().unwrap(); + panic!("intentional panic to poison lock"); + }); + + // Wait for the panicking thread to finish + let _ = handle.join(); + + // The lock is now poisoned. Our read_lock() should recover from it. + // l1_get uses read_lock internally + let result = cache.l1_get("query", &scope); + // Should still return data (recovered from poison) + assert!(result.is_some()); + } } From 9925d3b7ff84249f58830beb5a0a4207d277b9ef Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Wed, 22 Apr 2026 09:57:53 +0800 Subject: [PATCH 03/10] feat: add single-document reasoning challenge example Add comprehensive test example that indexes a realistic technical document and asks complex questions requiring deep reasoning across document sections, demonstrating the engine's capability beyond simple keyword matching. fix: update README tagline by removing synthesis reference Remove "Exact, not synthesized" from the header tagline in README.md to better align with current project focus and messaging. refactor: enhance logging across core engine components Add detailed logging information throughout the engine including: - Evidence evaluation counts in orchestrator - Replanning evidence metrics - Navigation planning and rounds tracking - Index persistence status updates - Query understanding initiation - Dispatcher operation flow Replace generic log messages with structured logging containing relevant context like document names, round numbers, and operation metrics for better debugging and monitoring. --- README.md | 2 +- rust/examples/single_doc_challenge.rs | 248 ++++++++++++++++++++++++ rust/src/agent/orchestrator/evaluate.rs | 4 + rust/src/agent/orchestrator/replan.rs | 1 + rust/src/agent/worker/mod.rs | 8 + rust/src/client/engine.rs | 10 +- rust/src/query/understand.rs | 1 + rust/src/retrieval/dispatcher.rs | 7 +- 8 files changed, 272 insertions(+), 9 deletions(-) create mode 100644 rust/examples/single_doc_challenge.rs diff --git a/README.md b/README.md index ad6de21..c8391d1 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Vectorless

Reasoning-based Document Engine

-
Reason, don't vector · Structure, not chunks · Agents, not embeddings · Exact, not synthesized
+
Reason, don't vector · Structure, not chunks · Agents, not embeddings
[![PyPI](https://img.shields.io/pypi/v/vectorless.svg)](https://pypi.org/project/vectorless/) [![PyPI Downloads](https://static.pepy.tech/badge/vectorless/month)](https://pepy.tech/projects/vectorless) diff --git a/rust/examples/single_doc_challenge.rs b/rust/examples/single_doc_challenge.rs new file mode 100644 index 0000000..40582b2 --- /dev/null +++ b/rust/examples/single_doc_challenge.rs @@ -0,0 +1,248 @@ +// Copyright (c) 2026 vectorless developers +// SPDX-License-Identifier: Apache-2.0 + +//! Single-document reasoning challenge. +//! +//! Indexes a realistic technical document and asks questions that require +//! the engine to navigate deep into the tree, cross-reference details +//! across distant sections, and extract information buried in nested +//! structures — not surface-level keyword matches. +//! +//! ```bash +//! LLM_API_KEY=sk-xxx LLM_MODEL=gpt-4o \ +//! LLM_ENDPOINT=https://api.openai.com/v1 \ +//! cargo run --example single_doc_challenge +//! ``` + +use vectorless::{DocumentFormat, EngineBuilder, IndexContext, QueryContext}; + +/// A research report with information scattered across sections. +/// The answers to the challenge questions require connecting dots +/// from different parts of the document, not simple keyword lookup. +const REPORT: &str = r#" +# Quantum Computing Division — Annual Research Report 2025 + +## Executive Summary + +The Quantum Computing Division achieved several milestones in fiscal year 2025. +Total division revenue reached $47.2M, representing 23% year-over-year growth. +The division employed 312 staff across four research labs as of December 2025. +Headcount grew by 18% during the year, with the majority of new hires in the +error correction and cryogenics teams. + +The board approved a $200M capital investment program spanning 2025-2028. +Phase 1 ($52M) was fully deployed in 2025, primarily in dilution refrigerator +procurement and cleanroom expansion at the Zurich facility. + +## Research Labs + +### Lab A — Superconducting Qubits (Zurich) + +Lab A focuses on transmon qubit design and fabrication. The lab operates +two dilution refrigerators: FR-01 (purchased 2023, 20mK base temperature) +and FR-02 (commissioned Q3 2025, 15mK base temperature). FR-02 was the +single largest capital expenditure in 2025 at $8.7M. + +Current qubit specifications: +- Qubit count: 127 (FR-01: 64, FR-02: 63) +- Average T1 coherence time: 142 microseconds (up from 98μs in 2024) +- Average T2 coherence time: 89 microseconds +- Single-qubit gate fidelity: 99.92% +- Two-qubit gate fidelity: 99.67% +- Readout fidelity: 99.81% + +The 2025 coherence improvement was primarily driven by the transition from +aluminum to tantalum transmon junctions, which reduced two-level system (TLS) +defect density by 40%. + +### Lab B — Topological Qubits (Tokyo) + +Lab B pursues Majorana-based topological qubits using semiconductor-superconductor +nanowires. The team fabricated 12 nanowire devices during 2025, of which 3 +demonstrated measurable topological gap. This is a significant improvement +over 2024 when only 1 device out of 8 showed the gap. + +The topological gap measurement protocol requires the device temperature to +remain below 20mK throughout the 48-hour characterization cycle. Only FR-02 +in Zurich meets this requirement, so Lab B ships devices to Zurich for final +characterization — creating a logistical dependency between the two labs. + +Key metric: topological gap size averaged 0.35meV across successful devices, +compared to the theoretical target of 0.5meV. The gap-to-target ratio improved +from 48% in 2024 to 70% in 2025. + +### Lab C — Quantum Error Correction (Cambridge) + +Lab C develops surface code error correction protocols. In 2025, the team +achieved a critical milestone: below-threshold error correction on a 17-qubit +surface code patch, reducing logical error rate from 2.1×10⁻² to 3.4×10⁻³ +per correction cycle. + +The threshold simulations used Lab A's measured gate fidelities as input +parameters. The below-threshold result was only possible after Lab A's T1 +coherence improvement from 98μs to 142μs — the simulation models showed +that the 98μs regime was above the error correction threshold for the 17-qubit +code, making the Lab A / Lab C dependency critical. + +Lab C also developed a new decoder algorithm called "Cascade" that reduces +classical processing latency from 1.2μs to 0.4μs per syndrome extraction cycle. +This decoder runs on an FPGA co-processor board that was custom-designed by +Lab D. + +### Lab D — Control Systems (Boston) + +Lab D designs and manufactures the classical control electronics for all qubit +types. The flagship product is the QCS-4 control system, capable of driving +up to 256 qubit channels with 14-bit DAC resolution and sub-nanosecond timing +precision. + +In 2025, Lab D delivered 4 QCS-4 units to Lab A and 2 units to Lab B. +Lab C received a modified QCS-4 variant with the integrated FPGA decoder +co-processor. The FPGA decoder board is a custom design: Xilinx Ultrascale+ +XCU26 FPGA, 400k logic cells, running at 350MHz. Lab D is the sole source +for this board — there is no commercial equivalent. + +A notable incident occurred in August 2025 when a firmware bug in the QCS-4 +DAC calibration routine caused systematic phase errors in two-qubit gate +operations. The bug was traced to an integer overflow in the calibration LUT +when operating above 4.2 GHz. The issue affected Lab A's FR-01 for 11 days +before a patched firmware was deployed. During this period, Lab A's measured +two-qubit gate fidelity temporarily dropped to 97.31%. + +## Financial Summary + +| Category | 2024 | 2025 | Change | +|----------|------|------|--------| +| Revenue | $38.4M | $47.2M | +23% | +| R&D Expense | $31.6M | $38.9M | +23% | +| Capital Expenditure | $18.2M | $52.0M | +186% | +| Staff Count (Dec) | 264 | 312 | +18% | +| Patents Filed | 14 | 19 | +36% | + +Revenue breakdown by source: +- Government contracts: $19.8M (42%) +- Enterprise partnerships: $15.3M (32%) +- IP licensing: $8.6M (18%) +- Consulting services: $3.5M (8%) + +The $52M capital expenditure in 2025 included: +- FR-02 dilution refrigerator (Zurich): $8.7M +- Cleanroom expansion (Zurich): $14.2M +- Nanowire fabrication equipment (Tokyo): $6.1M +- FPGA development and QCS-4 production (Boston): $9.4M +- General infrastructure and IT: $13.6M + +## Outlook for 2026 + +Priority goals for 2026: +1. Scale to 256 superconducting qubits by Q3 (requires a third dilution + refrigerator, procurement estimated at $9-11M) +2. Achieve topological gap above 0.45meV (requires device process improvement) +3. Demonstrate below-threshold error correction on a 49-qubit surface code + (requires both 256-qubit hardware AND the Cascade decoder scaling to + larger code distances) +4. File 25+ patents +5. Grow revenue to $60M +"#; + +/// Challenge questions designed to test deep reasoning. +/// None of these can be answered by simple keyword search — +/// each requires connecting information from multiple sections. +const CHALLENGE_QUESTIONS: &[&str] = &[ + // Requires: cross-reference Lab B's device characterization needs with + // Lab A's FR-02 specs, then connect to the CapEx table for FR-02 cost + "How much did the only refrigerator capable of characterizing Lab B's devices cost, and where is it located?", + + // Requires: trace Lab C's below-threshold result → depends on Lab A's T1 + // improvement → depends on tantalum junction transition + "What specific materials change in another lab made Lab C's error correction milestone possible?", + + // Requires: find the firmware bug in Lab D section, then look at the + // Lab A FR-01 qubit count, then compute the impact window + "How many qubits were affected by the firmware bug, and for how many days?", + + // Requires: Lab B gap/target ratio (70%) × theoretical target (0.5meV) + // → actual gap = 0.35meV, compare with 2026 goal of 0.45meV + "What is the gap between Lab B's current topological gap achievement and the 2026 target, in meV?", + + // Requires: trace the dependency chain: 256-qubit goal → need FR-03 → + // cost $9-11M → government contracts are largest revenue source at $19.8M + "If the 2026 qubit scaling goal requires a new refrigerator, can the largest revenue source category alone cover its estimated cost?", +]; + +#[tokio::main] +async fn main() -> vectorless::Result<()> { + tracing_subscriber::fmt::init(); + + println!("=== Single-Document Reasoning Challenge ===\n"); + + let api_key = std::env::var("LLM_API_KEY").unwrap_or_else(|_| "sk-...".to_string()); + let model = std::env::var("LLM_MODEL").unwrap_or_else(|_| "gpt-4o".to_string()); + let endpoint = + std::env::var("LLM_ENDPOINT").unwrap_or_else(|_| "https://api.openai.com/v1".to_string()); + + let engine = EngineBuilder::new() + .with_key(&api_key) + .with_model(&model) + .with_endpoint(&endpoint) + .build() + .await + .map_err(|e| vectorless::Error::Config(e.to_string()))?; + + // Index (skip if already indexed — we're testing retrieval, not indexing) + let doc_name = "qc_report_2025"; + let doc_id = { + let existing = engine.list().await?; + if let Some(doc) = existing.iter().find(|d| d.name == doc_name) { + println!("Document already indexed, reusing: {}\n", doc.id); + doc.id.clone() + } else { + println!("Indexing research report..."); + let result = engine + .index(IndexContext::from_content(REPORT, DocumentFormat::Markdown).with_name(doc_name)) + .await?; + let id = result.doc_id().unwrap().to_string(); + println!(" doc_id: {}\n", id); + id + } + }; + + // Challenge queries + for (i, question) in CHALLENGE_QUESTIONS.iter().enumerate() { + println!("Q{}: {}", i + 1, question); + + match engine + .query(QueryContext::new(*question).with_doc_ids(vec![doc_id.clone()])) + .await + { + Ok(response) => { + if let Some(item) = response.single() { + if item.content.is_empty() { + println!(" (no answer found)\n"); + } else { + // Print first 3 lines as preview + for line in item.content.lines().take(3) { + println!(" {}", line); + } + let remaining = item.content.lines().count().saturating_sub(3); + if remaining > 0 { + println!(" ... ({} more lines)", remaining); + } + println!(" confidence: {:.2}\n", item.confidence); + } + } else { + println!(" (no results)\n"); + } + } + Err(e) => { + println!(" error: {}\n", e); + } + } + } + + // Uncomment to remove the document after testing: + // engine.remove(&doc_id).await?; + // println!("Cleaned up."); + + Ok(()) +} diff --git a/rust/src/agent/orchestrator/evaluate.rs b/rust/src/agent/orchestrator/evaluate.rs index 27c8aab..0f74436 100644 --- a/rust/src/agent/orchestrator/evaluate.rs +++ b/rust/src/agent/orchestrator/evaluate.rs @@ -34,6 +34,10 @@ pub async fn evaluate( let evidence_summary = format_evidence_summary(evidence); let (system, user) = check_sufficiency(query, &evidence_summary); + info!( + evidence = evidence.len(), + "Evaluating evidence sufficiency..." + ); let response = llm .complete(&system, &user) .await diff --git a/rust/src/agent/orchestrator/replan.rs b/rust/src/agent/orchestrator/replan.rs index 57d5e24..4b423ac 100644 --- a/rust/src/agent/orchestrator/replan.rs +++ b/rust/src/agent/orchestrator/replan.rs @@ -59,6 +59,7 @@ pub async fn replan( &find_text, ); + info!(evidence = collected_evidence.len(), "Replanning dispatch targets..."); let response = llm .complete(&system, &user) .await diff --git a/rust/src/agent/worker/mod.rs b/rust/src/agent/worker/mod.rs index beebc89..d807501 100644 --- a/rust/src/agent/worker/mod.rs +++ b/rust/src/agent/worker/mod.rs @@ -128,6 +128,7 @@ impl<'a> Agent for Worker<'a> { // --- Phase 1.5: Navigation planning --- if state.remaining > 0 && !llm_budget_exhausted!() { + info!(doc = ctx.doc_name, "Generating navigation plan..."); let plan_prompt = build_plan_prompt( &query, task_ref, @@ -205,6 +206,12 @@ impl<'a> Agent for Worker<'a> { // LLM decision let round_num = config.max_rounds - state.remaining + 1; let round_start = std::time::Instant::now(); + info!( + doc = ctx.doc_name, + round = round_num, + max_rounds = config.max_rounds, + "Navigation round: calling LLM..." + ); let llm_output = llm.complete(&system, &user) .await @@ -262,6 +269,7 @@ impl<'a> Agent for Worker<'a> { && !llm_budget_exhausted!() { let missing = state.missing_info.clone(); + info!(doc = ctx.doc_name, missing = %missing, "Re-planning navigation..."); let replan = build_replan_prompt(&query, task_ref, &state, ctx); let new_plan = llm.complete(&replan.0, &replan.1) diff --git a/rust/src/client/engine.rs b/rust/src/client/engine.rs index a74a36b..d4062b4 100644 --- a/rust/src/client/engine.rs +++ b/rust/src/client/engine.rs @@ -366,9 +366,15 @@ impl Engine { old_id: Option<&str>, ) -> (Vec, Vec) { let item = Self::build_index_item(&doc); + + info!( + "[index] Persisting document '{}'...", + doc.name, + ); let persisted = IndexerClient::to_persisted(doc, pipeline_options).await; if let Err(e) = self.workspace.save(&persisted).await { + warn!("[index] Failed to save document: {}", e); return ( Vec::new(), vec![FailedItem::new(source_label, e.to_string())], @@ -377,11 +383,11 @@ impl Engine { // Clean up old document after successful save if let Some(old_id) = old_id { if let Err(e) = self.workspace.remove(old_id).await { - tracing::warn!("Failed to remove old document {}: {}", old_id, e); + warn!("Failed to remove old document {}: {}", old_id, e); } } - info!("Indexed document: {}", item.doc_id); + info!("[index] Document persisted: {}", item.doc_id); (vec![item], Vec::new()) } diff --git a/rust/src/query/understand.rs b/rust/src/query/understand.rs index 491faf2..da676fc 100644 --- a/rust/src/query/understand.rs +++ b/rust/src/query/understand.rs @@ -34,6 +34,7 @@ pub async fn understand( llm: &LlmClient, ) -> crate::error::Result { let (system, user) = understand_prompt(query, keywords); + info!("Query understanding: calling LLM..."); let response = llm.complete(&system, &user).await?; if response.trim().is_empty() { diff --git a/rust/src/retrieval/dispatcher.rs b/rust/src/retrieval/dispatcher.rs index 0ea3a2e..e92766f 100644 --- a/rust/src/retrieval/dispatcher.rs +++ b/rust/src/retrieval/dispatcher.rs @@ -58,13 +58,8 @@ pub async fn dispatch( // Step 1: Query understanding — LLM analyzes intent, concepts, complexity. // This is required. "Model fails, we fail." — errors propagate. + info!("Starting query understanding..."); let query_plan = QueryPipeline::understand(query, llm).await?; - info!( - intent = %query_plan.intent, - complexity = %query_plan.complexity, - concepts = query_plan.key_concepts.len(), - "Query understanding complete" - ); // Step 2: Dispatch to Orchestrator with the query plan. let orchestrator = Orchestrator::new( From b108dab112e0342f2bc86c2569a6c7122cdc50c2 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Wed, 22 Apr 2026 10:13:30 +0800 Subject: [PATCH 04/10] feat: add configurable tracing and document loading logs - Configure tracing subscriber with environment filter support, allowing log level to be controlled via RUST_LOG environment variable - Add document resolution count logging to track query processing - Add document loading statistics showing loaded and failed counts --- rust/examples/single_doc_challenge.rs | 7 ++++++- rust/src/client/engine.rs | 6 ++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/rust/examples/single_doc_challenge.rs b/rust/examples/single_doc_challenge.rs index 40582b2..dba3e27 100644 --- a/rust/examples/single_doc_challenge.rs +++ b/rust/examples/single_doc_challenge.rs @@ -172,7 +172,12 @@ const CHALLENGE_QUESTIONS: &[&str] = &[ #[tokio::main] async fn main() -> vectorless::Result<()> { - tracing_subscriber::fmt::init(); + tracing_subscriber::fmt() + .with_env_filter( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")), + ) + .init(); println!("=== Single-Document Reasoning Challenge ===\n"); diff --git a/rust/src/client/engine.rs b/rust/src/client/engine.rs index d4062b4..cc95a5f 100644 --- a/rust/src/client/engine.rs +++ b/rust/src/client/engine.rs @@ -423,8 +423,14 @@ impl Engine { self.with_timeout(timeout_secs, async move { let doc_ids = self.resolve_scope(&ctx.scope).await?; + info!(doc_count = doc_ids.len(), "Resolving documents for query"); let (documents, failed) = self.load_documents(&doc_ids).await?; + info!( + loaded = documents.len(), + failed = failed.len(), + "Documents loaded" + ); if documents.is_empty() { return Err(Error::Config(format!( "No documents available for query: {} failures", From b44116e47708b15fe3f9a37b7351ddbdb0bd5e77 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Wed, 22 Apr 2026 11:05:24 +0800 Subject: [PATCH 05/10] feat(agent): add relative path support to cd command with enhanced navigation Support relative paths with "/" separator in cd command (e.g., "Research Labs/Lab B") alongside existing absolute paths. Update navigation prompts to clarify path support including both relative paths like "Section/Sub" and absolute paths like "/root/Section". Add comprehensive tests for relative path navigation scenarios including success cases and partial failure handling. refactor(index): extract keywords from full content instead of samples Always extract keywords from full node content rather than falling back to content samples when summaries are empty. This ensures more comprehensive keyword coverage across documents. feat(query): enhance query understanding with detailed logging Include key concepts, strategy hints, and rewritten queries in understanding logs for better debugging and visibility into query processing decisions. feat(search): add content snippets to search results for relevance Include content snippets around matching keywords in search results to help users judge relevance. Add new content_snippet utility function that extracts context-aware text fragments centered on keywords with configurable length limits and proper UTF-8 boundary handling. Apply this enhancement to find_cross, worker execution, and planning components. --- rust/src/agent/prompts.rs | 6 +- rust/src/agent/tools/orchestrator.rs | 58 +++++++++++++-- rust/src/agent/tools/worker/cd.rs | 104 +++++++++++++++++++++++++++ rust/src/agent/tools/worker/ls.rs | 23 ++++-- rust/src/agent/worker/execute.rs | 64 +++++++++++++++-- rust/src/agent/worker/planning.rs | 62 ++++++++++++++++ rust/src/index/stages/reasoning.rs | 10 +-- rust/src/query/understand.rs | 4 +- 8 files changed, 300 insertions(+), 31 deletions(-) diff --git a/rust/src/agent/prompts.rs b/rust/src/agent/prompts.rs index 42699cc..e128795 100644 --- a/rust/src/agent/prompts.rs +++ b/rust/src/agent/prompts.rs @@ -115,7 +115,7 @@ pub fn worker_navigation(params: &NavigationParams) -> (String, String) { Available commands: - ls List children at current position (with summaries and leaf counts) -- cd Enter a child node (supports absolute paths like /root/Section) +- cd Enter a child node (supports relative paths like Section/Sub and absolute paths like /root/Section) - cd .. Go back to parent node - cat Read a child node's content (automatically collected as evidence) - cat Read the current node's content (useful at leaf nodes) @@ -239,8 +239,8 @@ pub fn worker_dispatch(params: &WorkerDispatchParams) -> (String, String) { "You are a document navigation assistant. You are searching inside the document \ \"{doc_name}\" for specific information. -Available commands: ls, cd , cd .., cat, cat , head , find , \ -findtree , grep , wc , pwd, check, done +Available commands: ls, cd (supports \"Section/Sub\" paths), cd .., cat, cat , \ +head , find , findtree , grep , wc , pwd, check, done SEARCH STRATEGY: - Prefer find to jump directly to relevant sections over manual ls→cd exploration. \ diff --git a/rust/src/agent/tools/orchestrator.rs b/rust/src/agent/tools/orchestrator.rs index 73c78ac..2dd1718 100644 --- a/rust/src/agent/tools/orchestrator.rs +++ b/rust/src/agent/tools/orchestrator.rs @@ -68,7 +68,7 @@ pub fn ls_docs(ctx: &WorkspaceContext) -> ToolResult { /// Execute `find_cross` — search keywords across all documents. /// -/// Returns formatted results showing which documents matched. +/// Returns formatted results showing which documents matched, with content snippets. pub fn find_cross(keywords: &[String], ctx: &WorkspaceContext) -> ToolResult { let results = ctx.find_cross_all(keywords); @@ -90,16 +90,15 @@ pub fn find_cross(keywords: &[String], ctx: &WorkspaceContext) -> ToolResult { let title = doc .and_then(|d| d.node_title(entry.node_id)) .unwrap_or("unknown"); - let summary = doc - .and_then(|d| d.nav_entry(entry.node_id)) - .map(|e| e.overview.as_str()) - .unwrap_or(""); output.push_str(&format!( " keyword '{}' → {} (depth {}, weight {:.2})", hit.keyword, title, entry.depth, entry.weight )); - if !summary.is_empty() { - output.push_str(&format!(" — {}", summary)); + // Include content snippet for cross-doc relevance judgment + if let Some(content) = doc.and_then(|d| d.cat(entry.node_id)) { + if let Some(snippet) = content_snippet(content, &hit.keyword, 150) { + output.push_str(&format!("\n \"{}\"", snippet)); + } } output.push('\n'); } @@ -110,6 +109,51 @@ pub fn find_cross(keywords: &[String], ctx: &WorkspaceContext) -> ToolResult { ToolResult::ok(output) } +/// Extract a short content snippet around the first occurrence of `keyword`. +fn content_snippet(content: &str, keyword: &str, max_len: usize) -> Option { + if content.trim().is_empty() { + return None; + } + + let keyword_lower = keyword.to_lowercase(); + let content_lower = content.to_lowercase(); + + let start = match content_lower.find(&keyword_lower) { + Some(pos) => { + let back = (max_len / 4).min(pos); + pos - back + } + None => 0, + }; + + let start = content + .char_indices() + .find(|(i, _)| *i >= start) + .map(|(i, _)| i) + .unwrap_or(0); + + let end = content + .char_indices() + .take_while(|(i, _)| *i <= start + max_len) + .last() + .map(|(i, c)| i + c.len_utf8()) + .unwrap_or(content.len()); + + let snippet = content[start..end].trim(); + if snippet.is_empty() { + return None; + } + + let mut result = snippet.to_string(); + if end < content.len() { + result.push_str("..."); + } + if start > 0 { + result = format!("...{}", result); + } + Some(result) +} + #[cfg(test)] mod tests { use super::*; diff --git a/rust/src/agent/tools/worker/cd.rs b/rust/src/agent/tools/worker/cd.rs index 765d6e2..eba325f 100644 --- a/rust/src/agent/tools/worker/cd.rs +++ b/rust/src/agent/tools/worker/cd.rs @@ -13,12 +13,18 @@ use super::super::ToolResult; /// /// Supports: /// - Relative names (child of current node): `cd "Getting Started"` +/// - Relative paths with `/`: `cd "Research Labs/Lab B"` /// - Absolute paths starting with `/`: `cd /root/Chapter 1/Section 1.2` pub fn cd(target: &str, ctx: &DocContext, state: &mut WorkerState) -> ToolResult { if target.starts_with('/') { return cd_absolute(target, ctx, state); } + // Relative path with segments: "Research Labs/Lab B" + if target.contains('/') { + return cd_relative_path(target, ctx, state); + } + match command::resolve_target_extended(target, ctx.nav_index, state.current_node, ctx.tree) { Some(node_id) => { let title = ctx.node_title(node_id).unwrap_or(target).to_string(); @@ -32,6 +38,40 @@ pub fn cd(target: &str, ctx: &DocContext, state: &mut WorkerState) -> ToolResult } } +/// Navigate using a relative multi-segment path (e.g., `"Research Labs/Lab B"`). +fn cd_relative_path(path: &str, ctx: &DocContext, state: &mut WorkerState) -> ToolResult { + let segments: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect(); + if segments.is_empty() { + return ToolResult::fail("Empty path.".to_string()); + } + + let mut current = state.current_node; + let mut breadcrumb = state.breadcrumb.clone(); + + for segment in &segments { + match command::resolve_target_extended(segment, ctx.nav_index, current, ctx.tree) { + Some(node_id) => { + let title = ctx.node_title(node_id).unwrap_or(*segment).to_string(); + breadcrumb.push(title); + current = node_id; + } + None => { + return ToolResult::fail(format!( + "Path segment '{}' not found at '/{}'. Use ls to see available children.", + segment, + breadcrumb.join("/") + )); + } + } + } + + state.breadcrumb = breadcrumb; + state.current_node = current; + state.visited.insert(current); + + ToolResult::ok(format!("Entered: {}", state.path_str())) +} + /// Navigate using an absolute path (e.g., `/root/Chapter 1/Section 1.2`). fn cd_absolute(path: &str, ctx: &DocContext, state: &mut WorkerState) -> ToolResult { let segments: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect(); @@ -155,4 +195,68 @@ mod tests { assert!(result.success); assert_eq!(state.current_node, root); } + + fn build_deep_tree() -> (DocumentTree, NavigationIndex, NodeId, NodeId, NodeId) { + // Root → "Research Labs" → "Lab B" + let mut tree = DocumentTree::new("Root", "root content"); + let root = tree.root(); + let section = tree.add_child(root, "Research Labs", "section content"); + let lab_b = tree.add_child(section, "Lab B", "lab b content"); + + let mut nav = NavigationIndex::new(); + nav.add_child_routes( + root, + vec![ChildRoute { + node_id: section, + title: "Research Labs".to_string(), + description: "Lab sections".to_string(), + leaf_count: 4, + }], + ); + nav.add_child_routes( + section, + vec![ChildRoute { + node_id: lab_b, + title: "Lab B".to_string(), + description: "Topological qubits".to_string(), + leaf_count: 1, + }], + ); + + (tree, nav, root, section, lab_b) + } + + #[test] + fn test_cd_relative_path() { + let (tree, nav, root, _, lab_b) = build_deep_tree(); + let ctx = DocContext { + tree: &tree, + nav_index: &nav, + reasoning_index: &crate::document::ReasoningIndex::default(), + doc_name: "test", + }; + let mut state = WorkerState::new(root, 8); + + let result = cd("Research Labs/Lab B", &ctx, &mut state); + assert!(result.success); + assert_eq!(state.current_node, lab_b); + assert!(state.path_str().contains("Research Labs")); + assert!(state.path_str().contains("Lab B")); + } + + #[test] + fn test_cd_relative_path_partial_fail() { + let (tree, nav, root, _, _) = build_deep_tree(); + let ctx = DocContext { + tree: &tree, + nav_index: &nav, + reasoning_index: &crate::document::ReasoningIndex::default(), + doc_name: "test", + }; + let mut state = WorkerState::new(root, 8); + + let result = cd("Research Labs/Nonexistent", &ctx, &mut state); + assert!(!result.success); + assert!(result.feedback.contains("Nonexistent")); + } } diff --git a/rust/src/agent/tools/worker/ls.rs b/rust/src/agent/tools/worker/ls.rs index 351ab89..1c20daa 100644 --- a/rust/src/agent/tools/worker/ls.rs +++ b/rust/src/agent/tools/worker/ls.rs @@ -32,13 +32,22 @@ pub fn ls(ctx: &DocContext, state: &WorkerState) -> ToolResult { } for (i, route) in routes.iter().enumerate() { - output.push_str(&format!( - "[{}] {} — {} ({} leaves)", - i + 1, - route.title, - route.description, - route.leaf_count - )); + if route.title == route.description { + output.push_str(&format!( + "[{}] {} ({} leaves)", + i + 1, + route.title, + route.leaf_count + )); + } else { + output.push_str(&format!( + "[{}] {} — {} ({} leaves)", + i + 1, + route.title, + route.description, + route.leaf_count + )); + } if let Some(nav) = ctx.nav_entry(route.node_id) { if !nav.question_hints.is_empty() { output.push_str(&format!( diff --git a/rust/src/agent/worker/execute.rs b/rust/src/agent/worker/execute.rs index c8e1b9a..77ff917 100644 --- a/rust/src/agent/worker/execute.rs +++ b/rust/src/agent/worker/execute.rs @@ -96,16 +96,17 @@ pub async fn execute_command( continue; } let title = ctx.node_title(entry.node_id).unwrap_or("unknown"); - let summary = ctx - .nav_entry(entry.node_id) - .map(|e| e.overview.as_str()) - .unwrap_or(""); output.push_str(&format!( " - {} (depth {}, weight {:.2})", title, entry.depth, entry.weight )); - if !summary.is_empty() { - output.push_str(&format!(" — {}", summary)); + // Include a content snippet so the LLM can judge relevance + if let Some(content) = ctx.cat(entry.node_id) { + if let Some(snippet) = + content_snippet(content, keyword, 150) + { + output.push_str(&format!("\n \"{}\"", snippet)); + } } output.push('\n'); } @@ -218,6 +219,57 @@ pub async fn execute_command( } } +/// Extract a short content snippet around the first occurrence of `keyword`. +/// +/// Returns `None` if the content is empty. If the keyword is not found, +/// returns the beginning of the content instead. +fn content_snippet(content: &str, keyword: &str, max_len: usize) -> Option { + if content.trim().is_empty() { + return None; + } + + let keyword_lower = keyword.to_lowercase(); + let content_lower = content.to_lowercase(); + + // Find the keyword position to center the snippet around it + let start = match content_lower.find(&keyword_lower) { + Some(pos) => { + // Back up a bit for context, but don't go negative + let back = (max_len / 4).min(pos); + pos - back + } + None => 0, + }; + + // Find a char boundary near `start` + let start = content + .char_indices() + .find(|(i, _)| *i >= start) + .map(|(i, _)| i) + .unwrap_or(0); + + let end = content + .char_indices() + .take_while(|(i, _)| *i <= start + max_len) + .last() + .map(|(i, c)| i + c.len_utf8()) + .unwrap_or(content.len()); + + let snippet = content[start..end].trim(); + if snippet.is_empty() { + return None; + } + + let mut result = snippet.to_string(); + if end < content.len() { + result.push_str("..."); + } + if start > 0 { + result = format!("...{}", result); + } + Some(result) +} + /// Truncate feedback for log output — keep first 300 chars to avoid noisy logs. fn truncate_log(s: &str) -> std::borrow::Cow<'_, str> { const MAX: usize = 300; diff --git a/rust/src/agent/worker/planning.rs b/rust/src/agent/worker/planning.rs index eb98999..2b0671a 100644 --- a/rust/src/agent/worker/planning.rs +++ b/rust/src/agent/worker/planning.rs @@ -56,6 +56,12 @@ pub fn build_plan_prompt( " - keyword '{}' → {} (depth {}, weight {:.2})\n", hit.keyword, ancestor_path, entry.depth, entry.weight )); + // Include a content snippet so the planner can judge relevance + if let Some(content) = ctx.cat(entry.node_id) { + if let Some(snippet) = content_snippet(content, &hit.keyword, 120) { + section.push_str(&format!(" \"{}\"\n", snippet)); + } + } if section.len() > PLAN_CONTEXT_BUDGET { section.push_str(" ... (more hits truncated)\n"); break; @@ -165,7 +171,9 @@ pub fn build_replan_prompt( /// ```text /// Keyword matches (use find to jump directly): /// - 'complex' → Performance (weight 0.85) +/// "...complexity analysis shows..." /// - 'latency' → Performance (weight 0.72) +/// "...latency benchmarks indicate..." /// ``` pub fn format_keyword_hints(keyword_hits: &[FindHit], ctx: &DocContext<'_>) -> String { if keyword_hits.is_empty() { @@ -190,6 +198,12 @@ pub fn format_keyword_hints(keyword_hits: &[FindHit], ctx: &DocContext<'_>) -> S " - '{}' → {} (weight {:.2})\n", hit.keyword, title, entry.weight )); + // Include a content snippet so the LLM can see what's there + if let Some(content) = ctx.cat(entry.node_id) { + if let Some(snippet) = content_snippet(content, &hit.keyword, 100) { + section.push_str(&format!(" \"{}\"\n", snippet)); + } + } if section.len() > 800 { section.push_str(" ... (more)\n"); return section; @@ -199,6 +213,54 @@ pub fn format_keyword_hints(keyword_hits: &[FindHit], ctx: &DocContext<'_>) -> S section } +/// Extract a short content snippet around the first occurrence of `keyword`. +/// +/// Returns `None` if the content is empty. If the keyword is not found, +/// returns the beginning of the content instead. +fn content_snippet(content: &str, keyword: &str, max_len: usize) -> Option { + if content.trim().is_empty() { + return None; + } + + let keyword_lower = keyword.to_lowercase(); + let content_lower = content.to_lowercase(); + + let start = match content_lower.find(&keyword_lower) { + Some(pos) => { + let back = (max_len / 4).min(pos); + pos - back + } + None => 0, + }; + + let start = content + .char_indices() + .find(|(i, _)| *i >= start) + .map(|(i, _)| i) + .unwrap_or(0); + + let end = content + .char_indices() + .take_while(|(i, _)| *i <= start + max_len) + .last() + .map(|(i, c)| i + c.len_utf8()) + .unwrap_or(content.len()); + + let snippet = content[start..end].trim(); + if snippet.is_empty() { + return None; + } + + let mut result = snippet.to_string(); + if end < content.len() { + result.push_str("..."); + } + if start > 0 { + result = format!("...{}", result); + } + Some(result) +} + /// Build the ancestor path string for a node (e.g., "root/Chapter 1/Section 1.2"). pub fn build_ancestor_path(node_id: crate::document::NodeId, ctx: &DocContext<'_>) -> String { let mut path: Vec = ctx.tree.ancestors_iter(node_id).collect(); diff --git a/rust/src/index/stages/reasoning.rs b/rust/src/index/stages/reasoning.rs index 612c0b3..d30e130 100644 --- a/rust/src/index/stages/reasoning.rs +++ b/rust/src/index/stages/reasoning.rs @@ -68,13 +68,9 @@ impl ReasoningIndexStage { Self::extract_node_keywords(&node.title, config.min_keyword_length); let summary_keywords = Self::extract_node_keywords(&node.summary, config.min_keyword_length); - let content_keywords = if node.summary.is_empty() { - // Fallback: extract from content if no summary - let content_sample: String = node.content.chars().take(500).collect(); - Self::extract_node_keywords(&content_sample, config.min_keyword_length) - } else { - Vec::new() - }; + // Always extract from content — keywords can appear anywhere + let content_keywords = + Self::extract_node_keywords(&node.content, config.min_keyword_length); // Title keywords get higher weight (2.0), summary (1.5), content (1.0) for kw in &title_keywords { diff --git a/rust/src/query/understand.rs b/rust/src/query/understand.rs index da676fc..9790e55 100644 --- a/rust/src/query/understand.rs +++ b/rust/src/query/understand.rs @@ -58,7 +58,9 @@ pub async fn understand( info!( intent = %analysis.intent, complexity = %analysis.complexity, - concepts = analysis.key_concepts.len(), + concepts = ?analysis.key_concepts, + strategy = %analysis.strategy_hint, + rewritten = ?analysis.rewritten, "Query understanding complete" ); Ok(analysis.into_plan(query, keywords)) From b0d88cc8ded244c2211a6784086552dd5f8c93d4 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Wed, 22 Apr 2026 11:44:22 +0800 Subject: [PATCH 06/10] feat(agent): increase worker limits and enhance find command functionality - Increase max_rounds from 8 to 15 and max_llm_calls from 15 to 25 - Update find command to support multi-word searches and provide better fallback behavior for title matching - Enhance search strategy documentation with navigation efficiency guidelines - Update all test cases to reflect new max_rounds value of 15 - Improve find command output to include content snippets when available --- rust/src/agent/config.rs | 4 +-- rust/src/agent/prompts.rs | 35 +++++++++++++++++--------- rust/src/agent/tools/worker/cat.rs | 2 +- rust/src/agent/tools/worker/cd.rs | 8 +++--- rust/src/agent/tools/worker/grep.rs | 10 ++++---- rust/src/agent/tools/worker/head.rs | 4 +-- rust/src/agent/tools/worker/ls.rs | 2 +- rust/src/agent/tools/worker/pwd.rs | 2 +- rust/src/agent/tools/worker/wc.rs | 4 +-- rust/src/agent/worker/execute.rs | 38 +++++++++++++++++++++++++++-- rust/src/agent/worker/planning.rs | 2 +- 11 files changed, 78 insertions(+), 33 deletions(-) diff --git a/rust/src/agent/config.rs b/rust/src/agent/config.rs index b276d94..56567c7 100644 --- a/rust/src/agent/config.rs +++ b/rust/src/agent/config.rs @@ -23,8 +23,8 @@ pub struct WorkerConfig { impl Default for WorkerConfig { fn default() -> Self { Self { - max_rounds: 8, - max_llm_calls: 15, + max_rounds: 15, + max_llm_calls: 25, } } } diff --git a/rust/src/agent/prompts.rs b/rust/src/agent/prompts.rs index e128795..11d26fb 100644 --- a/rust/src/agent/prompts.rs +++ b/rust/src/agent/prompts.rs @@ -120,7 +120,7 @@ Available commands: - cat Read a child node's content (automatically collected as evidence) - cat Read the current node's content (useful at leaf nodes) - head Preview first 20 lines of a node (does NOT collect evidence) -- find Search for a keyword in the document index +- find Search for a keyword in the document index (also supports multi-word like 'Lab C') - findtree Search for nodes by title pattern (case-insensitive) - grep Regex search across all content in current subtree - wc Show content size (lines, words, chars) @@ -129,12 +129,21 @@ Available commands: - done End navigation SEARCH STRATEGY (important — follow this priority order): -- When keyword matches are shown below, use find with the EXACT keyword from the list (single word, \ +- When keyword matches are shown below, navigate directly to the highest-weight matched node. \ +Do NOT explore other branches first — the keyword index has already identified the most relevant location. +- When find results include content snippets that answer the question, cd to that node and cat it immediately. +- Use find with the EXACT keyword from the list (single word, \ not multi-word phrases). Example: if hint shows keyword 'performance' pointing to Performance section, \ use find performance, NOT find \"performance guide\". -- Use ls when you have no keyword hints or need to discover the structure of an unknown section. +- Use ls only when you have no keyword hints or need to discover the structure of an unknown section. - Use findtree when you know a section title pattern but not the exact name. +NAVIGATION EFFICIENCY (critical — every round counts): +- Prefer cd with absolute paths (/root/Section/Subsection) or relative paths (Section/Sub) \ +to reach target nodes in ONE command instead of multiple cd steps. +- Do NOT ls before cd if keyword hints or find results already tell you which node to enter. +- Do NOT cd into nodes one level at a time when you can use a multi-segment path. + Rules: - Output exactly ONE command per response, nothing else. - Content from cat is automatically saved as evidence — don't re-cat the same node. @@ -239,13 +248,15 @@ pub fn worker_dispatch(params: &WorkerDispatchParams) -> (String, String) { "You are a document navigation assistant. You are searching inside the document \ \"{doc_name}\" for specific information. -Available commands: ls, cd (supports \"Section/Sub\" paths), cd .., cat, cat , \ -head , find , findtree , grep , wc , pwd, check, done +Available commands: ls, cd (supports Section/Sub paths and /root/Section absolute paths), \ +cd .., cat, cat , head , find , findtree , grep , wc , \ +pwd, check, done SEARCH STRATEGY: -- Prefer find to jump directly to relevant sections over manual ls→cd exploration. \ -Use single-word keywords, not multi-word phrases. -- Use ls when you need to discover the structure of an unknown section. +- Prefer find to jump directly to relevant sections over manual ls→cd exploration. +- When find results include content snippets that answer your task, cd to that node and cat it immediately. +- Use multi-segment paths (e.g. cd Research Labs/Lab A) to reach targets in ONE command. +- Do NOT ls before cd if find results already tell you which node to enter. - Use findtree when you know a section title pattern but not the exact name. Rules: @@ -396,7 +407,7 @@ mod tests { missing_info: "2024 comparison", last_feedback: "[1] Q1 Report — Q1 data (5 leaves)\n[2] Q2 Report — Q2 data (5 leaves)", remaining: 5, - max_rounds: 8, + max_rounds: 15, history: "(no history yet)", visited_titles: "(none)", plan: "", @@ -411,7 +422,7 @@ mod tests { assert!(user.contains("root/Financial Statements")); assert!(user.contains("200 chars")); assert!(user.contains("2024 comparison")); - assert!(user.contains("5/8")); + assert!(user.contains("5/15")); assert!(!user.contains("sub-task")); } @@ -425,7 +436,7 @@ mod tests { missing_info: "", last_feedback: "", remaining: 8, - max_rounds: 8, + max_rounds: 15, history: "(no history yet)", visited_titles: "(none)", plan: "", @@ -448,7 +459,7 @@ mod tests { missing_info: "", last_feedback: "", remaining: 8, - max_rounds: 8, + max_rounds: 15, history: "(no history yet)", visited_titles: "(none)", plan: "", diff --git a/rust/src/agent/tools/worker/cat.rs b/rust/src/agent/tools/worker/cat.rs index 0e13257..267771b 100644 --- a/rust/src/agent/tools/worker/cat.rs +++ b/rust/src/agent/tools/worker/cat.rs @@ -110,7 +110,7 @@ mod tests { reasoning_index: &crate::document::ReasoningIndex::default(), doc_name: "test", }; - let mut state = WorkerState::new(root, 8); + let mut state = WorkerState::new(root, 15); let result = cat("Getting Started", &ctx, &mut state); assert!(result.success); diff --git a/rust/src/agent/tools/worker/cd.rs b/rust/src/agent/tools/worker/cd.rs index eba325f..8d87483 100644 --- a/rust/src/agent/tools/worker/cd.rs +++ b/rust/src/agent/tools/worker/cd.rs @@ -171,7 +171,7 @@ mod tests { reasoning_index: &crate::document::ReasoningIndex::default(), doc_name: "test", }; - let mut state = WorkerState::new(root, 8); + let mut state = WorkerState::new(root, 15); let result = cd("Getting Started", &ctx, &mut state); assert!(result.success); @@ -188,7 +188,7 @@ mod tests { reasoning_index: &crate::document::ReasoningIndex::default(), doc_name: "test", }; - let mut state = WorkerState::new(root, 8); + let mut state = WorkerState::new(root, 15); cd("Getting Started", &ctx, &mut state); let result = cd_up(&ctx, &mut state); @@ -235,7 +235,7 @@ mod tests { reasoning_index: &crate::document::ReasoningIndex::default(), doc_name: "test", }; - let mut state = WorkerState::new(root, 8); + let mut state = WorkerState::new(root, 15); let result = cd("Research Labs/Lab B", &ctx, &mut state); assert!(result.success); @@ -253,7 +253,7 @@ mod tests { reasoning_index: &crate::document::ReasoningIndex::default(), doc_name: "test", }; - let mut state = WorkerState::new(root, 8); + let mut state = WorkerState::new(root, 15); let result = cd("Research Labs/Nonexistent", &ctx, &mut state); assert!(!result.success); diff --git a/rust/src/agent/tools/worker/grep.rs b/rust/src/agent/tools/worker/grep.rs index 6dc5c2c..2ebb609 100644 --- a/rust/src/agent/tools/worker/grep.rs +++ b/rust/src/agent/tools/worker/grep.rs @@ -124,7 +124,7 @@ mod tests { fn test_grep_finds_matches() { let (tree, nav, root) = build_rich_tree(); let ctx = rich_ctx!(tree, nav); - let state = WorkerState::new(root, 8); + let state = WorkerState::new(root, 15); let result = grep("revenue", &ctx, &state); assert!(result.success); @@ -136,7 +136,7 @@ mod tests { fn test_grep_regex() { let (tree, nav, root) = build_rich_tree(); let ctx = rich_ctx!(tree, nav); - let state = WorkerState::new(root, 8); + let state = WorkerState::new(root, 15); let result = grep("EBITDA|\\$\\d+", &ctx, &state); assert!(result.success); @@ -148,7 +148,7 @@ mod tests { fn test_grep_no_matches() { let (tree, nav, root) = build_rich_tree(); let ctx = rich_ctx!(tree, nav); - let state = WorkerState::new(root, 8); + let state = WorkerState::new(root, 15); let result = grep("nonexistent_term_xyz", &ctx, &state); assert!(result.success); @@ -159,7 +159,7 @@ mod tests { fn test_grep_invalid_regex() { let (tree, nav, root) = build_rich_tree(); let ctx = rich_ctx!(tree, nav); - let state = WorkerState::new(root, 8); + let state = WorkerState::new(root, 15); let result = grep("[invalid", &ctx, &state); assert!(!result.success); @@ -170,7 +170,7 @@ mod tests { fn test_grep_subtree_only() { let (tree, nav, root) = build_rich_tree(); let ctx = rich_ctx!(tree, nav); - let mut state = WorkerState::new(root, 8); + let mut state = WorkerState::new(root, 15); crate::agent::tools::worker::cd::cd("Expenses", &ctx, &mut state); let result = grep("revenue", &ctx, &state); diff --git a/rust/src/agent/tools/worker/head.rs b/rust/src/agent/tools/worker/head.rs index 06dd443..5fefa23 100644 --- a/rust/src/agent/tools/worker/head.rs +++ b/rust/src/agent/tools/worker/head.rs @@ -98,7 +98,7 @@ mod tests { fn test_head_preview() { let (tree, nav, root) = build_rich_tree(); let ctx = rich_ctx!(tree, nav); - let state = WorkerState::new(root, 8); + let state = WorkerState::new(root, 15); let result = head("Revenue", 2, &ctx, &state); assert!(result.success); @@ -111,7 +111,7 @@ mod tests { fn test_head_not_found() { let (tree, nav, root) = build_rich_tree(); let ctx = rich_ctx!(tree, nav); - let state = WorkerState::new(root, 8); + let state = WorkerState::new(root, 15); let result = head("NonExistent", 10, &ctx, &state); assert!(!result.success); diff --git a/rust/src/agent/tools/worker/ls.rs b/rust/src/agent/tools/worker/ls.rs index 1c20daa..a00d688 100644 --- a/rust/src/agent/tools/worker/ls.rs +++ b/rust/src/agent/tools/worker/ls.rs @@ -114,7 +114,7 @@ mod tests { reasoning_index: &crate::document::ReasoningIndex::default(), doc_name: "test", }; - let state = WorkerState::new(root, 8); + let state = WorkerState::new(root, 15); let result = ls(&ctx, &state); assert!(result.success); diff --git a/rust/src/agent/tools/worker/pwd.rs b/rust/src/agent/tools/worker/pwd.rs index 4f71a7e..7adcf08 100644 --- a/rust/src/agent/tools/worker/pwd.rs +++ b/rust/src/agent/tools/worker/pwd.rs @@ -48,7 +48,7 @@ mod tests { reasoning_index: &crate::document::ReasoningIndex::default(), doc_name: "test", }; - let mut state = WorkerState::new(root, 8); + let mut state = WorkerState::new(root, 15); cd("API Reference", &ctx, &mut state); let result = pwd(&state); diff --git a/rust/src/agent/tools/worker/wc.rs b/rust/src/agent/tools/worker/wc.rs index ac37f29..4ea7ec0 100644 --- a/rust/src/agent/tools/worker/wc.rs +++ b/rust/src/agent/tools/worker/wc.rs @@ -87,7 +87,7 @@ mod tests { fn test_wc_stats() { let (tree, nav, root) = build_rich_tree(); let ctx = rich_ctx!(tree, nav); - let state = WorkerState::new(root, 8); + let state = WorkerState::new(root, 15); let result = wc("Revenue", &ctx, &state); assert!(result.success); @@ -101,7 +101,7 @@ mod tests { fn test_wc_not_found() { let (tree, nav, root) = build_rich_tree(); let ctx = rich_ctx!(tree, nav); - let state = WorkerState::new(root, 8); + let state = WorkerState::new(root, 15); let result = wc("NonExistent", &ctx, &state); assert!(!result.success); diff --git a/rust/src/agent/worker/execute.rs b/rust/src/agent/worker/execute.rs index 77ff917..ee4a9b0 100644 --- a/rust/src/agent/worker/execute.rs +++ b/rust/src/agent/worker/execute.rs @@ -100,7 +100,6 @@ pub async fn execute_command( " - {} (depth {}, weight {:.2})", title, entry.depth, entry.weight )); - // Include a content snippet so the LLM can judge relevance if let Some(content) = ctx.cat(entry.node_id) { if let Some(snippet) = content_snippet(content, keyword, 150) @@ -112,7 +111,42 @@ pub async fn execute_command( } output } - None => format!("No results for '{}'", keyword), + None => { + // Fallback: search node titles (like findtree) with content snippets + let pattern_lower = keyword.to_lowercase(); + let all_nodes = ctx.tree.traverse(); + let mut results = Vec::new(); + for node_id in &all_nodes { + if let Some(node) = ctx.tree.get(*node_id) { + if node.title.to_lowercase().contains(&pattern_lower) { + let depth = ctx.tree.depth(*node_id); + results.push((node.title.clone(), *node_id, depth)); + } + } + } + if results.is_empty() { + format!("No results for '{}' in index or titles.", keyword) + } else { + let mut output = format!( + "Results for '{}' (title match, {} found):\n", + keyword, + results.len() + ); + for (title, node_id, depth) in &results { + output.push_str(&format!( + " - {} (depth {})", + title, depth + )); + if let Some(content) = ctx.cat(*node_id) { + if let Some(snippet) = content_snippet(content, keyword, 150) { + output.push_str(&format!("\n \"{}\"", snippet)); + } + } + output.push('\n'); + } + output + } + } }; info!(doc = ctx.doc_name, keyword, feedback = %truncate_log(&feedback), "find result"); state.set_feedback(feedback); diff --git a/rust/src/agent/worker/planning.rs b/rust/src/agent/worker/planning.rs index 2b0671a..7570573 100644 --- a/rust/src/agent/worker/planning.rs +++ b/rust/src/agent/worker/planning.rs @@ -702,7 +702,7 @@ mod tests { #[test] fn test_build_replan_prompt() { let (tree, nav, root, _, _) = build_semantic_test_tree(); - let mut state = WorkerState::new(root, 8); + let mut state = WorkerState::new(root, 15); state.missing_info = "Need Q2 revenue figures".to_string(); state.add_evidence(Evidence { source_path: "root/Revenue".to_string(), From 095c918c9f189303b6a51f702088c68d4aa09091 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Wed, 22 Apr 2026 12:36:48 +0800 Subject: [PATCH 07/10] feat(agent): enhance target resolution with deep search capability Add BFS-based deep search functionality to resolve_target_extended that searches up to 4 levels deep for matching node titles. The new search hierarchy prioritizes: 1) Direct children via NavigationIndex, 2) Direct children via TreeNode titles, and 3) Deep descendant search with breadth-first traversal. Also include comprehensive test coverage for the new deep search functionality. refactor(agent): improve evidence formatting with content previews Replace character count displays with actual content excerpts in evidence summaries for both evaluation and replanning phases. Content is truncated to 500 characters to maintain manageable prompt sizes. Update format_evidence_summary and format_evidence_context functions to show meaningful content previews instead of just character counts. feat(agent): track collected nodes separately from visited nodes Introduce collected_nodes HashSet to distinguish between nodes that have been visited during navigation versus nodes whose content has been specifically collected via cat operations. Add has_evidence_for method to check collection status and evidence_for_check method to provide content-excerpt based evidence summaries for sufficiency checks. --- rust/src/agent/command.rs | 99 ++++++++++++++++++++++++- rust/src/agent/orchestrator/evaluate.rs | 15 ++-- rust/src/agent/orchestrator/replan.rs | 15 ++-- rust/src/agent/state.rs | 29 ++++++++ rust/src/agent/tools/worker/cat.rs | 3 +- rust/src/agent/worker/execute.rs | 4 +- 6 files changed, 144 insertions(+), 21 deletions(-) diff --git a/rust/src/agent/command.rs b/rust/src/agent/command.rs index 6d983dc..5507a1d 100644 --- a/rust/src/agent/command.rs +++ b/rust/src/agent/command.rs @@ -183,8 +183,11 @@ pub fn resolve_target( /// Resolve a cd/cat target with additional context from the tree node titles. /// -/// This extended resolver also checks against the actual tree node titles -/// (in case NavEntry titles differ from TreeNode titles). +/// Matching priority: +/// 1. Direct children via NavigationIndex (exact, case-insensitive, substring, numeric) +/// 2. Direct children via TreeNode titles (case-insensitive contains) +/// 3. Deep descendant search (BFS, up to depth 4) — enables `cd "Research Labs"` from +/// root when "Research Labs" is a grandchild behind an intermediate wrapper node. pub fn resolve_target_extended( target: &str, nav_index: &NavigationIndex, @@ -197,10 +200,10 @@ pub fn resolve_target_extended( return Some(id); } - // Extended: check all children by their TreeNode titles - let children: Vec = tree.children_iter(current_node).collect(); let target_lower = target.to_lowercase(); + // Extended: check all direct children by their TreeNode titles + let children: Vec = tree.children_iter(current_node).collect(); for child_id in &children { if let Some(node) = tree.get(*child_id) { if node.title.to_lowercase().contains(&target_lower) { @@ -209,6 +212,35 @@ pub fn resolve_target_extended( } } + // Deep search: BFS through descendants up to depth 4. + // Returns the shallowest match so `cd "Research Labs"` from root finds it + // at depth 1 even if another "Research Labs" exists deeper. + search_descendants(&target_lower, current_node, tree, 4) +} + +/// BFS search through descendants, returning the shallowest matching NodeId. +fn search_descendants( + target_lower: &str, + start: NodeId, + tree: &crate::document::DocumentTree, + max_depth: usize, +) -> Option { + let mut queue: Vec<(NodeId, usize)> = vec![(start, 0)]; + + while let Some((node_id, depth)) = queue.pop() { + if depth >= max_depth { + continue; + } + for child_id in tree.children_iter(node_id) { + if let Some(node) = tree.get(child_id) { + if node.title.to_lowercase().contains(target_lower) { + return Some(child_id); + } + } + queue.push((child_id, depth + 1)); + } + } + None } @@ -470,6 +502,65 @@ mod tests { assert!(resolve_target("anything", &nav_index, tree.root()).is_none()); } + #[test] + fn test_resolve_target_extended_deep_search() { + use crate::document::{ChildRoute, DocumentTree}; + + // root → "Wrapper" → "Research Labs" → "Lab B" + let mut tree = DocumentTree::new("Root", "root content"); + let root = tree.root(); + let wrapper = tree.add_child(root, "Quantum Computing Division", "wrapper"); + let labs = tree.add_child(wrapper, "Research Labs", "labs content"); + let lab_b = tree.add_child(labs, "Lab B", "lab b content"); + + let mut nav = NavigationIndex::new(); + nav.add_child_routes( + root, + vec![ChildRoute { + node_id: wrapper, + title: "Quantum Computing Division".to_string(), + description: "Division".to_string(), + leaf_count: 7, + }], + ); + nav.add_child_routes( + wrapper, + vec![ChildRoute { + node_id: labs, + title: "Research Labs".to_string(), + description: "Labs".to_string(), + leaf_count: 4, + }], + ); + nav.add_child_routes( + labs, + vec![ChildRoute { + node_id: lab_b, + title: "Lab B".to_string(), + description: "Topological".to_string(), + leaf_count: 1, + }], + ); + + // "Research Labs" is a grandchild of root — deep search should find it + assert_eq!( + resolve_target_extended("Research Labs", &nav, root, &tree), + Some(labs) + ); + + // "Lab B" is a great-grandchild — deep search should find it + assert_eq!( + resolve_target_extended("Lab B", &nav, root, &tree), + Some(lab_b) + ); + + // Direct children should still work via primary resolver + assert_eq!( + resolve_target_extended("Quantum Computing Division", &nav, root, &tree), + Some(wrapper) + ); + } + #[test] fn test_parse_grep() { assert_eq!( diff --git a/rust/src/agent/orchestrator/evaluate.rs b/rust/src/agent/orchestrator/evaluate.rs index 0f74436..02c2f08 100644 --- a/rust/src/agent/orchestrator/evaluate.rs +++ b/rust/src/agent/orchestrator/evaluate.rs @@ -78,6 +78,7 @@ pub async fn evaluate( } /// Format evidence summary for sufficiency check. +/// Includes actual content (truncated) so the check LLM can evaluate relevance. pub fn format_evidence_summary(evidence: &[Evidence]) -> String { if evidence.is_empty() { return "(no evidence)".to_string(); @@ -86,15 +87,15 @@ pub fn format_evidence_summary(evidence: &[Evidence]) -> String { .iter() .map(|e| { let doc = e.doc_name.as_deref().unwrap_or("unknown"); - format!( - "- [{}] (from {}) {} chars", - e.node_title, - doc, - e.content.len() - ) + let content = if e.content.len() > 500 { + format!("{}...(truncated)", &e.content[..500]) + } else { + e.content.clone() + }; + format!("[{}] (from {})\n{}", e.node_title, doc, content) }) .collect::>() - .join("\n") + .join("\n\n") } #[cfg(test)] diff --git a/rust/src/agent/orchestrator/replan.rs b/rust/src/agent/orchestrator/replan.rs index 4b423ac..71d45e1 100644 --- a/rust/src/agent/orchestrator/replan.rs +++ b/rust/src/agent/orchestrator/replan.rs @@ -88,6 +88,7 @@ pub async fn replan( } /// Format collected evidence for the replan prompt. +/// Includes content excerpts so the LLM can reason about what's actually been found. fn format_evidence_context(evidence: &[Evidence]) -> String { if evidence.is_empty() { return "(no evidence collected)".to_string(); @@ -96,15 +97,15 @@ fn format_evidence_context(evidence: &[Evidence]) -> String { .iter() .map(|e| { let doc = e.doc_name.as_deref().unwrap_or("unknown"); - format!( - "- [{}] (from {}) {} chars", - e.node_title, - doc, - e.content.len() - ) + let content = if e.content.len() > 500 { + format!("{}...(truncated)", &e.content[..500]) + } else { + e.content.clone() + }; + format!("[{}] (from {})\n{}", e.node_title, doc, content) }) .collect::>() - .join("\n") + .join("\n\n") } /// Build the replan prompt. diff --git a/rust/src/agent/state.rs b/rust/src/agent/state.rs index a4c1e67..a92e06a 100644 --- a/rust/src/agent/state.rs +++ b/rust/src/agent/state.rs @@ -25,6 +25,9 @@ pub struct WorkerState { pub evidence: Vec, /// Nodes already visited (prevents redundant reads). pub visited: HashSet, + /// Nodes whose content has been collected via cat. Separate from visited + /// because cd-ing through a node ≠ reading its content. + pub collected_nodes: HashSet, /// Remaining navigation rounds. pub remaining: u32, /// Maximum rounds (for display in prompts). @@ -61,6 +64,7 @@ impl WorkerState { current_node: root, evidence: Vec::new(), visited: HashSet::new(), + collected_nodes: HashSet::new(), remaining: max_rounds, max_rounds, last_feedback: String::new(), @@ -118,6 +122,11 @@ impl WorkerState { self.evidence.push(evidence); } + /// Check if evidence has already been collected for a specific node. + pub fn has_evidence_for(&self, node_id: crate::document::NodeId) -> bool { + self.collected_nodes.contains(&node_id) + } + /// Push a history entry (command + result summary). /// Keeps only the last `MAX_HISTORY_ENTRIES` entries. pub fn push_history(&mut self, entry: String) { @@ -157,6 +166,26 @@ impl WorkerState { .join("\n") } + /// Evidence with actual content for sufficiency evaluation. + /// Truncates each item to 500 chars to keep the prompt manageable. + pub fn evidence_for_check(&self) -> String { + if self.evidence.is_empty() { + return "(no evidence collected yet)".to_string(); + } + self.evidence + .iter() + .map(|e| { + let content = if e.content.len() > 500 { + format!("{}...(truncated)", &e.content[..500]) + } else { + e.content.clone() + }; + format!("[{}]\n{}", e.node_title, content) + }) + .collect::>() + .join("\n\n") + } + /// Convert this state into a WorkerOutput (consuming the state), with budget flag. /// Worker returns evidence only — no answer synthesis. pub fn into_worker_output( diff --git a/rust/src/agent/tools/worker/cat.rs b/rust/src/agent/tools/worker/cat.rs index 267771b..78fb7b8 100644 --- a/rust/src/agent/tools/worker/cat.rs +++ b/rust/src/agent/tools/worker/cat.rs @@ -30,7 +30,7 @@ pub fn cat(target: &str, ctx: &DocContext, state: &mut WorkerState) -> ToolResul } }; - if state.visited.contains(&node_id) { + if state.has_evidence_for(node_id) { let title = ctx.node_title(node_id).unwrap_or("unknown"); return ToolResult::ok(format!( "[Already collected: {}]. Use a different target or cd to another branch.", @@ -50,6 +50,7 @@ pub fn cat(target: &str, ctx: &DocContext, state: &mut WorkerState) -> ToolResul doc_name: Some(ctx.doc_name.to_string()), }); + state.collected_nodes.insert(node_id); state.visited.insert(node_id); let preview = if content_string.len() > 500 { diff --git a/rust/src/agent/worker/execute.rs b/rust/src/agent/worker/execute.rs index ee4a9b0..39f5fc9 100644 --- a/rust/src/agent/worker/execute.rs +++ b/rust/src/agent/worker/execute.rs @@ -160,9 +160,9 @@ pub async fn execute_command( } Command::Check => { - let evidence_summary = state.evidence_summary(); + let evidence_text = state.evidence_for_check(); - let (system, user) = check_sufficiency(query, &evidence_summary); + let (system, user) = check_sufficiency(query, &evidence_text); info!( doc = ctx.doc_name, From f99db9de02083bbab34b752971ed5b60b3a8b2d5 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Wed, 22 Apr 2026 13:05:52 +0800 Subject: [PATCH 08/10] feat(agent): remove content truncation for evidence evaluation - Remove character limit truncation from evidence content in evaluation - Allow full content to be available for LLM assessment of relevance - Increase MAX_FEEDBACK_CHARS from 500 to 2000 to prevent prompt bloat while maintaining useful context fix(logging): add compact formatting to tracing subscriber --- rust/examples/single_doc_challenge.rs | 1 + rust/src/agent/config.rs | 4 ++-- rust/src/agent/orchestrator/evaluate.rs | 9 ++------- rust/src/agent/orchestrator/replan.rs | 9 ++------- rust/src/agent/state.rs | 10 ++-------- 5 files changed, 9 insertions(+), 24 deletions(-) diff --git a/rust/examples/single_doc_challenge.rs b/rust/examples/single_doc_challenge.rs index dba3e27..332c241 100644 --- a/rust/examples/single_doc_challenge.rs +++ b/rust/examples/single_doc_challenge.rs @@ -173,6 +173,7 @@ const CHALLENGE_QUESTIONS: &[&str] = &[ #[tokio::main] async fn main() -> vectorless::Result<()> { tracing_subscriber::fmt() + .compact() .with_env_filter( tracing_subscriber::EnvFilter::try_from_default_env() .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")), diff --git a/rust/src/agent/config.rs b/rust/src/agent/config.rs index 56567c7..0873c8c 100644 --- a/rust/src/agent/config.rs +++ b/rust/src/agent/config.rs @@ -23,8 +23,8 @@ pub struct WorkerConfig { impl Default for WorkerConfig { fn default() -> Self { Self { - max_rounds: 15, - max_llm_calls: 25, + max_rounds: 100, + max_llm_calls: 200, } } } diff --git a/rust/src/agent/orchestrator/evaluate.rs b/rust/src/agent/orchestrator/evaluate.rs index 02c2f08..88e7e07 100644 --- a/rust/src/agent/orchestrator/evaluate.rs +++ b/rust/src/agent/orchestrator/evaluate.rs @@ -78,7 +78,7 @@ pub async fn evaluate( } /// Format evidence summary for sufficiency check. -/// Includes actual content (truncated) so the check LLM can evaluate relevance. +/// Includes actual content so the check LLM can evaluate relevance. pub fn format_evidence_summary(evidence: &[Evidence]) -> String { if evidence.is_empty() { return "(no evidence)".to_string(); @@ -87,12 +87,7 @@ pub fn format_evidence_summary(evidence: &[Evidence]) -> String { .iter() .map(|e| { let doc = e.doc_name.as_deref().unwrap_or("unknown"); - let content = if e.content.len() > 500 { - format!("{}...(truncated)", &e.content[..500]) - } else { - e.content.clone() - }; - format!("[{}] (from {})\n{}", e.node_title, doc, content) + format!("[{}] (from {})\n{}", e.node_title, doc, e.content) }) .collect::>() .join("\n\n") diff --git a/rust/src/agent/orchestrator/replan.rs b/rust/src/agent/orchestrator/replan.rs index 71d45e1..507c171 100644 --- a/rust/src/agent/orchestrator/replan.rs +++ b/rust/src/agent/orchestrator/replan.rs @@ -88,7 +88,7 @@ pub async fn replan( } /// Format collected evidence for the replan prompt. -/// Includes content excerpts so the LLM can reason about what's actually been found. +/// Includes content so the LLM can reason about what's actually been found. fn format_evidence_context(evidence: &[Evidence]) -> String { if evidence.is_empty() { return "(no evidence collected)".to_string(); @@ -97,12 +97,7 @@ fn format_evidence_context(evidence: &[Evidence]) -> String { .iter() .map(|e| { let doc = e.doc_name.as_deref().unwrap_or("unknown"); - let content = if e.content.len() > 500 { - format!("{}...(truncated)", &e.content[..500]) - } else { - e.content.clone() - }; - format!("[{}] (from {})\n{}", e.node_title, doc, content) + format!("[{}] (from {})\n{}", e.node_title, doc, e.content) }) .collect::>() .join("\n\n") diff --git a/rust/src/agent/state.rs b/rust/src/agent/state.rs index a92e06a..e16c855 100644 --- a/rust/src/agent/state.rs +++ b/rust/src/agent/state.rs @@ -54,7 +54,7 @@ const MAX_HISTORY_ENTRIES: usize = 6; /// Maximum characters for `last_feedback` before truncation. /// Prevents large cat/grep outputs from bloating subsequent prompts. -const MAX_FEEDBACK_CHARS: usize = 500; +const MAX_FEEDBACK_CHARS: usize = 2000; impl WorkerState { /// Create a new state starting at the given root node. @@ -167,7 +167,6 @@ impl WorkerState { } /// Evidence with actual content for sufficiency evaluation. - /// Truncates each item to 500 chars to keep the prompt manageable. pub fn evidence_for_check(&self) -> String { if self.evidence.is_empty() { return "(no evidence collected yet)".to_string(); @@ -175,12 +174,7 @@ impl WorkerState { self.evidence .iter() .map(|e| { - let content = if e.content.len() > 500 { - format!("{}...(truncated)", &e.content[..500]) - } else { - e.content.clone() - }; - format!("[{}]\n{}", e.node_title, content) + format!("[{}]\n{}", e.node_title, e.content) }) .collect::>() .join("\n\n") From 053a9b0f4d0219fa6b4882d2a52d2d1864404244 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Wed, 22 Apr 2026 14:50:57 +0800 Subject: [PATCH 09/10] refactor(orchestrator): extract supervisor loop into separate module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Create new supervisor module to encapsulate the dispatch → evaluate → replan logic - Replace inline supervisor loop implementation with call to run_supervisor_loop function - Add SupervisorOutcome struct to return iteration count, evaluation sufficiency status, and LLM call counts - Maintain same functionality while improving code organization and testability refactor(worker): extract navigation loop into separate module - Move navigation loop logic from worker module to new navigation module - Replace inline navigation loop with run_navigation_loop function call - Split complex navigation logic into smaller helper functions for building prompts, handling parsing failures, and managing replanning - Improve code organization and maintainability feat(tools): remove content truncation in cat tool - Remove character limit and truncation logic from cat tool output - Return full content string instead of truncated preview - This allows complete evidence collection without size limitations --- rust/src/agent/orchestrator/mod.rs | 109 +----- rust/src/agent/orchestrator/supervisor.rs | 159 ++++++++ rust/src/agent/tools/worker/cat.rs | 12 +- rust/src/agent/worker/mod.rs | 213 ++-------- rust/src/agent/worker/navigation.rs | 454 ++++++++++++++++++++++ 5 files changed, 653 insertions(+), 294 deletions(-) create mode 100644 rust/src/agent/orchestrator/supervisor.rs create mode 100644 rust/src/agent/worker/navigation.rs diff --git a/rust/src/agent/orchestrator/mod.rs b/rust/src/agent/orchestrator/mod.rs index 9cf9615..6bb6bc2 100644 --- a/rust/src/agent/orchestrator/mod.rs +++ b/rust/src/agent/orchestrator/mod.rs @@ -12,6 +12,7 @@ mod analyze; mod dispatch; mod evaluate; mod replan; +mod supervisor; use tracing::info; @@ -22,11 +23,9 @@ use super::Agent; use super::config::{AgentConfig, Output, WorkspaceContext}; use super::events::EventEmitter; use super::state::OrchestratorState; -use super::tools::orchestrator as orch_tools; use analyze::{AnalyzeOutcome, analyze}; -use evaluate::evaluate; -use replan::replan; +use supervisor::run_supervisor_loop; /// Maximum supervisor loop iterations to prevent infinite loops. const MAX_SUPERVISOR_ITERATIONS: u32 = 3; @@ -131,98 +130,22 @@ impl<'a> Agent for Orchestrator<'a> { }; // --- Phase 2: Supervisor loop --- - let mut current_dispatches = initial_dispatches; - let mut iteration: u32 = 0; - let mut eval_sufficient = false; - - loop { - if iteration >= MAX_SUPERVISOR_ITERATIONS { - info!(iteration, "Supervisor loop budget exhausted"); - break; - } - - // Dispatch current plan - if !current_dispatches.is_empty() { - info!( - docs = current_dispatches.len(), - docs_list = ?current_dispatches.iter().map(|d| d.doc_idx).collect::>(), - iteration, - "Dispatching Workers" - ); - dispatch::dispatch_and_collect( - &query, - ¤t_dispatches, - ws, - &config, - &llm, - &mut state, - &emitter, - &query_plan, - ) - .await; - } - - // No evidence at all — nothing to evaluate - if state.all_evidence.is_empty() { - info!("No evidence collected from any Worker"); - break; - } - - // Skip evaluation for user-specified documents (no replan needed) - if skip_analysis { - eval_sufficient = !state.all_evidence.is_empty(); - break; - } - - // Evaluate sufficiency - let eval_result = evaluate(&query, &state.all_evidence, &llm).await?; - orch_llm_calls += 1; - - if eval_result.sufficient { - eval_sufficient = true; - info!( - evidence = state.all_evidence.len(), - iteration, "Evidence sufficient — exiting supervisor loop" - ); - break; - } - - // Insufficient — replan - info!( - evidence = state.all_evidence.len(), - missing = eval_result.missing_info.len(), - iteration, - "Evidence insufficient — replanning" - ); - - let doc_cards_text = orch_tools::ls_docs(ws).feedback; - let replan_result = replan( - &query, - &eval_result.missing_info, - &state.all_evidence, - &state.dispatched, - ws.doc_count(), - &doc_cards_text, - &llm, - ) - .await?; - orch_llm_calls += 1; - - if replan_result.dispatches.is_empty() { - info!("Replan produced no new dispatches — exiting supervisor loop"); - break; - } - - current_dispatches = replan_result.dispatches; - iteration += 1; - } + let outcome = run_supervisor_loop( + &query, + initial_dispatches, + ws, + &config, + &llm, + &mut state, + &emitter, + &query_plan, + skip_analysis, + ) + .await?; + orch_llm_calls += outcome.llm_calls; - // Derive confidence from supervisor loop outcome: - // - LLM evaluated sufficient on first try → high confidence - // - Needed replan rounds → lower confidence - // - No evaluation ran (skip_analysis / no evidence) → moderate let confidence = - compute_confidence(eval_sufficient, iteration, state.all_evidence.is_empty()); + compute_confidence(outcome.eval_sufficient, outcome.iteration, state.all_evidence.is_empty()); // --- Phase 3: Finalize — rerank + synthesize --- if state.all_evidence.is_empty() { diff --git a/rust/src/agent/orchestrator/supervisor.rs b/rust/src/agent/orchestrator/supervisor.rs new file mode 100644 index 0000000..9dd4a37 --- /dev/null +++ b/rust/src/agent/orchestrator/supervisor.rs @@ -0,0 +1,159 @@ +// Copyright (c) 2026 vectorless developers +// SPDX-License-Identifier: Apache-2.0 + +//! Phase 2: Supervisor loop — dispatch → evaluate → replan. + +use tracing::info; + +use crate::llm::LlmClient; +use crate::query::QueryPlan; + +use super::super::config::{AgentConfig, WorkspaceContext}; +use super::super::events::EventEmitter; +use super::super::prompts::DispatchEntry; +use super::super::state::OrchestratorState; +use super::super::tools::orchestrator as orch_tools; +use super::dispatch; +use super::evaluate::evaluate; +use super::replan::replan; +use super::MAX_SUPERVISOR_ITERATIONS; + +/// Outcome of the supervisor loop. +pub struct SupervisorOutcome { + /// Number of replan iterations performed. + pub iteration: u32, + /// Whether the LLM evaluator judged evidence sufficient. + pub eval_sufficient: bool, + /// LLM calls consumed within the supervisor loop itself. + pub llm_calls: u32, +} + +/// Run the supervisor loop: dispatch → evaluate → replan. +/// +/// Returns a [`SupervisorOutcome`] summarizing what happened. +pub async fn run_supervisor_loop( + query: &str, + initial_dispatches: Vec, + ws: &WorkspaceContext<'_>, + config: &AgentConfig, + llm: &LlmClient, + state: &mut OrchestratorState, + emitter: &EventEmitter, + query_plan: &QueryPlan, + skip_analysis: bool, +) -> crate::error::Result { + let mut current_dispatches = initial_dispatches; + let mut iteration: u32 = 0; + let mut eval_sufficient = false; + let mut llm_calls: u32 = 0; + + loop { + if iteration >= MAX_SUPERVISOR_ITERATIONS { + info!(iteration, "Supervisor loop budget exhausted"); + break; + } + + // Dispatch current plan + if !current_dispatches.is_empty() { + info!( + docs = current_dispatches.len(), + docs_list = ?current_dispatches.iter().map(|d| d.doc_idx).collect::>(), + iteration, + "Dispatching Workers" + ); + dispatch::dispatch_and_collect( + query, + ¤t_dispatches, + ws, + config, + llm, + state, + emitter, + query_plan, + ) + .await; + } + + // No evidence at all — nothing to evaluate + if state.all_evidence.is_empty() { + info!("No evidence collected from any Worker"); + break; + } + + // Skip evaluation for user-specified documents (no replan needed) + if skip_analysis { + eval_sufficient = !state.all_evidence.is_empty(); + break; + } + + // Evaluate sufficiency + let eval_result = evaluate(query, &state.all_evidence, llm).await?; + llm_calls += 1; + + if eval_result.sufficient { + eval_sufficient = true; + info!( + evidence = state.all_evidence.len(), + iteration, "Evidence sufficient — exiting supervisor loop" + ); + break; + } + + // Insufficient — replan + info!( + evidence = state.all_evidence.len(), + missing = eval_result.missing_info.len(), + iteration, + "Evidence insufficient — replanning" + ); + + let doc_cards_text = orch_tools::ls_docs(ws).feedback; + let replan_result = replan( + query, + &eval_result.missing_info, + &state.all_evidence, + &state.dispatched, + ws.doc_count(), + &doc_cards_text, + llm, + ) + .await?; + llm_calls += 1; + + if replan_result.dispatches.is_empty() { + info!("Replan produced no new dispatches — exiting supervisor loop"); + break; + } + + current_dispatches = replan_result.dispatches; + iteration += 1; + } + + Ok(SupervisorOutcome { + iteration, + eval_sufficient, + llm_calls, + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_supervisor_outcome_fields() { + let outcome = SupervisorOutcome { + iteration: 2, + eval_sufficient: true, + llm_calls: 5, + }; + assert_eq!(outcome.iteration, 2); + assert!(outcome.eval_sufficient); + assert_eq!(outcome.llm_calls, 5); + } + + #[test] + fn test_max_iterations_constant() { + assert_eq!(MAX_SUPERVISOR_ITERATIONS, 3); + } +} diff --git a/rust/src/agent/tools/worker/cat.rs b/rust/src/agent/tools/worker/cat.rs index 78fb7b8..3792290 100644 --- a/rust/src/agent/tools/worker/cat.rs +++ b/rust/src/agent/tools/worker/cat.rs @@ -53,17 +53,7 @@ pub fn cat(target: &str, ctx: &DocContext, state: &mut WorkerState) -> ToolResul state.collected_nodes.insert(node_id); state.visited.insert(node_id); - let preview = if content_string.len() > 500 { - format!( - "{}...(truncated, {} chars total)", - &content_string[..500], - content_string.len() - ) - } else { - content_string - }; - - ToolResult::ok(format!("[Evidence collected: {}]\n{}", title, preview)) + ToolResult::ok(format!("[Evidence collected: {}]\n{}", title, content_string)) } None => ToolResult::fail(format!("No content available for '{}'.", target)), } diff --git a/rust/src/agent/worker/mod.rs b/rust/src/agent/worker/mod.rs index d807501..f0577c8 100644 --- a/rust/src/agent/worker/mod.rs +++ b/rust/src/agent/worker/mod.rs @@ -13,16 +13,15 @@ mod execute; mod format; +mod navigation; mod planning; -use tracing::{debug, info}; +use tracing::info; use super::Agent; -use super::command::Command; -use super::config::{DocContext, Step, WorkerConfig, WorkerOutput}; +use super::config::{DocContext, WorkerConfig, WorkerOutput}; use super::context::FindHit; use super::events::EventEmitter; -use super::prompts::{NavigationParams, worker_dispatch, worker_navigation}; use super::state::WorkerState; use super::tools::worker as tools; use crate::error::Error; @@ -30,9 +29,8 @@ use crate::llm::LlmClient; use crate::query::QueryPlan; use crate::scoring::bm25::extract_keywords; -use execute::{execute_command, parse_and_detect_failure}; -use format::format_visited_titles; -use planning::{build_plan_prompt, build_replan_prompt, format_keyword_hints}; +use navigation::run_navigation_loop; +use planning::build_plan_prompt; /// Worker agent — navigates a single document to collect evidence. /// @@ -102,19 +100,12 @@ impl<'a> Agent for Worker<'a> { ); let mut llm_calls: u32 = 0; - let max_llm = config.max_llm_calls; - - macro_rules! llm_budget_exhausted { - () => { - max_llm > 0 && llm_calls >= max_llm - }; - } // Gather keyword hits as context for LLM planning (not routing rules) let keywords = extract_keywords(&query); let index_hits: Vec = ctx.find_all(&keywords); if !index_hits.is_empty() { - debug!( + tracing::debug!( doc = ctx.doc_name, hit_count = index_hits.len(), "ReasoningIndex keyword hits available for planning" @@ -127,7 +118,7 @@ impl<'a> Agent for Worker<'a> { state.set_feedback(ls_result.feedback); // --- Phase 1.5: Navigation planning --- - if state.remaining > 0 && !llm_budget_exhausted!() { + if state.remaining > 0 && (config.max_llm_calls == 0 || llm_calls < config.max_llm_calls) { info!(doc = ctx.doc_name, "Generating navigation plan..."); let plan_prompt = build_plan_prompt( &query, @@ -160,181 +151,23 @@ impl<'a> Agent for Worker<'a> { } // --- Phase 2: Navigation loop --- - let use_dispatch_prompt = task_ref.is_some(); - let keyword_hints = format_keyword_hints(&index_hits, ctx); - - loop { - if state.remaining == 0 { - info!(doc = ctx.doc_name, "Navigation budget exhausted"); - break; - } - if llm_budget_exhausted!() { - info!( - doc = ctx.doc_name, - llm_calls, max_llm, "LLM call budget exhausted" - ); - break; - } - - // Build prompt - let (system, user) = if use_dispatch_prompt && state.remaining == config.max_rounds { - worker_dispatch(&super::prompts::WorkerDispatchParams { - original_query: &query, - task: task_ref.unwrap_or(&query), - doc_name: ctx.doc_name, - breadcrumb: &state.path_str(), - }) - } else { - let visited_titles = format_visited_titles(&state, ctx); - worker_navigation(&NavigationParams { - query: &query, - task: task_ref, - breadcrumb: &state.path_str(), - evidence_summary: &state.evidence_summary(), - missing_info: &state.missing_info, - last_feedback: &state.last_feedback, - remaining: state.remaining, - max_rounds: state.max_rounds, - history: &state.history_text(), - visited_titles: &visited_titles, - plan: &state.plan, - intent_context: &intent_context, - keyword_hints: &keyword_hints, - }) - }; - - // LLM decision - let round_num = config.max_rounds - state.remaining + 1; - let round_start = std::time::Instant::now(); - info!( - doc = ctx.doc_name, - round = round_num, - max_rounds = config.max_rounds, - "Navigation round: calling LLM..." - ); - let llm_output = - llm.complete(&system, &user) - .await - .map_err(|e| Error::LlmReasoning { - stage: "worker/navigation".to_string(), - detail: format!("Nav loop LLM call failed (round {round_num}): {e}"), - })?; - llm_calls += 1; - - // Parse command - if llm_output.trim().len() < 2 { - tracing::warn!( - doc = ctx.doc_name, - round = config.max_rounds - state.remaining + 1, - response = llm_output.trim(), - "LLM response unusually short" - ); - } - let (command, is_parse_failure) = parse_and_detect_failure(&llm_output); - if is_parse_failure { - let raw_preview = if llm_output.trim().len() > 200 { - format!("{}...", &llm_output.trim()[..200]) - } else { - llm_output.trim().to_string() - }; - state.last_feedback = format!( - "Your output was not recognized as a valid command:\n\"{}\"\n\n\ - Please output exactly one command (ls, cd, cat, head, find, findtree, grep, wc, pwd, check, or done).", - raw_preview - ); - state.push_history("(unrecognized) → parse failure".to_string()); - continue; - } - - debug!(doc = ctx.doc_name, ?command, "Parsed command"); - - let is_check = matches!(command, Command::Check); - - // Execute - let step = execute_command( - &command, - ctx, - &mut state, - &query, - &llm, - &mut llm_calls, - &emitter, - ) - .await; - - // Dynamic re-planning after insufficient check - if is_check - && !state.missing_info.is_empty() - && state.remaining >= 3 - && !llm_budget_exhausted!() - { - let missing = state.missing_info.clone(); - info!(doc = ctx.doc_name, missing = %missing, "Re-planning navigation..."); - let replan = build_replan_prompt(&query, task_ref, &state, ctx); - let new_plan = - llm.complete(&replan.0, &replan.1) - .await - .map_err(|e| Error::LlmReasoning { - stage: "worker/replan".to_string(), - detail: format!("Re-plan LLM call failed: {e}"), - })?; - llm_calls += 1; - let plan_text = new_plan.trim().to_string(); - if !plan_text.is_empty() { - info!( - doc = ctx.doc_name, - plan = %plan_text, - "Re-plan generated" - ); - emitter.emit_worker_replan(ctx.doc_name, &missing, plan_text.len()); - state.plan = plan_text; - } - state.missing_info.clear(); - } else if is_check && !state.missing_info.is_empty() { - state.plan.clear(); - state.missing_info.clear(); - } - - // Emit round event - let cmd_str = format!("{:?}", command); - let success = !matches!(step, Step::ForceDone(_)); - let round_elapsed = round_start.elapsed().as_millis() as u64; - emitter.emit_worker_round(ctx.doc_name, round_num, &cmd_str, success, round_elapsed); - - let feedback_preview = if state.last_feedback.len() > 120 { - let boundary = state.last_feedback.ceil_char_boundary(120); - format!("{}...", &state.last_feedback[..boundary]) - } else { - state.last_feedback.clone() - }; - state.push_history(format!("{} → {}", cmd_str, feedback_preview)); - - // Check termination - match step { - Step::Done => { - info!( - doc = ctx.doc_name, - evidence = state.evidence.len(), - "Navigation done" - ); - break; - } - Step::ForceDone(reason) => { - info!(doc = ctx.doc_name, reason = %reason, "Forced done"); - break; - } - Step::Continue => { - if !is_check { - state.dec_round(); - } - } - } - } - - let budget_exhausted = state.remaining == 0 || llm_budget_exhausted!(); + run_navigation_loop( + &query, + task_ref, + ctx, + &config, + &llm, + &mut state, + &emitter, + &index_hits, + &intent_context, + &mut llm_calls, + ) + .await?; + + let budget_exhausted = state.remaining == 0 + || (config.max_llm_calls > 0 && llm_calls >= config.max_llm_calls); - // Worker returns raw evidence — no synthesis. - // The Orchestrator owns the single synthesis/fusion point via rerank::process. let output = state.into_worker_output(llm_calls, budget_exhausted, ctx.doc_name); emitter.emit_worker_done( diff --git a/rust/src/agent/worker/navigation.rs b/rust/src/agent/worker/navigation.rs new file mode 100644 index 0000000..c41f0bf --- /dev/null +++ b/rust/src/agent/worker/navigation.rs @@ -0,0 +1,454 @@ +// Copyright (c) 2026 vectorless developers +// SPDX-License-Identifier: Apache-2.0 + +//! Phase 2: Navigation loop — LLM-driven command loop until done or budget exhausted. + +use tracing::{debug, info}; + +use super::super::command::Command; +use super::super::config::{DocContext, Step, WorkerConfig}; +use super::super::context::FindHit; +use super::super::events::EventEmitter; +use super::super::prompts::{NavigationParams, worker_dispatch, worker_navigation}; +use super::super::state::WorkerState; +use super::execute::{execute_command, parse_and_detect_failure}; +use super::format::format_visited_titles; +use super::planning::{build_replan_prompt, format_keyword_hints}; +use crate::error::Error; +use crate::llm::LlmClient; + +/// Run the Phase 2 navigation loop. +/// +/// Loops until budget exhausted, `done`/`force_done`, or error. +/// Mutates `state` and `llm_calls` in place. +pub async fn run_navigation_loop( + query: &str, + task: Option<&str>, + ctx: &DocContext<'_>, + config: &WorkerConfig, + llm: &LlmClient, + state: &mut WorkerState, + emitter: &EventEmitter, + index_hits: &[FindHit], + intent_context: &str, + llm_calls: &mut u32, +) -> crate::error::Result<()> { + let use_dispatch_prompt = task.is_some(); + let keyword_hints = format_keyword_hints(index_hits, ctx); + let max_llm = config.max_llm_calls; + + loop { + if state.remaining == 0 { + info!(doc = ctx.doc_name, "Navigation budget exhausted"); + break; + } + if max_llm > 0 && *llm_calls >= max_llm { + info!( + doc = ctx.doc_name, + llm_calls, max_llm, "LLM call budget exhausted" + ); + break; + } + + // Build prompt + let (system, user) = build_round_prompt( + query, + task, + ctx, + state, + intent_context, + &keyword_hints, + use_dispatch_prompt, + config.max_rounds, + ); + + // LLM decision + let round_num = config.max_rounds - state.remaining + 1; + let round_start = std::time::Instant::now(); + info!( + doc = ctx.doc_name, + round = round_num, + max_rounds = config.max_rounds, + "Navigation round: calling LLM..." + ); + let llm_output = + llm.complete(&system, &user) + .await + .map_err(|e| Error::LlmReasoning { + stage: "worker/navigation".to_string(), + detail: format!("Nav loop LLM call failed (round {round_num}): {e}"), + })?; + *llm_calls += 1; + + // Parse command + let (command, is_parse_failure) = handle_parse_failure(&llm_output, ctx.doc_name, state); + if is_parse_failure { + continue; + } + + debug!(doc = ctx.doc_name, ?command, "Parsed command"); + + let is_check = matches!(command, Command::Check); + + // Execute + let step = execute_command( + &command, + ctx, + state, + query, + llm, + llm_calls, + emitter, + ) + .await; + + // Dynamic re-planning after insufficient check + handle_replan( + is_check, + query, + task, + ctx, + llm, + state, + emitter, + llm_calls, + max_llm, + ) + .await?; + + // Emit round event + let cmd_str = format!("{:?}", command); + let success = !matches!(step, Step::ForceDone(_)); + let round_elapsed = round_start.elapsed().as_millis() as u64; + emitter.emit_worker_round(ctx.doc_name, round_num, &cmd_str, success, round_elapsed); + + push_round_history(state, &cmd_str); + + // Check termination + match step { + Step::Done => { + info!( + doc = ctx.doc_name, + evidence = state.evidence.len(), + "Navigation done" + ); + break; + } + Step::ForceDone(reason) => { + info!(doc = ctx.doc_name, reason = %reason, "Forced done"); + break; + } + Step::Continue => { + if !is_check { + state.dec_round(); + } + } + } + } + + Ok(()) +} + +/// Build the (system, user) prompt pair for a single navigation round. +fn build_round_prompt( + query: &str, + task: Option<&str>, + ctx: &DocContext<'_>, + state: &WorkerState, + intent_context: &str, + keyword_hints: &str, + use_dispatch_prompt: bool, + max_rounds: u32, +) -> (String, String) { + if use_dispatch_prompt && state.remaining == max_rounds { + worker_dispatch(&super::super::prompts::WorkerDispatchParams { + original_query: query, + task: task.unwrap_or(query), + doc_name: ctx.doc_name, + breadcrumb: &state.path_str(), + }) + } else { + let visited_titles = format_visited_titles(state, ctx); + worker_navigation(&NavigationParams { + query, + task, + breadcrumb: &state.path_str(), + evidence_summary: &state.evidence_summary(), + missing_info: &state.missing_info, + last_feedback: &state.last_feedback, + remaining: state.remaining, + max_rounds: state.max_rounds, + history: &state.history_text(), + visited_titles: &visited_titles, + plan: &state.plan, + intent_context, + keyword_hints, + }) + } +} + +/// Parse LLM output and handle parse failures. +/// +/// Returns `(command, is_parse_failure)`. On parse failure, updates state +/// with feedback and pushes a history entry. +fn handle_parse_failure( + llm_output: &str, + doc_name: &str, + state: &mut WorkerState, +) -> (Command, bool) { + if llm_output.trim().len() < 2 { + tracing::warn!( + doc = doc_name, + response = llm_output.trim(), + "LLM response unusually short" + ); + } + let (command, is_parse_failure) = parse_and_detect_failure(llm_output); + if is_parse_failure { + let raw_preview = if llm_output.trim().len() > 200 { + format!("{}...", &llm_output.trim()[..200]) + } else { + llm_output.trim().to_string() + }; + state.last_feedback = format!( + "Your output was not recognized as a valid command:\n\"{}\"\n\n\ + Please output exactly one command (ls, cd, cat, head, find, findtree, grep, wc, pwd, check, or done).", + raw_preview + ); + state.push_history("(unrecognized) → parse failure".to_string()); + } + (command, is_parse_failure) +} + +/// Push a round's command + feedback preview into history. +fn push_round_history(state: &mut WorkerState, cmd_str: &str) { + let feedback_preview = if state.last_feedback.len() > 120 { + let boundary = state.last_feedback.ceil_char_boundary(120); + format!("{}...", &state.last_feedback[..boundary]) + } else { + state.last_feedback.clone() + }; + state.push_history(format!("{} → {}", cmd_str, feedback_preview)); +} + +/// Dynamic re-planning after an insufficient check. +/// +/// If check returned INSUFFICIENT with enough remaining rounds and LLM budget, +/// generates a new navigation plan. Otherwise clears stale replan state. +async fn handle_replan( + is_check: bool, + query: &str, + task: Option<&str>, + ctx: &DocContext<'_>, + llm: &LlmClient, + state: &mut WorkerState, + emitter: &EventEmitter, + llm_calls: &mut u32, + max_llm: u32, +) -> crate::error::Result<()> { + if !is_check { + return Ok(()); + } + + if !state.missing_info.is_empty() && state.remaining >= 3 && (max_llm == 0 || *llm_calls < max_llm) { + let missing = state.missing_info.clone(); + info!(doc = ctx.doc_name, missing = %missing, "Re-planning navigation..."); + let replan = build_replan_prompt(query, task, state, ctx); + let new_plan = + llm.complete(&replan.0, &replan.1) + .await + .map_err(|e| Error::LlmReasoning { + stage: "worker/replan".to_string(), + detail: format!("Re-plan LLM call failed: {e}"), + })?; + *llm_calls += 1; + let plan_text = new_plan.trim().to_string(); + if !plan_text.is_empty() { + info!( + doc = ctx.doc_name, + plan = %plan_text, + "Re-plan generated" + ); + emitter.emit_worker_replan(ctx.doc_name, &missing, plan_text.len()); + state.plan = plan_text; + } + state.missing_info.clear(); + } else if !state.missing_info.is_empty() { + state.plan.clear(); + state.missing_info.clear(); + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::agent::config::DocContext; + use crate::agent::state::WorkerState; + use crate::document::{DocumentTree, NodeId}; + + fn test_ctx() -> (DocumentTree, NodeId) { + let tree = DocumentTree::new("Root", "root content"); + let root = tree.root(); + (tree, root) + } + + #[test] + fn test_handle_parse_failure_valid_command() { + let (tree, root) = test_ctx(); + let nav = crate::document::NavigationIndex::new(); + let ctx = DocContext { + tree: &tree, + nav_index: &nav, + reasoning_index: &crate::document::ReasoningIndex::default(), + doc_name: "test", + }; + let mut state = WorkerState::new(root, 10); + + let (cmd, is_failure) = handle_parse_failure("ls", ctx.doc_name, &mut state); + assert!(!is_failure); + assert!(matches!(cmd, Command::Ls)); + } + + #[test] + fn test_handle_parse_failure_unrecognized() { + let (tree, root) = test_ctx(); + let nav = crate::document::NavigationIndex::new(); + let ctx = DocContext { + tree: &tree, + nav_index: &nav, + reasoning_index: &crate::document::ReasoningIndex::default(), + doc_name: "test", + }; + let mut state = WorkerState::new(root, 10); + + let (_cmd, is_failure) = handle_parse_failure("random garbage text", ctx.doc_name, &mut state); + assert!(is_failure); + assert!(state.last_feedback.contains("not recognized")); + assert!(state.history.last().unwrap().contains("unrecognized")); + } + + #[test] + fn test_handle_parse_failure_short_response() { + let (tree, root) = test_ctx(); + let nav = crate::document::NavigationIndex::new(); + let ctx = DocContext { + tree: &tree, + nav_index: &nav, + reasoning_index: &crate::document::ReasoningIndex::default(), + doc_name: "test", + }; + let mut state = WorkerState::new(root, 10); + + // Single character response — short but not a parse failure if it's "ls" + let (cmd, is_failure) = handle_parse_failure("ls", ctx.doc_name, &mut state); + assert!(!is_failure); + assert!(matches!(cmd, Command::Ls)); + } + + #[test] + fn test_push_round_history_short_feedback() { + let (_, root) = test_ctx(); + let mut state = WorkerState::new(root, 10); + state.last_feedback = "short feedback".to_string(); + + push_round_history(&mut state, "ls"); + assert_eq!(state.history.len(), 1); + assert!(state.history[0].contains("ls → short feedback")); + } + + #[test] + fn test_push_round_history_long_feedback() { + let (_, root) = test_ctx(); + let mut state = WorkerState::new(root, 10); + state.last_feedback = "a".repeat(200); + + push_round_history(&mut state, "cat"); + assert_eq!(state.history.len(), 1); + assert!(state.history[0].contains("cat → ")); + // Should be truncated with ... + assert!(state.history[0].contains("...")); + } + + #[test] + fn test_push_round_history_respects_max_entries() { + let (_, root) = test_ctx(); + let mut state = WorkerState::new(root, 10); + state.last_feedback = "ok".to_string(); + + for i in 0..8 { + push_round_history(&mut state, &format!("cmd_{i}")); + } + // MAX_HISTORY_ENTRIES is 6, so only last 6 should remain + assert_eq!(state.history.len(), 6); + } + + #[test] + fn test_build_round_prompt_dispatch_first_round() { + let (tree, root) = test_ctx(); + let nav = crate::document::NavigationIndex::new(); + let ctx = DocContext { + tree: &tree, + nav_index: &nav, + reasoning_index: &crate::document::ReasoningIndex::default(), + doc_name: "test_doc", + }; + let mut state = WorkerState::new(root, 10); + // remaining == max_rounds means first round + assert_eq!(state.remaining, 10); + + let (system, user) = build_round_prompt( + "test query", + Some("sub-task"), + &ctx, + &state, + "factual — find answer", + "", + true, // use_dispatch_prompt + 10, + ); + assert!(system.contains("dispatch") || !system.is_empty()); + assert!(user.contains("test query") || user.contains("sub-task")); + } + + #[test] + fn test_build_round_prompt_navigation_subsequent_round() { + let (tree, root) = test_ctx(); + let nav = crate::document::NavigationIndex::new(); + let ctx = DocContext { + tree: &tree, + nav_index: &nav, + reasoning_index: &crate::document::ReasoningIndex::default(), + doc_name: "test_doc", + }; + let mut state = WorkerState::new(root, 10); + state.remaining = 8; // not first round + + let (system, _user) = build_round_prompt( + "test query", + None, + &ctx, + &state, + "factual", + "keyword hints here", + false, // use_dispatch_prompt + 10, + ); + assert!(!system.is_empty()); + } + + #[test] + fn test_utf8_safe_truncation_in_history() { + let (_, root) = test_ctx(); + let mut state = WorkerState::new(root, 10); + // Each '中' is 3 bytes in UTF-8 + state.last_feedback = "中文反馈内容测试截断安全".repeat(20); + + push_round_history(&mut state, "cat"); + let entry = &state.history[0]; + // Should be truncated without panicking + assert!(entry.contains("cat → ")); + assert!(entry.len() < state.last_feedback.len() + 20); + } +} From c6ad19a818b9fdea0f93c1bc6927623a9cef33eb Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Wed, 22 Apr 2026 15:17:48 +0800 Subject: [PATCH 10/10] refactor(agent): remove feedback truncation and consolidate content snippet logic BREAKING CHANGE: Removed MAX_FEEDBACK_CHARS constant and automatic truncation in set_feedback method. Feedback will now be stored as-is without size limitations. - Moved content_snippet function to tools module for shared usage - Updated all references to use the centralized content_snippet function - Increased snippet length from 150/120 to 300 characters for better context - Replaced character limit checks with entry count limits in planning - Added MAX_PLAN_ENTRIES (15), MAX_SECTION_SUMMARIES (10), and MAX_EXPANSION_ENTRIES (8) constants for better control over prompt size - Removed content preview truncation in grep tool --- rust/src/agent/state.rs | 19 +---- rust/src/agent/tools/mod.rs | 48 +++++++++++ rust/src/agent/tools/orchestrator.rs | 47 +---------- rust/src/agent/tools/worker/grep.rs | 7 +- rust/src/agent/worker/execute.rs | 55 +------------ rust/src/agent/worker/planning.rs | 114 +++++++++------------------ 6 files changed, 91 insertions(+), 199 deletions(-) diff --git a/rust/src/agent/state.rs b/rust/src/agent/state.rs index e16c855..e0deb3b 100644 --- a/rust/src/agent/state.rs +++ b/rust/src/agent/state.rs @@ -52,10 +52,6 @@ pub struct WorkerState { /// Maximum number of history entries to keep for prompt injection. const MAX_HISTORY_ENTRIES: usize = 6; -/// Maximum characters for `last_feedback` before truncation. -/// Prevents large cat/grep outputs from bloating subsequent prompts. -const MAX_FEEDBACK_CHARS: usize = 2000; - impl WorkerState { /// Create a new state starting at the given root node. pub fn new(root: NodeId, max_rounds: u32) -> Self { @@ -83,20 +79,9 @@ impl WorkerState { } } - /// Set feedback with automatic truncation to prevent prompt bloat. + /// Set feedback from tool execution. pub fn set_feedback(&mut self, feedback: String) { - if feedback.len() <= MAX_FEEDBACK_CHARS { - self.last_feedback = feedback; - } else { - // Find a clean truncation point (line boundary if possible) - let truncated = &feedback[..MAX_FEEDBACK_CHARS]; - let end = truncated.rfind('\n').unwrap_or(MAX_FEEDBACK_CHARS); - self.last_feedback = format!( - "{}...\n(truncated, {} chars total)", - &feedback[..end.min(MAX_FEEDBACK_CHARS)], - feedback.len() - ); - } + self.last_feedback = feedback; } /// Navigate into a child node. diff --git a/rust/src/agent/tools/mod.rs b/rust/src/agent/tools/mod.rs index af90013..c44c002 100644 --- a/rust/src/agent/tools/mod.rs +++ b/rust/src/agent/tools/mod.rs @@ -51,3 +51,51 @@ impl ToolResult { } } } + +/// Extract a content snippet around the first occurrence of `keyword`. +/// +/// Returns `None` if the content is empty. If the keyword is not found, +/// returns the beginning of the content instead. +pub fn content_snippet(content: &str, keyword: &str, max_len: usize) -> Option { + if content.trim().is_empty() { + return None; + } + + let keyword_lower = keyword.to_lowercase(); + let content_lower = content.to_lowercase(); + + let start = match content_lower.find(&keyword_lower) { + Some(pos) => { + let back = (max_len / 4).min(pos); + pos - back + } + None => 0, + }; + + let start = content + .char_indices() + .find(|(i, _)| *i >= start) + .map(|(i, _)| i) + .unwrap_or(0); + + let end = content + .char_indices() + .take_while(|(i, _)| *i <= start + max_len) + .last() + .map(|(i, c)| i + c.len_utf8()) + .unwrap_or(content.len()); + + let snippet = content[start..end].trim(); + if snippet.is_empty() { + return None; + } + + let mut result = snippet.to_string(); + if end < content.len() { + result.push_str("..."); + } + if start > 0 { + result = format!("...{}", result); + } + Some(result) +} diff --git a/rust/src/agent/tools/orchestrator.rs b/rust/src/agent/tools/orchestrator.rs index 2dd1718..96dcb11 100644 --- a/rust/src/agent/tools/orchestrator.rs +++ b/rust/src/agent/tools/orchestrator.rs @@ -96,7 +96,7 @@ pub fn find_cross(keywords: &[String], ctx: &WorkspaceContext) -> ToolResult { )); // Include content snippet for cross-doc relevance judgment if let Some(content) = doc.and_then(|d| d.cat(entry.node_id)) { - if let Some(snippet) = content_snippet(content, &hit.keyword, 150) { + if let Some(snippet) = super::content_snippet(content, &hit.keyword, 300) { output.push_str(&format!("\n \"{}\"", snippet)); } } @@ -109,51 +109,6 @@ pub fn find_cross(keywords: &[String], ctx: &WorkspaceContext) -> ToolResult { ToolResult::ok(output) } -/// Extract a short content snippet around the first occurrence of `keyword`. -fn content_snippet(content: &str, keyword: &str, max_len: usize) -> Option { - if content.trim().is_empty() { - return None; - } - - let keyword_lower = keyword.to_lowercase(); - let content_lower = content.to_lowercase(); - - let start = match content_lower.find(&keyword_lower) { - Some(pos) => { - let back = (max_len / 4).min(pos); - pos - back - } - None => 0, - }; - - let start = content - .char_indices() - .find(|(i, _)| *i >= start) - .map(|(i, _)| i) - .unwrap_or(0); - - let end = content - .char_indices() - .take_while(|(i, _)| *i <= start + max_len) - .last() - .map(|(i, c)| i + c.len_utf8()) - .unwrap_or(content.len()); - - let snippet = content[start..end].trim(); - if snippet.is_empty() { - return None; - } - - let mut result = snippet.to_string(); - if end < content.len() { - result.push_str("..."); - } - if start > 0 { - result = format!("...{}", result); - } - Some(result) -} - #[cfg(test)] mod tests { use super::*; diff --git a/rust/src/agent/tools/worker/grep.rs b/rust/src/agent/tools/worker/grep.rs index 2ebb609..077b077 100644 --- a/rust/src/agent/tools/worker/grep.rs +++ b/rust/src/agent/tools/worker/grep.rs @@ -42,12 +42,7 @@ pub fn grep(pattern: &str, ctx: &DocContext, state: &WorkerState) -> ToolResult break; } if re.is_match(line) { - let preview = if line.len() > 120 { - format!("{}...", &line[..120]) - } else { - line.to_string() - }; - output.push_str(&format!("[{}] {}\n", title, preview)); + output.push_str(&format!("[{}] {}\n", title, line)); matches_found += 1; } } diff --git a/rust/src/agent/worker/execute.rs b/rust/src/agent/worker/execute.rs index 39f5fc9..c4fa8c1 100644 --- a/rust/src/agent/worker/execute.rs +++ b/rust/src/agent/worker/execute.rs @@ -102,7 +102,7 @@ pub async fn execute_command( )); if let Some(content) = ctx.cat(entry.node_id) { if let Some(snippet) = - content_snippet(content, keyword, 150) + super::super::tools::content_snippet(content, keyword, 300) { output.push_str(&format!("\n \"{}\"", snippet)); } @@ -138,7 +138,7 @@ pub async fn execute_command( title, depth )); if let Some(content) = ctx.cat(*node_id) { - if let Some(snippet) = content_snippet(content, keyword, 150) { + if let Some(snippet) = super::super::tools::content_snippet(content, keyword, 300) { output.push_str(&format!("\n \"{}\"", snippet)); } } @@ -253,57 +253,6 @@ pub async fn execute_command( } } -/// Extract a short content snippet around the first occurrence of `keyword`. -/// -/// Returns `None` if the content is empty. If the keyword is not found, -/// returns the beginning of the content instead. -fn content_snippet(content: &str, keyword: &str, max_len: usize) -> Option { - if content.trim().is_empty() { - return None; - } - - let keyword_lower = keyword.to_lowercase(); - let content_lower = content.to_lowercase(); - - // Find the keyword position to center the snippet around it - let start = match content_lower.find(&keyword_lower) { - Some(pos) => { - // Back up a bit for context, but don't go negative - let back = (max_len / 4).min(pos); - pos - back - } - None => 0, - }; - - // Find a char boundary near `start` - let start = content - .char_indices() - .find(|(i, _)| *i >= start) - .map(|(i, _)| i) - .unwrap_or(0); - - let end = content - .char_indices() - .take_while(|(i, _)| *i <= start + max_len) - .last() - .map(|(i, c)| i + c.len_utf8()) - .unwrap_or(content.len()); - - let snippet = content[start..end].trim(); - if snippet.is_empty() { - return None; - } - - let mut result = snippet.to_string(); - if end < content.len() { - result.push_str("..."); - } - if start > 0 { - result = format!("...{}", result); - } - Some(result) -} - /// Truncate feedback for log output — keep first 300 chars to avoid noisy logs. fn truncate_log(s: &str) -> std::borrow::Cow<'_, str> { const MAX: usize = 300; diff --git a/rust/src/agent/worker/planning.rs b/rust/src/agent/worker/planning.rs index 7570573..a42bf52 100644 --- a/rust/src/agent/worker/planning.rs +++ b/rust/src/agent/worker/planning.rs @@ -13,8 +13,12 @@ use super::super::context::FindHit; use super::super::state::WorkerState; use super::format::format_visited_titles; -/// Maximum total chars for keyword + semantic sections in planning prompt. -const PLAN_CONTEXT_BUDGET: usize = 1500; +/// Maximum keyword/semantic hit entries in plan prompt. +const MAX_PLAN_ENTRIES: usize = 15; +/// Maximum section summaries in plan prompt. +const MAX_SECTION_SUMMARIES: usize = 10; +/// Maximum deep expansion entries. +const MAX_EXPANSION_ENTRIES: usize = 8; /// Build the navigation planning prompt (Phase 1.5). pub fn build_plan_prompt( @@ -39,6 +43,7 @@ pub fn build_plan_prompt( } else { let mut section = String::from("\nKeyword index matches (use these to prioritize navigation):\n"); + let mut entry_count = 0; for hit in keyword_hits { let mut entries = hit.entries.clone(); entries.sort_by(|a, b| { @@ -56,18 +61,20 @@ pub fn build_plan_prompt( " - keyword '{}' → {} (depth {}, weight {:.2})\n", hit.keyword, ancestor_path, entry.depth, entry.weight )); - // Include a content snippet so the planner can judge relevance if let Some(content) = ctx.cat(entry.node_id) { - if let Some(snippet) = content_snippet(content, &hit.keyword, 120) { + if let Some(snippet) = + super::super::tools::content_snippet(content, &hit.keyword, 300) + { section.push_str(&format!(" \"{}\"\n", snippet)); } } - if section.len() > PLAN_CONTEXT_BUDGET { - section.push_str(" ... (more hits truncated)\n"); + entry_count += 1; + if entry_count >= MAX_PLAN_ENTRIES { + section.push_str(" ... (more hits omitted)\n"); break; } } - if section.len() > PLAN_CONTEXT_BUDGET { + if entry_count >= MAX_PLAN_ENTRIES { break; } } @@ -76,9 +83,7 @@ pub fn build_plan_prompt( let deep_expansion = build_deep_expansion(keyword_hits, ctx); if !deep_expansion.is_empty() { - if keyword_section.len() + deep_expansion.len() <= PLAN_CONTEXT_BUDGET { - keyword_section.push_str(&deep_expansion); - } + keyword_section.push_str(&deep_expansion); } let semantic_section = build_semantic_hints(&query_keywords, &query_lower, ctx); @@ -181,6 +186,7 @@ pub fn format_keyword_hints(keyword_hits: &[FindHit], ctx: &DocContext<'_>) -> S } let mut section = String::from("Keyword matches (use find to jump directly):\n"); + let mut entry_count = 0; for hit in keyword_hits { let mut entries = hit.entries.clone(); entries.sort_by(|a, b| { @@ -198,14 +204,16 @@ pub fn format_keyword_hints(keyword_hits: &[FindHit], ctx: &DocContext<'_>) -> S " - '{}' → {} (weight {:.2})\n", hit.keyword, title, entry.weight )); - // Include a content snippet so the LLM can see what's there if let Some(content) = ctx.cat(entry.node_id) { - if let Some(snippet) = content_snippet(content, &hit.keyword, 100) { + if let Some(snippet) = + super::super::tools::content_snippet(content, &hit.keyword, 300) + { section.push_str(&format!(" \"{}\"\n", snippet)); } } - if section.len() > 800 { - section.push_str(" ... (more)\n"); + entry_count += 1; + if entry_count >= MAX_PLAN_ENTRIES { + section.push_str(" ... (more omitted)\n"); return section; } } @@ -213,54 +221,6 @@ pub fn format_keyword_hints(keyword_hits: &[FindHit], ctx: &DocContext<'_>) -> S section } -/// Extract a short content snippet around the first occurrence of `keyword`. -/// -/// Returns `None` if the content is empty. If the keyword is not found, -/// returns the beginning of the content instead. -fn content_snippet(content: &str, keyword: &str, max_len: usize) -> Option { - if content.trim().is_empty() { - return None; - } - - let keyword_lower = keyword.to_lowercase(); - let content_lower = content.to_lowercase(); - - let start = match content_lower.find(&keyword_lower) { - Some(pos) => { - let back = (max_len / 4).min(pos); - pos - back - } - None => 0, - }; - - let start = content - .char_indices() - .find(|(i, _)| *i >= start) - .map(|(i, _)| i) - .unwrap_or(0); - - let end = content - .char_indices() - .take_while(|(i, _)| *i <= start + max_len) - .last() - .map(|(i, c)| i + c.len_utf8()) - .unwrap_or(content.len()); - - let snippet = content[start..end].trim(); - if snippet.is_empty() { - return None; - } - - let mut result = snippet.to_string(); - if end < content.len() { - result.push_str("..."); - } - if start > 0 { - result = format!("...{}", result); - } - Some(result) -} - /// Build the ancestor path string for a node (e.g., "root/Chapter 1/Section 1.2"). pub fn build_ancestor_path(node_id: crate::document::NodeId, ctx: &DocContext<'_>) -> String { let mut path: Vec = ctx.tree.ancestors_iter(node_id).collect(); @@ -290,21 +250,18 @@ fn build_intent_signals(intent: QueryIntent, ctx: &DocContext<'_>) -> String { if !shortcut.document_summary.is_empty() { section.push_str(&format!( "Document summary: {}\n", - &shortcut.document_summary[..shortcut.document_summary.len().min(500)] + shortcut.document_summary )); } + let mut summary_count = 0; for ss in &shortcut.section_summaries { - let summary_preview = if ss.summary.len() > 200 { - format!("{}...", &ss.summary[..200]) - } else { - ss.summary.clone() - }; section.push_str(&format!( " - Section '{}' (depth {}): {}\n", - ss.title, ss.depth, summary_preview + ss.title, ss.depth, ss.summary )); - if section.len() > PLAN_CONTEXT_BUDGET { - section.push_str(" ... (more sections truncated)\n"); + summary_count += 1; + if summary_count >= MAX_SECTION_SUMMARIES { + section.push_str(" ... (more sections omitted)\n"); break; } } @@ -374,7 +331,7 @@ fn build_semantic_hints( .collect(); let mut section = String::new(); - let budget_remaining = PLAN_CONTEXT_BUDGET.saturating_sub(section.len()); + let mut entry_count = 0; for route in routes { let nav = match ctx.nav_entry(route.node_id) { @@ -436,10 +393,11 @@ fn build_semantic_hints( " - Section '{}' — BM25: {:.2}{}\n", route.title, bm25_score, annotation_str ); - if section.len() + line.len() > budget_remaining { + section.push_str(&line); + entry_count += 1; + if entry_count >= MAX_PLAN_ENTRIES { break; } - section.push_str(&line); } if section.is_empty() { @@ -460,6 +418,7 @@ fn build_deep_expansion(keyword_hits: &[FindHit], ctx: &DocContext<'_>) -> Strin let mut seen_parents = HashSet::new(); let mut expansion = String::new(); + let mut expansion_count = 0; for hit in keyword_hits { for entry in &hit.entries { @@ -494,12 +453,13 @@ fn build_deep_expansion(keyword_hits: &[FindHit], ctx: &DocContext<'_>) -> Strin )); } expansion.push('\n'); - if expansion.len() > 500 { - expansion.push_str(" ... (more expansions truncated)\n"); + expansion_count += 1; + if expansion_count >= MAX_EXPANSION_ENTRIES { + expansion.push_str(" ... (more expansions omitted)\n"); break; } } - if expansion.len() > 500 { + if expansion_count >= MAX_EXPANSION_ENTRIES { break; } }