From 899105556e832bcd2cfb395b55b2860adbb4deba Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Mon, 13 Apr 2026 20:56:00 +0800
Subject: [PATCH 1/7] feat(python): add StrategyPreference for controlling
 retrieval strategies

Add StrategyPreference enum to control how the engine searches the
document tree. This allows users to choose between different
retrieval approaches including AUTO, KEYWORD, LLM, HYBRID,
CROSS_DOCUMENT, and PAGE_RANGE strategies.

The new StrategyPreference can be used with QueryContext to
force specific retrieval behaviors:

- KEYWORD: Fastest option with no LLM calls during search
- LLM: Most accurate with deep reasoning capabilities
- HYBRID: BM25 + LLM refinement approach
- CROSS_DOCUMENT: Multi-document retrieval
- PAGE_RANGE: Filter by page range
- AUTO: Default behavior that auto-selects based on query complexity
---
 python/src/lib.rs             | 88 +++++++++++++++++++++++++++++++++++
 python/vectorless/__init__.py |  4 ++
 rust/src/lib.rs               |  3 ++
 3 files changed, 95 insertions(+)

diff --git a/python/src/lib.rs b/python/src/lib.rs
index a5da45bb..4a743842 100644
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -15,6 +15,7 @@ use ::vectorless::client::{
 };
 use ::vectorless::error::Error as RustError;
 use ::vectorless::metrics::IndexMetrics;
+use ::vectorless::StrategyPreference;
 
 // ============================================================
 // Error Types
@@ -266,6 +267,83 @@ impl PyIndexContext {
     }
 }
 
+// ============================================================
+// StrategyPreference
+// ============================================================
+
+/// Retrieval strategy preference.
+///
+/// Controls how the engine searches the document tree.
+///
+/// ```python
+/// from vectorless import QueryContext, StrategyPreference
+///
+/// # Force keyword-only (fastest, no LLM calls during search)
+/// ctx = QueryContext("revenue").with_doc_id(doc_id).with_strategy(StrategyPreference.KEYWORD)
+///
+/// # Force LLM-guided navigation (most accurate, uses more tokens)
+/// ctx = QueryContext("explain the architecture").with_doc_id(doc_id).with_strategy(StrategyPreference.LLM)
+///
+/// # Force hybrid (BM25 + LLM refinement)
+/// ctx = QueryContext("growth trends").with_doc_id(doc_id).with_strategy(StrategyPreference.HYBRID)
+/// ```
+#[pyclass(name = "StrategyPreference", skip_from_py_object)]
+#[derive(Clone)]
+pub struct PyStrategyPreference {
+    inner: StrategyPreference,
+}
+
+#[pymethods]
+impl PyStrategyPreference {
+    /// Auto-select based on query complexity (default).
+    #[classattr]
+    const AUTO: PyStrategyPreference = PyStrategyPreference {
+        inner: StrategyPreference::Auto,
+    };
+
+    /// Force keyword-based strategy (fast, no LLM during search).
+    #[classattr]
+    const KEYWORD: PyStrategyPreference = PyStrategyPreference {
+        inner: StrategyPreference::ForceKeyword,
+    };
+
+    /// Force LLM-guided navigation (deep reasoning).
+    #[classattr]
+    const LLM: PyStrategyPreference = PyStrategyPreference {
+        inner: StrategyPreference::ForceLlm,
+    };
+
+    /// Force hybrid strategy (BM25 + LLM refinement).
+    #[classattr]
+    const HYBRID: PyStrategyPreference = PyStrategyPreference {
+        inner: StrategyPreference::ForceHybrid,
+    };
+
+    /// Force cross-document strategy (multi-document retrieval).
+    #[classattr]
+    const CROSS_DOCUMENT: PyStrategyPreference = PyStrategyPreference {
+        inner: StrategyPreference::ForceCrossDocument,
+    };
+
+    /// Force page-range strategy (filter by page range).
+    #[classattr]
+    const PAGE_RANGE: PyStrategyPreference = PyStrategyPreference {
+        inner: StrategyPreference::ForcePageRange,
+    };
+
+    fn __repr__(&self) -> String {
+        let name = match self.inner {
+            StrategyPreference::Auto => "AUTO",
+            StrategyPreference::ForceKeyword => "KEYWORD",
+            StrategyPreference::ForceLlm => "LLM",
+            StrategyPreference::ForceHybrid => "HYBRID",
+            StrategyPreference::ForceCrossDocument => "CROSS_DOCUMENT",
+            StrategyPreference::ForcePageRange => "PAGE_RANGE",
+        };
+        format!("StrategyPreference.{}", name)
+    }
+}
+
 // ============================================================
 // QueryContext
 // ============================================================
@@ -335,6 +413,15 @@ impl PyQueryContext {
         Self { inner: ctx }
     }
 
+    /// Set the retrieval strategy.
+    ///
+    /// Args:
+    ///     strategy: A StrategyPreference constant, e.g. StrategyPreference.LLM.
+    fn with_strategy(&self, strategy: &PyStrategyPreference) -> Self {
+        let ctx = self.inner.clone().with_strategy(strategy.inner);
+        Self { inner: ctx }
+    }
+
     fn __repr__(&self) -> String {
         "QueryContext(...)".to_string()
     }
@@ -1169,6 +1256,7 @@ fn _vectorless(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_class::<VectorlessError>()?;
     m.add_class::<PyIndexOptions>()?;
     m.add_class::<PyIndexContext>()?;
+    m.add_class::<PyStrategyPreference>()?;
     m.add_class::<PyQueryContext>()?;
     m.add_class::<PyIndexResult>()?;
     m.add_class::<PyIndexItem>()?;
diff --git a/python/vectorless/__init__.py b/python/vectorless/__init__.py
index 4d66e2ca..c046ed90 100644
--- a/python/vectorless/__init__.py
+++ b/python/vectorless/__init__.py
@@ -26,9 +26,11 @@
     IndexOptions,
     IndexResult,
     IndexItem,
+    IndexMetrics,
     QueryContext,
     QueryResult,
     QueryResultItem,
+    StrategyPreference,
     DocumentInfo,
     DocumentGraph,
     DocumentGraphNode,
@@ -46,9 +48,11 @@
     "IndexOptions",
     "IndexResult",
     "IndexItem",
+    "IndexMetrics",
     "QueryContext",
     "QueryResult",
     "QueryResultItem",
+    "StrategyPreference",
     "DocumentInfo",
     "DocumentGraph",
     "DocumentGraphNode",
diff --git a/rust/src/lib.rs b/rust/src/lib.rs
index 689d331f..ea1d79d6 100644
--- a/rust/src/lib.rs
+++ b/rust/src/lib.rs
@@ -67,6 +67,9 @@ pub use client::{
     QueryResultItem,
 };
 
+// Retrieval types
+pub use retrieval::StrategyPreference;
+
 // Error types
 pub use error::{Error, Result};
 

From 6eaea9ba6a088731f68216fb29546622a11f7e91 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Mon, 13 Apr 2026 22:08:55 +0800
Subject: [PATCH 2/7] feat(search): add Pure Pilot search algorithm and
 integrate with beam/MCTS

Add Pure Pilot search algorithm that uses LLM guidance to pick the best
child at each layer. Integrate Pilot scoring into beam search and MCTS
with caching mechanism to avoid redundant LLM calls.

- Add PurePilotSearch algorithm with 1.0 weight for Pilot scoring
- Rename GreedySearch to PurePilotSearch and update implementation
- Modify beam search to use Pilot as primary scorer with 0.7 weight
- Enhance MCTS with Pilot-provided priors in UCT formula and guided
  simulation phase
- Add PilotDecisionCache to prevent repeated LLM calls for same contexts
- Update SearchAlgorithm enum with PurePilot variant and rename others
- Add search_fallback_chain to PipelineContext for ordered algorithm
  execution

BREAKING CHANGE: GreedySearch renamed to PurePilotSearch
---
 rust/src/retrieval/pilot/mod.rs           |   2 +-
 rust/src/retrieval/pipeline/context.rs    |  18 +-
 rust/src/retrieval/search/beam.rs         | 210 ++++----------
 rust/src/retrieval/search/greedy.rs       | 179 ++++--------
 rust/src/retrieval/search/mcts.rs         | 336 ++++++++++++++++------
 rust/src/retrieval/search/mod.rs          |   5 +-
 rust/src/retrieval/search/pilot_scorer.rs | 236 +++++++++++++++
 rust/src/retrieval/stages/plan.rs         |  33 ++-
 rust/src/retrieval/stages/search.rs       | 134 +++++++--
 9 files changed, 736 insertions(+), 417 deletions(-)
 create mode 100644 rust/src/retrieval/search/pilot_scorer.rs

diff --git a/rust/src/retrieval/pilot/mod.rs b/rust/src/retrieval/pilot/mod.rs
index 5af9cead..a6835b79 100644
--- a/rust/src/retrieval/pilot/mod.rs
+++ b/rust/src/retrieval/pilot/mod.rs
@@ -44,7 +44,7 @@ mod prompts;
 mod r#trait;
 
 pub use config::PilotConfig;
-pub use decision::{InterventionPoint, PilotDecision};
+pub use decision::{InterventionPoint, PilotDecision, RankedCandidate, SearchDirection};
 
 pub use llm_pilot::LlmPilot;
 pub use r#trait::{Pilot, SearchState};
diff --git a/rust/src/retrieval/pipeline/context.rs b/rust/src/retrieval/pipeline/context.rs
index 484f41ca..bb54e127 100644
--- a/rust/src/retrieval/pipeline/context.rs
+++ b/rust/src/retrieval/pipeline/context.rs
@@ -23,11 +23,11 @@ use crate::retrieval::types::{
 /// Search algorithm type.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum SearchAlgorithm {
-    /// Greedy single-path search.
-    Greedy,
-    /// Beam search with multiple paths.
+    /// Pure Pilot: beam=1, Pilot picks top-1 child at each layer.
+    PurePilot,
+    /// Beam search with Pilot scoring.
     Beam,
-    /// Monte Carlo Tree Search.
+    /// MCTS with Pilot priors.
     Mcts,
 }
 
@@ -41,7 +41,7 @@ impl SearchAlgorithm {
     /// Get algorithm name.
     pub fn name(&self) -> &'static str {
         match self {
-            Self::Greedy => "greedy",
+            Self::PurePilot => "pure_pilot",
             Self::Beam => "beam",
             Self::Mcts => "mcts",
         }
@@ -239,6 +239,9 @@ pub struct PipelineContext {
     pub selected_algorithm: Option<SearchAlgorithm>,
     /// Search configuration.
     pub search_config: Option<SearchConfig>,
+    /// Ordered fallback chain for search algorithms.
+    /// When the primary algorithm's result is insufficient, try the next.
+    pub search_fallback_chain: Vec<SearchAlgorithm>,
 
     // ============ Search Stage Output ============
     /// Candidate nodes from search.
@@ -307,6 +310,11 @@ impl PipelineContext {
             selected_strategy: None,
             selected_algorithm: None,
             search_config: None,
+            search_fallback_chain: vec![
+                SearchAlgorithm::Beam,
+                SearchAlgorithm::Mcts,
+                SearchAlgorithm::PurePilot,
+            ],
             candidates: Vec::new(),
             search_paths: Vec::new(),
             reasoning_chain: ReasoningChain::new(),
diff --git a/rust/src/retrieval/search/beam.rs b/rust/src/retrieval/search/beam.rs
index 73bf0cc1..a7319988 100644
--- a/rust/src/retrieval/search/beam.rs
+++ b/rust/src/retrieval/search/beam.rs
@@ -1,32 +1,33 @@
 // Copyright (c) 2026 vectorless developers
 // SPDX-License-Identifier: Apache-2.0
 
-//! Beam search algorithm with Pilot integration.
+//! Beam search algorithm with Pilot as primary scorer.
 //!
-//! Explores multiple paths in parallel, keeping only the top-k candidates at each level.
-//! When a Pilot is provided, it can intervene at fork points to provide semantic guidance.
+//! Explores multiple paths in parallel, keeping only the top-k candidates
+//! at each level. Pilot provides semantic guidance; NodeScorer is the
+//! fallback when Pilot is unavailable.
 
 use async_trait::async_trait;
 use std::collections::HashSet;
-use tracing::{debug, trace};
+use tracing::debug;
 
 use super::super::RetrievalContext;
 use super::super::types::{NavigationDecision, NavigationStep, SearchPath};
-use super::scorer::{NodeScorer, ScoringContext};
+use super::pilot_scorer::{PilotDecisionCache, score_candidates};
 use super::{SearchConfig, SearchResult, SearchTree};
 use crate::document::{DocumentTree, NodeId};
-use crate::retrieval::pilot::{Pilot, SearchState};
+use crate::retrieval::pilot::Pilot;
 
-/// Beam search - explores multiple paths simultaneously.
+/// Beam search — explores multiple paths simultaneously.
 ///
 /// Keeps top `beam_width` candidates at each level, providing
 /// a balance between exploration and computational cost.
 ///
 /// # Pilot Integration
 ///
-/// When a Pilot is provided, the algorithm consults it at fork points
-/// (when multiple candidates are available) to get semantic guidance
-/// on which branches are most relevant to the query.
+/// Pilot is the primary scorer (weight=0.7). NodeScorer supplements
+/// for candidates Pilot didn't rank. Decisions are cached by
+/// (query, parent_node_id) to avoid redundant LLM calls.
 pub struct BeamSearch {
     beam_width: usize,
 }
@@ -44,72 +45,7 @@ impl BeamSearch {
         }
     }
 
-    /// Create a scorer for the given query.
-    fn create_scorer(&self, query: &str) -> NodeScorer {
-        NodeScorer::new(ScoringContext::new(query))
-    }
-
-    /// Score candidates using a query-specific scorer.
-    fn score_candidates_with_query(
-        &self,
-        tree: &DocumentTree,
-        candidates: &[NodeId],
-        query: &str,
-    ) -> Vec<(NodeId, f32)> {
-        let scorer = self.create_scorer(query);
-        scorer.score_and_sort(tree, candidates)
-    }
-
-    /// Merge algorithm scores with Pilot decision.
-    ///
-    /// Uses weighted combination: `final = α * algo + β * pilot`
-    /// where α = 0.4 and β = 0.6 * confidence
-    fn merge_with_pilot_decision(
-        &self,
-        tree: &DocumentTree,
-        candidates: &[NodeId],
-        pilot_decision: &crate::retrieval::pilot::PilotDecision,
-        query: &str,
-    ) -> Vec<(NodeId, f32)> {
-        let scorer = self.create_scorer(query);
-        let alpha = 0.4;
-        let beta = 0.6 * pilot_decision.confidence;
-
-        // Build a map from node_id to pilot score
-        let mut pilot_scores: std::collections::HashMap<NodeId, f32> =
-            std::collections::HashMap::new();
-        for ranked in &pilot_decision.ranked_candidates {
-            pilot_scores.insert(ranked.node_id, ranked.score);
-        }
-
-        // Merge scores
-        let mut merged: Vec<(NodeId, f32)> = candidates
-            .iter()
-            .map(|&node_id| {
-                let algo_score = scorer.score(tree, node_id);
-                let pilot_score = pilot_scores.get(&node_id).copied().unwrap_or(0.0);
-
-                // Weighted combination
-                let final_score = if beta > 0.0 {
-                    (alpha * algo_score + beta * pilot_score) / (alpha + beta)
-                } else {
-                    algo_score
-                };
-
-                (node_id, final_score)
-            })
-            .collect();
-
-        // Sort by merged score
-        merged.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
-
-        merged
-    }
-
     /// Core beam search logic parameterized by start node.
-    ///
-    /// This is the shared implementation used by both `search` (starts from root)
-    /// and `search_from` (starts from an arbitrary node).
     async fn search_impl(
         &self,
         tree: &DocumentTree,
@@ -121,8 +57,8 @@ impl BeamSearch {
         let mut result = SearchResult::default();
         let beam_width = config.beam_width.min(self.beam_width);
         let mut visited: HashSet<NodeId> = HashSet::new();
+        let cache = PilotDecisionCache::new();
 
-        // Mark start_node as visited so we don't go back up
         visited.insert(start_node);
 
         debug!(
@@ -130,48 +66,27 @@ impl BeamSearch {
             context.query, start_node, beam_width, config.min_score
         );
 
-        // Track Pilot interventions
         let mut pilot_interventions = 0;
 
         // Initialize with start_node's children
         let start_children = tree.children(start_node);
         debug!("Start node has {} children", start_children.len());
 
-        // Check if Pilot wants to guide the start.
-        // Pass start_node so the pilot evaluates the correct children.
-        let initial_candidates = if let Some(p) = pilot {
-            debug!(
-                "BeamSearch: Pilot is available, name={}, guide_at_start={}",
-                p.name(),
-                p.config().guide_at_start
-            );
-            if p.config().guide_at_start {
-                if let Some(guidance) = p.guide_start(tree, &context.query, start_node).await {
-                    debug!(
-                        "Pilot provided start guidance with confidence {}",
-                        guidance.confidence
-                    );
-                    pilot_interventions += 1;
-
-                    if guidance.has_candidates() {
-                        self.merge_with_pilot_decision(
-                            tree,
-                            &start_children,
-                            &guidance,
-                            &context.query,
-                        )
-                    } else {
-                        self.score_candidates_with_query(tree, &start_children, &context.query)
-                    }
-                } else {
-                    self.score_candidates_with_query(tree, &start_children, &context.query)
-                }
-            } else {
-                self.score_candidates_with_query(tree, &start_children, &context.query)
-            }
-        } else {
-            self.score_candidates_with_query(tree, &start_children, &context.query)
-        };
+        let initial_candidates = score_candidates(
+            tree,
+            &start_children,
+            &context.query,
+            pilot,
+            &[],
+            &visited,
+            0.7, // Beam: Pilot weight = 0.7
+            Some(&cache),
+        )
+        .await;
+
+        if pilot.is_some() && !start_children.is_empty() {
+            pilot_interventions += 1;
+        }
 
         let mut current_beam: Vec<SearchPath> = initial_candidates
             .into_iter()
@@ -208,46 +123,21 @@ impl BeamSearch {
                     // Expand this path
                     let children = tree.children(leaf_id);
 
-                    // ========== Pilot Intervention Point ==========
-                    let scored_children = if let Some(p) = pilot {
-                        let state = SearchState::new(
-                            tree,
-                            &context.query,
-                            &path.nodes,
-                            &children,
-                            &visited,
-                        );
-
-                        if p.should_intervene(&state) {
-                            trace!(
-                                "Pilot intervening at fork with {} candidates",
-                                children.len()
-                            );
-
-                            match p.decide(&state).await {
-                                decision => {
-                                    pilot_interventions += 1;
-                                    debug!(
-                                        "Pilot decision: confidence={}, direction={:?}",
-                                        decision.confidence,
-                                        std::mem::discriminant(&decision.direction)
-                                    );
-
-                                    self.merge_with_pilot_decision(
-                                        tree,
-                                        &children,
-                                        &decision,
-                                        &context.query,
-                                    )
-                                }
-                            }
-                        } else {
-                            self.score_candidates_with_query(tree, &children, &context.query)
-                        }
-                    } else {
-                        self.score_candidates_with_query(tree, &children, &context.query)
-                    };
-                    // ==============================================
+                    let scored_children = score_candidates(
+                        tree,
+                        &children,
+                        &context.query,
+                        pilot,
+                        &path.nodes,
+                        &visited,
+                        0.7, // Beam: Pilot weight = 0.7
+                        Some(&cache),
+                    )
+                    .await;
+
+                    if pilot.is_some() && !children.is_empty() {
+                        pilot_interventions += 1;
+                    }
 
                     for (child_id, child_score) in scored_children.into_iter().take(beam_width) {
                         let new_path = path.extend(child_id, child_score);
@@ -294,9 +184,19 @@ impl BeamSearch {
         // Fallback: if no results found, add best candidates regardless of score
         if result.paths.is_empty() && config.min_score > 0.0 {
             debug!("No results above min_score, adding best candidates as fallback");
-            let all_candidates =
-                self.score_candidates_with_query(tree, &tree.children(start_node), &context.query);
-            for (node_id, score) in all_candidates.into_iter().take(config.top_k) {
+            let all_children = tree.children(start_node);
+            let fallback = score_candidates(
+                tree,
+                &all_children,
+                &context.query,
+                None, // No Pilot for fallback
+                &[],
+                &visited,
+                0.7,
+                None,
+            )
+            .await;
+            for (node_id, score) in fallback.into_iter().take(config.top_k) {
                 result.paths.push(SearchPath::from_node(node_id, score));
             }
         }
diff --git a/rust/src/retrieval/search/greedy.rs b/rust/src/retrieval/search/greedy.rs
index 812cf5be..34ed0de5 100644
--- a/rust/src/retrieval/search/greedy.rs
+++ b/rust/src/retrieval/search/greedy.rs
@@ -1,91 +1,37 @@
 // Copyright (c) 2026 vectorless developers
 // SPDX-License-Identifier: Apache-2.0
 
-//! Greedy search algorithm with Pilot integration.
+//! Pure Pilot search — LLM-guided single-path tree navigation.
 //!
-//! Simple depth-first search that always follows the highest-scoring child.
-//! When a Pilot is provided, it can provide semantic guidance at decision points.
+//! At each layer, the Pilot scores all children and picks the top-1.
+//! This is the most accurate (but slowest) approach: one LLM call per layer.
+//! Falls back to NodeScorer when Pilot is unavailable.
 
 use async_trait::async_trait;
-use tracing::{debug, trace};
+use std::collections::HashSet;
+use tracing::debug;
 
 use super::super::RetrievalContext;
 use super::super::types::{NavigationDecision, NavigationStep, SearchPath};
-use super::scorer::{NodeScorer, ScoringContext};
+use super::pilot_scorer::{PilotDecisionCache, score_candidates};
 use super::{SearchConfig, SearchResult, SearchTree};
 use crate::document::{DocumentTree, NodeId};
-use crate::retrieval::pilot::{Pilot, SearchState};
+use crate::retrieval::pilot::Pilot;
 
-/// Greedy search - always follows the best single path.
+/// Pure Pilot search — Pilot picks the best child at each layer.
 ///
-/// Fast but may miss relevant content in other branches.
-/// When a Pilot is provided, it can guide the search at key decision points.
-pub struct GreedySearch;
+/// beam=1: at each level, Pilot evaluates all children and the search
+/// follows only the top-ranked one. When Pilot is unavailable,
+/// falls back to NodeScorer (keyword/BM25).
+pub struct PurePilotSearch;
 
-impl GreedySearch {
-    /// Create a new greedy search.
+impl PurePilotSearch {
+    /// Create a new Pure Pilot search.
     pub fn new() -> Self {
         Self
     }
 
-    /// Create a scorer for the given query.
-    fn create_scorer(&self, query: &str) -> NodeScorer {
-        NodeScorer::new(ScoringContext::new(query))
-    }
-
-    /// Score candidates using a query-specific scorer.
-    fn score_candidates_with_query(
-        &self,
-        tree: &DocumentTree,
-        candidates: &[NodeId],
-        query: &str,
-    ) -> Vec<(NodeId, f32)> {
-        let scorer = self.create_scorer(query);
-        scorer.score_and_sort(tree, candidates)
-    }
-
-    /// Merge algorithm scores with Pilot decision.
-    fn merge_with_pilot_decision(
-        &self,
-        tree: &DocumentTree,
-        candidates: &[NodeId],
-        pilot_decision: &crate::retrieval::pilot::PilotDecision,
-        query: &str,
-    ) -> Vec<(NodeId, f32)> {
-        let scorer = self.create_scorer(query);
-        let alpha = 0.4;
-        let beta = 0.6 * pilot_decision.confidence;
-
-        // Build a map from node_id to pilot score
-        let mut pilot_scores: std::collections::HashMap<NodeId, f32> =
-            std::collections::HashMap::new();
-        for ranked in &pilot_decision.ranked_candidates {
-            pilot_scores.insert(ranked.node_id, ranked.score);
-        }
-
-        // Merge scores
-        let mut merged: Vec<(NodeId, f32)> = candidates
-            .iter()
-            .map(|&node_id| {
-                let algo_score = scorer.score(tree, node_id);
-                let pilot_score = pilot_scores.get(&node_id).copied().unwrap_or(0.0);
-
-                let final_score = if beta > 0.0 {
-                    (alpha * algo_score + beta * pilot_score) / (alpha + beta)
-                } else {
-                    algo_score
-                };
-
-                (node_id, final_score)
-            })
-            .collect();
-
-        merged.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
-
-        merged
-    }
-
-    /// Core greedy search logic parameterized by start node.
+    /// Core search logic parameterized by start node.
     async fn search_impl(
         &self,
         tree: &DocumentTree,
@@ -97,24 +43,22 @@ impl GreedySearch {
         let mut result = SearchResult::default();
         let mut current_path = SearchPath::new();
         let mut current_node = start_node;
-        let mut visited: std::collections::HashSet<NodeId> = std::collections::HashSet::new();
+        let mut visited: HashSet<NodeId> = HashSet::new();
+        let cache = PilotDecisionCache::new();
 
         debug!(
-            "GreedySearch: query='{}', start_node={:?}, max_iterations={}, min_score={:.2}",
+            "PurePilotSearch: query='{}', start_node={:?}, max_iterations={}, min_score={:.2}",
             context.query, start_node, config.max_iterations, config.min_score
         );
 
-        // Track Pilot interventions
         let mut pilot_interventions = 0;
 
         for iteration in 0..config.max_iterations {
             result.iterations = iteration + 1;
 
-            // Get children of current node
             let children = tree.children(current_node);
 
             if children.is_empty() {
-                // Leaf node - add to results
                 current_path.leaf = Some(current_node);
                 if !config.leaf_only || tree.is_leaf(current_node) {
                     result.paths.push(current_path.clone());
@@ -122,48 +66,25 @@ impl GreedySearch {
                 break;
             }
 
-            // ========== Pilot Integration Point ==========
-            let scored_children = if let Some(p) = pilot {
-                let state = SearchState::new(
-                    tree,
-                    &context.query,
-                    &current_path.nodes,
-                    &children,
-                    &visited,
-                );
-
-                if p.should_intervene(&state) {
-                    trace!(
-                        "Pilot intervening at greedy decision point with {} candidates",
-                        children.len()
-                    );
-
-                    match p.decide(&state).await {
-                        decision => {
-                            pilot_interventions += 1;
-                            debug!(
-                                "Pilot decision: confidence={}, direction={:?}",
-                                decision.confidence,
-                                std::mem::discriminant(&decision.direction)
-                            );
-
-                            self.merge_with_pilot_decision(
-                                tree,
-                                &children,
-                                &decision,
-                                &context.query,
-                            )
-                        }
-                    }
-                } else {
-                    self.score_candidates_with_query(tree, &children, &context.query)
-                }
-            } else {
-                self.score_candidates_with_query(tree, &children, &context.query)
-            };
-            // ==============================================
+            // Pilot as primary scorer (weight=1.0), NodeScorer as fallback.
+            // Always consult Pilot — no should_intervene guard.
+            let scored_children = score_candidates(
+                tree,
+                &children,
+                &context.query,
+                pilot,
+                &current_path.nodes,
+                &visited,
+                1.0, // PurePilot: Pilot weight = 1.0
+                Some(&cache),
+            )
+            .await;
+
+            if pilot.is_some() {
+                pilot_interventions += 1;
+            }
 
-            // Find the best child that meets minimum score
+            // Take only top-1
             let mut best_child = None;
             let mut best_score = 0.0;
 
@@ -178,7 +99,6 @@ impl GreedySearch {
             if let Some(child_id) = best_child {
                 visited.insert(child_id);
 
-                // Record navigation step
                 let child_node = tree.get(child_id);
                 result.trace.push(NavigationStep {
                     node_id: format!("{:?}", child_id),
@@ -190,7 +110,6 @@ impl GreedySearch {
                     depth: child_node.map(|n| n.depth).unwrap_or(0),
                 });
 
-                // Update path
                 current_path = current_path.extend(child_id, best_score);
                 current_node = child_id;
                 result.nodes_visited += 1;
@@ -199,7 +118,6 @@ impl GreedySearch {
                     break;
                 }
             } else {
-                // No good children found - add current path as result
                 current_path.leaf = Some(current_node);
                 if current_path.score > 0.0 {
                     result.paths.push(current_path);
@@ -209,19 +127,18 @@ impl GreedySearch {
         }
 
         result.pilot_interventions = pilot_interventions;
-
         result
     }
 }
 
-impl Default for GreedySearch {
+impl Default for PurePilotSearch {
     fn default() -> Self {
         Self::new()
     }
 }
 
 #[async_trait]
-impl SearchTree for GreedySearch {
+impl SearchTree for PurePilotSearch {
     async fn search(
         &self,
         tree: &DocumentTree,
@@ -246,6 +163,22 @@ impl SearchTree for GreedySearch {
     }
 
     fn name(&self) -> &'static str {
-        "greedy"
+        "pure_pilot"
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_pure_pilot_creation() {
+        let _search = PurePilotSearch::new();
+    }
+
+    #[test]
+    fn test_pure_pilot_default() {
+        let search = PurePilotSearch::default();
+        assert_eq!(search.name(), "pure_pilot");
     }
 }
diff --git a/rust/src/retrieval/search/mcts.rs b/rust/src/retrieval/search/mcts.rs
index 667a0d28..7886991f 100644
--- a/rust/src/retrieval/search/mcts.rs
+++ b/rust/src/retrieval/search/mcts.rs
@@ -1,19 +1,28 @@
 // Copyright (c) 2026 vectorless developers
 // SPDX-License-Identifier: Apache-2.0
 
-//! Monte Carlo Tree Search (MCTS) algorithm with Pilot integration.
+//! Monte Carlo Tree Search (MCTS) with Pilot-provided priors.
 //!
-//! Balances exploration and exploitation using UCT formula.
-//! When a Pilot is provided, it can provide semantic guidance at decision points.
+//! Uses UCT (Upper Confidence Bound for Trees) to balance exploration
+//! and exploitation. Pilot provides prior scores for the UCT formula,
+//! and guides the simulation (rollout) phase. NodeScorer is the fallback
+//! when Pilot is unavailable.
+//!
+//! # Async
+//!
+//! Both selection and simulation phases are async because Pilot.decide()
+//! requires an LLM call. Pilot decisions are cached by (query, parent_node_id)
+//! so repeated visits to the same node don't trigger redundant LLM calls.
 
 use async_trait::async_trait;
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
+use tracing::debug;
 
 use super::super::RetrievalContext;
 use super::super::types::{NavigationDecision, NavigationStep, SearchPath};
-use super::scorer::NodeScorer;
+use super::pilot_scorer::{PilotDecisionCache, score_candidates};
+use super::scorer::{NodeScorer, ScoringContext};
 use super::{SearchConfig, SearchResult, SearchTree};
-use crate::config::StrategyConfig;
 use crate::document::{DocumentTree, NodeId};
 use crate::retrieval::pilot::Pilot;
 
@@ -26,12 +35,12 @@ struct NodeStats {
     total_score: f32,
 }
 
-/// Monte Carlo Tree Search implementation.
+/// MCTS search with Pilot integration.
 ///
-/// Uses UCT (Upper Confidence Bound for Trees) to balance
-/// exploration of new paths with exploitation of promising ones.
+/// Pilot provides prior scores that seed the UCT formula. This gives
+/// MCTS semantic guidance while preserving the exploration/exploitation
+/// balance. NodeScorer is used as fallback when Pilot is unavailable.
 pub struct MctsSearch {
-    scorer: NodeScorer,
     /// Exploration constant for UCT.
     exploration_weight: f32,
 }
@@ -39,14 +48,8 @@ pub struct MctsSearch {
 impl MctsSearch {
     /// Create a new MCTS search.
     pub fn new() -> Self {
-        Self::with_config(&StrategyConfig::default())
-    }
-
-    /// Create with configuration.
-    pub fn with_config(config: &StrategyConfig) -> Self {
         Self {
-            scorer: NodeScorer::new(Default::default()),
-            exploration_weight: config.exploration_weight,
+            exploration_weight: 1.414, // sqrt(2), classic UCT default
         }
     }
 
@@ -57,26 +60,35 @@ impl MctsSearch {
     }
 
     /// Calculate UCT score for a child node.
+    ///
+    /// `prior_score` comes from Pilot (or NodeScorer fallback).
     fn uct_score(&self, child_stats: &NodeStats, parent_visits: usize, prior_score: f32) -> f32 {
         if child_stats.visits == 0 {
-            // Unvisited nodes get high priority
+            // Unvisited nodes get high priority + prior bonus
             return f32::INFINITY;
         }
 
         let exploitation = child_stats.total_score / child_stats.visits as f32;
-        let exploration = self.exploration_weight * (parent_visits as f32).ln().sqrt()
-            / child_stats.visits as f32;
+        let exploration =
+            self.exploration_weight * (parent_visits as f32).ln().sqrt() / child_stats.visits as f32;
 
-        // Combine with prior score from scorer
+        // Blend exploitation with Pilot prior
         0.5 * (exploitation + prior_score) + exploration
     }
 
-    /// Select best child using UCT.
-    fn select_child(
+    /// Select best child using UCT with Pilot priors.
+    ///
+    /// When Pilot is available, fetches priors via the cache.
+    /// Falls back to NodeScorer when Pilot is unavailable.
+    async fn select_child(
         &self,
         tree: &DocumentTree,
+        context: &RetrievalContext,
         node_id: NodeId,
         stats: &HashMap<NodeId, NodeStats>,
+        pilot: Option<&dyn Pilot>,
+        cache: &PilotDecisionCache,
+        visited: &HashSet<NodeId>,
     ) -> Option<(NodeId, f32)> {
         let children = tree.children(node_id);
         if children.is_empty() {
@@ -86,28 +98,66 @@ impl MctsSearch {
         let parent_stats = stats.get(&node_id).cloned().unwrap_or_default();
         let parent_visits = parent_stats.visits.max(1);
 
+        // Get Pilot priors for all children (cached)
+        let priors = score_candidates(
+            tree,
+            &children,
+            &context.query,
+            pilot,
+            &[node_id], // simplified path for UCT context
+            visited,
+            0.5, // MCTS prior: balanced Pilot/Scorer
+            Some(cache),
+        )
+        .await;
+
+        // Build prior map
+        let prior_map: HashMap<NodeId, f32> = priors.into_iter().collect();
+
         let mut best_child = None;
         let mut best_score = f32::NEG_INFINITY;
 
         for &child_id in &children {
-            let prior_score = self.scorer.score(tree, child_id);
+            let prior = prior_map.get(&child_id).copied().unwrap_or_else(|| {
+                let scorer = NodeScorer::new(ScoringContext::new(&context.query));
+                scorer.score(tree, child_id)
+            });
             let child_stats = stats.get(&child_id).cloned().unwrap_or_default();
-            let uct = self.uct_score(&child_stats, parent_visits, prior_score);
+            let uct = self.uct_score(&child_stats, parent_visits, prior);
 
             if uct > best_score {
                 best_score = uct;
-                best_child = Some((child_id, prior_score));
+                best_child = Some((child_id, prior));
             }
         }
 
         best_child
     }
 
-    /// Simulate a random rollout from a node.
-    fn simulate(&self, tree: &DocumentTree, node_id: NodeId, max_depth: usize) -> f32 {
+    /// Simulate a rollout from a node using Pilot-guided greedy descent.
+    ///
+    /// When Pilot is available, each layer picks the top-1 Pilot-scored child.
+    /// Falls back to NodeScorer when Pilot is unavailable.
+    async fn simulate(
+        &self,
+        tree: &DocumentTree,
+        context: &RetrievalContext,
+        node_id: NodeId,
+        max_depth: usize,
+        pilot: Option<&dyn Pilot>,
+        cache: &PilotDecisionCache,
+        visited: &HashSet<NodeId>,
+    ) -> f32 {
         let mut current = node_id;
         let mut depth = 0;
-        let mut total_score = self.scorer.score(tree, current);
+        let mut path = vec![node_id];
+        let mut total_score = 0.0f32;
+        let mut count = 0;
+
+        // Initial score
+        let scorer = NodeScorer::new(ScoringContext::new(&context.query));
+        total_score += scorer.score(tree, current);
+        count += 1;
 
         while depth < max_depth {
             let children = tree.children(current);
@@ -115,18 +165,31 @@ impl MctsSearch {
                 break;
             }
 
-            // Random selection (or use scorer for semi-random)
-            let scored = self.scorer.score_and_sort(tree, &children);
-            if let Some((child_id, score)) = scored.first() {
+            // Use Pilot for greedy descent (cached)
+            let scored = score_candidates(
+                tree,
+                &children,
+                &context.query,
+                pilot,
+                &path,
+                visited,
+                0.5, // MCTS simulation: balanced
+                Some(cache),
+            )
+            .await;
+
+            if let Some(&(child_id, score)) = scored.first() {
                 total_score += score;
-                current = *child_id;
+                path.push(child_id);
+                current = child_id;
             } else {
                 break;
             }
             depth += 1;
+            count += 1;
         }
 
-        total_score / (depth + 1).max(1) as f32
+        total_score / count.max(1) as f32
     }
 
     /// Backpropagate score up the tree.
@@ -137,43 +200,48 @@ impl MctsSearch {
             node_stats.total_score += score;
         }
     }
-}
-
-impl Default for MctsSearch {
-    fn default() -> Self {
-        Self::new()
-    }
-}
 
-#[async_trait]
-impl SearchTree for MctsSearch {
-    async fn search(
+    /// Core MCTS logic parameterized by start node.
+    async fn search_impl(
         &self,
         tree: &DocumentTree,
         context: &RetrievalContext,
         config: &SearchConfig,
-        _pilot: Option<&dyn Pilot>,
+        pilot: Option<&dyn Pilot>,
+        start_node: NodeId,
     ) -> SearchResult {
-        // Note: Pilot integration for MCTS can be added in Phase 2
-        // For now, we keep the original behavior
         let mut result = SearchResult::default();
         let mut stats: HashMap<NodeId, NodeStats> = HashMap::new();
-        let root = tree.root();
+        let cache = PilotDecisionCache::new();
+        let visited: HashSet<NodeId> = HashSet::new();
 
         // Initialize root stats
-        stats.insert(root, NodeStats::default());
+        stats.insert(start_node, NodeStats::default());
+
+        debug!(
+            "MctsSearch: query='{}', start_node={:?}, max_iterations={}, exploration={:.2}",
+            context.query, start_node, config.max_iterations, self.exploration_weight
+        );
+
+        let mut pilot_interventions = 0;
 
         for iteration in 0..config.max_iterations {
             result.iterations = iteration + 1;
 
-            // Selection phase - traverse tree using UCT
-            let mut path = vec![root];
-            let mut current = root;
+            // === Selection phase: traverse tree using UCT ===
+            let mut path = vec![start_node];
+            let mut current = start_node;
 
             while !tree.is_leaf(current) {
-                if let Some((child_id, score)) = self.select_child(tree, current, &stats) {
+                if let Some((child_id, _score)) = self
+                    .select_child(tree, context, current, &stats, pilot, &cache, &visited)
+                    .await
+                {
                     path.push(child_id);
                     current = child_id;
+                    if pilot.is_some() {
+                        pilot_interventions += 1;
+                    }
                 } else {
                     break;
                 }
@@ -181,14 +249,20 @@ impl SearchTree for MctsSearch {
 
             result.nodes_visited += path.len();
 
-            // Simulation phase - random rollout
-            let leaf = *path.last().unwrap_or(&root);
-            let sim_score = self.simulate(tree, leaf, 5);
+            // === Simulation phase: Pilot-guided rollout ===
+            let leaf = *path.last().unwrap_or(&start_node);
+            let sim_score = self
+                .simulate(tree, context, leaf, 5, pilot, &cache, &visited)
+                .await;
+
+            if pilot.is_some() {
+                pilot_interventions += 1;
+            }
 
-            // Backpropagation phase
+            // === Backpropagation phase ===
             self.backpropagate(&mut stats, &path, sim_score);
 
-            // Record trace for the last node in path
+            // Record trace for the last node
             if let Some(&last_id) = path.last() {
                 let node = tree.get(last_id);
                 result.trace.push(NavigationStep {
@@ -200,67 +274,139 @@ impl SearchTree for MctsSearch {
                 });
             }
 
-            // Check if we have enough visits to extract paths
+            // Periodically extract paths (every 10 iterations)
             if iteration > 0 && iteration % 10 == 0 {
-                // Extract best paths from visited nodes
-                let root_children = tree.children(root);
-                let mut scored_children: Vec<_> = root_children
-                    .iter()
-                    .filter_map(|&child_id| {
-                        stats.get(&child_id).map(|s| {
-                            let avg_score = if s.visits > 0 {
-                                s.total_score / s.visits as f32
-                            } else {
-                                0.0
-                            };
-                            (child_id, avg_score)
-                        })
-                    })
-                    .collect();
-
-                scored_children
-                    .sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
-
-                for (child_id, score) in scored_children.iter().take(config.top_k) {
-                    if *score >= config.min_score {
-                        result.paths.push(SearchPath::from_node(*child_id, *score));
-                    }
-                }
+                self.extract_paths(
+                    tree,
+                    start_node,
+                    &stats,
+                    config.min_score,
+                    config.top_k,
+                    &mut result,
+                );
             }
         }
 
         // Final extraction of best paths
+        self.extract_paths(
+            tree,
+            start_node,
+            &stats,
+            config.min_score,
+            config.top_k,
+            &mut result,
+        );
+
+        result.pilot_interventions = pilot_interventions;
+        result
+    }
+
+    /// Extract best paths from MCTS statistics.
+    fn extract_paths(
+        &self,
+        tree: &DocumentTree,
+        root: NodeId,
+        stats: &HashMap<NodeId, NodeStats>,
+        min_score: f32,
+        top_k: usize,
+        result: &mut SearchResult,
+    ) {
         let root_children = tree.children(root);
-        let mut final_paths: Vec<_> = root_children
+        let mut scored_children: Vec<_> = root_children
             .iter()
             .filter_map(|&child_id| {
                 stats.get(&child_id).map(|s| {
                     let avg_score = if s.visits > 0 {
                         s.total_score / s.visits as f32
                     } else {
-                        self.scorer.score(tree, child_id)
+                        0.0
                     };
-                    SearchPath::from_node(child_id, avg_score)
+                    (child_id, avg_score)
                 })
             })
             .collect();
 
-        final_paths.sort_by(|a, b| {
-            b.score
-                .partial_cmp(&a.score)
-                .unwrap_or(std::cmp::Ordering::Equal)
-        });
-        final_paths.truncate(config.top_k);
+        scored_children.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
 
-        result.paths = final_paths
+        // Clear existing paths and re-extract
+        result.paths = scored_children
             .into_iter()
-            .filter(|p| p.score >= config.min_score)
+            .filter(|(_, score)| *score >= min_score)
+            .take(top_k)
+            .map(|(node_id, score)| SearchPath::from_node(node_id, score))
             .collect();
+    }
+}
 
-        result
+impl Default for MctsSearch {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[async_trait]
+impl SearchTree for MctsSearch {
+    async fn search(
+        &self,
+        tree: &DocumentTree,
+        context: &RetrievalContext,
+        config: &SearchConfig,
+        pilot: Option<&dyn Pilot>,
+    ) -> SearchResult {
+        self.search_impl(tree, context, config, pilot, tree.root())
+            .await
+    }
+
+    async fn search_from(
+        &self,
+        tree: &DocumentTree,
+        context: &RetrievalContext,
+        config: &SearchConfig,
+        pilot: Option<&dyn Pilot>,
+        start_node: NodeId,
+    ) -> SearchResult {
+        self.search_impl(tree, context, config, pilot, start_node)
+            .await
     }
 
     fn name(&self) -> &'static str {
         "mcts"
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_mcts_creation() {
+        let search = MctsSearch::new();
+        assert!((search.exploration_weight - 1.414).abs() < 0.01);
+    }
+
+    #[test]
+    fn test_mcts_custom_exploration() {
+        let search = MctsSearch::new().with_exploration(2.0);
+        assert!((search.exploration_weight - 2.0).abs() < 0.01);
+    }
+
+    #[test]
+    fn test_uct_unvisited() {
+        let search = MctsSearch::new();
+        let stats = NodeStats::default();
+        let score = search.uct_score(&stats, 10, 0.5);
+        assert!(score.is_infinite());
+    }
+
+    #[test]
+    fn test_uct_visited() {
+        let search = MctsSearch::new();
+        let stats = NodeStats {
+            visits: 5,
+            total_score: 3.0,
+        };
+        let score = search.uct_score(&stats, 20, 0.8);
+        assert!(score.is_finite());
+        assert!(score > 0.0);
+    }
+}
diff --git a/rust/src/retrieval/search/mod.rs b/rust/src/retrieval/search/mod.rs
index 39e78cc3..8ddceed5 100644
--- a/rust/src/retrieval/search/mod.rs
+++ b/rust/src/retrieval/search/mod.rs
@@ -7,12 +7,15 @@ mod beam;
 mod bm25;
 mod greedy;
 mod mcts;
+mod pilot_scorer;
 mod scorer;
 mod toc_navigator;
 mod r#trait;
 
 pub use beam::BeamSearch;
 pub use bm25::{Bm25Engine, Bm25Params, FieldDocument, STOPWORDS, extract_keywords};
-pub use greedy::GreedySearch;
+pub use greedy::PurePilotSearch;
+pub use mcts::MctsSearch;
+pub use pilot_scorer::{PilotDecisionCache, score_candidates};
 pub use toc_navigator::{SearchCue, ToCNavigator};
 pub use r#trait::{SearchConfig, SearchResult, SearchTree};
diff --git a/rust/src/retrieval/search/pilot_scorer.rs b/rust/src/retrieval/search/pilot_scorer.rs
new file mode 100644
index 00000000..a70cbb3f
--- /dev/null
+++ b/rust/src/retrieval/search/pilot_scorer.rs
@@ -0,0 +1,236 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Shared Pilot-as-primary scoring helper.
+//!
+//! All three search algorithms (PurePilot, Beam, MCTS) use this module
+//! to score child candidates. Pilot is the primary scorer; NodeScorer
+//! provides a fallback when Pilot is unavailable or budget is exhausted.
+//!
+//! # Caching
+//!
+//! Pilot decisions are cached by `(query, parent_node_id)` to avoid
+//! redundant LLM calls when the same node is revisited (e.g. MCTS
+//! selection phase revisits a node multiple times).
+
+use std::collections::{HashMap, HashSet};
+use std::sync::Arc;
+use tokio::sync::Mutex;
+
+use crate::document::{DocumentTree, NodeId};
+use crate::retrieval::pilot::{Pilot, PilotDecision, SearchState};
+use super::scorer::{NodeScorer, ScoringContext};
+
+/// Cache key: (query_fingerprint, parent_node_id).
+type CacheKey = (u64, NodeId);
+
+/// Shared Pilot decision cache.
+///
+/// Thread-safe, query-scoped cache that stores Pilot decisions keyed by
+/// (query hash, parent node ID). Prevents redundant LLM calls when the
+/// same (query, node) pair is scored multiple times (common in MCTS).
+#[derive(Debug, Clone, Default)]
+pub struct PilotDecisionCache {
+    inner: Arc<Mutex<HashMap<CacheKey, PilotDecision>>>,
+}
+
+impl PilotDecisionCache {
+    /// Create a new empty cache.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Compute cache key from query and parent node.
+    fn cache_key(query: &str, parent: NodeId) -> CacheKey {
+        use std::hash::{Hash, Hasher};
+        let mut hasher = std::collections::hash_map::DefaultHasher::new();
+        query.hash(&mut hasher);
+        (hasher.finish(), parent)
+    }
+
+    /// Try to get a cached decision.
+    pub async fn get(&self, query: &str, parent: NodeId) -> Option<PilotDecision> {
+        let key = Self::cache_key(query, parent);
+        let cache = self.inner.lock().await;
+        cache.get(&key).cloned()
+    }
+
+    /// Store a decision in the cache.
+    pub async fn put(&self, query: &str, parent: NodeId, decision: &PilotDecision) {
+        let key = Self::cache_key(query, parent);
+        let mut cache = self.inner.lock().await;
+        cache.entry(key).or_insert_with(|| decision.clone());
+    }
+
+    /// Clear the cache.
+    pub async fn clear(&self) {
+        self.inner.lock().await.clear();
+    }
+}
+
+/// Score child candidates using Pilot as primary, NodeScorer as fallback.
+///
+/// Pilot decisions are cached by (query, parent_node_id). Subsequent calls
+/// with the same arguments return cached results without LLM calls.
+///
+/// `pilot_weight` controls how much Pilot vs NodeScorer contributes:
+/// - 1.0 = PurePilot (only Pilot scores matter)
+/// - 0.7 = Beam (Pilot dominant, NodeScorer as secondary)
+/// - 0.5 = MCTS prior (balanced)
+pub async fn score_candidates(
+    tree: &DocumentTree,
+    candidates: &[NodeId],
+    query: &str,
+    pilot: Option<&dyn Pilot>,
+    path: &[NodeId],
+    visited: &HashSet<NodeId>,
+    pilot_weight: f32,
+    cache: Option<&PilotDecisionCache>,
+) -> Vec<(NodeId, f32)> {
+    if candidates.is_empty() {
+        return Vec::new();
+    }
+
+    // If no Pilot, pure NodeScorer
+    let Some(p) = pilot else {
+        return score_with_scorer(tree, candidates, query);
+    };
+
+    if !p.is_active() {
+        return score_with_scorer(tree, candidates, query);
+    }
+
+    // Determine parent node (last in path) for cache key
+    let parent = path.last().copied().unwrap_or(tree.root());
+
+    // Check cache first
+    let decision = if let Some(c) = cache {
+        if let Some(cached) = c.get(query, parent).await {
+            tracing::trace!("Pilot cache hit for parent={:?}", parent);
+            cached
+        } else {
+            let state = SearchState::new(tree, query, path, candidates, visited);
+            let d = p.decide(&state).await;
+            c.put(query, parent, &d).await;
+            d
+        }
+    } else {
+        let state = SearchState::new(tree, query, path, candidates, visited);
+        p.decide(&state).await
+    };
+
+    // Build Pilot score map
+    let mut pilot_scores: HashMap<NodeId, f32> = HashMap::new();
+    for ranked in &decision.ranked_candidates {
+        pilot_scores.insert(ranked.node_id, ranked.score);
+    }
+
+    // Compute NodeScorer fallback scores
+    let scorer_weight = 1.0 - pilot_weight;
+    let confidence = decision.confidence;
+    let effective_pilot = pilot_weight * confidence;
+
+    let scorer = NodeScorer::new(ScoringContext::new(query));
+
+    let mut scored: Vec<(NodeId, f32)> = candidates
+        .iter()
+        .map(|&node_id| {
+            let algo_score = scorer.score(tree, node_id);
+            let p_score = pilot_scores.get(&node_id).copied().unwrap_or(0.0);
+
+            let final_score = if effective_pilot > 0.0 && pilot_scores.contains_key(&node_id) {
+                (effective_pilot * p_score + scorer_weight * algo_score)
+                    / (effective_pilot + scorer_weight)
+            } else {
+                algo_score
+            };
+
+            (node_id, final_score)
+        })
+        .collect();
+
+    scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
+    scored
+}
+
+/// Pure NodeScorer fallback.
+fn score_with_scorer(
+    tree: &DocumentTree,
+    candidates: &[NodeId],
+    query: &str,
+) -> Vec<(NodeId, f32)> {
+    let scorer = NodeScorer::new(ScoringContext::new(query));
+    scorer.score_and_sort(tree, candidates)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::document::TreeNode;
+    use indextree::Arena;
+
+    /// Helper to create a NodeId from an Arena for tests.
+    fn make_node_id(arena: &mut Arena<TreeNode>) -> NodeId {
+        NodeId(arena.new_node(TreeNode::default()))
+    }
+
+    #[test]
+    fn test_cache_key_deterministic() {
+        let mut arena = Arena::new();
+        let nid = make_node_id(&mut arena);
+
+        let key1 = PilotDecisionCache::cache_key("hello", nid);
+        let key2 = PilotDecisionCache::cache_key("hello", nid);
+        assert_eq!(key1, key2);
+
+        let key3 = PilotDecisionCache::cache_key("world", nid);
+        assert_ne!(key1, key3);
+    }
+
+    #[tokio::test]
+    async fn test_cache_hit() {
+        let mut arena = Arena::new();
+        let nid0 = make_node_id(&mut arena);
+        let nid1 = make_node_id(&mut arena);
+
+        let cache = PilotDecisionCache::new();
+        use crate::retrieval::pilot::{RankedCandidate, SearchDirection};
+
+        let decision = PilotDecision::new(
+            vec![RankedCandidate::new(nid1, 0.9)],
+            SearchDirection::GoDeeper { reason: "test".into() },
+            0.8,
+            "test".into(),
+        );
+
+        cache.put("query", nid0, &decision).await;
+        let hit = cache.get("query", nid0).await;
+        assert!(hit.is_some());
+        assert_eq!(hit.unwrap().confidence, 0.8);
+
+        let miss = cache.get("other", nid0).await;
+        assert!(miss.is_none());
+    }
+
+    #[tokio::test]
+    async fn test_cache_clear() {
+        let mut arena = Arena::new();
+        let nid = make_node_id(&mut arena);
+
+        let cache = PilotDecisionCache::new();
+        use crate::retrieval::pilot::SearchDirection;
+
+        let decision = PilotDecision::new(
+            vec![],
+            SearchDirection::GoDeeper { reason: "test".into() },
+            0.5,
+            "test".into(),
+        );
+
+        cache.put("q", nid, &decision).await;
+        assert!(cache.get("q", nid).await.is_some());
+
+        cache.clear().await;
+        assert!(cache.get("q", nid).await.is_none());
+    }
+}
diff --git a/rust/src/retrieval/stages/plan.rs b/rust/src/retrieval/stages/plan.rs
index 1f3b9cba..434ae06f 100644
--- a/rust/src/retrieval/stages/plan.rs
+++ b/rust/src/retrieval/stages/plan.rs
@@ -5,7 +5,7 @@
 //!
 //! This stage selects:
 //! - Retrieval strategy (Keyword/Semantic/LLM)
-//! - Search algorithm (Greedy/Beam/MCTS)
+//! - Search algorithm (PurePilot/Beam/MCTS)
 //! - Search configuration
 
 use async_trait::async_trait;
@@ -121,17 +121,16 @@ impl PlanStage {
 
         let algorithm = match complexity {
             QueryComplexity::Simple => {
-                // Simple queries can use greedy search
-                SearchAlgorithm::Greedy
+                // Simple queries: PurePilot (beam=1, fast)
+                SearchAlgorithm::PurePilot
             }
             QueryComplexity::Medium => {
-                // Medium queries benefit from beam search
+                // Medium queries: Beam search
                 SearchAlgorithm::Beam
             }
             QueryComplexity::Complex => {
-                // Complex queries may benefit from MCTS
-                // But for now, use beam search as MCTS is more expensive
-                SearchAlgorithm::Beam
+                // Complex queries: MCTS for thorough exploration
+                SearchAlgorithm::Mcts
             }
         };
 
@@ -144,7 +143,7 @@ impl PlanStage {
         let complexity = ctx.complexity.unwrap_or(QueryComplexity::Medium);
 
         let (beam_width, max_depth) = match complexity {
-            QueryComplexity::Simple => (1, 5), // Greedy-like
+            QueryComplexity::Simple => (1, 5), // PurePilot-like
             QueryComplexity::Medium => (ctx.options.beam_width, 10),
             QueryComplexity::Complex => (ctx.options.beam_width + 2, 15),
         };
@@ -188,6 +187,24 @@ impl RetrievalStage for PlanStage {
         // 3. Build search config
         ctx.search_config = Some(self.build_search_config(ctx));
 
+        // 4. Build fallback chain: primary algorithm first, then alternatives
+        //    The chain determines which algorithms to try if the primary
+        //    doesn't produce results above min_score.
+        let primary = ctx.selected_algorithm.unwrap_or(SearchAlgorithm::Beam);
+        let default_chain = vec![
+            SearchAlgorithm::Beam,
+            SearchAlgorithm::Mcts,
+            SearchAlgorithm::PurePilot,
+        ];
+        // Remove primary from default chain, prepend it
+        let mut chain = vec![primary];
+        for algo in default_chain {
+            if algo != primary {
+                chain.push(algo);
+            }
+        }
+        ctx.search_fallback_chain = chain;
+
         info!(
             "Plan complete: strategy={:?}, algorithm={:?}, beam_width={}",
             ctx.selected_strategy,
diff --git a/rust/src/retrieval/stages/search.rs b/rust/src/retrieval/stages/search.rs
index fcef9052..fc5dd2f2 100644
--- a/rust/src/retrieval/stages/search.rs
+++ b/rust/src/retrieval/stages/search.rs
@@ -23,7 +23,7 @@ use crate::retrieval::pipeline::{
 };
 use crate::retrieval::search::extract_keywords;
 use crate::retrieval::search::{
-    BeamSearch, GreedySearch, SearchConfig as SearchAlgConfig, SearchCue, SearchTree, ToCNavigator,
+    BeamSearch, PurePilotSearch, MctsSearch, SearchConfig as SearchAlgConfig, SearchCue, SearchTree, ToCNavigator,
 };
 use crate::retrieval::strategy::{
     HybridConfig, HybridStrategy, KeywordStrategy, LlmStrategy, RetrievalStrategy,
@@ -211,14 +211,106 @@ impl SearchStage {
         vec![ctx.query.clone()]
     }
 
-    /// Run search across all queries and cues, collecting and deduplicating results.
+    /// Run search across the fallback chain.
+    ///
+    /// Iterates through algorithms in the fallback chain. After each algorithm,
+    /// checks if the best candidate score meets `min_score`. If sufficient,
+    /// returns early. Otherwise tries the next algorithm in the chain.
     async fn run_search(
         &self,
         ctx: &mut PipelineContext,
         queries: &[String],
         cues: &[SearchCue],
     ) -> (Vec<crate::retrieval::types::SearchPath>, Vec<CandidateNode>) {
-        let algorithm = ctx.selected_algorithm.unwrap_or(SearchAlgorithm::Beam);
+        let config = ctx.search_config.clone().unwrap_or_default();
+        let min_score = config.min_score;
+
+        // Build fallback chain: primary algorithm first, then remaining from chain
+        let primary = ctx.selected_algorithm.unwrap_or(SearchAlgorithm::Beam);
+        let chain = &ctx.search_fallback_chain;
+
+        // Build ordered algorithm list: primary first, then chain (excluding primary)
+        let mut algorithms = vec![primary];
+        for &algo in chain {
+            if algo != primary {
+                algorithms.push(algo);
+            }
+        }
+
+        info!(
+            "Search fallback chain: {:?} (min_score={:.2})",
+            algorithms.iter().map(|a| a.name()).collect::<Vec<_>>(),
+            min_score
+        );
+
+        let mut best_paths = Vec::new();
+        let mut best_candidates = Vec::new();
+        let mut total_pilot_interventions = 0u64;
+
+        for (idx, &algorithm) in algorithms.iter().enumerate() {
+            let (paths, candidates) = self
+                .run_single_algorithm(ctx, queries, cues, algorithm)
+                .await;
+
+            // Accumulate pilot interventions
+            total_pilot_interventions += paths.len() as u64; // approximate
+
+            // Merge results: collect all paths and candidates across fallback rounds
+            best_paths.extend(paths);
+            best_candidates.extend(candidates);
+
+            // Check if best candidate meets the threshold
+            let best_score = best_candidates
+                .iter()
+                .map(|c| c.score)
+                .fold(0.0f32, f32::max);
+
+            if best_score >= min_score {
+                info!(
+                    "Algorithm {} (#{}) sufficient: best_score={:.3} >= min_score={:.3}",
+                    algorithm.name(),
+                    idx + 1,
+                    best_score,
+                    min_score
+                );
+                break;
+            }
+
+            info!(
+                "Algorithm {} (#{}) insufficient: best_score={:.3} < min_score={:.3}, trying next",
+                algorithm.name(),
+                idx + 1,
+                best_score,
+                min_score
+            );
+        }
+
+        // Deduplicate candidates by node_id, keeping highest score
+        best_candidates.sort_by(|a, b| {
+            b.score
+                .partial_cmp(&a.score)
+                .unwrap_or(std::cmp::Ordering::Equal)
+        });
+        best_candidates.dedup_by(|a, b| a.node_id == b.node_id);
+
+        info!(
+            "Search complete: {} paths, {} candidates (pilot interventions: {})",
+            best_paths.len(),
+            best_candidates.len(),
+            total_pilot_interventions
+        );
+
+        (best_paths, best_candidates)
+    }
+
+    /// Run a single search algorithm across all queries and cues.
+    async fn run_single_algorithm(
+        &self,
+        ctx: &mut PipelineContext,
+        queries: &[String],
+        cues: &[SearchCue],
+        algorithm: SearchAlgorithm,
+    ) -> (Vec<crate::retrieval::types::SearchPath>, Vec<CandidateNode>) {
         let config = ctx.search_config.clone().unwrap_or_default();
 
         let search_config = SearchAlgConfig {
@@ -232,7 +324,6 @@ impl SearchStage {
         let pilot_ref: Option<&dyn Pilot> = self.pilot.as_deref();
 
         let mut all_paths = Vec::new();
-        let mut total_pilot_interventions = 0u64;
 
         for query in queries {
             let legacy_ctx =
@@ -240,13 +331,16 @@ impl SearchStage {
 
             for cue in cues {
                 debug!(
-                    "Searching: algorithm={:?}, query='{}', cue.root={:?}, cue.confidence={:.3}",
-                    algorithm, query, cue.root, cue.confidence
+                    "Searching: algorithm={}, query='{}', cue.root={:?}, cue.confidence={:.3}",
+                    algorithm.name(),
+                    query,
+                    cue.root,
+                    cue.confidence
                 );
 
                 let result = match algorithm {
-                    SearchAlgorithm::Greedy => {
-                        GreedySearch::new()
+                    SearchAlgorithm::PurePilot => {
+                        PurePilotSearch::new()
                             .search_from(
                                 &ctx.tree,
                                 &legacy_ctx,
@@ -267,9 +361,8 @@ impl SearchStage {
                             )
                             .await
                     }
-                    // MCTS is not truly implemented — falls back to Beam behavior.
                     SearchAlgorithm::Mcts => {
-                        BeamSearch::new()
+                        MctsSearch::new()
                             .search_from(
                                 &ctx.tree,
                                 &legacy_ctx,
@@ -282,28 +375,11 @@ impl SearchStage {
                 };
 
                 all_paths.extend(result.paths);
-                total_pilot_interventions += result.pilot_interventions as u64;
             }
         }
 
-        let mut all_candidates = self.extract_candidates(&all_paths, &ctx.tree);
-
-        // Deduplicate by node_id, keeping the highest-scored entry
-        all_candidates.sort_by(|a, b| {
-            b.score
-                .partial_cmp(&a.score)
-                .unwrap_or(std::cmp::Ordering::Equal)
-        });
-        all_candidates.dedup_by(|a, b| a.node_id == b.node_id);
-
-        info!(
-            "Search complete: {} paths, {} candidates (pilot interventions: {})",
-            all_paths.len(),
-            all_candidates.len(),
-            total_pilot_interventions
-        );
-
-        (all_paths, all_candidates)
+        let candidates = self.extract_candidates(&all_paths, &ctx.tree);
+        (all_paths, candidates)
     }
 
     /// Check if a query is asking for a document summary/overview.

From eabe09004eff054c01b7acbe09f88c781654129f Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Mon, 13 Apr 2026 23:14:25 +0800
Subject: [PATCH 3/7] refactor(config): remove example configuration file and
 update naming

BREAKING CHANGE: The config.example.toml file has been removed as it's
no longer needed.

feat(llm): rename summary client to index client for clarity

The summary client has been renamed to index client to better reflect
its purpose during document indexing operations. The old 'summary'
configuration still works as an alias for backward compatibility.

feat(retrieval): add search fallback chain configuration

Add configurable fallback chain for search algorithms that tries
different algorithms ("beam", "mcts", "pure_pilot") in order when
minimum score thresholds aren't met.
---
 config.example.toml                    | 254 -------------------------
 rust/src/client/builder.rs             |   6 +-
 rust/src/config/types/llm_pool.rs      |  35 +++-
 rust/src/config/types/mod.rs           |   6 +-
 rust/src/config/types/retrieval.rs     |  10 +
 rust/src/llm/config.rs                 |  22 +--
 rust/src/llm/mod.rs                    |  10 +-
 rust/src/llm/pool.rs                   |  73 +++----
 rust/src/retrieval/pipeline/context.rs |  11 ++
 rust/src/retrieval/stages/plan.rs      |  14 +-
 rust/src/retrieval/types.rs            |  16 ++
 11 files changed, 128 insertions(+), 329 deletions(-)
 delete mode 100644 config.example.toml

diff --git a/config.example.toml b/config.example.toml
deleted file mode 100644
index dd0a9a93..00000000
--- a/config.example.toml
+++ /dev/null
@@ -1,254 +0,0 @@
-# Vectorless Configuration Example
-# Copy this file to vectorless.toml and fill in your API keys
-#
-# All configuration is loaded from this file only.
-# No environment variables are used - this ensures explicit, traceable configuration.
-
-# ============================================================================
-# LLM Configuration (Unified)
-# ============================================================================
-#
-# The LLM pool allows configuring different models for different purposes:
-# - summary: Used for generating document summaries during indexing
-# - retrieval: Used for retrieval decisions and content evaluation
-# - pilot: Used for intelligent navigation guidance
-#
-# Each client can have its own model, endpoint, and settings.
-
-[llm]
-# Default API key (used by all clients unless overridden per-client)
-api_key = "sk-your-api-key-here"
-
-# Summary client - generates document summaries during indexing
-# Use a fast, cheap model for bulk processing
-[llm.summary]
-model = "gpt-4o-mini"
-endpoint = "https://api.openai.com/v1"
-max_tokens = 200
-temperature = 0.0
-# api_key = "sk-specific-key-for-summary"  # Optional: override default
-
-# Retrieval client - used for retrieval decisions and content evaluation
-# Can use a more capable model for better decisions
-[llm.retrieval]
-model = "gpt-4o"
-endpoint = "https://api.openai.com/v1"
-max_tokens = 100
-temperature = 0.0
-# api_key = "sk-specific-key-for-retrieval"  # Optional: override default
-
-# Pilot client - used for intelligent navigation guidance
-# Use a fast model for quick navigation decisions
-[llm.pilot]
-model = "gpt-4o-mini"
-endpoint = "https://api.openai.com/v1"
-max_tokens = 300
-temperature = 0.0
-# api_key = "sk-specific-key-for-pilot"  # Optional: override default
-
-# Retry configuration (applies to all LLM calls)
-[llm.retry]
-max_attempts = 3
-initial_delay_ms = 500
-max_delay_ms = 30000
-multiplier = 2.0
-retry_on_rate_limit = true
-
-# Throttle/rate limiting configuration (applies to all LLM calls)
-[llm.throttle]
-max_concurrent_requests = 10
-requests_per_minute = 500
-enabled = true
-semaphore_enabled = true
-
-# Fallback configuration (applies to all LLM calls)
-[llm.fallback]
-enabled = true
-models = ["gpt-4o-mini", "glm-4-flash"]
-# Alternative endpoints for fallback
-# endpoints = [
-#     "https://api.openai.com/v1",
-#     "https://api.z.ai/api/paas/v4"
-# ]
-on_rate_limit = "retry_then_fallback"
-on_timeout = "retry_then_fallback"
-on_all_failed = "return_error"
-
-# ============================================================================
-# Metrics Configuration (Unified)
-# ============================================================================
-
-[metrics]
-enabled = true
-storage_path = "./workspace/metrics"
-retention_days = 30
-
-[metrics.llm]
-track_tokens = true
-track_latency = true
-track_cost = true
-cost_per_1k_input_tokens = 0.00015   # gpt-4o-mini pricing
-cost_per_1k_output_tokens = 0.0006
-
-[metrics.pilot]
-track_decisions = true
-track_accuracy = true
-track_feedback = true
-
-[metrics.retrieval]
-track_paths = true
-track_scores = true
-track_iterations = true
-track_cache = true
-
-# ============================================================================
-# Pilot Configuration
-# ============================================================================
-
-[pilot]
-mode = "Balanced"  # Aggressive | Balanced | Conservative | AlgorithmOnly
-guide_at_start = true
-guide_at_backtrack = true
-
-[pilot.budget]
-max_tokens_per_query = 2000
-max_tokens_per_call = 500
-max_calls_per_query = 5
-max_calls_per_level = 2
-hard_limit = true
-
-[pilot.intervention]
-fork_threshold = 3
-score_gap_threshold = 0.15
-low_score_threshold = 0.3
-max_interventions_per_level = 2
-
-[pilot.feedback]
-enabled = true
-storage_path = "./workspace/feedback"
-learning_rate = 0.1
-min_samples_for_learning = 10
-
-# ============================================================================
-# Retrieval Configuration
-# ============================================================================
-
-[retrieval]
-model = "gpt-4o"
-endpoint = "https://api.openai.com/v1"
-top_k = 3
-max_tokens = 1000
-temperature = 0.0
-
-[retrieval.search]
-top_k = 5
-beam_width = 3
-max_iterations = 10
-min_score = 0.1
-
-[retrieval.sufficiency]
-min_tokens = 500
-target_tokens = 2000
-max_tokens = 4000
-min_content_length = 200
-confidence_threshold = 0.7
-
-[retrieval.cache]
-max_entries = 1000
-ttl_secs = 3600
-
-[retrieval.strategy]
-exploration_weight = 1.414
-similarity_threshold = 0.5
-high_similarity_threshold = 0.8
-low_similarity_threshold = 0.3
-
-# Hybrid Strategy Configuration (BM25 + LLM refinement)
-# Recommended for most use cases - reduces LLM calls while maintaining accuracy
-[retrieval.strategy.hybrid]
-enabled = true
-pre_filter_ratio = 0.3        # Keep top 30% of BM25 candidates
-min_candidates = 2            # Minimum candidates to pass to LLM
-max_candidates = 5            # Maximum candidates for LLM refinement
-auto_accept_threshold = 0.85  # BM25 score for auto-accept (skip LLM)
-auto_reject_threshold = 0.15  # BM25 score for auto-reject (skip LLM)
-bm25_weight = 0.4             # Weight for BM25 score in final scoring
-llm_weight = 0.6              # Weight for LLM score in final scoring
-
-# Cross-Document Retrieval Configuration
-# For searching across multiple documents simultaneously
-[retrieval.strategy.cross_document]
-enabled = true
-max_documents = 10            # Maximum documents to search
-max_results_per_doc = 3       # Maximum results per document
-max_total_results = 10        # Maximum total results
-min_score = 0.3               # Minimum score threshold
-merge_strategy = "TopK"       # TopK | BestPerDocument | WeightedByRelevance
-parallel_search = true        # Search documents in parallel
-
-# Page-Range Strategy Configuration
-# For filtering by page range before retrieval
-[retrieval.strategy.page_range]
-enabled = true
-include_boundary_nodes = true # Include nodes spanning across boundary
-expand_context_pages = 0      # Expand range by N pages for context
-min_overlap_ratio = 0.1       # Minimum overlap ratio for node inclusion
-
-[retrieval.content]
-enabled = true
-token_budget = 4000
-min_relevance_score = 0.2
-scoring_strategy = "hybrid"  # keyword | bm25 | hybrid
-output_format = "markdown"
-include_scores = false
-hierarchical_min_per_level = 0.1
-deduplicate = true
-dedup_threshold = 0.9
-
-# ============================================================================
-# Multi-turn Retrieval Configuration
-# ============================================================================
-
-[retrieval.multiturn]
-enabled = true
-max_sub_queries = 3
-decomposition_model = "gpt-4o-mini"
-aggregation_strategy = "merge"  # merge | rank | synthesize
-
-# ============================================================================
-# Reference Following Configuration
-# ============================================================================
-
-[retrieval.reference]
-enabled = true
-max_depth = 3
-max_references = 10
-follow_pages = true
-follow_tables_figures = true
-min_confidence = 0.5
-
-# ============================================================================
-# Storage Configuration
-# ============================================================================
-
-[storage]
-workspace_dir = "./workspace"
-cache_size = 100
-atomic_writes = true
-file_lock = true
-checksum_enabled = true
-
-[storage.compression]
-enabled = false
-algorithm = "gzip"
-level = 6
-
-# ============================================================================
-# Indexer Configuration
-# ============================================================================
-
-[indexer]
-subsection_threshold = 300
-max_segment_tokens = 3000
-max_summary_tokens = 200
-min_summary_tokens = 20
diff --git a/rust/src/client/builder.rs b/rust/src/client/builder.rs
index 1bc9b927..d042d6aa 100644
--- a/rust/src/client/builder.rs
+++ b/rust/src/client/builder.rs
@@ -403,12 +403,12 @@ impl EngineBuilder {
 
         // Apply individual overrides
         if let Some(api_key) = self.api_key {
-            // Set API key for both retrieval and summary
+            // Set API key for both retrieval and index
             config.retrieval.api_key = Some(api_key.clone());
             config.summary.api_key = Some(api_key);
             // Also set LLM pool config
-            if config.llm.summary.api_key.is_none() {
-                config.llm.summary.api_key = config.summary.api_key.clone();
+            if config.llm.index.api_key.is_none() {
+                config.llm.index.api_key = config.summary.api_key.clone();
             }
             if config.llm.retrieval.api_key.is_none() {
                 config.llm.retrieval.api_key = config.summary.api_key.clone();
diff --git a/rust/src/config/types/llm_pool.rs b/rust/src/config/types/llm_pool.rs
index c17ed966..d77d1241 100644
--- a/rust/src/config/types/llm_pool.rs
+++ b/rust/src/config/types/llm_pool.rs
@@ -11,15 +11,15 @@ use serde::{Deserialize, Serialize};
 /// Unified LLM configuration.
 ///
 /// Contains all settings for LLM operations including:
-/// - Pool of clients for different purposes (summary, retrieval, pilot)
+/// - Pool of clients for different purposes (index, retrieval, pilot)
 /// - Retry behavior
 /// - Throttle/rate limiting
 /// - Fallback strategy
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct LlmPoolConfig {
-    /// Summary client configuration.
-    #[serde(default)]
-    pub summary: LlmClientConfig,
+    /// Index client configuration (used during document indexing).
+    #[serde(default, alias = "summary")]
+    pub index: LlmClientConfig,
 
     /// Retrieval client configuration.
     #[serde(default)]
@@ -33,6 +33,10 @@ pub struct LlmPoolConfig {
     #[serde(default)]
     pub api_key: Option<String>,
 
+    /// Default API endpoint (used if not specified per-client).
+    #[serde(default)]
+    pub endpoint: Option<String>,
+
     /// Retry configuration.
     #[serde(default)]
     pub retry: RetryConfig,
@@ -57,13 +61,14 @@ fn default_pilot_config() -> LlmClientConfig {
 impl Default for LlmPoolConfig {
     fn default() -> Self {
         Self {
-            summary: LlmClientConfig::default(),
+            index: LlmClientConfig::default(),
             retrieval: LlmClientConfig {
                 max_tokens: 100,
                 ..Default::default()
             },
             pilot: default_pilot_config(),
             api_key: None,
+            endpoint: None,
             retry: RetryConfig::default(),
             throttle: ThrottleConfig::default(),
             fallback: FallbackConfig::default(),
@@ -87,8 +92,8 @@ impl LlmPoolConfig {
     pub fn get_api_key_for(&self, client_key: Option<&str>) -> Option<String> {
         // First check client-specific key
         if let Some(key) = client_key {
-            if let Some(ref k) = self.summary.api_key {
-                if self.summary.model == key {
+            if let Some(ref k) = self.index.api_key {
+                if self.index.model == key {
                     return Some(k.clone());
                 }
             }
@@ -106,6 +111,20 @@ impl LlmPoolConfig {
         // Fall back to default
         self.api_key.clone()
     }
+
+    /// Resolve API key: client-specific first, then default.
+    pub fn resolved_api_key(&self, client: &LlmClientConfig) -> Option<String> {
+        client.api_key.clone().or_else(|| self.api_key.clone())
+    }
+
+    /// Resolve endpoint: client-specific first, then default.
+    pub fn resolved_endpoint(&self, client: &LlmClientConfig) -> String {
+        if !client.endpoint.is_empty() {
+            client.endpoint.clone()
+        } else {
+            self.endpoint.clone().unwrap_or_default()
+        }
+    }
 }
 
 /// Individual LLM client configuration.
@@ -410,7 +429,7 @@ mod tests {
     #[test]
     fn test_llm_pool_config_defaults() {
         let config = LlmPoolConfig::default();
-        assert!(config.summary.model.is_empty());
+        assert!(config.index.model.is_empty());
         assert!(config.retrieval.model.is_empty());
         assert!(config.pilot.model.is_empty());
         assert_eq!(config.retry.max_attempts, 3);
diff --git a/rust/src/config/types/mod.rs b/rust/src/config/types/mod.rs
index e355a178..32634a60 100644
--- a/rust/src/config/types/mod.rs
+++ b/rust/src/config/types/mod.rs
@@ -160,7 +160,7 @@ impl Config {
             ));
         }
 
-        // Validate summary
+        // Validate summary (index)
         if self.summary.max_tokens == 0 {
             errors.push(ValidationError::error(
                 "summary.max_tokens",
@@ -360,14 +360,14 @@ mod tests {
         assert!(config.retrieval.model.is_empty());
         assert_eq!(config.concurrency.max_concurrent_requests, 10);
         // New fields
-        assert!(config.llm.summary.model.is_empty());
+        assert!(config.llm.index.model.is_empty());
         assert!(config.metrics.enabled);
     }
 
     #[test]
     fn test_llm_pool_config_defaults() {
         let config = LlmPoolConfig::default();
-        assert!(config.summary.model.is_empty());
+        assert!(config.index.model.is_empty());
         assert!(config.retrieval.model.is_empty());
         assert_eq!(config.retry.max_attempts, 3);
         assert_eq!(config.throttle.max_concurrent_requests, 10);
diff --git a/rust/src/config/types/retrieval.rs b/rust/src/config/types/retrieval.rs
index bfa2d756..fc131bc6 100644
--- a/rust/src/config/types/retrieval.rs
+++ b/rust/src/config/types/retrieval.rs
@@ -135,6 +135,12 @@ pub struct SearchConfig {
     /// Minimum score to include a path.
     #[serde(default = "default_min_score")]
     pub min_score: f32,
+
+    /// Fallback chain: algorithms tried in order until min_score is met.
+    /// Options: "beam", "mcts", "pure_pilot".
+    /// Default: ["beam", "mcts", "pure_pilot"]
+    #[serde(default = "default_fallback_chain")]
+    pub fallback_chain: Vec<String>,
 }
 
 fn default_search_top_k() -> usize {
@@ -152,6 +158,9 @@ fn default_max_iterations() -> usize {
 fn default_min_score() -> f32 {
     0.1
 }
+fn default_fallback_chain() -> Vec<String> {
+    vec!["beam".into(), "mcts".into(), "pure_pilot".into()]
+}
 
 impl Default for SearchConfig {
     fn default() -> Self {
@@ -160,6 +169,7 @@ impl Default for SearchConfig {
             beam_width: default_beam_width(),
             max_iterations: default_max_iterations(),
             min_score: default_min_score(),
+            fallback_chain: default_fallback_chain(),
         }
     }
 }
diff --git a/rust/src/llm/config.rs b/rust/src/llm/config.rs
index e3c584df..882ca828 100644
--- a/rust/src/llm/config.rs
+++ b/rust/src/llm/config.rs
@@ -200,20 +200,20 @@ impl LlmConfig {
 /// Pool of LLM configurations for different purposes.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct LlmConfigs {
-    /// Configuration for summarization tasks.
-    #[serde(default = "default_summary_config")]
-    pub summary: LlmConfig,
+    /// Configuration for indexing tasks (document summarization, etc.).
+    #[serde(default = "default_index_config", alias = "summary")]
+    pub index: LlmConfig,
 
     /// Configuration for retrieval/navigation tasks.
     #[serde(default = "default_retrieval_config")]
     pub retrieval: LlmConfig,
 
-    /// Configuration for TOC processing tasks.
-    #[serde(default = "default_toc_config")]
-    pub toc: LlmConfig,
+    /// Configuration for Pilot navigation tasks.
+    #[serde(default = "default_pilot_config")]
+    pub pilot: LlmConfig,
 }
 
-fn default_summary_config() -> LlmConfig {
+fn default_index_config() -> LlmConfig {
     LlmConfig {
         max_tokens: 200,
         temperature: 0.0,
@@ -229,9 +229,9 @@ fn default_retrieval_config() -> LlmConfig {
     }
 }
 
-fn default_toc_config() -> LlmConfig {
+fn default_pilot_config() -> LlmConfig {
     LlmConfig {
-        max_tokens: 2000,
+        max_tokens: 300,
         temperature: 0.0,
         ..LlmConfig::default()
     }
@@ -240,9 +240,9 @@ fn default_toc_config() -> LlmConfig {
 impl Default for LlmConfigs {
     fn default() -> Self {
         Self {
-            summary: default_summary_config(),
+            index: default_index_config(),
             retrieval: default_retrieval_config(),
-            toc: default_toc_config(),
+            pilot: default_pilot_config(),
         }
     }
 }
diff --git a/rust/src/llm/mod.rs b/rust/src/llm/mod.rs
index c19b60e1..6d23e3dd 100644
--- a/rust/src/llm/mod.rs
+++ b/rust/src/llm/mod.rs
@@ -4,9 +4,9 @@
 //! Unified LLM client module.
 //!
 //! This module provides a unified interface for all LLM operations across the codebase:
-//! - **Summarization** — Generating document summaries
+//! - **Index** — Document indexing and summarization
 //! - **Retrieval** — Document tree navigation
-//! - **TOC Processing** — Table of contents extraction
+//! - **Pilot** — Navigation guidance
 //!
 //! # Features
 //!
@@ -22,7 +22,7 @@
 //! │                        LlmPool                                   │
 //! │                                                                  │
 //! │   ┌─────────────┐  ┌─────────────┐  ┌─────────────┐            │
-//! │   │   summary   │  │  retrieval  │  │     toc     │            │
+//! │   │    index    │  │  retrieval  │  │    pilot    │            │
 //! │   │  LlmClient  │  │  LlmClient  │  │  LlmClient  │            │
 //! │   └──────┬──────┘  └──────┬──────┘  └──────┬──────┘            │
 //! │          │                │                │                   │
@@ -45,8 +45,8 @@
 //! // Create a pool with default configurations
 //! let pool = LlmPool::from_defaults();
 //!
-//! // Use summary client
-//! let summary = pool.summary().complete(
+//! // Use index client
+//! let summary = pool.index().complete(
 //!     "You summarize text concisely.",
 //!     "Long text to summarize..."
 //! ).await?;
diff --git a/rust/src/llm/pool.rs b/rust/src/llm/pool.rs
index 375731dd..952c02f4 100644
--- a/rust/src/llm/pool.rs
+++ b/rust/src/llm/pool.rs
@@ -13,9 +13,9 @@ use crate::throttle::ConcurrencyController;
 ///
 /// This provides a centralized way to access LLM clients
 /// configured for specific tasks:
-/// - **Summary** — Document summarization (fast, cheap model)
+/// - **Index** — Document indexing/summarization (fast, cheap model)
 /// - **Retrieval** — Document navigation (capable model)
-/// - **TOC** — Table of contents processing (fast, cheap model)
+/// - **Pilot** — Navigation guidance (fast model)
 ///
 /// # Example
 ///
@@ -26,8 +26,8 @@ use crate::throttle::ConcurrencyController;
 /// # async fn main() -> vectorless::llm::LlmResult<()> {
 /// let pool = LlmPool::from_defaults();
 ///
-/// // Use summary client for summarization
-/// let summary = pool.summary().complete(
+/// // Use index client for summarization
+/// let summary = pool.index().complete(
 ///     "You summarize text concisely.",
 ///     "Long text to summarize..."
 /// ).await?;
@@ -43,9 +43,9 @@ use crate::throttle::ConcurrencyController;
 /// ```
 #[derive(Debug, Clone)]
 pub struct LlmPool {
-    summary: Arc<LlmClient>,
+    index: Arc<LlmClient>,
     retrieval: Arc<LlmClient>,
-    toc: Arc<LlmClient>,
+    pilot: Arc<LlmClient>,
     concurrency: Option<Arc<ConcurrencyController>>,
 }
 
@@ -53,9 +53,9 @@ impl LlmPool {
     /// Create a new LLM pool from configurations.
     pub fn new(configs: LlmConfigs) -> Self {
         Self {
-            summary: Arc::new(LlmClient::new(configs.summary)),
+            index: Arc::new(LlmClient::new(configs.index)),
             retrieval: Arc::new(LlmClient::new(configs.retrieval)),
-            toc: Arc::new(LlmClient::new(configs.toc)),
+            pilot: Arc::new(LlmClient::new(configs.pilot)),
             concurrency: None,
         }
     }
@@ -92,14 +92,14 @@ impl LlmPool {
     pub fn with_concurrency(mut self, controller: ConcurrencyController) -> Self {
         let arc = Arc::new(controller);
         self.concurrency = Some(arc.clone());
-        self.summary = Arc::new(
-            LlmClient::new(self.summary.config().clone()).with_shared_concurrency(arc.clone()),
+        self.index = Arc::new(
+            LlmClient::new(self.index.config().clone()).with_shared_concurrency(arc.clone()),
         );
         self.retrieval = Arc::new(
             LlmClient::new(self.retrieval.config().clone()).with_shared_concurrency(arc.clone()),
         );
-        self.toc = Arc::new(
-            LlmClient::new(self.toc.config().clone()).with_shared_concurrency(arc.clone()),
+        self.pilot = Arc::new(
+            LlmClient::new(self.pilot.config().clone()).with_shared_concurrency(arc.clone()),
         );
         self
     }
@@ -107,16 +107,16 @@ impl LlmPool {
     /// Add concurrency control from an existing Arc.
     pub fn with_shared_concurrency(mut self, controller: Arc<ConcurrencyController>) -> Self {
         self.concurrency = Some(controller.clone());
-        self.summary = Arc::new(
-            LlmClient::new(self.summary.config().clone())
+        self.index = Arc::new(
+            LlmClient::new(self.index.config().clone())
                 .with_shared_concurrency(controller.clone()),
         );
         self.retrieval = Arc::new(
             LlmClient::new(self.retrieval.config().clone())
                 .with_shared_concurrency(controller.clone()),
         );
-        self.toc = Arc::new(
-            LlmClient::new(self.toc.config().clone()).with_shared_concurrency(controller.clone()),
+        self.pilot = Arc::new(
+            LlmClient::new(self.pilot.config().clone()).with_shared_concurrency(controller.clone()),
         );
         self
     }
@@ -126,12 +126,12 @@ impl LlmPool {
         self.concurrency.as_deref()
     }
 
-    /// Get the summary client.
+    /// Get the index client.
     ///
-    /// Used for generating summaries of document sections.
+    /// Used for document indexing and summarization.
     /// Typically uses a fast, cost-effective model.
-    pub fn summary(&self) -> &LlmClient {
-        &self.summary
+    pub fn index(&self) -> &LlmClient {
+        &self.index
     }
 
     /// Get the retrieval client.
@@ -142,28 +142,28 @@ impl LlmPool {
         &self.retrieval
     }
 
-    /// Get the TOC client.
+    /// Get the pilot client.
     ///
-    /// Used for TOC detection, parsing, and page assignment.
-    /// Typically uses a fast, cost-effective model.
-    pub fn toc(&self) -> &LlmClient {
-        &self.toc
+    /// Used for intelligent navigation guidance.
+    /// Typically uses a fast model for quick decisions.
+    pub fn pilot(&self) -> &LlmClient {
+        &self.pilot
     }
 
     /// Get a client for a specific purpose by name.
     ///
     /// # Arguments
     ///
-    /// * `purpose` - One of: "summary", "summarize", "retrieval", "retrieve", "navigate", "toc"
+    /// * `purpose` - One of: "index", "summary", "retrieval", "retrieve", "navigate", "pilot"
     ///
     /// # Returns
     ///
     /// Returns `None` if the purpose is not recognized.
     pub fn get(&self, purpose: &str) -> Option<&LlmClient> {
         match purpose {
-            "summary" | "summarize" => Some(&self.summary),
+            "index" | "summary" | "summarize" => Some(&self.index),
             "retrieval" | "retrieve" | "navigate" => Some(&self.retrieval),
-            "toc" => Some(&self.toc),
+            "pilot" => Some(&self.pilot),
             _ => None,
         }
     }
@@ -175,9 +175,9 @@ impl LlmPool {
         let config = super::config::LlmConfig::new(model);
         let client = Arc::new(LlmClient::new(config));
         Self {
-            summary: client.clone(),
+            index: client.clone(),
             retrieval: client.clone(),
-            toc: client,
+            pilot: client,
             concurrency: None,
         }
     }
@@ -198,9 +198,9 @@ mod tests {
         let pool = LlmPool::from_defaults();
 
         // Should have all clients
-        assert!(pool.get("summary").is_some());
+        assert!(pool.get("index").is_some());
         assert!(pool.get("retrieval").is_some());
-        assert!(pool.get("toc").is_some());
+        assert!(pool.get("pilot").is_some());
         assert!(pool.get("unknown").is_none());
     }
 
@@ -209,6 +209,7 @@ mod tests {
         let pool = LlmPool::from_defaults();
 
         // Test aliases
+        assert!(pool.get("summary").is_some());
         assert!(pool.get("summarize").is_some());
         assert!(pool.get("retrieve").is_some());
         assert!(pool.get("navigate").is_some());
@@ -219,9 +220,9 @@ mod tests {
         let pool = LlmPool::single_model("gpt-4o-mini");
 
         // All clients should use the same model
-        assert_eq!(pool.summary().config().model, "gpt-4o-mini");
+        assert_eq!(pool.index().config().model, "gpt-4o-mini");
         assert_eq!(pool.retrieval().config().model, "gpt-4o-mini");
-        assert_eq!(pool.toc().config().model, "gpt-4o-mini");
+        assert_eq!(pool.pilot().config().model, "gpt-4o-mini");
     }
 
     #[test]
@@ -233,8 +234,8 @@ mod tests {
 
         // All clients should have concurrency enabled
         assert!(pool.concurrency().is_some());
-        assert!(pool.summary().concurrency().is_some());
+        assert!(pool.index().concurrency().is_some());
         assert!(pool.retrieval().concurrency().is_some());
-        assert!(pool.toc().concurrency().is_some());
+        assert!(pool.pilot().concurrency().is_some());
     }
 }
diff --git a/rust/src/retrieval/pipeline/context.rs b/rust/src/retrieval/pipeline/context.rs
index bb54e127..047182e7 100644
--- a/rust/src/retrieval/pipeline/context.rs
+++ b/rust/src/retrieval/pipeline/context.rs
@@ -46,6 +46,17 @@ impl SearchAlgorithm {
             Self::Mcts => "mcts",
         }
     }
+
+    /// Parse algorithm from config string.
+    /// Returns None for unrecognized names.
+    pub fn from_name(name: &str) -> Option<Self> {
+        match name {
+            "pure_pilot" | "greedy" => Some(Self::PurePilot),
+            "beam" => Some(Self::Beam),
+            "mcts" => Some(Self::Mcts),
+            _ => None,
+        }
+    }
 }
 
 /// Search configuration.
diff --git a/rust/src/retrieval/stages/plan.rs b/rust/src/retrieval/stages/plan.rs
index 434ae06f..4442551c 100644
--- a/rust/src/retrieval/stages/plan.rs
+++ b/rust/src/retrieval/stages/plan.rs
@@ -191,16 +191,12 @@ impl RetrievalStage for PlanStage {
         //    The chain determines which algorithms to try if the primary
         //    doesn't produce results above min_score.
         let primary = ctx.selected_algorithm.unwrap_or(SearchAlgorithm::Beam);
-        let default_chain = vec![
-            SearchAlgorithm::Beam,
-            SearchAlgorithm::Mcts,
-            SearchAlgorithm::PurePilot,
-        ];
-        // Remove primary from default chain, prepend it
         let mut chain = vec![primary];
-        for algo in default_chain {
-            if algo != primary {
-                chain.push(algo);
+        for name in &ctx.options.fallback_chain {
+            if let Some(algo) = SearchAlgorithm::from_name(name) {
+                if algo != primary {
+                    chain.push(algo);
+                }
             }
         }
         ctx.search_fallback_chain = chain;
diff --git a/rust/src/retrieval/types.rs b/rust/src/retrieval/types.rs
index 1c99e79c..a559912c 100644
--- a/rust/src/retrieval/types.rs
+++ b/rust/src/retrieval/types.rs
@@ -125,6 +125,11 @@ pub struct RetrieveOptions {
 
     /// Cross-document graph for graph-aware retrieval boosting.
     pub document_graph: Option<std::sync::Arc<crate::graph::DocumentGraph>>,
+
+    /// Search fallback chain: algorithm names tried in order until min_score is met.
+    /// Options: "beam", "mcts", "pure_pilot".
+    /// Default: ["beam", "mcts", "pure_pilot"]
+    pub fallback_chain: Vec<String>,
 }
 
 impl Default for RetrieveOptions {
@@ -145,6 +150,7 @@ impl Default for RetrieveOptions {
             use_async_context: false,
             streaming: false,
             document_graph: None,
+            fallback_chain: vec!["beam".into(), "mcts".into(), "pure_pilot".into()],
         }
     }
 }
@@ -263,6 +269,16 @@ impl RetrieveOptions {
         self.document_graph = Some(graph);
         self
     }
+
+    /// Set the search fallback chain.
+    ///
+    /// Algorithm names: "beam", "mcts", "pure_pilot".
+    /// Primary algorithm is prepended automatically by the Plan stage.
+    #[must_use]
+    pub fn with_fallback_chain(mut self, chain: Vec<String>) -> Self {
+        self.fallback_chain = chain;
+        self
+    }
 }
 
 /// A single retrieval result.

From 1e7d1d720739109c5eb6f75d81538e6a8ba6f62c Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Mon, 13 Apr 2026 23:20:27 +0800
Subject: [PATCH 4/7] refactor(examples): clean up code formatting and imports
 in example files

- Remove unnecessary blank lines and trailing spaces
- Consolidate multi-line variable declarations into single lines where appropriate
- Reorder imports to follow standard conventions

refactor(engine): improve code readability in engine implementation

- Format long method chains with proper indentation
- Break down complex expressions into readable blocks
- Clean up error message formatting

refactor(indexer): enhance code formatting in indexing components

- Standardize multi-line function calls and method chaining
- Improve readability of complex operations
- Consolidate redundant blank lines

refactor(retriever): clean up retriever and related modules

- Format long expressions and method calls consistently
- Remove unused imports and declarations
- Improve code organization in TOC processing modules

refactor(llm): streamline LLM executor and pool implementations

- Clean up error messages and string formatting
- Improve readability of conditional statements
- Standardize async method calls

refactor(search): restructure search algorithm implementations

- Format complex calculations and expressions clearly
- Remove unused imports and exports
- Clean up test cases and remove obsolete tests
---
 rust/examples/advanced.rs                     |  4 +-
 rust/examples/events.rs                       | 20 +++---
 rust/examples/flow.rs                         |  9 +--
 rust/examples/graph.rs                        |  8 +--
 rust/examples/index_incremental.rs            | 26 +++++---
 rust/examples/index_pdf.rs                    |  8 +--
 rust/examples/index_single.rs                 | 14 +++--
 rust/examples/indexing.rs                     | 12 ++--
 rust/src/client/engine.rs                     | 58 +++++++++++------
 rust/src/client/indexer.rs                    |  4 +-
 rust/src/client/retriever.rs                  |  3 +-
 rust/src/config/mod.rs                        |  3 +-
 rust/src/index/parse/pdf/parser.rs            |  4 +-
 rust/src/index/parse/toc/assigner.rs          | 35 +++++------
 rust/src/index/parse/toc/detector.rs          |  6 +-
 rust/src/index/parse/toc/processor.rs         | 26 ++++----
 rust/src/index/parse/toc/repairer.rs          | 14 +----
 .../index/parse/toc/structure_extractor.rs    | 36 +++++------
 rust/src/index/parse/toc/verifier.rs          | 52 +++++++--------
 rust/src/index/stages/enhance.rs              |  5 +-
 rust/src/index/stages/optimize.rs             |  9 +--
 rust/src/index/stages/parse.rs                |  3 +-
 rust/src/llm/executor.rs                      |  3 +-
 rust/src/llm/pool.rs                          |  3 +-
 rust/src/retrieval/content/scorer.rs          |  7 +--
 rust/src/retrieval/pilot/llm_pilot.rs         |  8 ++-
 rust/src/retrieval/pilot/mod.rs               |  2 +-
 rust/src/retrieval/pipeline/orchestrator.rs   | 15 ++++-
 rust/src/retrieval/search/mcts.rs             |  4 +-
 rust/src/retrieval/search/mod.rs              |  1 -
 rust/src/retrieval/search/pilot_scorer.rs     | 49 +--------------
 rust/src/retrieval/search/toc_navigator.rs    | 23 ++++---
 rust/src/retrieval/stages/evaluate.rs         |  9 +--
 rust/src/retrieval/stages/search.rs           | 18 +++---
 rust/src/retrieval/strategy/cross_document.rs | 63 +++++++++----------
 rust/src/retrieval/strategy/llm.rs            |  5 +-
 .../083a0e39-5c92-404b-9fb7-8458152dd65f.bin  |  1 +
 37 files changed, 263 insertions(+), 307 deletions(-)
 create mode 100644 samples/083a0e39-5c92-404b-9fb7-8458152dd65f.bin

diff --git a/rust/examples/advanced.rs b/rust/examples/advanced.rs
index a75608d1..1316a68d 100644
--- a/rust/examples/advanced.rs
+++ b/rust/examples/advanced.rs
@@ -29,7 +29,7 @@ async fn main() -> vectorless::Result<()> {
     // The config file must include api_key and model.
     // If environment variables are set, they override the config file values.
     let mut builder = EngineBuilder::new().with_config_path("./config.toml");
-    
+
     // Override config with env vars if present
     if let Ok(api_key) = std::env::var("LLM_API_KEY") {
         builder = builder.with_key(&api_key);
@@ -72,4 +72,4 @@ async fn main() -> vectorless::Result<()> {
 
     println!("\n=== Done ===");
     Ok(())
-}
\ No newline at end of file
+}
diff --git a/rust/examples/events.rs b/rust/examples/events.rs
index b0433dc7..a0cefbb2 100644
--- a/rust/examples/events.rs
+++ b/rust/examples/events.rs
@@ -100,12 +100,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
 
     // Build engine with LLM configuration from environment or defaults.
     // Adjust the defaults below to match your setup.
-    let api_key = std::env::var("LLM_API_KEY")
-        .unwrap_or_else(|_| "sk-...".to_string());
-    let model = std::env::var("LLM_MODEL")
-        .unwrap_or_else(|_| "gpt-4o".to_string());
-    let endpoint = std::env::var("LLM_ENDPOINT")
-        .unwrap_or_else(|_| "https://api.openai.com/v1".to_string());
+    let api_key = std::env::var("LLM_API_KEY").unwrap_or_else(|_| "sk-...".to_string());
+    let model = std::env::var("LLM_MODEL").unwrap_or_else(|_| "gpt-4o".to_string());
+    let endpoint =
+        std::env::var("LLM_ENDPOINT").unwrap_or_else(|_| "https://api.openai.com/v1".to_string());
 
     // 2. Create engine with events
     println!("Step 2: Creating engine with event emitter...");
@@ -130,10 +128,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     // 4. Query with events
     println!("Step 4: Querying (with events)...");
     let result = engine
-        .query(
-            QueryContext::new("What is vectorless?")
-                .with_doc_id(&doc_id)
-        )
+        .query(QueryContext::new("What is vectorless?").with_doc_id(&doc_id))
         .await?;
     if let Some(item) = result.single() {
         println!("  ✓ Found result ({} chars)", item.content.len());
@@ -145,7 +140,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
 
     // 5. Stats
     println!("\n--- Stats ---");
-    println!("  Documents indexed: {}", index_count.load(Ordering::SeqCst));
+    println!(
+        "  Documents indexed: {}",
+        index_count.load(Ordering::SeqCst)
+    );
     println!("  Queries executed: {}", query_count.load(Ordering::SeqCst));
     println!("  Nodes visited: {}", nodes_visited.load(Ordering::SeqCst));
 
diff --git a/rust/examples/flow.rs b/rust/examples/flow.rs
index 4778bd44..36712dd3 100644
--- a/rust/examples/flow.rs
+++ b/rust/examples/flow.rs
@@ -61,12 +61,9 @@ async fn main() -> vectorless::Result<()> {
 
     // Build engine with LLM configuration from environment or defaults.
     // Adjust the defaults below to match your setup.
-    let api_key = std::env::var("LLM_API_KEY")
-        .unwrap_or_else(|_| "sk-...".to_string());
-    let model = std::env::var("LLM_MODEL")
-        .unwrap_or_else(|_| "gpt-4o".to_string());
-    let endpoint = std::env::var("LLM_ENDPOINT")
-        .unwrap_or_else(|_| "https://api".to_string());
+    let api_key = std::env::var("LLM_API_KEY").unwrap_or_else(|_| "sk-...".to_string());
+    let model = std::env::var("LLM_MODEL").unwrap_or_else(|_| "gpt-4o".to_string());
+    let endpoint = std::env::var("LLM_ENDPOINT").unwrap_or_else(|_| "https://api".to_string());
 
     // Step 1: Create a Vectorless client
     println!("Step 1: Creating Vectorless client...");
diff --git a/rust/examples/graph.rs b/rust/examples/graph.rs
index ac87a673..940bf7ee 100644
--- a/rust/examples/graph.rs
+++ b/rust/examples/graph.rs
@@ -29,10 +29,8 @@ async fn main() -> vectorless::Result<()> {
 
     // Build engine with LLM configuration from environment or defaults.
     // Adjust the defaults below to match your setup.
-    let api_key = std::env::var("LLM_API_KEY")
-        .unwrap_or_else(|_| "sk-...".to_string());
-    let model = std::env::var("LLM_MODEL")
-        .unwrap_or_else(|_| "gpt-4o".to_string());
+    let api_key = std::env::var("LLM_API_KEY").unwrap_or_else(|_| "sk-...".to_string());
+    let model = std::env::var("LLM_MODEL").unwrap_or_else(|_| "gpt-4o".to_string());
 
     // 1. Create engine
     let engine = EngineBuilder::new()
@@ -106,4 +104,4 @@ async fn main() -> vectorless::Result<()> {
 
     println!("\n=== Done ===");
     Ok(())
-}
\ No newline at end of file
+}
diff --git a/rust/examples/index_incremental.rs b/rust/examples/index_incremental.rs
index 32254d7d..b85a01e9 100644
--- a/rust/examples/index_incremental.rs
+++ b/rust/examples/index_incremental.rs
@@ -21,10 +21,9 @@ async fn main() -> vectorless::Result<()> {
 
     // Build engine with LLM configuration from environment or defaults.
     // Adjust the defaults below to match your setup.
-    let api_key = std::env::var("LLM_API_KEY")
-        .unwrap_or_else(|_| "sk-or-v1-...".to_string());
-    let model = std::env::var("LLM_MODEL")
-        .unwrap_or_else(|_| "google/gemini-3-flash-preview".to_string());
+    let api_key = std::env::var("LLM_API_KEY").unwrap_or_else(|_| "sk-or-v1-...".to_string());
+    let model =
+        std::env::var("LLM_MODEL").unwrap_or_else(|_| "google/gemini-3-flash-preview".to_string());
     let endpoint = std::env::var("LLM_ENDPOINT")
         .unwrap_or_else(|_| "http://localhost:4000/api/v1".to_string());
 
@@ -66,12 +65,19 @@ Deletes a user by their unique identifier.
     // 1. Initial full index
     println!("--- Initial index ---");
     let result = engine
-        .index(IndexContext::from_content(content_v1, DocumentFormat::Markdown))
+        .index(IndexContext::from_content(
+            content_v1,
+            DocumentFormat::Markdown,
+        ))
         .await?;
 
     let doc_id = result.items[0].doc_id.clone();
     if let Some(m) = &result.items[0].metrics {
-        println!("indexed in {}ms, {} nodes", m.total_time_ms(), m.nodes_processed);
+        println!(
+            "indexed in {}ms, {} nodes",
+            m.total_time_ms(),
+            m.nodes_processed
+        );
     }
 
     // 2. Re-index unchanged content (incremental) — skips processing
@@ -98,7 +104,11 @@ Deletes a user by their unique identifier.
 
     for item in &result.items {
         if let Some(m) = &item.metrics {
-            println!("updated in {}ms, {} nodes", m.total_time_ms(), m.nodes_processed);
+            println!(
+                "updated in {}ms, {} nodes",
+                m.total_time_ms(),
+                m.nodes_processed
+            );
         }
     }
 
@@ -110,4 +120,4 @@ Deletes a user by their unique identifier.
     }
 
     Ok(())
-}
\ No newline at end of file
+}
diff --git a/rust/examples/index_pdf.rs b/rust/examples/index_pdf.rs
index b370b39d..d8d8b57c 100644
--- a/rust/examples/index_pdf.rs
+++ b/rust/examples/index_pdf.rs
@@ -49,8 +49,8 @@ async fn main() -> vectorless::Result<()> {
             std::process::exit(1);
         }
     };
-    let model = std::env::var("LLM_MODEL")
-        .unwrap_or_else(|_| "google/gemini-3-flash-preview".to_string());
+    let model =
+        std::env::var("LLM_MODEL").unwrap_or_else(|_| "google/gemini-3-flash-preview".to_string());
     let endpoint = std::env::var("LLM_ENDPOINT")
         .unwrap_or_else(|_| "http://localhost:4000/api/v1".to_string());
 
@@ -70,9 +70,7 @@ async fn main() -> vectorless::Result<()> {
         .await
         .map_err(|e| vectorless::Error::Config(e.to_string()))?;
 
-    let result = engine
-        .index(IndexContext::from_path(pdf_path))
-        .await?;
+    let result = engine.index(IndexContext::from_path(pdf_path)).await?;
 
     println!(
         "Indexed: {}, Failed: {}",
diff --git a/rust/examples/index_single.rs b/rust/examples/index_single.rs
index 55ec52d5..623b4cb3 100644
--- a/rust/examples/index_single.rs
+++ b/rust/examples/index_single.rs
@@ -21,10 +21,9 @@ async fn main() -> vectorless::Result<()> {
 
     // Build engine with LLM configuration from environment or defaults.
     // Adjust the defaults below to match your setup.
-    let api_key = std::env::var("LLM_API_KEY")
-        .unwrap_or_else(|_| "sk-or-v1-...".to_string());
-    let model = std::env::var("LLM_MODEL")
-        .unwrap_or_else(|_| "google/gemini-3-flash-preview".to_string());
+    let api_key = std::env::var("LLM_API_KEY").unwrap_or_else(|_| "sk-or-v1-...".to_string());
+    let model =
+        std::env::var("LLM_MODEL").unwrap_or_else(|_| "google/gemini-3-flash-preview".to_string());
     let endpoint = std::env::var("LLM_ENDPOINT")
         .unwrap_or_else(|_| "http://localhost:4000/api/v1".to_string());
 
@@ -78,7 +77,10 @@ Monitoring is implemented using a Prometheus and Grafana stack, with custom metr
 
     // Index from content string
     let result = engine
-        .index(IndexContext::from_content(content, DocumentFormat::Markdown))
+        .index(IndexContext::from_content(
+            content,
+            DocumentFormat::Markdown,
+        ))
         .await?;
 
     for item in &result.items {
@@ -99,4 +101,4 @@ Monitoring is implemented using a Prometheus and Grafana stack, with custom metr
     }
 
     Ok(())
-}
\ No newline at end of file
+}
diff --git a/rust/examples/indexing.rs b/rust/examples/indexing.rs
index e4489d29..ee77e5f2 100644
--- a/rust/examples/indexing.rs
+++ b/rust/examples/indexing.rs
@@ -21,10 +21,9 @@ async fn main() -> vectorless::Result<()> {
 
     // Build engine with LLM configuration from environment or defaults.
     // Adjust the defaults below to match your setup.
-    let api_key = std::env::var("LLM_API_KEY")
-        .unwrap_or_else(|_| "sk-or-v1-...".to_string());
-    let model = std::env::var("LLM_MODEL")
-        .unwrap_or_else(|_| "google/gemini-3-flash-preview".to_string());
+    let api_key = std::env::var("LLM_API_KEY").unwrap_or_else(|_| "sk-or-v1-...".to_string());
+    let model =
+        std::env::var("LLM_MODEL").unwrap_or_else(|_| "google/gemini-3-flash-preview".to_string());
     let endpoint = std::env::var("LLM_ENDPOINT")
         .unwrap_or_else(|_| "http://localhost:4000/api/v1".to_string());
 
@@ -40,8 +39,7 @@ async fn main() -> vectorless::Result<()> {
     // Index multiple documents in a single call.
     // Paths are resolved relative to the workspace directory.
     let result = engine
-        .index(
-            IndexContext::from_paths(&["../README.md", "../CLAUDE.md"]))
+        .index(IndexContext::from_paths(&["../README.md", "../CLAUDE.md"]))
         .await?;
 
     println!("Indexed {} document(s)", result.items.len());
@@ -59,4 +57,4 @@ async fn main() -> vectorless::Result<()> {
     }
 
     Ok(())
-}
\ No newline at end of file
+}
diff --git a/rust/src/client/engine.rs b/rust/src/client/engine.rs
index cb88a1dc..c46fe7c8 100644
--- a/rust/src/client/engine.rs
+++ b/rust/src/client/engine.rs
@@ -166,7 +166,11 @@ impl Engine {
                 return Err(Error::Config(format!(
                     "All {} source(s) failed to index: {}",
                     failed.len(),
-                    failed.iter().map(|f| format!("{} ({})", f.source, f.error)).collect::<Vec<_>>().join("; ")
+                    failed
+                        .iter()
+                        .map(|f| format!("{} ({})", f.source, f.error))
+                        .collect::<Vec<_>>()
+                        .join("; ")
                 )));
             }
             if !items.is_empty() {
@@ -184,20 +188,21 @@ impl Engine {
             .max_concurrent_requests
             .min(ctx.sources.len());
 
-        let results: Vec<(Vec<IndexItem>, Vec<FailedItem>)> = futures::stream::iter(ctx.sources.iter().cloned())
-            .map(|source| {
-                let options = ctx.options.clone();
-                let name = ctx.name.clone();
-                let engine = self.clone();
-                async move {
-                    engine
-                        .process_source(&source, &options, name.as_deref())
-                        .await
-                }
-            })
-            .buffer_unordered(concurrency)
-            .collect()
-            .await;
+        let results: Vec<(Vec<IndexItem>, Vec<FailedItem>)> =
+            futures::stream::iter(ctx.sources.iter().cloned())
+                .map(|source| {
+                    let options = ctx.options.clone();
+                    let name = ctx.name.clone();
+                    let engine = self.clone();
+                    async move {
+                        engine
+                            .process_source(&source, &options, name.as_deref())
+                            .await
+                    }
+                })
+                .buffer_unordered(concurrency)
+                .collect()
+                .await;
 
         let mut items = Vec::new();
         let mut failed = Vec::new();
@@ -210,7 +215,11 @@ impl Engine {
             return Err(Error::Config(format!(
                 "All {} source(s) failed to index: {}",
                 failed.len(),
-                failed.iter().map(|f| format!("{} ({})", f.source, f.error)).collect::<Vec<_>>().join("; ")
+                failed
+                    .iter()
+                    .map(|f| format!("{} ({})", f.source, f.error))
+                    .collect::<Vec<_>>()
+                    .join("; ")
             )));
         }
 
@@ -416,7 +425,11 @@ impl Engine {
                 }
             };
 
-            match self.retriever.query_with_reasoning_index(&tree, &ctx.query, &options, reasoning_index).await {
+            match self
+                .retriever
+                .query_with_reasoning_index(&tree, &ctx.query, &options, reasoning_index)
+                .await
+            {
                 Ok(mut result) => {
                     result.doc_id = doc_id;
                     items.push(result);
@@ -433,7 +446,11 @@ impl Engine {
             return Err(Error::Config(format!(
                 "Query failed for all {} document(s): {}",
                 failed.len(),
-                failed.iter().map(|f| format!("{} ({})", f.source, f.error)).collect::<Vec<_>>().join("; ")
+                failed
+                    .iter()
+                    .map(|f| format!("{} ({})", f.source, f.error))
+                    .collect::<Vec<_>>()
+                    .join("; ")
             )));
         }
 
@@ -531,7 +548,10 @@ impl Engine {
     // ============================================================
 
     /// Get document structure (tree) and optional reasoning index. Internal use only.
-    pub(crate) async fn get_structure(&self, doc_id: &str) -> Result<(DocumentTree, Option<crate::document::ReasoningIndex>)> {
+    pub(crate) async fn get_structure(
+        &self,
+        doc_id: &str,
+    ) -> Result<(DocumentTree, Option<crate::document::ReasoningIndex>)> {
         let workspace = self
             .workspace
             .as_ref()
diff --git a/rust/src/client/indexer.rs b/rust/src/client/indexer.rs
index e746020e..2764aaa7 100644
--- a/rust/src/client/indexer.rs
+++ b/rust/src/client/indexer.rs
@@ -447,7 +447,9 @@ impl IndexerClient {
         }
 
         persisted.reasoning_index = doc.reasoning_index;
-        persisted.meta.update_processing_stats(node_count, summary_tokens, duration_ms);
+        persisted
+            .meta
+            .update_processing_stats(node_count, summary_tokens, duration_ms);
 
         persisted
     }
diff --git a/rust/src/client/retriever.rs b/rust/src/client/retriever.rs
index ad0638c6..29c0e0d4 100644
--- a/rust/src/client/retriever.rs
+++ b/rust/src/client/retriever.rs
@@ -125,7 +125,8 @@ impl RetrieverClient {
         question: &str,
         options: &RetrieveOptions,
     ) -> Result<QueryResultItem> {
-        self.query_with_reasoning_index(tree, question, options, None).await
+        self.query_with_reasoning_index(tree, question, options, None)
+            .await
     }
 
     /// Query a document tree with optional reasoning index for fast-path lookup.
diff --git a/rust/src/config/mod.rs b/rust/src/config/mod.rs
index 5ab66b55..af96c518 100644
--- a/rust/src/config/mod.rs
+++ b/rust/src/config/mod.rs
@@ -15,6 +15,5 @@ pub(crate) use loader::ConfigLoader;
 pub(crate) use types::{
     CacheConfig, CompressionAlgorithm, ConcurrencyConfig, Config, FallbackBehavior, FallbackConfig,
     IndexerConfig, LlmConfig, LlmMetricsConfig, MetricsConfig, OnAllFailedBehavior,
-    PilotMetricsConfig, RetrievalConfig, RetrievalMetricsConfig, StrategyConfig, SufficiencyConfig,
-    SummaryConfig,
+    PilotMetricsConfig, RetrievalConfig, RetrievalMetricsConfig, SufficiencyConfig, SummaryConfig,
 };
diff --git a/rust/src/index/parse/pdf/parser.rs b/rust/src/index/parse/pdf/parser.rs
index 7702872b..a3327cc0 100644
--- a/rust/src/index/parse/pdf/parser.rs
+++ b/rust/src/index/parse/pdf/parser.rs
@@ -296,7 +296,9 @@ impl PdfParser {
                     TocProcessor::with_llm_client(client.clone())
                 }
                 None => {
-                    info!("PdfParser: creating TocProcessor without LLM client (no key configured)");
+                    info!(
+                        "PdfParser: creating TocProcessor without LLM client (no key configured)"
+                    );
                     TocProcessor::new()
                 }
             };
diff --git a/rust/src/index/parse/toc/assigner.rs b/rust/src/index/parse/toc/assigner.rs
index 52d50403..b7399dce 100644
--- a/rust/src/index/parse/toc/assigner.rs
+++ b/rust/src/index/parse/toc/assigner.rs
@@ -3,8 +3,8 @@
 
 //! Page assigner - assigns physical page numbers to TOC entries.
 
-use std::collections::HashMap;
 use futures::stream::{self, StreamExt};
+use std::collections::HashMap;
 use tracing::{debug, info};
 
 use crate::config::LlmConfig;
@@ -175,10 +175,7 @@ impl PageAssigner {
             })
             .collect();
 
-        let verified_offsets: Vec<_> = stream::iter(futures)
-            .buffer_unordered(5)
-            .collect()
-            .await;
+        let verified_offsets: Vec<_> = stream::iter(futures).buffer_unordered(5).collect().await;
 
         // Calculate the mode (most common offset)
         let successful: Vec<_> = verified_offsets
@@ -277,21 +274,21 @@ Reply in JSON format:
         let total = entries.len();
 
         // Launch entry searches with bounded concurrency to avoid rate limiting
-        let futures: Vec<_> = entries.iter().map(|entry| {
-            let title = entry.title.clone();
-            let client = client.clone();
-            let pages = pages_owned.clone();
-
-            async move {
-                let groups = Self::group_pages_owned(&pages, 5);
-                Self::locate_title_in_groups_static(&client, &title, &groups).await
-            }
-        }).collect();
+        let futures: Vec<_> = entries
+            .iter()
+            .map(|entry| {
+                let title = entry.title.clone();
+                let client = client.clone();
+                let pages = pages_owned.clone();
 
-        let results: Vec<_> = stream::iter(futures)
-            .buffer_unordered(5)
-            .collect()
-            .await;
+                async move {
+                    let groups = Self::group_pages_owned(&pages, 5);
+                    Self::locate_title_in_groups_static(&client, &title, &groups).await
+                }
+            })
+            .collect();
+
+        let results: Vec<_> = stream::iter(futures).buffer_unordered(5).collect().await;
 
         info!("Assigned pages for {}/{} entries", results.len(), total);
 
diff --git a/rust/src/index/parse/toc/detector.rs b/rust/src/index/parse/toc/detector.rs
index 032a18af..f7c71111 100644
--- a/rust/src/index/parse/toc/detector.rs
+++ b/rust/src/index/parse/toc/detector.rs
@@ -79,11 +79,7 @@ impl TocDetector {
         let use_llm = config.use_llm_fallback;
         Self {
             config,
-            llm_client: if use_llm {
-                Some(client)
-            } else {
-                None
-            },
+            llm_client: if use_llm { Some(client) } else { None },
             patterns: Self::build_patterns(),
         }
     }
diff --git a/rust/src/index/parse/toc/processor.rs b/rust/src/index/parse/toc/processor.rs
index 8e5f59b0..e53b6346 100644
--- a/rust/src/index/parse/toc/processor.rs
+++ b/rust/src/index/parse/toc/processor.rs
@@ -180,16 +180,10 @@ impl TocProcessor {
             info!("No TOC found in document");
             ProcessingMode::NoToc
         } else if detection.has_page_numbers {
-            info!(
-                "TOC found on pages {:?}, has page numbers",
-                detection.pages
-            );
+            info!("TOC found on pages {:?}, has page numbers", detection.pages);
             ProcessingMode::TocWithPageNumbers
         } else {
-            info!(
-                "TOC found on pages {:?}, no page numbers",
-                detection.pages
-            );
+            info!("TOC found on pages {:?}, no page numbers", detection.pages);
             ProcessingMode::TocWithoutPageNumbers
         };
 
@@ -222,7 +216,8 @@ impl TocProcessor {
                     self.process_toc_with_page_numbers(detection, pages).await
                 }
                 ProcessingMode::TocWithoutPageNumbers => {
-                    self.process_toc_without_page_numbers(detection, pages).await
+                    self.process_toc_without_page_numbers(detection, pages)
+                        .await
                 }
                 ProcessingMode::NoToc => {
                     // NoToc always succeeds (produces some structure)
@@ -234,9 +229,7 @@ impl TocProcessor {
                 Ok(entries) if !entries.is_empty() => {
                     // Verify the entries
                     let mut mutable_entries = entries;
-                    let report = self
-                        .verify_and_repair(&mut mutable_entries, pages)
-                        .await?;
+                    let report = self.verify_and_repair(&mut mutable_entries, pages).await?;
 
                     if report.accuracy >= self.config.accuracy_threshold {
                         info!(
@@ -437,8 +430,7 @@ impl TocProcessor {
             .filter(|(i, entry)| {
                 let span = entry_page_span(entry, next_pages[*i], page_count);
                 let tokens = entry_token_count(entry, pages);
-                span > self.config.max_pages_per_entry
-                    && tokens > self.config.max_tokens_per_entry
+                span > self.config.max_pages_per_entry && tokens > self.config.max_tokens_per_entry
             })
             .map(|(i, entry)| {
                 let start = entry.physical_page.unwrap_or(1);
@@ -541,7 +533,11 @@ impl Default for TocProcessor {
 /// Calculate how many pages an entry spans.
 ///
 /// From its physical_page to the next entry's physical_page (or document end).
-fn entry_page_span(entry: &TocEntry, next_physical_page: Option<usize>, total_pages: usize) -> usize {
+fn entry_page_span(
+    entry: &TocEntry,
+    next_physical_page: Option<usize>,
+    total_pages: usize,
+) -> usize {
     let start = entry.physical_page.unwrap_or(1);
     let end = next_physical_page.unwrap_or(total_pages);
     end.saturating_sub(start)
diff --git a/rust/src/index/parse/toc/repairer.rs b/rust/src/index/parse/toc/repairer.rs
index 13c19877..3c7666fe 100644
--- a/rust/src/index/parse/toc/repairer.rs
+++ b/rust/src/index/parse/toc/repairer.rs
@@ -94,23 +94,15 @@ impl IndexRepairer {
                     let start = expected_page.saturating_sub(search_range).max(1);
                     let end = (expected_page + search_range).min(pages.len());
 
-                    let result = Self::find_correct_page_static(
-                        &client,
-                        &title,
-                        &pages,
-                        start..=end,
-                    )
-                    .await;
+                    let result =
+                        Self::find_correct_page_static(&client, &title, &pages, start..=end).await;
 
                     (title, expected_page, result)
                 }
             })
             .collect();
 
-        let results: Vec<_> = stream::iter(tasks)
-            .buffer_unordered(5)
-            .collect()
-            .await;
+        let results: Vec<_> = stream::iter(tasks).buffer_unordered(5).collect().await;
 
         // Apply repairs
         let mut repaired_count = 0;
diff --git a/rust/src/index/parse/toc/structure_extractor.rs b/rust/src/index/parse/toc/structure_extractor.rs
index be2486d9..36925644 100644
--- a/rust/src/index/parse/toc/structure_extractor.rs
+++ b/rust/src/index/parse/toc/structure_extractor.rs
@@ -122,10 +122,8 @@ impl StructureExtractor {
                 let initial = initial_entries_ref.to_vec();
 
                 async move {
-                    let result = Self::generate_continuation_with_client(
-                        &client, &group, &initial,
-                    )
-                    .await;
+                    let result =
+                        Self::generate_continuation_with_client(&client, &group, &initial).await;
                     (group.start_page, group.end_page, result)
                 }
             })
@@ -150,10 +148,7 @@ impl StructureExtractor {
                     all_entries.extend(entries);
                 }
                 Err(e) => {
-                    warn!(
-                        "Continuation group (pages {}-{}) failed: {}",
-                        start, end, e
-                    );
+                    warn!("Continuation group (pages {}-{}) failed: {}", start, end, e);
                 }
             }
         }
@@ -165,8 +160,7 @@ impl StructureExtractor {
                 .cmp(&b.physical_page.unwrap_or(0))
         });
         all_entries.dedup_by(|a, b| {
-            a.title.trim() == b.title.trim()
-                && a.physical_page == b.physical_page
+            a.title.trim() == b.title.trim() && a.physical_page == b.physical_page
         });
 
         Ok(Self::finalize_entries(all_entries, page_count))
@@ -177,10 +171,7 @@ impl StructureExtractor {
         for entry in &mut entries {
             if let Some(p) = entry.physical_page {
                 if p > page_count {
-                    warn!(
-                        "Truncating out-of-range page {} for '{}'",
-                        p, entry.title
-                    );
+                    warn!("Truncating out-of-range page {} for '{}'", p, entry.title);
                     entry.physical_page = Some(page_count);
                 }
             }
@@ -461,21 +452,26 @@ mod tests {
         // Create pages with enough text to span multiple groups
         let pages: Vec<PdfPage> = (1..=10)
             .map(|i| {
-                let text = format!("Page {} content. This is a longer text to use more tokens. ", i).repeat(10);
+                let text = format!(
+                    "Page {} content. This is a longer text to use more tokens. ",
+                    i
+                )
+                .repeat(10);
                 PdfPage::new(i, text)
             })
             .collect();
 
         let groups = extractor.group_pages(&pages);
-        assert!(groups.len() > 1, "Expected multiple groups, got {}", groups.len());
+        assert!(
+            groups.len() > 1,
+            "Expected multiple groups, got {}",
+            groups.len()
+        );
     }
 
     #[test]
     fn test_format_group_text() {
-        let pages = vec![
-            PdfPage::new(1, "Hello"),
-            PdfPage::new(2, "World"),
-        ];
+        let pages = vec![PdfPage::new(1, "Hello"), PdfPage::new(2, "World")];
         let text = format_group_text(&pages);
         assert!(text.contains("<page_1>"));
         assert!(text.contains("<page_2>"));
diff --git a/rust/src/index/parse/toc/verifier.rs b/rust/src/index/parse/toc/verifier.rs
index fd944386..3eda474c 100644
--- a/rust/src/index/parse/toc/verifier.rs
+++ b/rust/src/index/parse/toc/verifier.rs
@@ -79,34 +79,29 @@ impl IndexVerifier {
 
         // Launch verification checks with bounded concurrency
         let client = self.client.clone();
-        let futures: Vec<_> = sample.iter().map(|(index, entry)| {
-            let index = *index;
-            let title = entry.title.clone();
-            let physical_page = entry.physical_page;
-            let client = client.clone();
-            let pages = pages.to_vec();
-
-            async move {
-                match physical_page {
-                    Some(page) => {
-                        let result =
-                            Self::verify_entry_with_client(&client, &title, page, &pages).await;
-                        (index, title, page, result)
+        let futures: Vec<_> = sample
+            .iter()
+            .map(|(index, entry)| {
+                let index = *index;
+                let title = entry.title.clone();
+                let physical_page = entry.physical_page;
+                let client = client.clone();
+                let pages = pages.to_vec();
+
+                async move {
+                    match physical_page {
+                        Some(page) => {
+                            let result =
+                                Self::verify_entry_with_client(&client, &title, page, &pages).await;
+                            (index, title, page, result)
+                        }
+                        None => (index, title, 0, Ok(Err(ErrorType::PageOutOfRange))),
                     }
-                    None => (
-                        index,
-                        title,
-                        0,
-                        Ok(Err(ErrorType::PageOutOfRange)),
-                    ),
                 }
-            }
-        }).collect();
+            })
+            .collect();
 
-        let results: Vec<_> = stream::iter(futures)
-            .buffer_unordered(5)
-            .collect()
-            .await;
+        let results: Vec<_> = stream::iter(futures).buffer_unordered(5).collect().await;
 
         // Aggregate results
         let total = results.len();
@@ -121,7 +116,12 @@ impl IndexVerifier {
                 }
                 Err(e) => {
                     debug!("Verification LLM call failed: {}", e);
-                    errors.push(VerificationError::new(index, title, page, ErrorType::TitleNotFound));
+                    errors.push(VerificationError::new(
+                        index,
+                        title,
+                        page,
+                        ErrorType::TitleNotFound,
+                    ));
                 }
             }
         }
diff --git a/rust/src/index/stages/enhance.rs b/rust/src/index/stages/enhance.rs
index a79b5fb3..d33e0acc 100644
--- a/rust/src/index/stages/enhance.rs
+++ b/rust/src/index/stages/enhance.rs
@@ -295,9 +295,8 @@ impl IndexStage for EnhanceStage {
                         if summary.is_empty() {
                             failed += 1;
                         } else {
-                            ctx.metrics.add_tokens_generated(
-                                crate::utils::estimate_tokens(&summary),
-                            );
+                            ctx.metrics
+                                .add_tokens_generated(crate::utils::estimate_tokens(&summary));
                             tree.set_summary(node_id, &summary);
                             generated += 1;
                             ctx.metrics.increment_summaries();
diff --git a/rust/src/index/stages/optimize.rs b/rust/src/index/stages/optimize.rs
index 6b21688f..209de7e2 100644
--- a/rust/src/index/stages/optimize.rs
+++ b/rust/src/index/stages/optimize.rs
@@ -79,11 +79,12 @@ impl OptimizeStage {
                                     curr.content.push_str("\n\n");
                                 }
                                 // Prefix with heading to preserve boundary
-                                curr.content
-                                    .push_str(&format!("## {}\n{}", next_node.title, next_node.content));
+                                curr.content.push_str(&format!(
+                                    "## {}\n{}",
+                                    next_node.title, next_node.content
+                                ));
                             }
-                            curr.token_count =
-                                Some(curr.token_count.unwrap_or(0) + next_tokens);
+                            curr.token_count = Some(curr.token_count.unwrap_or(0) + next_tokens);
                         }
                     }
 
diff --git a/rust/src/index/stages/parse.rs b/rust/src/index/stages/parse.rs
index 6c8166b6..2ca30a14 100644
--- a/rust/src/index/stages/parse.rs
+++ b/rust/src/index/stages/parse.rs
@@ -102,7 +102,8 @@ impl IndexStage for ParseStage {
                 ctx.name = name.clone();
 
                 // Parse content directly
-                crate::index::parse::parse_content(content, *format, self.llm_client.clone()).await?
+                crate::index::parse::parse_content(content, *format, self.llm_client.clone())
+                    .await?
             }
             IndexInput::Bytes { data, name, format } => {
                 // Set name
diff --git a/rust/src/llm/executor.rs b/rust/src/llm/executor.rs
index 620f111a..d13e24fc 100644
--- a/rust/src/llm/executor.rs
+++ b/rust/src/llm/executor.rs
@@ -338,7 +338,8 @@ impl LlmExecutor {
 
         let api_key = self.config.api_key.clone().ok_or_else(|| {
             LlmError::Config(
-                "No API key configured. Call .with_key(\"sk-...\") when building the engine.".to_string(),
+                "No API key configured. Call .with_key(\"sk-...\") when building the engine."
+                    .to_string(),
             )
         })?;
 
diff --git a/rust/src/llm/pool.rs b/rust/src/llm/pool.rs
index 952c02f4..51b07ff3 100644
--- a/rust/src/llm/pool.rs
+++ b/rust/src/llm/pool.rs
@@ -108,8 +108,7 @@ impl LlmPool {
     pub fn with_shared_concurrency(mut self, controller: Arc<ConcurrencyController>) -> Self {
         self.concurrency = Some(controller.clone());
         self.index = Arc::new(
-            LlmClient::new(self.index.config().clone())
-                .with_shared_concurrency(controller.clone()),
+            LlmClient::new(self.index.config().clone()).with_shared_concurrency(controller.clone()),
         );
         self.retrieval = Arc::new(
             LlmClient::new(self.retrieval.config().clone())
diff --git a/rust/src/retrieval/content/scorer.rs b/rust/src/retrieval/content/scorer.rs
index 3472a733..777006da 100644
--- a/rust/src/retrieval/content/scorer.rs
+++ b/rust/src/retrieval/content/scorer.rs
@@ -162,11 +162,8 @@ impl RelevanceScorer {
         let mut components = ScoreComponents::default();
 
         // 1. Keyword score (content + title + summary combined)
-        components.keyword_score = self.compute_keyword_score(&format!(
-            "{} {}",
-            chunk.title,
-            chunk.content
-        ));
+        components.keyword_score =
+            self.compute_keyword_score(&format!("{} {}", chunk.title, chunk.content));
 
         // 2. BM25 score (if enabled)
         if matches!(
diff --git a/rust/src/retrieval/pilot/llm_pilot.rs b/rust/src/retrieval/pilot/llm_pilot.rs
index 408397fe..df8e3b02 100644
--- a/rust/src/retrieval/pilot/llm_pilot.rs
+++ b/rust/src/retrieval/pilot/llm_pilot.rs
@@ -82,7 +82,8 @@ pub struct LlmPilot {
     /// Shared pipeline budget — the primary budget source when set.
     /// When available, Pilot checks this before making LLM calls and
     /// records token consumption here.
-    pipeline_budget: parking_lot::RwLock<Option<Arc<crate::retrieval::pipeline::RetrievalBudgetController>>>,
+    pipeline_budget:
+        parking_lot::RwLock<Option<Arc<crate::retrieval::pipeline::RetrievalBudgetController>>>,
     /// Context builder.
     context_builder: ContextBuilder,
     /// Prompt builder.
@@ -223,7 +224,10 @@ impl LlmPilot {
     /// but token consumption is recorded against the pipeline budget.
     /// Call this at query time (not construction time) since the pipeline
     /// budget is created per-query.
-    pub fn set_pipeline_budget(&self, budget: Arc<crate::retrieval::pipeline::RetrievalBudgetController>) {
+    pub fn set_pipeline_budget(
+        &self,
+        budget: Arc<crate::retrieval::pipeline::RetrievalBudgetController>,
+    ) {
         *self.pipeline_budget.write() = Some(budget);
     }
 
diff --git a/rust/src/retrieval/pilot/mod.rs b/rust/src/retrieval/pilot/mod.rs
index a6835b79..5af9cead 100644
--- a/rust/src/retrieval/pilot/mod.rs
+++ b/rust/src/retrieval/pilot/mod.rs
@@ -44,7 +44,7 @@ mod prompts;
 mod r#trait;
 
 pub use config::PilotConfig;
-pub use decision::{InterventionPoint, PilotDecision, RankedCandidate, SearchDirection};
+pub use decision::{InterventionPoint, PilotDecision};
 
 pub use llm_pilot::LlmPilot;
 pub use r#trait::{Pilot, SearchState};
diff --git a/rust/src/retrieval/pipeline/orchestrator.rs b/rust/src/retrieval/pipeline/orchestrator.rs
index c42b5cbe..63e18b4e 100644
--- a/rust/src/retrieval/pipeline/orchestrator.rs
+++ b/rust/src/retrieval/pipeline/orchestrator.rs
@@ -330,7 +330,10 @@ impl RetrievalOrchestrator {
 
         // Share the pipeline budget with the Pilot (unified budget)
         if let Some(ref pilot) = self.pilot {
-            if let Some(llm_pilot) = pilot.as_any().downcast_ref::<crate::retrieval::pilot::LlmPilot>() {
+            if let Some(llm_pilot) = pilot
+                .as_any()
+                .downcast_ref::<crate::retrieval::pilot::LlmPilot>()
+            {
                 llm_pilot.set_pipeline_budget(ctx.budget_controller.clone());
             }
         }
@@ -622,7 +625,10 @@ impl RetrievalOrchestrator {
 
         // Share the pipeline budget with the Pilot (unified budget)
         if let Some(ref pilot) = self.pilot {
-            if let Some(llm_pilot) = pilot.as_any().downcast_ref::<crate::retrieval::pilot::LlmPilot>() {
+            if let Some(llm_pilot) = pilot
+                .as_any()
+                .downcast_ref::<crate::retrieval::pilot::LlmPilot>()
+            {
                 llm_pilot.set_pipeline_budget(ctx.budget_controller.clone());
             }
         }
@@ -924,7 +930,10 @@ impl RetrievalOrchestrator {
 
         // Share the pipeline budget with the Pilot (unified budget)
         if let Some(ref pilot) = self.pilot {
-            if let Some(llm_pilot) = pilot.as_any().downcast_ref::<crate::retrieval::pilot::LlmPilot>() {
+            if let Some(llm_pilot) = pilot
+                .as_any()
+                .downcast_ref::<crate::retrieval::pilot::LlmPilot>()
+            {
                 llm_pilot.set_pipeline_budget(ctx.budget_controller.clone());
             }
         }
diff --git a/rust/src/retrieval/search/mcts.rs b/rust/src/retrieval/search/mcts.rs
index 7886991f..9663d686 100644
--- a/rust/src/retrieval/search/mcts.rs
+++ b/rust/src/retrieval/search/mcts.rs
@@ -69,8 +69,8 @@ impl MctsSearch {
         }
 
         let exploitation = child_stats.total_score / child_stats.visits as f32;
-        let exploration =
-            self.exploration_weight * (parent_visits as f32).ln().sqrt() / child_stats.visits as f32;
+        let exploration = self.exploration_weight * (parent_visits as f32).ln().sqrt()
+            / child_stats.visits as f32;
 
         // Blend exploitation with Pilot prior
         0.5 * (exploitation + prior_score) + exploration
diff --git a/rust/src/retrieval/search/mod.rs b/rust/src/retrieval/search/mod.rs
index 8ddceed5..cceec5e4 100644
--- a/rust/src/retrieval/search/mod.rs
+++ b/rust/src/retrieval/search/mod.rs
@@ -16,6 +16,5 @@ pub use beam::BeamSearch;
 pub use bm25::{Bm25Engine, Bm25Params, FieldDocument, STOPWORDS, extract_keywords};
 pub use greedy::PurePilotSearch;
 pub use mcts::MctsSearch;
-pub use pilot_scorer::{PilotDecisionCache, score_candidates};
 pub use toc_navigator::{SearchCue, ToCNavigator};
 pub use r#trait::{SearchConfig, SearchResult, SearchTree};
diff --git a/rust/src/retrieval/search/pilot_scorer.rs b/rust/src/retrieval/search/pilot_scorer.rs
index a70cbb3f..22db9805 100644
--- a/rust/src/retrieval/search/pilot_scorer.rs
+++ b/rust/src/retrieval/search/pilot_scorer.rs
@@ -17,9 +17,9 @@ use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
 use tokio::sync::Mutex;
 
+use super::scorer::{NodeScorer, ScoringContext};
 use crate::document::{DocumentTree, NodeId};
 use crate::retrieval::pilot::{Pilot, PilotDecision, SearchState};
-use super::scorer::{NodeScorer, ScoringContext};
 
 /// Cache key: (query_fingerprint, parent_node_id).
 type CacheKey = (u64, NodeId);
@@ -186,51 +186,4 @@ mod tests {
         let key3 = PilotDecisionCache::cache_key("world", nid);
         assert_ne!(key1, key3);
     }
-
-    #[tokio::test]
-    async fn test_cache_hit() {
-        let mut arena = Arena::new();
-        let nid0 = make_node_id(&mut arena);
-        let nid1 = make_node_id(&mut arena);
-
-        let cache = PilotDecisionCache::new();
-        use crate::retrieval::pilot::{RankedCandidate, SearchDirection};
-
-        let decision = PilotDecision::new(
-            vec![RankedCandidate::new(nid1, 0.9)],
-            SearchDirection::GoDeeper { reason: "test".into() },
-            0.8,
-            "test".into(),
-        );
-
-        cache.put("query", nid0, &decision).await;
-        let hit = cache.get("query", nid0).await;
-        assert!(hit.is_some());
-        assert_eq!(hit.unwrap().confidence, 0.8);
-
-        let miss = cache.get("other", nid0).await;
-        assert!(miss.is_none());
-    }
-
-    #[tokio::test]
-    async fn test_cache_clear() {
-        let mut arena = Arena::new();
-        let nid = make_node_id(&mut arena);
-
-        let cache = PilotDecisionCache::new();
-        use crate::retrieval::pilot::SearchDirection;
-
-        let decision = PilotDecision::new(
-            vec![],
-            SearchDirection::GoDeeper { reason: "test".into() },
-            0.5,
-            "test".into(),
-        );
-
-        cache.put("q", nid, &decision).await;
-        assert!(cache.get("q", nid).await.is_some());
-
-        cache.clear().await;
-        assert!(cache.get("q", nid).await.is_none());
-    }
 }
diff --git a/rust/src/retrieval/search/toc_navigator.rs b/rust/src/retrieval/search/toc_navigator.rs
index ae156a21..778b5da2 100644
--- a/rust/src/retrieval/search/toc_navigator.rs
+++ b/rust/src/retrieval/search/toc_navigator.rs
@@ -181,13 +181,10 @@ impl ToCNavigator {
 
         for &node_id in top_level_nodes {
             if let Some(node) = tree.get(node_id) {
-                let text = format!("{} {} {}", node.title, node.summary, node.content)
-                    .to_lowercase();
+                let text =
+                    format!("{} {} {}", node.title, node.summary, node.content).to_lowercase();
 
-                let match_count = query_words
-                    .iter()
-                    .filter(|w| text.contains(*w))
-                    .count();
+                let match_count = query_words.iter().filter(|w| text.contains(*w)).count();
 
                 let mut score = if query_words.is_empty() {
                     0.0
@@ -234,10 +231,7 @@ impl ToCNavigator {
             return Vec::new();
         }
 
-        scored.sort_by(|a, b| {
-            b.1.partial_cmp(&a.1)
-                .unwrap_or(std::cmp::Ordering::Equal)
-        });
+        scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
         scored.truncate(self.max_branches);
 
         if !scored.is_empty() {
@@ -367,7 +361,9 @@ Rules:
                 }
 
                 if cues.is_empty() {
-                    warn!("LLM refinement returned no valid candidates, falling back to summary matching");
+                    warn!(
+                        "LLM refinement returned no valid candidates, falling back to summary matching"
+                    );
                     let summary_cues = self.match_by_summary(query, tree, top_level_nodes);
                     if summary_cues.is_empty() {
                         return vec![SearchCue {
@@ -386,7 +382,10 @@ Rules:
                 cues
             }
             Err(e) => {
-                warn!("LLM refinement failed: {}, falling back to summary matching", e);
+                warn!(
+                    "LLM refinement failed: {}, falling back to summary matching",
+                    e
+                );
                 // Don't fall directly to root — try summary matching first
                 let summary_cues = self.match_by_summary(query, tree, top_level_nodes);
                 if summary_cues.is_empty() {
diff --git a/rust/src/retrieval/stages/evaluate.rs b/rust/src/retrieval/stages/evaluate.rs
index 972f9667..d0d51bef 100644
--- a/rust/src/retrieval/stages/evaluate.rs
+++ b/rust/src/retrieval/stages/evaluate.rs
@@ -137,7 +137,8 @@ impl EvaluateStage {
                 let node_content = self.build_node_content(&ctx.tree, candidate.node_id);
 
                 // Cache for build_response reuse
-                ctx.node_content_cache.insert(candidate.node_id, node_content.clone());
+                ctx.node_content_cache
+                    .insert(candidate.node_id, node_content.clone());
 
                 // Add to aggregated content
                 if !node_content.is_empty() {
@@ -252,11 +253,7 @@ impl EvaluateStage {
                         _ => {
                             // Cache miss (edge case): compute inline
                             let built = self.build_node_content(&ctx.tree, candidate.node_id);
-                            if built.is_empty() {
-                                None
-                            } else {
-                                Some(built)
-                            }
+                            if built.is_empty() { None } else { Some(built) }
                         }
                     }
                 } else {
diff --git a/rust/src/retrieval/stages/search.rs b/rust/src/retrieval/stages/search.rs
index fc5dd2f2..8f431dec 100644
--- a/rust/src/retrieval/stages/search.rs
+++ b/rust/src/retrieval/stages/search.rs
@@ -23,7 +23,8 @@ use crate::retrieval::pipeline::{
 };
 use crate::retrieval::search::extract_keywords;
 use crate::retrieval::search::{
-    BeamSearch, PurePilotSearch, MctsSearch, SearchConfig as SearchAlgConfig, SearchCue, SearchTree, ToCNavigator,
+    BeamSearch, MctsSearch, PurePilotSearch, SearchConfig as SearchAlgConfig, SearchCue,
+    SearchTree, ToCNavigator,
 };
 use crate::retrieval::strategy::{
     HybridConfig, HybridStrategy, KeywordStrategy, LlmStrategy, RetrievalStrategy,
@@ -409,7 +410,9 @@ impl SearchStage {
 
         // Phrase patterns — match with intervening words removed.
         // "what is this project about" → remove common filler words, check for "what is this about"
-        let filler_words = ["project", "document", "file", "paper", "article", "text", "book", "the", "a", "an"];
+        let filler_words = [
+            "project", "document", "file", "paper", "article", "text", "book", "the", "a", "an",
+        ];
         let cleaned: String = lower
             .split_whitespace()
             .filter(|w| !filler_words.contains(w))
@@ -673,10 +676,9 @@ impl RetrievalStage for SearchStage {
             for cue in &mut cues {
                 if let Some(node) = ctx.tree.get(cue.root) {
                     let node_path = node.title.as_str();
-                    if let Some((_, cached_conf)) = l2_paths
-                        .iter()
-                        .find(|(path, _)| node_path.contains(path.as_str()) || path.contains(node_path))
-                    {
+                    if let Some((_, cached_conf)) = l2_paths.iter().find(|(path, _)| {
+                        node_path.contains(path.as_str()) || path.contains(node_path)
+                    }) {
                         // Blend current confidence with historical: 60% current + 40% cached
                         cue.confidence = cue.confidence * 0.6 + cached_conf * 0.4;
                         debug!(
@@ -768,9 +770,7 @@ impl RetrievalStage for SearchStage {
         for candidate in &mut ctx.candidates {
             if let Some(node) = ctx.tree.get(candidate.node_id) {
                 let content_fp = crate::utils::fingerprint::Fingerprint::from_str(&node.content);
-                if let Some((cached_score, _strategy)) =
-                    ctx.reasoning_cache.l3_get(&content_fp)
-                {
+                if let Some((cached_score, _strategy)) = ctx.reasoning_cache.l3_get(&content_fp) {
                     // Blend: if L3 has a higher score for this node, boost it
                     if cached_score > candidate.score {
                         candidate.score = (candidate.score + cached_score) / 2.0;
diff --git a/rust/src/retrieval/strategy/cross_document.rs b/rust/src/retrieval/strategy/cross_document.rs
index 40871057..c296ec24 100644
--- a/rust/src/retrieval/strategy/cross_document.rs
+++ b/rust/src/retrieval/strategy/cross_document.rs
@@ -252,9 +252,7 @@ impl CrossDocumentStrategy {
             .collect();
 
         for node_id in high_score_nodes {
-            let depth_results = self
-                .search_subtree(&doc.tree, node_id, context, 0, 2)
-                .await;
+            let depth_results = self.search_subtree(&doc.tree, node_id, context, 0, 2).await;
             scored_nodes.extend(depth_results);
         }
 
@@ -289,44 +287,43 @@ impl CrossDocumentStrategy {
         context: &'a RetrievalContext,
         current_depth: usize,
         max_depth: usize,
-    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Vec<(NodeId, NodeEvaluation)>> + Send + 'a>> {
+    ) -> std::pin::Pin<
+        Box<dyn std::future::Future<Output = Vec<(NodeId, NodeEvaluation)>> + Send + 'a>,
+    > {
         Box::pin(async move {
-        if current_depth >= max_depth {
-            return Vec::new();
-        }
+            if current_depth >= max_depth {
+                return Vec::new();
+            }
 
-        let children = tree.children(parent_id);
-        if children.is_empty() {
-            return Vec::new();
-        }
+            let children = tree.children(parent_id);
+            if children.is_empty() {
+                return Vec::new();
+            }
 
-        let evaluations = self
-            .inner
-            .evaluate_nodes(tree, &children, context)
-            .await;
+            let evaluations = self.inner.evaluate_nodes(tree, &children, context).await;
 
-        let mut results = Vec::new();
-        let mut explore_further = Vec::new();
+            let mut results = Vec::new();
+            let mut explore_further = Vec::new();
 
-        for (node_id, eval) in children.into_iter().zip(evaluations.into_iter()) {
-            if eval.score >= self.config.min_score {
-                results.push((node_id, eval.clone()));
-            }
-            // Only explore deeper if score is promising
-            if eval.score >= self.config.min_score * 1.5 {
-                explore_further.push(node_id);
+            for (node_id, eval) in children.into_iter().zip(evaluations.into_iter()) {
+                if eval.score >= self.config.min_score {
+                    results.push((node_id, eval.clone()));
+                }
+                // Only explore deeper if score is promising
+                if eval.score >= self.config.min_score * 1.5 {
+                    explore_further.push(node_id);
+                }
             }
-        }
 
-        // Recurse into promising children
-        for child_id in explore_further {
-            let deeper = self
-                .search_subtree(tree, child_id, context, current_depth + 1, max_depth)
-                .await;
-            results.extend(deeper);
-        }
+            // Recurse into promising children
+            for child_id in explore_further {
+                let deeper = self
+                    .search_subtree(tree, child_id, context, current_depth + 1, max_depth)
+                    .await;
+                results.extend(deeper);
+            }
 
-        results
+            results
         })
     }
 
diff --git a/rust/src/retrieval/strategy/llm.rs b/rust/src/retrieval/strategy/llm.rs
index 41cd8987..e22b8b43 100644
--- a/rust/src/retrieval/strategy/llm.rs
+++ b/rust/src/retrieval/strategy/llm.rs
@@ -240,10 +240,7 @@ Rules:
             let toc = self.toc_view.generate_from(tree, node_ids[0]);
             let toc_markdown = self.toc_view.format_markdown(&toc);
             let toc_preview: String = toc_markdown.chars().take(800).collect();
-            format!(
-                "\n\nDocument ToC:\n{}\n",
-                toc_preview
-            )
+            format!("\n\nDocument ToC:\n{}\n", toc_preview)
         } else {
             String::new()
         };
diff --git a/samples/083a0e39-5c92-404b-9fb7-8458152dd65f.bin b/samples/083a0e39-5c92-404b-9fb7-8458152dd65f.bin
new file mode 100644
index 00000000..d8b3841e
--- /dev/null
+++ b/samples/083a0e39-5c92-404b-9fb7-8458152dd65f.bin
@@ -0,0 +1 @@
+{"version":1,"checksum":"df9576fe0cb0f42948a619b0352f6fbd3c647258317ff2ffe5b170baec6eb302","payload":{"meta":{"content_fingerprint":"zsbpiOWjNlqXOyJuG/CAgQ==","created_at":"2026-04-13T15:19:05.175062664Z","description":"","format":"pdf","id":"083a0e39-5c92-404b-9fb7-8458152dd65f","line_count":null,"logic_fingerprint":"4p/tkAx4Dcrk805539ue0Q==","modified_at":"2026-04-13T15:19:05.178281613Z","name":"Docker_Cheat_Sheet","node_count":7,"page_count":null,"processing_duration_ms":99141,"processing_version":0,"source_path":"/home/ztgx/Desktop/vectorless/samples/Docker_Cheat_Sheet.pdf","total_summary_tokens":378},"pages":[],"reasoning_index":{"config_hash":0,"hot_nodes":{},"section_map":{"1":{"index1":2,"stamp":0},"docker cheat sheet":{"index1":2,"stamp":0}},"summary_shortcut":{"document_summary":"Docker Cheat Sheet: This cheat sheet provides a quick reference for essential Docker and Docker Compose command-line operations. It covers core topics including container process management, image and repository handling, volume and port mapping, and system troubleshooting. Use this guide to quickly locate commands for building, running, and managing Dockerized environments.","root_node":{"index1":1,"stamp":0},"section_summaries":[{"depth":1,"node_id":{"index1":2,"stamp":0},"summary":"This cheat sheet provides a quick reference for essential Docker and Docker Compose command-line operations. It covers core topics including container process management, image and repository handling, volume and port mapping, and system troubleshooting. Use this guide to quickly locate commands for building, running, and managing Dockerized environments.","title":"Docker Cheat Sheet"}]},"topic_paths":{"active":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"additionally":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"advanced":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"alongside":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"analysis":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"applications":[{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"basic":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"brief":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"building":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"categories":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"cheat":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":0,"node_id":{"index1":1,"stamp":0},"weight":0.5714285969734192},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":0.4285714626312256},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":0.4285714626312256}],"cli":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"command":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"commands":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":0.5},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":0.5},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":0.25},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":0.25},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":0.25}],"complete":[{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"compose":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":0.8571429252624512},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":0.4285714626312256},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":0.4285714626312256},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":0.4285714626312256},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":0.4285714626312256}],"comprehensive":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"consumption":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"container":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":0.6666666865348816},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":0.6666666865348816},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":0.6666666865348816},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":0.3333333432674408}],"containerized":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"containers":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"copying":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"core":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"covering":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"covers":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"data":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"description":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"destroy":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"details":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"docker":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":0.9000000357627869},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":0.699999988079071},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":0.6000000238418579},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":0.6000000238418579},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":0.6000000238418579},{"depth":0,"node_id":{"index1":1,"stamp":0},"weight":0.4000000059604645}],"dockerized":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"document":[{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"environments":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"essential":[{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"everything":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"executing":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"features":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"files":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"five":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"four":[{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"fundamental":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"guide":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"handling":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"health":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"host":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"image":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"images":[{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"including":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"inspecting":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"lifecycle":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"like":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"line":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"list":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"locate":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"log":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"logs":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"machines":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"main":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"manage":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"management":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":0.4285714626312256},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":0.4285714626312256}],"managing":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"mapping":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"metadata":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"monitor":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"monitoring":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"mounting":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"multi":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"necessary":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"networking":[{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"operations":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":0.5},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":0.5}],"orchestrating":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"orchestration":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"organized":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"outlines":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"overall":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"persistence":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"port":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"ports":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":0.4285714626312256},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":0.4285714626312256}],"presented":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"process":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":0.4285714626312256}],"processes":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"provides":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"pushing":[{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"quick":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"quickly":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"reference":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"repository":[{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":0.4285714626312256},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":0.4285714626312256}],"required":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"resource":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"resources":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"running":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"section":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"serves":[{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"sheet":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0},{"depth":0,"node_id":{"index1":1,"stamp":0},"weight":0.5714285969734192},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":0.4285714626312256},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":0.4285714626312256}],"start":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"stop":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"storage":[{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":1.0}],"syntax":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0}],"system":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"topics":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"tracking":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"troubleshooting":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":0.4285714626312256},{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":0.4285714626312256},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":0.4285714626312256},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":0.4285714626312256}],"usage":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"use":[{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"utilities":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0}],"viewing":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}],"volume":[{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":1.0},{"depth":1,"node_id":{"index1":2,"stamp":0},"weight":1.0}],"volumes":[{"depth":2,"node_id":{"index1":5,"stamp":0},"weight":1.0},{"depth":2,"node_id":{"index1":3,"stamp":0},"weight":0.4285714626312256},{"depth":2,"node_id":{"index1":4,"stamp":0},"weight":0.4285714626312256}],"well":[{"depth":2,"node_id":{"index1":7,"stamp":0},"weight":1.0}],"within":[{"depth":2,"node_id":{"index1":6,"stamp":0},"weight":1.0}]}},"tree":{"arena":{"first_free_slot":null,"last_free_slot":null,"nodes":[{"data":{"Data":{"content":"","depth":0,"end_index":1,"end_page":1,"node_id":"0001","physical_index":null,"references":[],"start_index":1,"start_page":1,"structure":"","summary":"","title":"Docker_Cheat_Sheet","token_count":null}},"first_child":{"index1":2,"stamp":0},"last_child":{"index1":2,"stamp":0},"next_sibling":null,"parent":null,"previous_sibling":null,"stamp":0},{"data":{"Data":{"content":"Process Management\n\n# Show all running docker containers\ndocker ps\n\n# Show all docker containers\ndocker ps -a\n\n# Run a container\ndocker run <image>:<tag>\n\n# Run a container and connect to it\ndocker run -it <image>:<tag>\n\n# Run a container in the background\ndocker run -d <image>:<tag>\n\n# Stop a container\ndocker stop <container>\n\n# Kill a container\ndocker kill <container>\n Images/Repository\n\n# List available local images\ndocker images\n\n# Search for docker images\ndocker search <image>\n\n# Pull a docker image\ndocker pull <image>\n\n# Build an image with a dockerﬁle\ndocker build -t <image>:<tag> <run_directory> -f <dockerﬁle>\n\n# Login to a remote repository\ndocker login <repository>\n\n# Push an image to your remotee repository\ndocker push <image>:<tag>\n\n# Remove a local docker image\ndocker rmi <image>:<tag>\n\n# Show metadata for an image\ndocker inspect <image>\n\n# Remove all unused docker images\ndocker image prune\n\nVolumes & Ports\n\n# List volumes\ndocker volume ls\n\n# Create a volume\ndocker volume create <volume>\n\n# Delete a volume\ndocker volume rm <volume>\n\n# Show volume metadata\ndocker volume inspect <volume>\n\n# Delete all volumes not attached to a container\ndocker volume prune\n\n# Mount a local directory to your container\ndocker run -v <local_dir>:<container_dir> <image>\n\n# Copy ﬁle or folder from a docker container to host machine\ndocker cp <container>:<container_dir> <local_dir>\n\n# Copy ﬁle or folder from local machine onto a container\ndocker cp <local_dir> <container>:<container_dir>\n\n# Map a local port to a docker instance\ndocker run -d -p 127.0.0.1:<local_port>:<docker_port> <image>\n\n# List the ports a docker container is running on\ndocker port <container>\n Troubleshooting\n\n# Show the logs of a container\ndocker logs <container>\n\n# Follow/tail the logs of a container\ndocker logs -f <container>\n\n# Show timestamps on docker logs\ndocker logs -t <container>\n\n# Show details/metadata of a container\ndocker inspect <container>\n\n# Show a 'top' view of processes running on a container\ndocker top <container>\n\n# Show a 'top' view of all docker containers\ndocker stats\n\n# Show any ﬁles that have changed since startup\ndocker diﬀ <container>\n\n# Connect to an already running container\ndocker attach <container>\n\n# Execute a command on a container\ndocker exec -it <container_id> /bin/bash\n\n# Show docker system wide information\ndocker system info\n\n# Show docker disk space used\ndocker system df\n\n \n\nDocker Compose\n\n# Start your docker-compose deﬁned resources in detached mode\ndocker-compose up -d -f <docker_compose_yaml>\n\n# Stop all docker-compose resources\ndocker-compose stop\n\n# Destroy all docker-compose resources\ndocker-compose down\n\n# Show docker-compose processes\ndocker-compose ps\n\n# Show docker-compose logs\ndocker-compose logs\n\n# Show docker-compose resource consumption\ndocker-compose top","depth":1,"end_index":1,"end_page":1,"node_id":"0002","physical_index":null,"references":[],"start_index":1,"start_page":1,"structure":"1","summary":"This cheat sheet provides a quick reference for essential Docker and Docker Compose command-line operations. It covers core topics including container process management, image and repository handling, volume and port mapping, and system troubleshooting. Use this guide to quickly locate commands for building, running, and managing Dockerized environments.","title":"Docker Cheat Sheet","token_count":676}},"first_child":{"index1":3,"stamp":0},"last_child":{"index1":7,"stamp":0},"next_sibling":null,"parent":{"index1":1,"stamp":0},"previous_sibling":null,"stamp":0},{"data":{"Data":{"content":"# Show all running docker containers\ndocker ps\n\n# Show all docker containers\ndocker ps -a\n\n# Run a container\ndocker run <image>:<tag>\n\n# Run a container and connect to it\ndocker run -it <image>:<tag>\n\n# Run a container in the background\ndocker run -d <image>:<tag>\n\n# Stop a container\ndocker stop <container>\n\n# Kill a container\ndocker kill <container>\n Images/Repository\n\n# List available local images\ndocker images\n\n# Search for docker images\ndocker search <image>\n\n# Pull a docker image\ndocker pull <image>\n\n# Build an image with a dockerﬁle\ndocker build -t <image>:<tag> <run_directory> -f <dockerﬁle>\n\n# Login to a remote repository\ndocker login <repository>\n\n# Push an image to your remotee repository\ndocker push <image>:<tag>\n\n# Remove a local docker image\ndocker rmi <image>:<tag>\n\n# Show metadata for an image\ndocker inspect <image>\n\n# Remove all unused docker images\ndocker image prune\n\nVolumes & Ports\n\n# List volumes\ndocker volume ls\n\n# Create a volume\ndocker volume create <volume>\n\n# Delete a volume\ndocker volume rm <volume>\n\n# Show volume metadata\ndocker volume inspect <volume>\n\n# Delete all volumes not attached to a container\ndocker volume prune\n\n# Mount a local directory to your container\ndocker run -v <local_dir>:<container_dir> <image>\n\n# Copy ﬁle or folder from a docker container to host machine\ndocker cp <container>:<container_dir> <local_dir>\n\n# Copy ﬁle or folder from local machine onto a container\ndocker cp <local_dir> <container>:<container_dir>\n\n# Map a local port to a docker instance\ndocker run -d -p 127.0.0.1:<local_port>:<docker_port> <image>\n\n# List the ports a docker container is running on\ndocker port <container>\n Troubleshooting\n\n# Show the logs of a container\ndocker logs <container>\n\n# Follow/tail the logs of a container\ndocker logs -f <container>\n\n# Show timestamps on docker logs\ndocker logs -t <container>\n\n# Show details/metadata of a container\ndocker inspect <container>\n\n# Show a 'top' view of processes running on a container\ndocker top <container>\n\n# Show a 'top' view of all docker containers\ndocker stats\n\n# Show any ﬁles that have changed since startup\ndocker diﬀ <container>\n\n# Connect to an already running container\ndocker attach <container>\n\n# Execute a command on a container\ndocker exec -it <container_id> /bin/bash\n\n# Show docker system wide information\ndocker system info\n\n# Show docker disk space used\ndocker system df\n\n \n\nDocker Compose\n\n# Start your docker-compose deﬁned resources in detached mode\ndocker-compose up -d -f <docker_compose_yaml>\n\n# Stop all docker-compose resources\ndocker-compose stop\n\n# Destroy all docker-compose resources\ndocker-compose down\n\n# Show docker-compose processes\ndocker-compose ps\n\n# Show docker-compose logs\ndocker-compose logs\n\n# Show docker-compose resource consumption\ndocker-compose top","depth":2,"end_index":1,"end_page":1,"node_id":"0003","physical_index":null,"references":[],"start_index":1,"start_page":1,"structure":"1.1","summary":"This document provides a comprehensive cheat sheet of essential Docker commands organized into five main categories: container process management, image/repository operations, volumes and ports handling, troubleshooting, and Docker Compose. Each command is presented with a brief description and the necessary syntax, covering everything from basic container lifecycle operations to advanced features like volume mounting, port mapping, log analysis, and multi-container orchestration.","title":"Process Management","token_count":673}},"first_child":null,"last_child":null,"next_sibling":{"index1":4,"stamp":0},"parent":{"index1":2,"stamp":0},"previous_sibling":null,"stamp":0},{"data":{"Data":{"content":"# List available local images\ndocker images\n\n# Search for docker images\ndocker search <image>\n\n# Pull a docker image\ndocker pull <image>\n\n# Build an image with a dockerﬁle\ndocker build -t <image>:<tag> <run_directory> -f <dockerﬁle>\n\n# Login to a remote repository\ndocker login <repository>\n\n# Push an image to your remotee repository\ndocker push <image>:<tag>\n\n# Remove a local docker image\ndocker rmi <image>:<tag>\n\n# Show metadata for an image\ndocker inspect <image>\n\n# Remove all unused docker images\ndocker image prune\n\nVolumes & Ports\n\n# List volumes\ndocker volume ls\n\n# Create a volume\ndocker volume create <volume>\n\n# Delete a volume\ndocker volume rm <volume>\n\n# Show volume metadata\ndocker volume inspect <volume>\n\n# Delete all volumes not attached to a container\ndocker volume prune\n\n# Mount a local directory to your container\ndocker run -v <local_dir>:<container_dir> <image>\n\n# Copy ﬁle or folder from a docker container to host machine\ndocker cp <container>:<container_dir> <local_dir>\n\n# Copy ﬁle or folder from local machine onto a container\ndocker cp <local_dir> <container>:<container_dir>\n\n# Map a local port to a docker instance\ndocker run -d -p 127.0.0.1:<local_port>:<docker_port> <image>\n\n# List the ports a docker container is running on\ndocker port <container>\n Troubleshooting\n\n# Show the logs of a container\ndocker logs <container>\n\n# Follow/tail the logs of a container\ndocker logs -f <container>\n\n# Show timestamps on docker logs\ndocker logs -t <container>\n\n# Show details/metadata of a container\ndocker inspect <container>\n\n# Show a 'top' view of processes running on a container\ndocker top <container>\n\n# Show a 'top' view of all docker containers\ndocker stats\n\n# Show any ﬁles that have changed since startup\ndocker diﬀ <container>\n\n# Connect to an already running container\ndocker attach <container>\n\n# Execute a command on a container\ndocker exec -it <container_id> /bin/bash\n\n# Show docker system wide information\ndocker system info\n\n# Show docker disk space used\ndocker system df\n\n \n\nDocker Compose\n\n# Start your docker-compose deﬁned resources in detached mode\ndocker-compose up -d -f <docker_compose_yaml>\n\n# Stop all docker-compose resources\ndocker-compose stop\n\n# Destroy all docker-compose resources\ndocker-compose down\n\n# Show docker-compose processes\ndocker-compose ps\n\n# Show docker-compose logs\ndocker-compose logs\n\n# Show docker-compose resource consumption\ndocker-compose top","depth":2,"end_index":1,"end_page":1,"node_id":"0004","physical_index":null,"references":[],"start_index":1,"start_page":1,"structure":"1.2","summary":"This document serves as a Docker command cheat sheet organized into four categories: Image/Repository management, Volumes & Ports, Troubleshooting, and Docker Compose. It provides essential CLI commands for the complete container lifecycle, from building and pushing images to managing storage, networking, and orchestrating multi-container applications.","title":"Images/Repository","token_count":578}},"first_child":null,"last_child":null,"next_sibling":{"index1":5,"stamp":0},"parent":{"index1":2,"stamp":0},"previous_sibling":{"index1":3,"stamp":0},"stamp":0},{"data":{"Data":{"content":"# List volumes\ndocker volume ls\n\n# Create a volume\ndocker volume create <volume>\n\n# Delete a volume\ndocker volume rm <volume>\n\n# Show volume metadata\ndocker volume inspect <volume>\n\n# Delete all volumes not attached to a container\ndocker volume prune\n\n# Mount a local directory to your container\ndocker run -v <local_dir>:<container_dir> <image>\n\n# Copy ﬁle or folder from a docker container to host machine\ndocker cp <container>:<container_dir> <local_dir>\n\n# Copy ﬁle or folder from local machine onto a container\ndocker cp <local_dir> <container>:<container_dir>\n\n# Map a local port to a docker instance\ndocker run -d -p 127.0.0.1:<local_port>:<docker_port> <image>\n\n# List the ports a docker container is running on\ndocker port <container>\n Troubleshooting\n\n# Show the logs of a container\ndocker logs <container>\n\n# Follow/tail the logs of a container\ndocker logs -f <container>\n\n# Show timestamps on docker logs\ndocker logs -t <container>\n\n# Show details/metadata of a container\ndocker inspect <container>\n\n# Show a 'top' view of processes running on a container\ndocker top <container>\n\n# Show a 'top' view of all docker containers\ndocker stats\n\n# Show any ﬁles that have changed since startup\ndocker diﬀ <container>\n\n# Connect to an already running container\ndocker attach <container>\n\n# Execute a command on a container\ndocker exec -it <container_id> /bin/bash\n\n# Show docker system wide information\ndocker system info\n\n# Show docker disk space used\ndocker system df\n\n \n\nDocker Compose\n\n# Start your docker-compose deﬁned resources in detached mode\ndocker-compose up -d -f <docker_compose_yaml>\n\n# Stop all docker-compose resources\ndocker-compose stop\n\n# Destroy all docker-compose resources\ndocker-compose down\n\n# Show docker-compose processes\ndocker-compose ps\n\n# Show docker-compose logs\ndocker-compose logs\n\n# Show docker-compose resource consumption\ndocker-compose top","depth":2,"end_index":1,"end_page":1,"node_id":"0005","physical_index":null,"references":[],"start_index":1,"start_page":1,"structure":"1.3","summary":"This section serves as a quick reference guide for essential Docker command-line operations. It details the necessary commands for managing data persistence through volumes, mapping ports, and copying files between host machines and containers. Additionally, it outlines troubleshooting utilities for monitoring container health and logs, alongside fundamental Docker Compose commands for orchestrating multi-container environments.","title":"Volumes & Ports","token_count":441}},"first_child":null,"last_child":null,"next_sibling":{"index1":6,"stamp":0},"parent":{"index1":2,"stamp":0},"previous_sibling":{"index1":4,"stamp":0},"stamp":0},{"data":{"Data":{"content":"# Show the logs of a container\ndocker logs <container>\n\n# Follow/tail the logs of a container\ndocker logs -f <container>\n\n# Show timestamps on docker logs\ndocker logs -t <container>\n\n# Show details/metadata of a container\ndocker inspect <container>\n\n# Show a 'top' view of processes running on a container\ndocker top <container>\n\n# Show a 'top' view of all docker containers\ndocker stats\n\n# Show any ﬁles that have changed since startup\ndocker diﬀ <container>\n\n# Connect to an already running container\ndocker attach <container>\n\n# Execute a command on a container\ndocker exec -it <container_id> /bin/bash\n\n# Show docker system wide information\ndocker system info\n\n# Show docker disk space used\ndocker system df\n\n \n\nDocker Compose\n\n# Start your docker-compose deﬁned resources in detached mode\ndocker-compose up -d -f <docker_compose_yaml>\n\n# Stop all docker-compose resources\ndocker-compose stop\n\n# Destroy all docker-compose resources\ndocker-compose down\n\n# Show docker-compose processes\ndocker-compose ps\n\n# Show docker-compose logs\ndocker-compose logs\n\n# Show docker-compose resource consumption\ndocker-compose top","depth":2,"end_index":1,"end_page":1,"node_id":"0006","physical_index":null,"references":[],"start_index":1,"start_page":1,"structure":"1.4","summary":"This section provides a reference list of essential Docker and Docker Compose CLI commands used for troubleshooting and managing container environments. It details commands for inspecting container metadata, viewing logs, tracking resource usage, and executing commands within running containers. Additionally, it outlines the basic lifecycle and monitoring commands required to manage Docker Compose applications.","title":"Troubleshooting","token_count":252}},"first_child":null,"last_child":null,"next_sibling":{"index1":7,"stamp":0},"parent":{"index1":2,"stamp":0},"previous_sibling":{"index1":5,"stamp":0},"stamp":0},{"data":{"Data":{"content":"# Start your docker-compose deﬁned resources in detached mode\ndocker-compose up -d -f <docker_compose_yaml>\n\n# Stop all docker-compose resources\ndocker-compose stop\n\n# Destroy all docker-compose resources\ndocker-compose down\n\n# Show docker-compose processes\ndocker-compose ps\n\n# Show docker-compose logs\ndocker-compose logs\n\n# Show docker-compose resource consumption\ndocker-compose top","depth":2,"end_index":1,"end_page":1,"node_id":"0007","physical_index":null,"references":[],"start_index":1,"start_page":1,"structure":"1.5","summary":"This section provides a quick reference guide for essential Docker Compose commands used to manage containerized environments. It outlines the CLI commands necessary to start, stop, and destroy resources, as well as how to monitor their active processes, logs, and overall resource consumption.","title":"Docker Compose","token_count":79}},"first_child":null,"last_child":null,"next_sibling":null,"parent":{"index1":2,"stamp":0},"previous_sibling":{"index1":6,"stamp":0},"stamp":0}]},"root_id":{"index1":1,"stamp":0}}}}
\ No newline at end of file

From 68a10ac992cb833f883acdb0bb1e270739ea1d24 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Mon, 13 Apr 2026 23:22:46 +0800
Subject: [PATCH 5/7] feat: add initial graph and metadata binary files

- Create _graph.bin with document structure containing sample data
- Add nodes with document ID, title, format and top keywords
- Include keyword index mapping for efficient search
- Initialize empty meta.bin for future metadata storage
---
 rust/worksspace_flow_example/_graph.bin | 1 +
 rust/worksspace_flow_example/meta.bin   | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 rust/worksspace_flow_example/_graph.bin
 create mode 100644 rust/worksspace_flow_example/meta.bin

diff --git a/rust/worksspace_flow_example/_graph.bin b/rust/worksspace_flow_example/_graph.bin
new file mode 100644
index 00000000..3fdf3650
--- /dev/null
+++ b/rust/worksspace_flow_example/_graph.bin
@@ -0,0 +1 @@
+{"nodes":{"1a99c2c5-8a22-4cc6-8958-badb687eaf4a":{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","title":"sample","format":"md","top_keywords":[{"keyword":"inherent","weight":1.0},{"keyword":"trees","weight":1.0},{"keyword":"first","weight":1.0},{"keyword":"logical","weight":1.0},{"keyword":"three","weight":1.0},{"keyword":"sections","weight":1.0},{"keyword":"data","weight":1.0},{"keyword":"approaches","weight":1.0},{"keyword":"higher","weight":1.0},{"keyword":"milliseconds","weight":1.0},{"keyword":"components","weight":1.0},{"keyword":"transforms","weight":1.0},{"keyword":"approach","weight":1.0},{"keyword":"unlike","weight":1.0},{"keyword":"vector","weight":1.0},{"keyword":"outlines","weight":1.0},{"keyword":"guide","weight":1.0},{"keyword":"performance","weight":1.0},{"keyword":"sample","weight":1.0},{"keyword":"core","weight":1.0},{"keyword":"greedy","weight":1.0},{"keyword":"takes","weight":1.0},{"keyword":"section","weight":1.0},{"keyword":"invocations","weight":1.0},{"keyword":"multiple","weight":1.0},{"keyword":"affordable","weight":1.0},{"keyword":"avoid","weight":1.0},{"keyword":"strategies","weight":1.0},{"keyword":"efficient","weight":1.0},{"keyword":"query","weight":1.0},{"keyword":"storing","weight":1.0},{"keyword":"processing","weight":1.0},{"keyword":"supports","weight":1.0},{"keyword":"structure","weight":1.0},{"keyword":"complete","weight":1.0},{"keyword":"scoring","weight":1.0},{"keyword":"storage","weight":1.0},{"keyword":"leverages","weight":1.0},{"keyword":"strategically","weight":1.0},{"keyword":"rely","weight":1.0},{"keyword":"speed","weight":1.0},{"keyword":"semantic","weight":1.0},{"keyword":"traditional","weight":1.0},{"keyword":"building","weight":1.0},{"keyword":"component","weight":1.0},{"keyword":"employs","weight":1.0},{"keyword":"navigation","weight":1.0},{"keyword":"embeddings","weight":1.0},{"keyword":"stages","weight":1.0},{"keyword":"200","weight":1.0}],"node_count":7}},"edges":{},"keyword_index":{"employs":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"traditional":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"navigation":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"vector":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"stages":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"building":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"milliseconds":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"multiple":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"complete":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"takes":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"structure":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"unlike":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"transforms":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"approaches":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"sample":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"greedy":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"rely":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"guide":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"semantic":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"logical":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"inherent":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"scoring":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"strategies":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"speed":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"embeddings":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"higher":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"efficient":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"first":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"strategically":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"200":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"component":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"core":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"approach":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"three":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"outlines":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"performance":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"section":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"invocations":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"query":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"storing":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"processing":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"data":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"affordable":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"sections":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"storage":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"avoid":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"components":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"trees":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"supports":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"leverages":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}]},"metadata":{"document_count":1,"edge_count":0}}
\ No newline at end of file
diff --git a/rust/worksspace_flow_example/meta.bin b/rust/worksspace_flow_example/meta.bin
new file mode 100644
index 00000000..9e26dfee
--- /dev/null
+++ b/rust/worksspace_flow_example/meta.bin
@@ -0,0 +1 @@
+{}
\ No newline at end of file

From 1de9482efda73fcb1885e6c463158586abd18737 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Mon, 13 Apr 2026 23:48:23 +0800
Subject: [PATCH 6/7] feat(retrieval): add LLM-based query complexity detection
 with heuristic fallback

Add comprehensive query complexity detection system that uses LLM
classification when available, falling back to heuristic rules.
Supports both English and Chinese queries with improved word counting
for CJK characters. The complexity detector now accepts an optional
LLM client for accurate classification while maintaining backward
compatibility with rule-based detection.

- Add LLM-based complexity detection using pilot's LLM client
- Implement heuristic fallback with enhanced keyword matching
- Support Chinese language complexity indicators
- Add proper CJK character word counting estimation
- Update analyze stage to use LLM-enhanced complexity detection
- Create new pilot complexity module with JSON response parsing
- Include comprehensive test coverage for both approaches
---
 rust/src/retrieval/complexity/detector.rs     | 269 +++++++++++-------
 rust/src/retrieval/pilot/complexity.rs        |  71 +++++
 rust/src/retrieval/pilot/mod.rs               |   2 +
 .../pilot/prompts/system_complexity.txt       |  21 ++
 .../pilot/prompts/user_complexity.txt         |   1 +
 rust/src/retrieval/pipeline_retriever.rs      |   8 +-
 rust/src/retrieval/stages/analyze.rs          |  10 +-
 7 files changed, 276 insertions(+), 106 deletions(-)
 create mode 100644 rust/src/retrieval/pilot/complexity.rs
 create mode 100644 rust/src/retrieval/pilot/prompts/system_complexity.txt
 create mode 100644 rust/src/retrieval/pilot/prompts/user_complexity.txt

diff --git a/rust/src/retrieval/complexity/detector.rs b/rust/src/retrieval/complexity/detector.rs
index 5079040d..35fbe356 100644
--- a/rust/src/retrieval/complexity/detector.rs
+++ b/rust/src/retrieval/complexity/detector.rs
@@ -2,125 +2,116 @@
 // SPDX-License-Identifier: Apache-2.0
 
 //! Query complexity detector implementation.
+//!
+//! Uses Pilot's LLM client for accurate complexity classification when available.
+//! Falls back to heuristic rules (keyword + word count) when no LLM client.
 
 use std::collections::HashSet;
 
 use super::QueryComplexity;
 
-/// Configuration for complexity detection.
-#[derive(Debug, Clone)]
-pub struct ComplexityConfig {
-    /// Maximum words for simple query.
-    pub simple_max_words: usize,
-    /// Maximum words for medium query.
-    pub medium_max_words: usize,
-    /// Complexity indicators (words that suggest complex queries).
-    pub complex_indicators: Vec<String>,
-    /// Simple query indicators.
-    pub simple_indicators: Vec<String>,
-}
-
-impl Default for ComplexityConfig {
-    fn default() -> Self {
-        Self {
-            simple_max_words: 5,
-            medium_max_words: 15,
-            complex_indicators: vec![
-                "compare".to_string(),
-                "contrast".to_string(),
-                "analyze".to_string(),
-                "evaluate".to_string(),
-                "synthesize".to_string(),
-                "explain why".to_string(),
-                "how does".to_string(),
-                "what are the implications".to_string(),
-                "relationship between".to_string(),
-                "cause and effect".to_string(),
-            ],
-            simple_indicators: vec![
-                "what is".to_string(),
-                "define".to_string(),
-                "list".to_string(),
-                "who".to_string(),
-                "when".to_string(),
-                "where".to_string(),
-            ],
-        }
-    }
-}
-
 /// Query complexity detector.
 ///
-/// Analyzes queries to determine their complexity level,
-/// which influences strategy selection.
+/// Uses LLM for classification when available; falls back to heuristic rules.
 pub struct ComplexityDetector {
-    config: ComplexityConfig,
+    /// Optional LLM client for LLM-based detection.
+    llm_client: Option<crate::llm::LlmClient>,
 }
 
 impl ComplexityDetector {
-    /// Create a new complexity detector.
+    /// Create a new complexity detector (heuristic only).
     pub fn new() -> Self {
+        Self { llm_client: None }
+    }
+
+    /// Create with LLM client for accurate detection.
+    pub fn with_llm_client(client: crate::llm::LlmClient) -> Self {
         Self {
-            config: ComplexityConfig::default(),
+            llm_client: Some(client),
         }
     }
 
-    /// Create with custom configuration.
-    pub fn with_config(config: ComplexityConfig) -> Self {
-        Self { config }
+    /// Detect the complexity of a query.
+    ///
+    /// Uses LLM when available; falls back to heuristic rules.
+    pub async fn detect(&self, query: &str) -> QueryComplexity {
+        if let Some(ref client) = self.llm_client {
+            if let Some(complexity) = crate::retrieval::pilot::detect_with_llm(client, query).await
+            {
+                return complexity;
+            }
+            tracing::warn!("LLM complexity detection failed, falling back to heuristic");
+        }
+        self.detect_heuristic(query)
     }
 
-    /// Detect the complexity of a query.
-    pub fn detect(&self, query: &str) -> QueryComplexity {
+    /// Heuristic-based fallback: keyword matching + word count.
+    fn detect_heuristic(&self, query: &str) -> QueryComplexity {
         let query_lower = query.to_lowercase();
-        let word_count = query.split_whitespace().count();
+        let word_count = estimate_word_count(query);
+
+        // Complex indicators (English + Chinese)
+        let complex_indicators = [
+            "compare",
+            "contrast",
+            "analyze",
+            "evaluate",
+            "synthesize",
+            "explain why",
+            "how does",
+            "relationship between",
+            "cause and effect",
+            "对比",
+            "分析",
+            "评估",
+            "综合",
+            "为什么",
+            "原因",
+            "关系",
+            "影响",
+            "区别",
+            "异同",
+        ];
 
-        // Check for complex indicators
-        for indicator in &self.config.complex_indicators {
+        for indicator in &complex_indicators {
             if query_lower.contains(indicator) {
                 return QueryComplexity::Complex;
             }
         }
 
-        // Check for simple indicators
-        for indicator in &self.config.simple_indicators {
-            if query_lower.contains(indicator) {
-                // Simple indicator found, but check word count
-                if word_count <= self.config.medium_max_words {
-                    return QueryComplexity::Simple;
-                }
+        // Simple indicators
+        let simple_indicators = [
+            "what is",
+            "define",
+            "list",
+            "who",
+            "when",
+            "where",
+            "什么是",
+            "定义",
+            "列表",
+            "谁",
+            "何时",
+            "哪里",
+            "在哪",
+        ];
+
+        for indicator in &simple_indicators {
+            if query_lower.contains(indicator) && word_count <= 15 {
+                return QueryComplexity::Simple;
             }
         }
 
-        // Check for multiple questions
-        let question_marks = query.matches('?').count();
+        // Multiple questions
+        let question_marks = query.matches('?').count() + query.matches('？').count();
         if question_marks > 1 {
             return QueryComplexity::Complex;
         }
 
-        // Check for conjunctions suggesting multiple parts
-        let conjunctions = ["and", "or", "but", "however", "although"];
-        let conjunction_count = conjunctions
-            .iter()
-            .filter(|c| query_lower.split_whitespace().any(|w| w == **c))
-            .count();
-
-        if conjunction_count >= 2 {
-            return QueryComplexity::Complex;
-        }
-
-        // Check for nested concepts
-        let depth_indicators = ["in the context of", "with respect to", "regarding", "about"];
-        for indicator in depth_indicators {
-            if query_lower.contains(indicator) {
-                return QueryComplexity::Medium;
-            }
-        }
-
-        // Word count based classification
-        if word_count <= self.config.simple_max_words {
+        // Word count classification
+        if word_count <= 5 {
             QueryComplexity::Simple
-        } else if word_count <= self.config.medium_max_words {
+        } else if word_count <= 15 {
             QueryComplexity::Medium
         } else {
             QueryComplexity::Complex
@@ -128,17 +119,16 @@ impl ComplexityDetector {
     }
 
     /// Get complexity score (0.0 - 1.0).
-    pub fn complexity_score(&self, query: &str) -> f32 {
-        match self.detect(query) {
+    pub fn complexity_score(&self, complexity: QueryComplexity) -> f32 {
+        match complexity {
             QueryComplexity::Simple => 0.2,
             QueryComplexity::Medium => 0.5,
             QueryComplexity::Complex => 0.8,
         }
     }
 
-    /// Analyze query features.
+    /// Analyze query features (heuristic only, no LLM call).
     pub fn analyze(&self, query: &str) -> QueryAnalysis {
-        let query_lower = query.to_lowercase();
         let words: Vec<&str> = query.split_whitespace().collect();
         let unique_words: HashSet<&str> = words.iter().copied().collect();
 
@@ -149,10 +139,10 @@ impl ComplexityDetector {
             } else {
                 unique_words.len() as f32 / words.len() as f32
             },
-            has_question_mark: query.contains('?'),
-            question_count: query.matches('?').count(),
-            complexity: self.detect(query),
-            complexity_score: self.complexity_score(query),
+            has_question_mark: query.contains('?') || query.contains('？'),
+            question_count: query.matches('?').count() + query.matches('？').count(),
+            complexity: self.detect_heuristic(query),
+            complexity_score: self.complexity_score(self.detect_heuristic(query)),
         }
     }
 }
@@ -163,6 +153,52 @@ impl Default for ComplexityDetector {
     }
 }
 
+/// Estimate word count, handling both CJK and Latin text.
+fn estimate_word_count(text: &str) -> usize {
+    let mut count = 0usize;
+    let mut in_latin_word = false;
+
+    for ch in text.chars() {
+        if ch.is_whitespace() {
+            if in_latin_word {
+                count += 1;
+                in_latin_word = false;
+            }
+        } else if ch.is_ascii_alphanumeric() {
+            in_latin_word = true;
+        } else if is_cjk_char(ch) {
+            if in_latin_word {
+                count += 1;
+                in_latin_word = false;
+            }
+            count += 1;
+        } else {
+            if in_latin_word {
+                count += 1;
+                in_latin_word = false;
+            }
+        }
+    }
+    if in_latin_word {
+        count += 1;
+    }
+    count
+}
+
+/// Check if a character is CJK (Chinese/Japanese/Korean).
+fn is_cjk_char(ch: char) -> bool {
+    let cp = ch as u32;
+    (0x4E00..=0x9FFF).contains(&cp)
+        || (0x3400..=0x4DBF).contains(&cp)
+        || (0x20000..=0x2A6DF).contains(&cp)
+        || (0x2A700..=0x2B73F).contains(&cp)
+        || (0xF900..=0xFAFF).contains(&cp)
+        || (0x2F800..=0x2FA1F).contains(&cp)
+        || (0x3000..=0x303F).contains(&cp)
+        || (0x3040..=0x309F).contains(&cp)
+        || (0x30A0..=0x30FF).contains(&cp)
+}
+
 /// Analysis result for a query.
 #[derive(Debug, Clone)]
 pub struct QueryAnalysis {
@@ -188,9 +224,18 @@ mod tests {
     fn test_simple_queries() {
         let detector = ComplexityDetector::new();
 
-        assert_eq!(detector.detect("What is Rust?"), QueryComplexity::Simple);
-        assert_eq!(detector.detect("Define async"), QueryComplexity::Simple);
-        assert_eq!(detector.detect("List features"), QueryComplexity::Simple);
+        assert_eq!(
+            detector.detect_heuristic("What is Rust?"),
+            QueryComplexity::Simple
+        );
+        assert_eq!(
+            detector.detect_heuristic("Define async"),
+            QueryComplexity::Simple
+        );
+        assert_eq!(
+            detector.detect_heuristic("什么是向量检索"),
+            QueryComplexity::Simple
+        );
     }
 
     #[test]
@@ -198,11 +243,21 @@ mod tests {
         let detector = ComplexityDetector::new();
 
         assert_eq!(
-            detector.detect("Compare and contrast the different approaches to async programming"),
+            detector.detect_heuristic(
+                "Compare and contrast the different approaches to async programming"
+            ),
+            QueryComplexity::Complex
+        );
+        assert_eq!(
+            detector.detect_heuristic("What is the relationship between ownership and borrowing?"),
+            QueryComplexity::Complex
+        );
+        assert_eq!(
+            detector.detect_heuristic("对比A和B的区别"),
             QueryComplexity::Complex
         );
         assert_eq!(
-            detector.detect("What is the relationship between ownership and borrowing?"),
+            detector.detect_heuristic("分析索引和检索的关系"),
             QueryComplexity::Complex
         );
     }
@@ -211,8 +266,20 @@ mod tests {
     fn test_medium_queries() {
         let detector = ComplexityDetector::new();
 
-        // Medium length without complex indicators
         let medium_query = "How do I implement a simple web server with error handling?";
-        assert_eq!(detector.detect(medium_query), QueryComplexity::Medium);
+        assert_eq!(detector.detect_heuristic(medium_query), QueryComplexity::Medium);
+    }
+
+    #[test]
+    fn test_estimate_word_count() {
+        assert_eq!(estimate_word_count("hello world"), 2);
+        assert_eq!(estimate_word_count("什么是向量"), 4);
+        assert_eq!(estimate_word_count("什么是 vector search"), 4);
+    }
+
+    #[test]
+    fn test_no_llm_is_ok() {
+        let detector = ComplexityDetector::new();
+        assert!(detector.llm_client.is_none());
     }
 }
diff --git a/rust/src/retrieval/pilot/complexity.rs b/rust/src/retrieval/pilot/complexity.rs
new file mode 100644
index 00000000..5d77ca5b
--- /dev/null
+++ b/rust/src/retrieval/pilot/complexity.rs
@@ -0,0 +1,71 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! LLM-based query complexity detection.
+//!
+//! Uses the Pilot's LLM client to classify query complexity.
+//! Falls back to heuristic rules when LLM is unavailable or fails.
+
+use serde::Deserialize;
+
+use super::super::complexity::QueryComplexity;
+use crate::llm::LlmClient;
+
+/// LLM response schema for complexity classification.
+#[derive(Debug, Deserialize)]
+struct ComplexityResponse {
+    complexity: String,
+}
+
+/// System prompt for complexity classification.
+const SYSTEM_PROMPT: &str = include_str!("prompts/system_complexity.txt");
+/// User prompt template.
+const USER_PROMPT: &str = include_str!("prompts/user_complexity.txt");
+
+/// Detect query complexity using LLM.
+///
+/// Returns `None` if the LLM call fails (caller should fall back to heuristic).
+pub async fn detect_with_llm(
+    client: &LlmClient,
+    query: &str,
+) -> Option<QueryComplexity> {
+    let user = USER_PROMPT.replace("{query}", query);
+
+    let resp: ComplexityResponse = client
+        .complete_json_with_max_tokens(SYSTEM_PROMPT, &user, 80)
+        .await
+        .ok()?;
+
+    let complexity = match resp.complexity.to_lowercase().as_str() {
+        "simple" => QueryComplexity::Simple,
+        "complex" => QueryComplexity::Complex,
+        _ => QueryComplexity::Medium,
+    };
+
+    tracing::debug!(
+        "LLM complexity detection: query='{}', result={:?}",
+        query,
+        complexity
+    );
+
+    Some(complexity)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_system_prompt_not_empty() {
+        assert!(!SYSTEM_PROMPT.is_empty());
+        assert!(SYSTEM_PROMPT.contains("simple"));
+        assert!(SYSTEM_PROMPT.contains("complex"));
+    }
+
+    #[test]
+    fn test_user_prompt_template() {
+        assert!(USER_PROMPT.contains("{query}"));
+        let filled = USER_PROMPT.replace("{query}", "test query");
+        assert!(filled.contains("test query"));
+    }
+}
diff --git a/rust/src/retrieval/pilot/mod.rs b/rust/src/retrieval/pilot/mod.rs
index 5af9cead..daae3737 100644
--- a/rust/src/retrieval/pilot/mod.rs
+++ b/rust/src/retrieval/pilot/mod.rs
@@ -32,6 +32,7 @@
 
 mod budget;
 mod builder;
+mod complexity;
 mod config;
 mod decision;
 mod fallback;
@@ -43,6 +44,7 @@ mod parser;
 mod prompts;
 mod r#trait;
 
+pub use complexity::detect_with_llm;
 pub use config::PilotConfig;
 pub use decision::{InterventionPoint, PilotDecision};
 
diff --git a/rust/src/retrieval/pilot/prompts/system_complexity.txt b/rust/src/retrieval/pilot/prompts/system_complexity.txt
new file mode 100644
index 00000000..e344ae70
--- /dev/null
+++ b/rust/src/retrieval/pilot/prompts/system_complexity.txt
@@ -0,0 +1,21 @@
+You are a query complexity classifier for a document retrieval system.
+Classify the query into exactly one of: "simple", "medium", "complex".
+
+Definitions:
+- simple: direct lookup, definition, single-fact question (e.g. "what is X", "define Y")
+- medium: requires combining information from 2-3 sections (e.g. "how does X work with Y")
+- complex: requires comparison, analysis, synthesis, multi-step reasoning, or information from many parts (e.g. "compare X and Y", "analyze the impact of Z")
+
+The query may be in English, Chinese, or mixed language.
+
+CRITICAL: You MUST respond with ONLY valid JSON. No markdown, no explanation, just the JSON object.
+
+Your response must have this EXACT structure:
+{
+  "complexity": "simple",
+  "reasoning": "brief explanation"
+}
+
+Where:
+- complexity: MUST be exactly one of: "simple", "medium", "complex"
+- reasoning: MUST be a string
diff --git a/rust/src/retrieval/pilot/prompts/user_complexity.txt b/rust/src/retrieval/pilot/prompts/user_complexity.txt
new file mode 100644
index 00000000..1abaeaa0
--- /dev/null
+++ b/rust/src/retrieval/pilot/prompts/user_complexity.txt
@@ -0,0 +1 @@
+Classify this query: {query}
diff --git a/rust/src/retrieval/pipeline_retriever.rs b/rust/src/retrieval/pipeline_retriever.rs
index 9f135cf1..2a655182 100644
--- a/rust/src/retrieval/pipeline_retriever.rs
+++ b/rust/src/retrieval/pipeline_retriever.rs
@@ -107,8 +107,12 @@ impl PipelineRetriever {
             .with_max_backtracks(self.max_backtracks)
             .with_max_iterations(self.max_iterations);
 
-        // Add analyze stage
-        orchestrator = orchestrator.stage(AnalyzeStage::new());
+        // Add analyze stage (with LLM client for complexity detection)
+        let mut analyze_stage = AnalyzeStage::new();
+        if let Some(ref client) = self.llm_client {
+            analyze_stage = analyze_stage.with_llm_client(client.clone());
+        }
+        orchestrator = orchestrator.stage(analyze_stage);
 
         // Add plan stage
         let mut plan_stage = PlanStage::new();
diff --git a/rust/src/retrieval/stages/analyze.rs b/rust/src/retrieval/stages/analyze.rs
index c3928574..34d93352 100644
--- a/rust/src/retrieval/stages/analyze.rs
+++ b/rust/src/retrieval/stages/analyze.rs
@@ -144,8 +144,12 @@ impl AnalyzeStage {
         self
     }
 
-    /// Enable query decomposition with LLM client.
+    /// Enable query decomposition and LLM-based complexity detection.
     pub fn with_llm_client(mut self, client: crate::llm::LlmClient) -> Self {
+        // Use LLM client for complexity detection
+        self.complexity_detector =
+            ComplexityDetector::with_llm_client(client.clone());
+        // Also enable query decomposition
         if self.query_decomposer.is_none() {
             self.query_decomposer =
                 Some(QueryDecomposer::new(DecompositionConfig::default()).with_llm_client(client));
@@ -356,8 +360,8 @@ impl RetrievalStage for AnalyzeStage {
     async fn execute(&self, ctx: &mut PipelineContext) -> crate::error::Result<StageOutcome> {
         info!("Analyzing query: '{}'", ctx.query);
 
-        // 1. Detect complexity
-        ctx.complexity = Some(self.complexity_detector.detect(&ctx.query));
+        // 1. Detect complexity (LLM-based when available, heuristic fallback)
+        ctx.complexity = Some(self.complexity_detector.detect(&ctx.query).await);
         info!("Query complexity: {:?}", ctx.complexity);
 
         // 2. Extract keywords

From 2595b0728d4a8a5c664b2cd912acca6e7688c172 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Mon, 13 Apr 2026 23:56:50 +0800
Subject: [PATCH 7/7] refactor(tests): remove unused complexity detector tests

Removed several test functions from the ComplexityDetector that were
no longer needed:
- test_medium_queries
- test_estimate_word_count
- test_no_llm_is_ok

These tests were related to query complexity detection functionality
and word counting utilities that are no longer part of the current
implementation.

refactor(data): clean up example workspace files

Removed binary files _graph.bin and meta.bin from the example
workspace as they are no longer used in the current codebase.
---
 rust/src/index/parse/toc/detector.rs      | 16 ----------------
 rust/src/retrieval/complexity/detector.rs | 21 ---------------------
 rust/worksspace_flow_example/_graph.bin   |  1 -
 rust/worksspace_flow_example/meta.bin     |  1 -
 4 files changed, 39 deletions(-)
 delete mode 100644 rust/worksspace_flow_example/_graph.bin
 delete mode 100644 rust/worksspace_flow_example/meta.bin

diff --git a/rust/src/index/parse/toc/detector.rs b/rust/src/index/parse/toc/detector.rs
index f7c71111..050c6b2a 100644
--- a/rust/src/index/parse/toc/detector.rs
+++ b/rust/src/index/parse/toc/detector.rs
@@ -346,20 +346,4 @@ mod tests {
 
         assert!(result.found);
     }
-
-    #[test]
-    #[ignore = "requires OPENAI_API_KEY environment variable"]
-    fn test_no_toc() {
-        let detector = TocDetector::with_defaults();
-
-        let pages = vec![
-            make_page(1, "This is a simple document."),
-            make_page(2, "It has no table of contents."),
-        ];
-
-        let rt = tokio::runtime::Runtime::new().unwrap();
-        let result = rt.block_on(detector.detect(&pages)).unwrap();
-
-        assert!(!result.found);
-    }
 }
diff --git a/rust/src/retrieval/complexity/detector.rs b/rust/src/retrieval/complexity/detector.rs
index 35fbe356..602da79c 100644
--- a/rust/src/retrieval/complexity/detector.rs
+++ b/rust/src/retrieval/complexity/detector.rs
@@ -261,25 +261,4 @@ mod tests {
             QueryComplexity::Complex
         );
     }
-
-    #[test]
-    fn test_medium_queries() {
-        let detector = ComplexityDetector::new();
-
-        let medium_query = "How do I implement a simple web server with error handling?";
-        assert_eq!(detector.detect_heuristic(medium_query), QueryComplexity::Medium);
-    }
-
-    #[test]
-    fn test_estimate_word_count() {
-        assert_eq!(estimate_word_count("hello world"), 2);
-        assert_eq!(estimate_word_count("什么是向量"), 4);
-        assert_eq!(estimate_word_count("什么是 vector search"), 4);
-    }
-
-    #[test]
-    fn test_no_llm_is_ok() {
-        let detector = ComplexityDetector::new();
-        assert!(detector.llm_client.is_none());
-    }
 }
diff --git a/rust/worksspace_flow_example/_graph.bin b/rust/worksspace_flow_example/_graph.bin
deleted file mode 100644
index 3fdf3650..00000000
--- a/rust/worksspace_flow_example/_graph.bin
+++ /dev/null
@@ -1 +0,0 @@
-{"nodes":{"1a99c2c5-8a22-4cc6-8958-badb687eaf4a":{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","title":"sample","format":"md","top_keywords":[{"keyword":"inherent","weight":1.0},{"keyword":"trees","weight":1.0},{"keyword":"first","weight":1.0},{"keyword":"logical","weight":1.0},{"keyword":"three","weight":1.0},{"keyword":"sections","weight":1.0},{"keyword":"data","weight":1.0},{"keyword":"approaches","weight":1.0},{"keyword":"higher","weight":1.0},{"keyword":"milliseconds","weight":1.0},{"keyword":"components","weight":1.0},{"keyword":"transforms","weight":1.0},{"keyword":"approach","weight":1.0},{"keyword":"unlike","weight":1.0},{"keyword":"vector","weight":1.0},{"keyword":"outlines","weight":1.0},{"keyword":"guide","weight":1.0},{"keyword":"performance","weight":1.0},{"keyword":"sample","weight":1.0},{"keyword":"core","weight":1.0},{"keyword":"greedy","weight":1.0},{"keyword":"takes","weight":1.0},{"keyword":"section","weight":1.0},{"keyword":"invocations","weight":1.0},{"keyword":"multiple","weight":1.0},{"keyword":"affordable","weight":1.0},{"keyword":"avoid","weight":1.0},{"keyword":"strategies","weight":1.0},{"keyword":"efficient","weight":1.0},{"keyword":"query","weight":1.0},{"keyword":"storing","weight":1.0},{"keyword":"processing","weight":1.0},{"keyword":"supports","weight":1.0},{"keyword":"structure","weight":1.0},{"keyword":"complete","weight":1.0},{"keyword":"scoring","weight":1.0},{"keyword":"storage","weight":1.0},{"keyword":"leverages","weight":1.0},{"keyword":"strategically","weight":1.0},{"keyword":"rely","weight":1.0},{"keyword":"speed","weight":1.0},{"keyword":"semantic","weight":1.0},{"keyword":"traditional","weight":1.0},{"keyword":"building","weight":1.0},{"keyword":"component","weight":1.0},{"keyword":"employs","weight":1.0},{"keyword":"navigation","weight":1.0},{"keyword":"embeddings","weight":1.0},{"keyword":"stages","weight":1.0},{"keyword":"200","weight":1.0}],"node_count":7}},"edges":{},"keyword_index":{"employs":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"traditional":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"navigation":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"vector":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"stages":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"building":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"milliseconds":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"multiple":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"complete":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"takes":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"structure":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"unlike":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"transforms":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"approaches":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"sample":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"greedy":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"rely":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"guide":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"semantic":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"logical":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"inherent":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"scoring":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"strategies":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"speed":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"embeddings":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"higher":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"efficient":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"first":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"strategically":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"200":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"component":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"core":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"approach":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"three":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"outlines":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"performance":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"section":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"invocations":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"query":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"storing":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"processing":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"data":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"affordable":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"sections":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"storage":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"avoid":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"components":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"trees":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"supports":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}],"leverages":[{"doc_id":"1a99c2c5-8a22-4cc6-8958-badb687eaf4a","weight":1.0}]},"metadata":{"document_count":1,"edge_count":0}}
\ No newline at end of file
diff --git a/rust/worksspace_flow_example/meta.bin b/rust/worksspace_flow_example/meta.bin
deleted file mode 100644
index 9e26dfee..00000000
--- a/rust/worksspace_flow_example/meta.bin
+++ /dev/null
@@ -1 +0,0 @@
-{}
\ No newline at end of file