Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rust/src/index/summary/strategy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
use async_trait::async_trait;

use crate::document::{DocumentTree, NodeId};
use crate::llm::{LlmClient, LlmResult};
use crate::llm::memo::{MemoKey, MemoStore, MemoValue};
use crate::llm::{LlmClient, LlmResult};
use crate::utils::fingerprint::Fingerprint;

/// Configuration for summary strategies.
Expand Down
2 changes: 1 addition & 1 deletion rust/src/llm/executor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ use async_openai::types::chat::{
use super::config::LlmConfig;
use super::error::{LlmError, LlmResult};
use super::fallback::{FallbackChain, FallbackStep};
use crate::metrics::MetricsHub;
use super::throttle::ConcurrencyController;
use crate::metrics::MetricsHub;

/// Unified executor for LLM operations.
///
Expand Down
3 changes: 2 additions & 1 deletion rust/src/llm/memo/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,8 @@ impl MemoStore {
}

// Restore stats
self.stats.load_from(data.stats.hits, data.stats.misses, data.stats.tokens_saved);
self.stats
.load_from(data.stats.hits, data.stats.misses, data.stats.tokens_saved);

info!(
"Loaded memo store with {} entries from {:?}",
Expand Down
2 changes: 1 addition & 1 deletion rust/src/llm/pool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ use std::sync::Arc;
use super::client::LlmClient;
use super::config::LlmConfig;
use super::fallback::{FallbackChain, FallbackConfig};
use crate::metrics::MetricsHub;
use super::throttle::ConcurrencyController;
use crate::metrics::MetricsHub;

/// Pool of LLM clients for different purposes.
///
Expand Down
3 changes: 1 addition & 2 deletions rust/src/retrieval/complexity/detector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,7 @@ impl ComplexityDetector {
}

let result = if let Some(ref client) = self.llm_client {
if let Some(complexity) =
crate::retrieval::pilot::detect_with_llm(client, query).await
if let Some(complexity) = crate::retrieval::pilot::detect_with_llm(client, query).await
{
complexity
} else {
Expand Down
35 changes: 16 additions & 19 deletions rust/src/retrieval/decompose.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@
use serde::{Deserialize, Serialize};
use tracing::{debug, info};

use crate::llm::{LlmClient, LlmExecutor};
use crate::llm::memo::{MemoKey, MemoOpType, MemoStore, MemoValue};
use crate::llm::{LlmClient, LlmExecutor};
use crate::utils::fingerprint::Fingerprint;

/// Sub-query resulting from decomposition.
Expand Down Expand Up @@ -269,30 +269,27 @@ impl QueryDecomposer {
info!("Decomposing complex query: '{}'", query);

// Try LLM-based decomposition if available
let result = if self.config.use_llm && (self.llm_client.is_some() || self.llm_executor.is_some()) {
match self.llm_decompose(query).await {
Ok(result) => result,
Err(e) => {
debug!(
"LLM decomposition failed, falling back to rule-based: {}",
e
);
self.rule_based_decompose(query)?
let result =
if self.config.use_llm && (self.llm_client.is_some() || self.llm_executor.is_some()) {
match self.llm_decompose(query).await {
Ok(result) => result,
Err(e) => {
debug!(
"LLM decomposition failed, falling back to rule-based: {}",
e
);
self.rule_based_decompose(query)?
}
}
}
} else {
self.rule_based_decompose(query)?
};
} else {
self.rule_based_decompose(query)?
};

// Cache the result
if let Some(ref store) = self.memo_store {
let cache_key = Self::build_cache_key(query);
if let Ok(json) = serde_json::to_value(&CachedDecomposition::from_result(&result)) {
store.put_with_tokens(
cache_key,
MemoValue::Json(json),
(query.len() / 4) as u64,
);
store.put_with_tokens(cache_key, MemoValue::Json(json), (query.len() / 4) as u64);
}
}

Expand Down
2 changes: 1 addition & 1 deletion rust/src/retrieval/pilot/llm_pilot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ use std::sync::Arc;
use tracing::{debug, info, warn};

use crate::document::{DocumentTree, NodeId};
use crate::llm::{LlmClient, LlmExecutor};
use crate::llm::memo::{MemoKey, MemoStore, MemoValue};
use crate::llm::{LlmClient, LlmExecutor};
use crate::utils::fingerprint::Fingerprint;

use super::budget::BudgetController;
Expand Down
3 changes: 2 additions & 1 deletion rust/src/retrieval/stages/analyze.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,8 @@ impl AnalyzeStage {
self.complexity_detector = detector;

// Also enable query decomposition
let mut decomposer = QueryDecomposer::new(DecompositionConfig::default()).with_llm_client(client);
let mut decomposer =
QueryDecomposer::new(DecompositionConfig::default()).with_llm_client(client);
if let Some(ref store) = self.memo_store {
decomposer = decomposer.with_memo_store(store.clone());
}
Expand Down
6 changes: 1 addition & 5 deletions rust/src/retrieval/sufficiency/llm_judge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,7 @@ Be conservative - only mark as sufficient if you're confident the content answer
if let Some(ref store) = self.memo_store {
let cache_key = self.build_cache_key(query, content);
let tokens = (prompt.len() / 4) as u64;
store.put_with_tokens(
cache_key,
MemoValue::Text(format!("{:?}", result)),
tokens,
);
store.put_with_tokens(cache_key, MemoValue::Text(format!("{:?}", result)), tokens);
}

result
Expand Down
Loading