Skip to content

Commit c17c210

Browse files
committed
Refactor: Rearchitect search into focused modules
- Split `indexing/search.rs` (2361 lines) into `search/{types,index,engine,query}.rs` with clear boundaries: `engine.rs` is pure (no I/O), `types.rs` is pure data, `query.rs` handles DB operations - Move AI pipeline from `commands/` to `search/ai/{prompt,parser,mappings,query_builder}.rs` - Split `SearchDialog.svelte` (1552 lines) into orchestrator + `AiSearchRow`, `SearchInputArea`, `SearchResults` - Update all CLAUDE.md files, `architecture.md`, and path references
1 parent b4572f3 commit c17c210

30 files changed

Lines changed: 3277 additions & 2500 deletions

apps/desktop/coverage-allowlist.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,9 +205,18 @@
205205
"settings/sections/AiSection.svelte": {
206206
"reason": "UI section, depends on Tauri commands and event listeners"
207207
},
208+
"search/AiSearchRow.svelte": {
209+
"reason": "Search sub-component, UI only, depends on parent orchestration"
210+
},
208211
"search/SearchDialog.svelte": {
209212
"reason": "Search dialog UI, depends on Tauri commands and event listeners"
210213
},
214+
"search/SearchInputArea.svelte": {
215+
"reason": "Search sub-component, UI only, depends on parent orchestration and Tauri tooltip action"
216+
},
217+
"search/SearchResults.svelte": {
218+
"reason": "Search sub-component, UI only, display and formatting"
219+
},
211220
"tauri-commands/search.ts": {
212221
"reason": "Tauri command wrappers, tested via integration"
213222
},

apps/desktop/src-tauri/src/ai/manager.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -961,7 +961,10 @@ fn spawn_and_track_server(m: &mut ManagerState) -> Result<(u32, u16), String> {
961961
let model = get_model_by_id(&m.state.installed_model_id).unwrap_or_else(get_default_model);
962962
let port = find_available_port().ok_or("No available port")?;
963963

964-
log::debug!("AI server: starting llama-server on port {port} with context size {}", m.context_size);
964+
log::debug!(
965+
"AI server: starting llama-server on port {port} with context size {}",
966+
m.context_size
967+
);
965968

966969
// Belt-and-suspenders: stop any stale llama-servers before spawning a new one
967970
kill_stale_llama_servers(&m.ai_dir);

apps/desktop/src-tauri/src/ai/process.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -138,11 +138,7 @@ pub fn kill_stale_llama_servers(ai_dir: &Path) {
138138
let binary_path = ai_dir.join(LLAMA_SERVER_BINARY);
139139
let binary_str = binary_path.to_string_lossy();
140140

141-
if let Ok(output) = std::process::Command::new("pgrep")
142-
.arg("-f")
143-
.arg(&*binary_str)
144-
.output()
145-
{
141+
if let Ok(output) = std::process::Command::new("pgrep").arg("-f").arg(&*binary_str).output() {
146142
let stdout = String::from_utf8_lossy(&output.stdout);
147143
for line in stdout.lines() {
148144
if let Ok(pid) = line.trim().parse::<u32>() {

apps/desktop/src-tauri/src/commands/CLAUDE.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,7 @@ immediately to business-logic modules. No significant logic lives here.
2525
| `indexing.rs` | Drive index | `start_drive_index`, `stop_drive_index`, `get_index_status`, `get_dir_stats`, `get_dir_stats_batch`, `clear_drive_index`, `set_indexing_enabled`, `get_index_debug_status` (dev-only extended stats). Uses `State<IndexManagerState>`. |
2626
| `clipboard.rs` | Clipboard file ops | `copy_files_to_clipboard`, `cut_files_to_clipboard`, `read_clipboard_files`, `clear_clipboard_cut_state`. macOS uses NSPasteboard via `clipboard::pasteboard`; non-macOS stubs return errors. |
2727
| `crash_reporter.rs` | Crash reporting | `check_pending_crash_report`, `dismiss_crash_report`, `send_crash_report`. Delegates to `crash_reporter` module. Send is skipped in dev/CI. |
28-
| `search.rs` | Drive search | `prepare_search_index`, `search_files`, `release_search_index`, `translate_search_query`, `parse_search_scope`. Thin wrappers over `indexing::search` module. Post-filters directory sizes after `fill_directory_sizes`. AI search uses single-pass classification prompt → `ai_response_parser``ai_query_builder` pipeline. |
29-
| `ai_response_parser.rs` | AI search parser | Key-value line parser for LLM classification responses. Validates enum fields, extracts keywords. Fallback keyword extraction when LLM fails. |
30-
| `ai_query_builder.rs` | AI search builder | Maps parsed LLM enums (type, time, size, scope) into `SearchQuery` fields. Merges keywords + type into single regex pattern. Deterministic date/size computation. |
28+
| `search.rs` | Drive search | Thin IPC wrappers over `search` module. `resolve_ai_backend` for AI provider config. Post-filters directory sizes after `fill_directory_sizes`. |
3129
| `sync_status.rs` | Cloud sync status | `get_sync_status` — macOS delegates to `file_system::sync_status`; non-macOS returns empty map via `#[cfg]` on the function itself (not the module). |
3230

3331
## Key decisions

apps/desktop/src-tauri/src/commands/mod.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
//! Tauri commands module.
22
3-
pub mod ai_query_builder;
4-
pub mod ai_response_parser;
53
pub mod clipboard;
64
pub mod crash_reporter;
75
pub mod e2e;

apps/desktop/src-tauri/src/commands/search.rs

Lines changed: 7 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//! IPC commands for drive search.
22
//!
3-
//! Thin wrappers around `indexing::search` module functions, exposed to the frontend via Tauri commands.
3+
//! Thin wrappers around `search` module functions, exposed to the frontend via Tauri commands.
44
55
use std::sync::Arc;
66
use std::sync::atomic::{AtomicBool, Ordering};
@@ -9,14 +9,13 @@ use serde::Serialize;
99

1010
use crate::ai::client::{AiBackend, ChatCompletionOptions};
1111
use crate::indexing::get_read_pool;
12-
use crate::indexing::search::{
12+
use crate::search::{
1313
self, DIALOG_OPEN, ParsedScope, SEARCH_INDEX, SearchIndexState, SearchQuery, SearchResult, drop_search_index,
1414
fill_directory_sizes, start_backstop_timer, start_idle_timer, touch_activity,
1515
};
1616

17-
use super::ai_query_builder;
18-
use super::ai_response_parser;
1917
use crate::indexing::writer::WRITER_GENERATION;
18+
use crate::search::ai::{self, query_builder as ai_query_builder};
2019

2120
#[derive(Debug, Clone, Serialize)]
2221
#[serde(rename_all = "camelCase")]
@@ -313,76 +312,6 @@ pub struct TranslateDisplay {
313312
pub case_sensitive: Option<bool>,
314313
}
315314

316-
/// Converts an ISO date string (YYYY-MM-DD) to a unix timestamp (seconds since epoch).
317-
pub(crate) fn iso_date_to_timestamp(date_str: &str) -> Result<u64, String> {
318-
let format = time::macros::format_description!("[year]-[month]-[day]");
319-
let date = time::Date::parse(date_str, &format).map_err(|e| format!("Invalid date '{date_str}': {e}"))?;
320-
let datetime = date.with_hms(0, 0, 0).expect("midnight is always valid");
321-
let timestamp = datetime.assume_utc().unix_timestamp();
322-
if timestamp < 0 {
323-
return Err(format!("Date '{date_str}' is before unix epoch"));
324-
}
325-
Ok(timestamp as u64)
326-
}
327-
328-
// ── AI search classification prompt ──────────────────────────────────
329-
330-
/// Classification prompt for the LLM. The LLM classifies intent into predefined
331-
/// enums and extracts filename keywords. Rust handles all structural/technical work.
332-
/// `{TODAY}` is replaced at runtime.
333-
const CLASSIFICATION_PROMPT: &str = "\
334-
Extract search parameters from the user's file search query.
335-
Return one field per line. Omit fields that don't apply.
336-
337-
keywords: filename words, space-separated, in the user's language
338-
type: photos|screenshots|videos|documents|presentations|archives|music|\
339-
code|rust|python|javascript|typescript|go|java|config|logs|fonts|\
340-
databases|xcode|shell-scripts|ssh-keys|docker-compose|env-files|none
341-
time: today|yesterday|this_week|last_week|this_month|last_month|\
342-
this_quarter|last_quarter|this_year|last_year|last_3_months|last_6_months|\
343-
recent|old|YYYY|YYYY..YYYY
344-
size: empty|tiny|small|large|huge|>NUMBERmb|>NUMBERgb|<NUMBERmb
345-
scope: downloads|documents|desktop|dotfiles|PATH
346-
exclude: dirname1 dirname2
347-
folders: yes|no
348-
note: brief limitation caveat if query involves unfilterable concepts
349-
350-
Rules:
351-
- \"keywords\" = words likely in FILENAMES. Not descriptions.
352-
- Use singular forms for keywords (contract, not contracts).
353-
- \"I name them X\" / \"I mark them as X\" → keywords: X (not the descriptive words)
354-
- Only set `time` when the user explicitly mentions a time period (yesterday, last week, recent, 2024, etc.). Never default to recent/today.
355-
- Prefer `type` over `keywords` for well-known file categories. Don't put the type name in keywords.
356-
- Don't put the file format in keywords when using a type. \"PDF documents\" → type: documents. \"sqlite databases\" → type: databases.
357-
- If the user wants ONLY a specific format (not all files of that category), use the format as keyword without type: \"HEIC photos I haven't converted\" → keywords: .heic / note: can't determine conversion status
358-
- \"not in X\" / \"but not in X\" / \"excluding X\" / \"except in X\" → ALWAYS use exclude: X
359-
- \"ssh keys\"/\"env files\"/\"docker compose\"/\"shell scripts\" → type handles this, no keywords needed
360-
- For content/semantic queries (\"photos of my cat\"), set type + add a note
361-
362-
Examples:
363-
\"recent invoices, I mark them rymd\" → keywords: rymd / type: documents / time: recent
364-
\"\u{5927}\u{304d}\u{306a}\u{52d5}\u{753b}\u{3092}\u{524a}\u{9664}\u{3057}\u{305f}\u{3044}\" → type: videos / size: large / note: can't determine safe to delete
365-
\"node_modules folders taking up space\" → keywords: node_modules / folders: yes / size: large
366-
\"screenshots from this week\" → type: screenshots / time: this_week
367-
\"package.json not in node_modules\" → keywords: package.json / exclude: node_modules
368-
\"empty folders\" → folders: yes / size: empty
369-
\"ssh keys\" → type: ssh-keys
370-
\"foton fr\u{00e5}n f\u{00f6}rra veckan\" → type: photos / time: last_week
371-
\"that rust file with the websocket server\" → keywords: websocket / type: rust
372-
\"old xcode projects\" → type: xcode / time: old
373-
\"contracts I signed in the last 6 months\" → keywords: contract / type: documents / time: last_6_months / note: \"signed\" is not filterable
374-
\"shell scripts in my dotfiles\" → type: shell-scripts / scope: dotfiles
375-
\"HEIC photos I haven't converted\" → keywords: .heic / note: can't determine conversion status
376-
377-
Today: {TODAY}.";
378-
379-
fn build_classification_prompt() -> String {
380-
let today = time::OffsetDateTime::now_utc().date();
381-
let format = time::macros::format_description!("[year]-[month]-[day]");
382-
let today_str = today.format(&format).expect("date format always succeeds");
383-
CLASSIFICATION_PROMPT.replace("{TODAY}", &today_str)
384-
}
385-
386315
/// Resolves the AI backend from the current provider configuration.
387316
fn resolve_ai_backend() -> Result<AiBackend, String> {
388317
let provider = crate::ai::manager::get_provider();
@@ -415,7 +344,7 @@ fn resolve_ai_backend() -> Result<AiBackend, String> {
415344
#[tauri::command]
416345
pub async fn translate_search_query(natural_query: String) -> Result<TranslateResult, String> {
417346
let backend = resolve_ai_backend()?;
418-
let system_prompt = build_classification_prompt();
347+
let system_prompt = ai::build_classification_prompt();
419348

420349
log::debug!(
421350
"AI search: classification prompt ({} chars), query={natural_query:?}",
@@ -448,16 +377,16 @@ pub async fn translate_search_query(natural_query: String) -> Result<TranslateRe
448377
log::debug!("AI search: raw response: {response:?}");
449378

450379
// Parse key-value response
451-
let parsed = ai_response_parser::parse_llm_response(&response);
380+
let parsed = ai::parse_llm_response(&response);
452381

453382
// Fallback: if parser returned nothing useful, use raw query keywords
454383
let parsed = if parsed.is_empty() {
455384
log::info!("AI search: LLM returned empty/garbage response, falling back to raw keywords");
456-
let fallback_kw = ai_response_parser::fallback_keywords(&natural_query);
385+
let fallback_kw = ai::fallback_keywords(&natural_query);
457386
if fallback_kw.is_empty() {
458387
parsed
459388
} else {
460-
ai_response_parser::ParsedLlmResponse {
389+
ai::ParsedLlmResponse {
461390
keywords: Some(fallback_kw),
462391
..Default::default()
463392
}
@@ -483,27 +412,6 @@ pub async fn translate_search_query(natural_query: String) -> Result<TranslateRe
483412
mod tests {
484413
use super::*;
485414

486-
#[test]
487-
fn test_iso_date_to_timestamp() {
488-
// 2025-01-01 00:00:00 UTC = 1735689600
489-
let ts = iso_date_to_timestamp("2025-01-01").unwrap();
490-
assert_eq!(ts, 1_735_689_600);
491-
}
492-
493-
#[test]
494-
fn test_iso_date_to_timestamp_mid_year() {
495-
// 2026-06-15 00:00:00 UTC = 1781481600
496-
let ts = iso_date_to_timestamp("2026-06-15").unwrap();
497-
assert_eq!(ts, 1_781_481_600);
498-
}
499-
500-
#[test]
501-
fn test_iso_date_to_timestamp_invalid() {
502-
assert!(iso_date_to_timestamp("not-a-date").is_err());
503-
assert!(iso_date_to_timestamp("2025-13-01").is_err());
504-
assert!(iso_date_to_timestamp("2025-01-32").is_err());
505-
}
506-
507415
#[test]
508416
fn test_translate_result_serialization() {
509417
let result = TranslateResult {
@@ -539,26 +447,4 @@ mod tests {
539447
assert!(json.contains("patternType"));
540448
assert!(json.contains("2025-01-01"));
541449
}
542-
543-
#[test]
544-
fn test_classification_prompt_contains_date() {
545-
let prompt = build_classification_prompt();
546-
assert!(prompt.contains("Today:"));
547-
assert!(prompt.contains("Extract search parameters"));
548-
// Should contain a date in YYYY-MM-DD format
549-
assert!(prompt.contains("20")); // Year starts with 20
550-
}
551-
552-
#[test]
553-
fn test_classification_prompt_contains_type_enums() {
554-
let prompt = build_classification_prompt();
555-
assert!(prompt.contains("photos|screenshots|videos"));
556-
assert!(prompt.contains("shell-scripts|ssh-keys|docker-compose|env-files"));
557-
}
558-
559-
#[test]
560-
fn test_classification_prompt_contains_time_enums() {
561-
let prompt = build_classification_prompt();
562-
assert!(prompt.contains("last_3_months|last_6_months"));
563-
}
564450
}

apps/desktop/src-tauri/src/indexing/CLAUDE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ Full design: `docs/specs/drive-indexing/plan.md`
2121
- **reconciler.rs** -- Buffers FSEvents during scan (capped at 500K events; overflow sets `buffer_overflow` flag forcing full rescan), replays after scan completes using event IDs to skip stale events. Processes live events for file creates/removes/modifies using integer-keyed write messages (`UpsertEntryV2`, `DeleteEntryById`, `DeleteSubtreeById`, `PropagateDeltaById`). Resolves filesystem paths to entry IDs via `store::resolve_path()` using a read connection passed by callers. Key functions (`process_fs_event`, `emit_dir_updated`) are `pub(super)` so `mod.rs` can call them directly during cold-start replay. `reconcile_subtree()` handles MustScanSubDirs by diffing filesystem vs DB directory-by-directory instead of delete-then-reinsert, making it safe to interrupt at any point.
2222
- **firmlinks.rs** -- Parses `/usr/share/firmlinks`, builds prefix map, normalizes paths. Converts `/System/Volumes/Data/Users/foo` to `/Users/foo`.
2323
- **verifier.rs** -- Per-navigation background readdir diff. On each directory navigation, `trigger_verification()` (called from `streaming.rs` and `operations.rs` after enrichment) is fully fire-and-forget: it spawns a task that acquires the `INDEXING` lock (never blocking the navigation thread), checks dedup/debounce via static `VerifierState` (in-flight set + recent timestamps), then spawns a second async task that: (1) reads DB children via `ReadPool`, (2) reads disk via `read_dir` (filtering through `scanner::should_exclude`), (3) diffs by normalized name, sending `UpsertEntryV2`/`DeleteEntryById`/`DeleteSubtreeById`/`PropagateDeltaById` corrections to the writer. New directories are flushed then scanned via `scan_subtree` with delta propagation. Debounce: 30s per path, max 2 concurrent verifications. Only runs after initial scan is complete (checks `scanning` flag). `invalidate()` clears state on shutdown/clear.
24-
- **search.rs** -- In-memory search index for whole-drive file search. Lazily loads all entries from the index DB into a `Vec<SearchEntry>` for fast parallel scanning with rayon. Filenames are arena-allocated: all names are concatenated into a single `SearchIndex.names: String` buffer, and each `SearchEntry` stores `name_offset: u32` + `name_len: u16` instead of an owned `String`. During load, `row.get_ref(col).as_str()` borrows directly from SQLite's internal buffer (zero per-row heap allocations), then pushes into the arena. `name_folded` is NOT stored in the search index — instead, the search pattern is NFD-normalized at query time on macOS (APFS filenames are already NFD). `SearchIndex::name(&self, entry)` retrieves a `&str` slice from the arena. `search()` is a pure function: compiles glob/regex patterns, parallel-filters entries, sorts by recency. Global `SEARCH_INDEX` state with `Arc<SearchIndex>`, idle timer (5 min after dialog close), backstop timer (10 min with no activity), and load cancellation via `AtomicBool` checked every 100K rows. `WRITER_GENERATION` in writer.rs tracks mutations; stale indexes are detected on search. Scope filtering: `SearchQuery` accepts optional `include_paths` (absolute paths — search only within these subtrees) and `exclude_dir_names` (directory names/patterns to exclude at any depth). Include paths are resolved to entry IDs via `store::resolve_path()` (SQLite indexed lookups, microseconds) at the call site before `search()`, stored in `include_path_ids`. `prepare_scope_filter()` reads pre-resolved IDs and compiles exclude patterns as regexes. `ScopeFilter::matches()` walks the ancestor chain via `id_to_index` (O(1) per level) after all other filters pass. `parse_scope()` parses a user-typed comma-separated scope string (with quoting, escaping, `~` expansion, `!` excludes) into a `ParsedScope` struct. IPC commands in `commands/search.rs`: `prepare_search_index` (emits `search-index-ready` event when load completes), `search_files`, `release_search_index`, `translate_search_query` (AI natural language → structured query), `parse_search_scope` (scope string → structured `ParsedScope`).
24+
**Search**: Moved to its own top-level module. See `src-tauri/src/search/CLAUDE.md`.
2525

2626
IPC commands in `commands/indexing.rs` -- thin wrappers over `IndexManager` methods.
2727

apps/desktop/src-tauri/src/indexing/mod.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ mod enrichment;
99
mod event_loop;
1010
mod events;
1111
pub mod firmlinks;
12-
pub(crate) mod search;
1312
pub mod store;
1413
pub mod writer;
1514

apps/desktop/src-tauri/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ mod network;
104104
mod permissions;
105105
#[cfg(target_os = "linux")]
106106
mod permissions_linux;
107+
pub mod search;
107108
mod settings;
108109
#[cfg(target_os = "macos")]
109110
mod updater;

apps/desktop/src-tauri/src/mcp/executor.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -678,8 +678,8 @@ fn execute_dialog_close<R: Runtime>(app: &AppHandle<R>, dialog_type: &str, path:
678678

679679
// ── Search tools ──────────────────────────────────────────────────────
680680

681-
use crate::indexing::search::PatternType;
682-
use crate::indexing::search::{
681+
use crate::search::PatternType;
682+
use crate::search::{
683683
self, DIALOG_OPEN, SEARCH_INDEX, SearchIndexState, SearchQuery, SearchResult, fill_directory_sizes, format_size,
684684
format_timestamp, summarize_query,
685685
};
@@ -919,13 +919,13 @@ async fn execute_search(params: &Value) -> ToolResult {
919919
let modified_after = params
920920
.get("modified_after")
921921
.and_then(|v| v.as_str())
922-
.map(crate::commands::search::iso_date_to_timestamp)
922+
.map(search::ai::iso_date_to_timestamp)
923923
.transpose()
924924
.map_err(ToolError::invalid_params)?;
925925
let modified_before = params
926926
.get("modified_before")
927927
.and_then(|v| v.as_str())
928-
.map(crate::commands::search::iso_date_to_timestamp)
928+
.map(search::ai::iso_date_to_timestamp)
929929
.transpose()
930930
.map_err(ToolError::invalid_params)?;
931931
let is_directory = match params.get("type").and_then(|v| v.as_str()) {
@@ -1263,7 +1263,7 @@ mod tests {
12631263

12641264
#[test]
12651265
fn test_format_search_results_with_entries() {
1266-
use crate::indexing::search::SearchResultEntry;
1266+
use crate::search::SearchResultEntry;
12671267
let result = SearchResult {
12681268
entries: vec![SearchResultEntry {
12691269
name: "test.pdf".to_string(),
@@ -1285,7 +1285,7 @@ mod tests {
12851285

12861286
#[test]
12871287
fn test_format_search_results_directory_trailing_slash() {
1288-
use crate::indexing::search::SearchResultEntry;
1288+
use crate::search::SearchResultEntry;
12891289
let result = SearchResult {
12901290
entries: vec![SearchResultEntry {
12911291
name: "Projects".to_string(),

0 commit comments

Comments
 (0)