Skip to content

Commit 3908641

Browse files
committed
Refactor/indexing: structural cleanup
- Extract `process_message` match arms into 9 named `handle_*` functions in `writer.rs`, turning the 370-line match block into a compact dispatcher - Split `mod.rs` (1,951 lines) into `mod.rs` (454 non-test) + `manager.rs` (775) + move `DebugStats` to `events.rs` - Add `metadata.rs` with `MetadataSnapshot` struct and `extract_metadata()`, replacing scattered tuple-based extraction across scanner, reconciler, verifier, and event_loop - Extract shared `compute_bottom_up()` in `aggregator.rs`, replacing 3 duplicated bottom-up loops. Fix N+1 query in backfill path - Unify `name_folded` column across all platforms in `store.rs`, eliminating 12 `#[cfg]` blocks. Schema bumped to v9 - Replace all `thread::sleep` with `flush_blocking()` in writer and scanner tests (46 sleeps removed)
1 parent 5959eaa commit 3908641

12 files changed

Lines changed: 1725 additions & 1713 deletions

File tree

apps/desktop/src-tauri/src/indexing/CLAUDE.md

Lines changed: 13 additions & 11 deletions
Large diffs are not rendered by default.

apps/desktop/src-tauri/src/indexing/aggregator.rs

Lines changed: 95 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -147,40 +147,7 @@ fn compute_and_write(
147147
let compute_report_interval = (dir_count / 100).max(1000).min(dir_count.max(1)) as usize;
148148

149149
on_progress(AggregationProgress::new(AggregationPhase::Computing, 0, dir_count));
150-
let mut computed: HashMap<i64, DirStatsById> = HashMap::with_capacity(sorted.len());
151-
152-
for (i, &dir_id) in sorted.iter().enumerate() {
153-
let (logical_size_sum, physical_size_sum, file_count, child_dir_count) =
154-
direct_stats.get(&dir_id).copied().unwrap_or((0, 0, 0, 0));
155-
156-
let mut recursive_logical_size = logical_size_sum;
157-
let mut recursive_physical_size = physical_size_sum;
158-
let mut recursive_file_count = file_count;
159-
let mut recursive_dir_count = child_dir_count;
160-
161-
// Add already-computed recursive stats from child directories
162-
if let Some(children) = child_dirs_map.get(&dir_id) {
163-
for &child_id in children {
164-
if let Some(child_stats) = computed.get(&child_id) {
165-
recursive_logical_size += child_stats.recursive_logical_size;
166-
recursive_physical_size += child_stats.recursive_physical_size;
167-
recursive_file_count += child_stats.recursive_file_count;
168-
recursive_dir_count += child_stats.recursive_dir_count;
169-
}
170-
}
171-
}
172-
173-
computed.insert(
174-
dir_id,
175-
DirStatsById {
176-
entry_id: dir_id,
177-
recursive_logical_size,
178-
recursive_physical_size,
179-
recursive_file_count,
180-
recursive_dir_count,
181-
},
182-
);
183-
150+
let computed = compute_bottom_up(&sorted, direct_stats, child_dirs_map, None, |i| {
184151
if (i + 1) % compute_report_interval == 0 {
185152
on_progress(AggregationProgress::new(
186153
AggregationPhase::Computing,
@@ -193,7 +160,7 @@ fn compute_and_write(
193160
start.elapsed().as_secs_f64()
194161
);
195162
}
196-
}
163+
});
197164

198165
// Batch-write all computed stats in chunks of 1000
199166
log::debug!("Aggregation: writing {} dir_stats rows to DB...", computed.len());
@@ -220,6 +187,62 @@ fn compute_and_write(
220187
Ok(count)
221188
}
222189

190+
/// Bottom-up aggregation over a topologically sorted list of directory IDs.
191+
///
192+
/// For each directory (leaves first), sums direct children stats from `direct_stats`,
193+
/// then adds recursive stats from already-computed child directories. When
194+
/// `existing_stats` is provided, falls back to it for children not yet in the
195+
/// computed map (used by `backfill_missing_dir_stats` where some children already
196+
/// have DB rows). Calls `on_iter(index)` after each directory for progress reporting.
197+
fn compute_bottom_up(
198+
sorted_ids: &[i64],
199+
direct_stats: &ChildrenStatsMap,
200+
child_dirs: &HashMap<i64, Vec<i64>>,
201+
existing_stats: Option<&HashMap<i64, DirStatsById>>,
202+
mut on_iter: impl FnMut(usize),
203+
) -> HashMap<i64, DirStatsById> {
204+
let mut computed: HashMap<i64, DirStatsById> = HashMap::with_capacity(sorted_ids.len());
205+
206+
for (i, &dir_id) in sorted_ids.iter().enumerate() {
207+
let (logical_size_sum, physical_size_sum, file_count, child_dir_count) =
208+
direct_stats.get(&dir_id).copied().unwrap_or((0, 0, 0, 0));
209+
210+
let mut recursive_logical_size = logical_size_sum;
211+
let mut recursive_physical_size = physical_size_sum;
212+
let mut recursive_file_count = file_count;
213+
let mut recursive_dir_count = child_dir_count;
214+
215+
if let Some(children) = child_dirs.get(&dir_id) {
216+
for &child_id in children {
217+
let child_stats = computed
218+
.get(&child_id)
219+
.or_else(|| existing_stats.and_then(|m| m.get(&child_id)));
220+
if let Some(cs) = child_stats {
221+
recursive_logical_size += cs.recursive_logical_size;
222+
recursive_physical_size += cs.recursive_physical_size;
223+
recursive_file_count += cs.recursive_file_count;
224+
recursive_dir_count += cs.recursive_dir_count;
225+
}
226+
}
227+
}
228+
229+
computed.insert(
230+
dir_id,
231+
DirStatsById {
232+
entry_id: dir_id,
233+
recursive_logical_size,
234+
recursive_physical_size,
235+
recursive_file_count,
236+
recursive_dir_count,
237+
},
238+
);
239+
240+
on_iter(i);
241+
}
242+
243+
computed
244+
}
245+
223246
/// Compute `dir_stats` for directories under `root` only (bottom-up).
224247
///
225248
/// Called after a subtree scan completes. Resolves the root path to an entry ID,
@@ -257,40 +280,7 @@ pub fn compute_subtree_aggregates(conn: &Connection, root: &str) -> Result<u64,
257280

258281
// Topological sort: leaves first
259282
let sorted = topological_sort_bottom_up(&dir_entries);
260-
261-
let mut computed: HashMap<i64, DirStatsById> = HashMap::with_capacity(sorted.len());
262-
263-
for &dir_id in &sorted {
264-
let (logical_size_sum, physical_size_sum, file_count, child_dir_count) =
265-
direct_stats.get(&dir_id).copied().unwrap_or((0, 0, 0, 0));
266-
267-
let mut recursive_logical_size = logical_size_sum;
268-
let mut recursive_physical_size = physical_size_sum;
269-
let mut recursive_file_count = file_count;
270-
let mut recursive_dir_count = child_dir_count;
271-
272-
if let Some(children) = child_dirs_map.get(&dir_id) {
273-
for &child_id in children {
274-
if let Some(child_stats) = computed.get(&child_id) {
275-
recursive_logical_size += child_stats.recursive_logical_size;
276-
recursive_physical_size += child_stats.recursive_physical_size;
277-
recursive_file_count += child_stats.recursive_file_count;
278-
recursive_dir_count += child_stats.recursive_dir_count;
279-
}
280-
}
281-
}
282-
283-
computed.insert(
284-
dir_id,
285-
DirStatsById {
286-
entry_id: dir_id,
287-
recursive_logical_size,
288-
recursive_physical_size,
289-
recursive_file_count,
290-
recursive_dir_count,
291-
},
292-
);
293-
}
283+
let computed = compute_bottom_up(&sorted, &direct_stats, &child_dirs_map, None, |_| {});
294284

295285
// Batch-write all computed stats
296286
log::debug!(
@@ -339,6 +329,10 @@ pub fn backfill_missing_dir_stats(conn: &Connection) -> Result<u64, IndexStoreEr
339329
let direct_stats = bulk_get_children_stats_by_id(conn)?;
340330
let child_dirs_map = bulk_get_child_dir_ids(conn)?;
341331

332+
// Bulk-load existing dir_stats so the bottom-up pass can use them as
333+
// fallback for children that already have stats (avoids N+1 queries).
334+
let existing_stats = bulk_get_all_dir_stats(conn)?;
335+
342336
// Topological sort all dirs (we need correct ordering)
343337
let sorted = topological_sort_bottom_up(&all_dir_entries);
344338

@@ -349,48 +343,11 @@ pub fn backfill_missing_dir_stats(conn: &Connection) -> Result<u64, IndexStoreEr
349343
// We need to compute all because a missing dir's stats depend on its
350344
// children (which might have existing stats in the DB or might also be
351345
// missing).
352-
let mut computed: HashMap<i64, DirStatsById> = HashMap::with_capacity(sorted.len());
353-
let mut to_write: Vec<DirStatsById> = Vec::with_capacity(count);
354-
355-
for &dir_id in &sorted {
356-
let (logical_size_sum, physical_size_sum, file_count, child_dir_count) =
357-
direct_stats.get(&dir_id).copied().unwrap_or((0, 0, 0, 0));
358-
359-
let mut recursive_logical_size = logical_size_sum;
360-
let mut recursive_physical_size = physical_size_sum;
361-
let mut recursive_file_count = file_count;
362-
let mut recursive_dir_count = child_dir_count;
363-
364-
if let Some(children) = child_dirs_map.get(&dir_id) {
365-
for &child_id in children {
366-
// Prefer freshly computed stats, fall back to existing DB stats
367-
if let Some(child_stats) = computed.get(&child_id) {
368-
recursive_logical_size += child_stats.recursive_logical_size;
369-
recursive_physical_size += child_stats.recursive_physical_size;
370-
recursive_file_count += child_stats.recursive_file_count;
371-
recursive_dir_count += child_stats.recursive_dir_count;
372-
} else if let Ok(Some(db_stats)) = IndexStore::get_dir_stats_by_id(conn, child_id) {
373-
recursive_logical_size += db_stats.recursive_logical_size;
374-
recursive_physical_size += db_stats.recursive_physical_size;
375-
recursive_file_count += db_stats.recursive_file_count;
376-
recursive_dir_count += db_stats.recursive_dir_count;
377-
}
378-
}
379-
}
380-
381-
let stats = DirStatsById {
382-
entry_id: dir_id,
383-
recursive_logical_size,
384-
recursive_physical_size,
385-
recursive_file_count,
386-
recursive_dir_count,
387-
};
388-
389-
if missing_set.contains(&dir_id) {
390-
to_write.push(stats.clone());
391-
}
392-
computed.insert(dir_id, stats);
393-
}
346+
let computed = compute_bottom_up(&sorted, &direct_stats, &child_dirs_map, Some(&existing_stats), |_| {});
347+
let to_write: Vec<DirStatsById> = computed
348+
.into_values()
349+
.filter(|s| missing_set.contains(&s.entry_id))
350+
.collect();
394351

395352
// Batch-write only the missing stats
396353
for chunk in to_write.chunks(1000) {
@@ -548,6 +505,33 @@ fn bulk_get_child_dir_ids(conn: &Connection) -> Result<HashMap<i64, Vec<i64>>, I
548505
Ok(map)
549506
}
550507

508+
/// Bulk-load all existing `dir_stats` rows into a map keyed by `entry_id`.
509+
///
510+
/// Used by `backfill_missing_dir_stats` so the bottom-up pass can fall back to
511+
/// existing stats for children that already have rows (avoiding N+1 queries).
512+
fn bulk_get_all_dir_stats(conn: &Connection) -> Result<HashMap<i64, DirStatsById>, IndexStoreError> {
513+
let mut stmt = conn.prepare(
514+
"SELECT entry_id, recursive_logical_size, recursive_physical_size,
515+
recursive_file_count, recursive_dir_count
516+
FROM dir_stats",
517+
)?;
518+
let rows = stmt.query_map([], |row| {
519+
Ok(DirStatsById {
520+
entry_id: row.get(0)?,
521+
recursive_logical_size: row.get(1)?,
522+
recursive_physical_size: row.get(2)?,
523+
recursive_file_count: row.get(3)?,
524+
recursive_dir_count: row.get(4)?,
525+
})
526+
})?;
527+
let mut map = HashMap::new();
528+
for row in rows {
529+
let stats = row?;
530+
map.insert(stats.entry_id, stats);
531+
}
532+
Ok(map)
533+
}
534+
551535
/// Load direct children stats scoped to a subtree via recursive CTE.
552536
///
553537
/// Returns a map: `parent_id -> (logical_size_sum, physical_size_sum, file_count, dir_count)`.

apps/desktop/src-tauri/src/indexing/event_loop.rs

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -990,31 +990,18 @@ fn verify_affected_dirs(affected_paths: &HashSet<String>, writer: &IndexWriter)
990990

991991
let is_dir = metadata.is_dir();
992992
let is_symlink = metadata.is_symlink();
993-
994-
let (logical_size, physical_size, modified_at) = if is_dir || is_symlink {
995-
(None, None, reconciler::entry_modified_at(&metadata))
996-
} else {
997-
reconciler::entry_size_and_mtime(&metadata)
998-
};
999-
1000-
#[cfg(unix)]
1001-
let (inode, nlink) = {
1002-
use std::os::unix::fs::MetadataExt;
1003-
(Some(metadata.ino()), Some(metadata.nlink()))
1004-
};
1005-
#[cfg(not(unix))]
1006-
let (inode, nlink) = (None, None);
993+
let snap = super::metadata::extract_metadata(&metadata, is_dir, is_symlink);
1007994

1008995
let _ = writer.send(WriteMessage::UpsertEntryV2 {
1009996
parent_id: *parent_id,
1010997
name,
1011998
is_directory: is_dir,
1012999
is_symlink,
1013-
logical_size,
1014-
physical_size,
1015-
modified_at,
1016-
inode,
1017-
nlink,
1000+
logical_size: snap.logical_size,
1001+
physical_size: snap.physical_size,
1002+
modified_at: snap.modified_at,
1003+
inode: snap.inode,
1004+
nlink: snap.nlink,
10181005
});
10191006

10201007
// UpsertEntryV2 auto-propagates deltas in the writer.

0 commit comments

Comments
 (0)