Skip to content

Commit 87de136

Browse files
committed
Prevent the app from loading slowly
1 parent a16849f commit 87de136

2 files changed

Lines changed: 85 additions & 48 deletions

File tree

apps/desktop/src-tauri/src/indexing/CLAUDE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ Key test files are alongside each module (test functions within `#[cfg(test)]` b
9797

9898
**Writer-side delete-with-propagation**: `DeleteEntry` and `DeleteSubtree` handlers in the writer automatically read old data before deleting and propagate accurate negative deltas. This means every deletion -- replay, live, verification -- gets correct dir_stats updates without callers needing to send separate `PropagateDelta` messages. `delete_subtree` and `propagate_delta` have no internal transactions, so they're safe inside the replay's `BEGIN IMMEDIATE` transaction.
9999

100-
**Post-replay verification is bidirectional**: `verify_affected_dirs` checks both directions: (1) stale entries in DB but not on disk (sends `DeleteEntry`/`DeleteSubtree`), and (2) missing entries on disk but not in DB (sends `UpsertEntry` + `PropagateDelta` for files, collects directory paths for `scan_subtree`). New directories are scanned and their subtree totals propagated up the ancestor chain. The `GLOBAL_INDEX_STORE` mutex guard is scoped to avoid holding it across `.await` points (the guard is not `Send`).
100+
**Post-replay verification is bidirectional**: `verify_affected_dirs` checks both directions: (1) stale entries in DB but not on disk (sends `DeleteEntry`/`DeleteSubtree`), and (2) missing entries on disk but not in DB (sends `UpsertEntry` + `PropagateDelta` for files, collects directory paths for `scan_subtree`). New directories are scanned and their subtree totals propagated up the ancestor chain. Uses a two-phase pattern to avoid blocking `enrich_entries_with_index`: Phase 1 holds the `GLOBAL_INDEX_STORE` lock briefly for bulk SQLite reads into a `HashMap`, Phase 2 does all disk I/O (hundreds of `readdir`/`exists`/`symlink_metadata` calls) without any lock.
101101

102102
**Schema version mismatch drops the DB**: If `schema_version` in meta doesn't match what the code expects, the entire DB is deleted and rebuilt. No migration path (it's a cache, not user data).
103103

apps/desktop/src-tauri/src/indexing/mod.rs

Lines changed: 84 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,29 +1056,43 @@ async fn run_background_verification(
10561056
// scan_subtree computes aggregates within the subtree but doesn't propagate
10571057
// upward. Read the computed dir_stats and send PropagateDelta.
10581058
if !verify_result.new_dir_paths.is_empty() {
1059-
// Scope the mutex guard so it's dropped before the .await below
1060-
{
1059+
// Brief lock: batch-read dir_stats, then release before sending writes
1060+
let dir_stats_snapshot: Vec<(String, Option<DirStats>)> = {
10611061
let guard = GLOBAL_INDEX_STORE.lock();
10621062
if let Ok(ref guard) = guard
10631063
&& let Some(store) = guard.as_ref()
10641064
{
1065-
for dir_path in &verify_result.new_dir_paths {
1066-
if let Ok(Some(stats)) = store.get_dir_stats(dir_path) {
1067-
let _ = writer.send(WriteMessage::PropagateDelta {
1068-
path: PathBuf::from(dir_path),
1069-
size_delta: stats.recursive_size as i64,
1070-
file_count_delta: stats.recursive_file_count as i32,
1071-
dir_count_delta: (stats.recursive_dir_count as i32) + 1,
1072-
});
1073-
} else {
1074-
let _ = writer.send(WriteMessage::PropagateDelta {
1075-
path: PathBuf::from(dir_path),
1076-
size_delta: 0,
1077-
file_count_delta: 0,
1078-
dir_count_delta: 1,
1079-
});
1080-
}
1065+
let refs: Vec<&str> = verify_result.new_dir_paths.iter().map(String::as_str).collect();
1066+
match store.get_dir_stats_batch(&refs) {
1067+
Ok(batch) => verify_result
1068+
.new_dir_paths
1069+
.iter()
1070+
.cloned()
1071+
.zip(batch)
1072+
.collect(),
1073+
Err(_) => Vec::new(),
10811074
}
1075+
} else {
1076+
Vec::new()
1077+
}
1078+
// guard dropped here
1079+
};
1080+
1081+
for (dir_path, stats_opt) in &dir_stats_snapshot {
1082+
if let Some(stats) = stats_opt {
1083+
let _ = writer.send(WriteMessage::PropagateDelta {
1084+
path: PathBuf::from(dir_path),
1085+
size_delta: stats.recursive_size as i64,
1086+
file_count_delta: stats.recursive_file_count as i32,
1087+
dir_count_delta: (stats.recursive_dir_count as i32) + 1,
1088+
});
1089+
} else {
1090+
let _ = writer.send(WriteMessage::PropagateDelta {
1091+
path: PathBuf::from(dir_path),
1092+
size_delta: 0,
1093+
file_count_delta: 0,
1094+
dir_count_delta: 1,
1095+
});
10821096
}
10831097
}
10841098

@@ -1117,49 +1131,72 @@ struct VerifyResult {
11171131
/// "parent directory modified" without individual removal events. Similarly,
11181132
/// new children may not get individual creation events.
11191133
///
1120-
/// For each affected parent this function:
1134+
/// Two-phase approach to minimize `GLOBAL_INDEX_STORE` lock hold time:
1135+
///
1136+
/// **Phase 1 (lock held briefly):** Read all DB children for every affected
1137+
/// parent into an in-memory `HashMap`, then drop the lock. Only SQLite reads,
1138+
/// no disk I/O.
1139+
///
1140+
/// **Phase 2 (no lock):** Walk the `HashMap`, check the filesystem
1141+
/// (`Path::exists`, `read_dir`, `symlink_metadata`), and send corrections to
1142+
/// the writer channel:
11211143
/// 1. **Stale entries**: DB children that no longer exist on disk get
11221144
/// `DeleteEntry`/`DeleteSubtree` (auto-propagates deltas).
11231145
/// 2. **Missing entries**: Disk children not in DB get `UpsertEntry`.
11241146
/// New files also get `PropagateDelta`. New directories are collected
11251147
/// in `new_dir_paths` for the caller to scan via `scan_subtree`.
11261148
fn verify_affected_dirs(affected_paths: &std::collections::HashSet<String>, writer: &IndexWriter) -> VerifyResult {
1127-
let guard = match GLOBAL_INDEX_STORE.lock() {
1128-
Ok(g) => g,
1129-
Err(_) => {
1130-
return VerifyResult {
1131-
stale_count: 0,
1132-
new_file_count: 0,
1133-
new_dir_paths: Vec::new(),
1134-
};
1135-
}
1136-
};
1137-
let store = match guard.as_ref() {
1138-
Some(s) => s,
1139-
None => {
1140-
return VerifyResult {
1141-
stale_count: 0,
1142-
new_file_count: 0,
1143-
new_dir_paths: Vec::new(),
1144-
};
1149+
// ── Phase 1: Bulk-read DB state under the lock ───────────────────
1150+
let db_snapshot: std::collections::HashMap<String, Vec<store::ScannedEntry>> = {
1151+
let guard = match GLOBAL_INDEX_STORE.lock() {
1152+
Ok(g) => g,
1153+
Err(_) => {
1154+
return VerifyResult {
1155+
stale_count: 0,
1156+
new_file_count: 0,
1157+
new_dir_paths: Vec::new(),
1158+
};
1159+
}
1160+
};
1161+
let store = match guard.as_ref() {
1162+
Some(s) => s,
1163+
None => {
1164+
return VerifyResult {
1165+
stale_count: 0,
1166+
new_file_count: 0,
1167+
new_dir_paths: Vec::new(),
1168+
};
1169+
}
1170+
};
1171+
1172+
let mut snapshot = std::collections::HashMap::with_capacity(affected_paths.len());
1173+
for parent_path in affected_paths {
1174+
match store.list_entries_by_parent(parent_path) {
1175+
Ok(entries) => {
1176+
snapshot.insert(parent_path.clone(), entries);
1177+
}
1178+
Err(_) => {
1179+
// Insert empty vec so Phase 2 still checks disk for new entries
1180+
snapshot.insert(parent_path.clone(), Vec::new());
1181+
}
1182+
}
11451183
}
1184+
snapshot
1185+
// guard dropped here — lock released before any disk I/O
11461186
};
11471187

1188+
// ── Phase 2: Filesystem checks without the lock ──────────────────
11481189
let mut stale_count = 0u64;
11491190
let mut new_file_count = 0u64;
11501191
let mut new_dir_paths = Vec::<String>::new();
11511192

1152-
for parent_path in affected_paths {
1153-
let db_children = match store.list_entries_by_parent(parent_path) {
1154-
Ok(entries) => entries,
1155-
Err(_) => continue,
1156-
};
1157-
1193+
for (parent_path, db_children) in &db_snapshot {
11581194
// Build a set of known DB child paths for fast lookup
1159-
let db_child_paths: std::collections::HashSet<&str> = db_children.iter().map(|c| c.path.as_str()).collect();
1195+
let db_child_paths: std::collections::HashSet<&str> =
1196+
db_children.iter().map(|c| c.path.as_str()).collect();
11601197

1161-
// Phase 1: detect stale entries (in DB but not on disk)
1162-
for child in &db_children {
1198+
// Detect stale entries (in DB but not on disk)
1199+
for child in db_children {
11631200
if !Path::new(&child.path).exists() {
11641201
if child.is_directory {
11651202
let _ = writer.send(WriteMessage::DeleteSubtree(child.path.clone()));
@@ -1170,7 +1207,7 @@ fn verify_affected_dirs(affected_paths: &std::collections::HashSet<String>, writ
11701207
}
11711208
}
11721209

1173-
// Phase 2: detect missing entries (on disk but not in DB)
1210+
// Detect missing entries (on disk but not in DB)
11741211
let read_dir = match std::fs::read_dir(parent_path) {
11751212
Ok(rd) => rd,
11761213
Err(_) => continue,

0 commit comments

Comments
 (0)