Skip to content

Commit fdadfc8

Browse files
committed
Drive index: keep min_subtree_epoch honest under live mutation
The live-watch and verifier paths (where the local index spends ~all its life) now maintain coverage correctly: `propagate_delta_by_id` carries `min_subtree_epoch` through a pure size/count delta unchanged (so a file write into a complete dir no longer flips it to a lower bound), and a new `propagate_min_subtree_epoch` re-min walk fires only on tree-shape changes — new dir (ancestors drop to 0), delete/delete-subtree (a gone incomplete child can raise coverage), and cross-parent move (both chains). `reconcile_subtree` (the real live fill path) now marks every dir it lists and re-mins up; the verifier lifts ancestor coverage after a subtree scan. Regression pinned: a live file-add keeps an exact dir exact.
1 parent c4b20c9 commit fdadfc8

7 files changed

Lines changed: 796 additions & 10 deletions

File tree

apps/desktop/src-tauri/src/indexing/DETAILS.md

Lines changed: 6 additions & 2 deletions
Large diffs are not rendered by default.

apps/desktop/src-tauri/src/indexing/reconciler.rs

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,16 @@ pub(super) fn reconcile_subtree(
403403
let mut removed: u64 = 0;
404404
let mut updated: u64 = 0;
405405

406+
// The epoch every dir we successfully list this pass is stamped with. A
407+
// reconcile *stamps* with the current epoch; it never bumps it. Read once.
408+
let epoch = IndexStore::read_current_epoch(conn).unwrap_or(1);
409+
// Every dir whose direct contents we successfully list (incl. empty), so we
410+
// can `MarkDirsListed` them after the walk and lift ancestor coverage.
411+
// Without this, a reconcile-discovered subtree stays `listed_epoch = 0`
412+
// forever and drags every ancestor to incomplete — the exact local-live-path
413+
// regression this milestone guards against.
414+
let mut listed_dir_ids: Vec<i64> = Vec::new();
415+
406416
let root_str = firmlinks::normalize_path(&root.to_string_lossy());
407417
let root_id = match store::resolve_path(conn, &root_str) {
408418
Ok(Some(id)) => id,
@@ -495,6 +505,9 @@ pub(super) fn reconcile_subtree(
495505
Some(c) => c,
496506
None => continue,
497507
};
508+
// We successfully listed this dir's direct contents (an empty listing
509+
// still counts). Stamp it at the current epoch after the walk.
510+
listed_dir_ids.push(dir_id);
498511

499512
let db_children =
500513
IndexStore::list_children_on(dir_id, conn).map_err(|e| format!("list_children_on({dir_id}): {e}"))?;
@@ -600,6 +613,27 @@ pub(super) fn reconcile_subtree(
600613
}
601614
}
602615

616+
// Stamp every dir we listed at the current epoch, then lift ancestor
617+
// coverage. The walk collected ids shallow→deep (BFS), so recompute
618+
// deepest-first: a parent's `min_subtree_epoch` reads its children's stored
619+
// values, which must already reflect this pass. `propagate_min_subtree_epoch`
620+
// short-circuits once a value stabilizes, so the repeated up-walks are cheap.
621+
if !listed_dir_ids.is_empty() {
622+
// Chunk under SQLite's bound-parameter ceiling (+1 for the epoch param).
623+
const MARK_CHUNK: usize = 900;
624+
for chunk in listed_dir_ids.chunks(MARK_CHUNK) {
625+
if let Err(e) = writer.send(WriteMessage::MarkDirsListed {
626+
ids: chunk.to_vec(),
627+
epoch,
628+
}) {
629+
log::warn!("reconcile_subtree: failed to send MarkDirsListed: {e}");
630+
}
631+
}
632+
for dir_id in listed_dir_ids.iter().rev() {
633+
let _ = writer.send(WriteMessage::PropagateMinSubtreeEpoch(*dir_id));
634+
}
635+
}
636+
603637
Ok(ReconcileSummary {
604638
added,
605639
removed,
@@ -1873,6 +1907,69 @@ mod tests {
18731907
assert_eq!(children.len(), 2, "both child files should be indexed");
18741908
}
18751909

1910+
/// A reconcile-discovered subtree must be stamped `listed_epoch = current`
1911+
/// for every dir it lists (including empty ones), and ancestor coverage must
1912+
/// lift. Without the mark, the subtree stays `listed_epoch = 0` forever and
1913+
/// drags ancestors to incomplete — the exact local-live-path regression M1
1914+
/// guards against.
1915+
#[test]
1916+
fn reconcile_subtree_marks_listed_dirs_at_current_epoch() {
1917+
let (writer, dir, conn) = setup_test_writer();
1918+
let db_path = dir.path().join("test-reconciler.db");
1919+
1920+
// Stamp the volume's current epoch at 5.
1921+
{
1922+
let wconn = IndexStore::open_write_connection(&db_path).unwrap();
1923+
IndexStore::update_meta(&wconn, "current_epoch", "5").unwrap();
1924+
}
1925+
1926+
// On disk: a new tree with a child dir (non-empty) and an empty dir.
1927+
let test_dir = non_excluded_tempdir();
1928+
let new_dir = test_dir.path().join("tree");
1929+
std::fs::create_dir(&new_dir).unwrap();
1930+
std::fs::create_dir(new_dir.join("sub")).unwrap();
1931+
std::fs::write(new_dir.join("sub").join("f.txt"), "x").unwrap();
1932+
std::fs::create_dir(new_dir.join("empty")).unwrap();
1933+
1934+
// Only the parent of `tree` is in the DB (mimics must_scan_sub_dirs).
1935+
ensure_path_in_db(&db_path, &test_dir.path().to_string_lossy(), &writer);
1936+
1937+
let cancelled = AtomicBool::new(false);
1938+
reconcile_subtree(&new_dir, &conn, &writer, &cancelled).unwrap();
1939+
writer.flush_blocking().unwrap();
1940+
writer.shutdown();
1941+
1942+
let store = IndexStore::open(&db_path).unwrap();
1943+
let rconn = store.read_conn();
1944+
let resolve = |p: &Path| {
1945+
store::resolve_path(rconn, &p.to_string_lossy())
1946+
.unwrap()
1947+
.unwrap_or_else(|| panic!("{} should be in DB", p.display()))
1948+
};
1949+
let tree_id = resolve(&new_dir);
1950+
let sub_id = resolve(&new_dir.join("sub"));
1951+
let empty_id = resolve(&new_dir.join("empty"));
1952+
1953+
// Every listed dir (including the empty one) is stamped at epoch 5.
1954+
for (label, id) in [("tree", tree_id), ("sub", sub_id), ("empty", empty_id)] {
1955+
assert_eq!(
1956+
IndexStore::get_listed_epoch_by_id(rconn, id).unwrap(),
1957+
Some(5),
1958+
"{label} must be listed at the current epoch"
1959+
);
1960+
}
1961+
1962+
// Coverage lifted: the whole reconciled subtree is complete at epoch 5.
1963+
assert_eq!(
1964+
IndexStore::get_dir_stats_by_id(rconn, tree_id)
1965+
.unwrap()
1966+
.unwrap()
1967+
.min_subtree_epoch,
1968+
5,
1969+
"tree's min_subtree_epoch lifts to 5 (fully listed)"
1970+
);
1971+
}
1972+
18761973
/// Bug 2: after a MustScanSubDirs rescan completes, pending queued rescans
18771974
/// should be started automatically. Previously the spawned task set
18781975
/// rescan_active=false but never called start_next_rescan, so queued paths

apps/desktop/src-tauri/src/indexing/store.rs

Lines changed: 108 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1134,13 +1134,54 @@ impl IndexStore {
11341134
}
11351135

11361136
/// Read an entry's `listed_epoch` by id. `None` if the entry doesn't exist.
1137-
#[cfg(test)]
11381137
pub fn get_listed_epoch_by_id(conn: &Connection, id: i64) -> Result<Option<u64>, IndexStoreError> {
11391138
let mut stmt = conn.prepare_cached("SELECT listed_epoch FROM entries WHERE id = ?1")?;
11401139
let result = stmt.query_row(params![id], |row| row.get::<_, u64>(0)).optional()?;
11411140
Ok(result)
11421141
}
11431142

1143+
/// Recompute a directory's `min_subtree_epoch` from its own `listed_epoch`
1144+
/// and every child directory's stored `min_subtree_epoch`, taking the
1145+
/// 0-absorbing `min` (any `0` ⇒ `0`).
1146+
///
1147+
/// This is the per-dir step of `delta::propagate_min_subtree_epoch`, kept in
1148+
/// the store as a single SQL pass so the walk does one round trip per
1149+
/// ancestor instead of N+1. A dir with no `entries` row is treated as
1150+
/// unlisted (`0`). The 0-absorbing semantics differ from a plain `MIN()`:
1151+
/// SQL `MIN` over `{1, 0}` is `0` (correct here by luck), but `MIN` over an
1152+
/// empty child set is `NULL`, and `MIN` ignoring NULLs could mask a missing
1153+
/// `dir_stats` row — so the CTE coalesces a missing child stat to `0`.
1154+
pub fn recompute_min_subtree_epoch(conn: &Connection, dir_id: i64) -> Result<u64, IndexStoreError> {
1155+
// Own listed_epoch (0 if the row is gone).
1156+
let own: u64 = conn
1157+
.prepare_cached("SELECT listed_epoch FROM entries WHERE id = ?1")?
1158+
.query_row(params![dir_id], |row| row.get::<_, u64>(0))
1159+
.optional()?
1160+
.unwrap_or(0);
1161+
if own == 0 {
1162+
return Ok(0);
1163+
}
1164+
1165+
// 0-absorbing min over child dirs' min_subtree_epoch. A child dir with no
1166+
// dir_stats row counts as 0 (unknown coverage), so LEFT JOIN + COALESCE.
1167+
// If ANY child is 0, MIN is 0. No child dirs ⇒ MIN over empty ⇒ NULL ⇒
1168+
// keep `own` (a listed-but-childless dir is fully covered at its epoch).
1169+
let child_min: Option<u64> = conn
1170+
.prepare_cached(
1171+
"SELECT MIN(COALESCE(ds.min_subtree_epoch, 0))
1172+
FROM entries c
1173+
LEFT JOIN dir_stats ds ON ds.entry_id = c.id
1174+
WHERE c.parent_id = ?1 AND c.is_directory = 1",
1175+
)?
1176+
.query_row(params![dir_id], |row| row.get::<_, Option<u64>>(0))?;
1177+
1178+
Ok(match child_min {
1179+
None => own, // no child dirs: covered at own epoch
1180+
Some(0) => 0, // some child subtree is unlisted
1181+
Some(cm) => own.min(cm), // weakest link across self + children
1182+
})
1183+
}
1184+
11441185
/// Get all directory paths from the entries table.
11451186
///
11461187
/// Reconstructs full paths from the integer-keyed tree for backward compat.
@@ -1484,6 +1525,72 @@ mod tests {
14841525
assert_eq!(IndexStore::read_current_epoch(&conn).unwrap(), 2);
14851526
}
14861527

1528+
/// `recompute_min_subtree_epoch`: the 0-absorbing min over the dir's own
1529+
/// `listed_epoch` and every child dir's stored `min_subtree_epoch`.
1530+
#[test]
1531+
fn recompute_min_subtree_epoch_cases() {
1532+
let (store, _dir) = open_temp_store();
1533+
let conn = IndexStore::open_write_connection(store.db_path()).unwrap();
1534+
1535+
// An unlisted dir (listed_epoch = 0) is always 0, regardless of children.
1536+
let unlisted = insert_entry(&conn, ROOT_ID, "unlisted", true, None);
1537+
assert_eq!(IndexStore::recompute_min_subtree_epoch(&conn, unlisted).unwrap(), 0);
1538+
1539+
// A listed dir with NO child dirs is covered at its own epoch.
1540+
let leaf = insert_entry(&conn, ROOT_ID, "leaf", true, None);
1541+
IndexStore::mark_dirs_listed(&conn, &[leaf], 5).unwrap();
1542+
assert_eq!(
1543+
IndexStore::recompute_min_subtree_epoch(&conn, leaf).unwrap(),
1544+
5,
1545+
"listed-childless ⇒ own epoch"
1546+
);
1547+
1548+
// A listed parent with one complete child (epoch 4) and one incomplete
1549+
// child (epoch 0) ⇒ 0 (the 0 absorbs).
1550+
let parent = insert_entry(&conn, ROOT_ID, "parent", true, None);
1551+
IndexStore::mark_dirs_listed(&conn, &[parent], 9).unwrap();
1552+
let complete = insert_entry(&conn, parent, "complete", true, None);
1553+
let incomplete = insert_entry(&conn, parent, "incomplete", true, None);
1554+
IndexStore::upsert_dir_stats_by_id(
1555+
&conn,
1556+
&[
1557+
DirStatsById {
1558+
entry_id: complete,
1559+
min_subtree_epoch: 4,
1560+
..Default::default()
1561+
},
1562+
DirStatsById {
1563+
entry_id: incomplete,
1564+
min_subtree_epoch: 0,
1565+
..Default::default()
1566+
},
1567+
],
1568+
)
1569+
.unwrap();
1570+
assert_eq!(
1571+
IndexStore::recompute_min_subtree_epoch(&conn, parent).unwrap(),
1572+
0,
1573+
"an incomplete child absorbs to 0"
1574+
);
1575+
1576+
// With both children complete (4 and 6), the parent is the weakest link
1577+
// across self (9) and children ⇒ 4.
1578+
IndexStore::upsert_dir_stats_by_id(
1579+
&conn,
1580+
&[DirStatsById {
1581+
entry_id: incomplete,
1582+
min_subtree_epoch: 6,
1583+
..Default::default()
1584+
}],
1585+
)
1586+
.unwrap();
1587+
assert_eq!(
1588+
IndexStore::recompute_min_subtree_epoch(&conn, parent).unwrap(),
1589+
4,
1590+
"weakest link = min(own=9, 4, 6) = 4"
1591+
);
1592+
}
1593+
14871594
/// A schema-version mismatch drops + rebuilds; the rebuilt DB still has the
14881595
/// new v13 columns (a write/read round-trip through them succeeds).
14891596
#[test]

apps/desktop/src-tauri/src/indexing/verifier.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,12 @@ async fn verify_and_correct(dir_path: &str, writer: &IndexWriter) -> Vec<String>
436436
file_count_delta: stats.recursive_file_count as i32,
437437
dir_count_delta: stats.recursive_dir_count as i32,
438438
});
439+
// `scan_subtree` stamped the new dir's `listed_epoch` and its
440+
// `ComputeSubtreeAggregates` set its `min_subtree_epoch`, but the
441+
// `UpsertEntryV2` that created it earlier dropped every ancestor's
442+
// coverage to 0. Recompute up from the parent so a now-fully-listed
443+
// subtree lifts ancestor coverage back to exact.
444+
let _ = writer.send(WriteMessage::PropagateMinSubtreeEpoch(*p_id));
439445
}
440446
}
441447

0 commit comments

Comments
 (0)