@@ -147,40 +147,7 @@ fn compute_and_write(
147147 let compute_report_interval = ( dir_count / 100 ) . max ( 1000 ) . min ( dir_count. max ( 1 ) ) as usize ;
148148
149149 on_progress ( AggregationProgress :: new ( AggregationPhase :: Computing , 0 , dir_count) ) ;
150- let mut computed: HashMap < i64 , DirStatsById > = HashMap :: with_capacity ( sorted. len ( ) ) ;
151-
152- for ( i, & dir_id) in sorted. iter ( ) . enumerate ( ) {
153- let ( logical_size_sum, physical_size_sum, file_count, child_dir_count) =
154- direct_stats. get ( & dir_id) . copied ( ) . unwrap_or ( ( 0 , 0 , 0 , 0 ) ) ;
155-
156- let mut recursive_logical_size = logical_size_sum;
157- let mut recursive_physical_size = physical_size_sum;
158- let mut recursive_file_count = file_count;
159- let mut recursive_dir_count = child_dir_count;
160-
161- // Add already-computed recursive stats from child directories
162- if let Some ( children) = child_dirs_map. get ( & dir_id) {
163- for & child_id in children {
164- if let Some ( child_stats) = computed. get ( & child_id) {
165- recursive_logical_size += child_stats. recursive_logical_size ;
166- recursive_physical_size += child_stats. recursive_physical_size ;
167- recursive_file_count += child_stats. recursive_file_count ;
168- recursive_dir_count += child_stats. recursive_dir_count ;
169- }
170- }
171- }
172-
173- computed. insert (
174- dir_id,
175- DirStatsById {
176- entry_id : dir_id,
177- recursive_logical_size,
178- recursive_physical_size,
179- recursive_file_count,
180- recursive_dir_count,
181- } ,
182- ) ;
183-
150+ let computed = compute_bottom_up ( & sorted, direct_stats, child_dirs_map, None , |i| {
184151 if ( i + 1 ) % compute_report_interval == 0 {
185152 on_progress ( AggregationProgress :: new (
186153 AggregationPhase :: Computing ,
@@ -193,7 +160,7 @@ fn compute_and_write(
193160 start. elapsed( ) . as_secs_f64( )
194161 ) ;
195162 }
196- }
163+ } ) ;
197164
198165 // Batch-write all computed stats in chunks of 1000
199166 log:: debug!( "Aggregation: writing {} dir_stats rows to DB..." , computed. len( ) ) ;
@@ -220,6 +187,62 @@ fn compute_and_write(
220187 Ok ( count)
221188}
222189
190+ /// Bottom-up aggregation over a topologically sorted list of directory IDs.
191+ ///
192+ /// For each directory (leaves first), sums direct children stats from `direct_stats`,
193+ /// then adds recursive stats from already-computed child directories. When
194+ /// `existing_stats` is provided, falls back to it for children not yet in the
195+ /// computed map (used by `backfill_missing_dir_stats` where some children already
196+ /// have DB rows). Calls `on_iter(index)` after each directory for progress reporting.
197+ fn compute_bottom_up (
198+ sorted_ids : & [ i64 ] ,
199+ direct_stats : & ChildrenStatsMap ,
200+ child_dirs : & HashMap < i64 , Vec < i64 > > ,
201+ existing_stats : Option < & HashMap < i64 , DirStatsById > > ,
202+ mut on_iter : impl FnMut ( usize ) ,
203+ ) -> HashMap < i64 , DirStatsById > {
204+ let mut computed: HashMap < i64 , DirStatsById > = HashMap :: with_capacity ( sorted_ids. len ( ) ) ;
205+
206+ for ( i, & dir_id) in sorted_ids. iter ( ) . enumerate ( ) {
207+ let ( logical_size_sum, physical_size_sum, file_count, child_dir_count) =
208+ direct_stats. get ( & dir_id) . copied ( ) . unwrap_or ( ( 0 , 0 , 0 , 0 ) ) ;
209+
210+ let mut recursive_logical_size = logical_size_sum;
211+ let mut recursive_physical_size = physical_size_sum;
212+ let mut recursive_file_count = file_count;
213+ let mut recursive_dir_count = child_dir_count;
214+
215+ if let Some ( children) = child_dirs. get ( & dir_id) {
216+ for & child_id in children {
217+ let child_stats = computed
218+ . get ( & child_id)
219+ . or_else ( || existing_stats. and_then ( |m| m. get ( & child_id) ) ) ;
220+ if let Some ( cs) = child_stats {
221+ recursive_logical_size += cs. recursive_logical_size ;
222+ recursive_physical_size += cs. recursive_physical_size ;
223+ recursive_file_count += cs. recursive_file_count ;
224+ recursive_dir_count += cs. recursive_dir_count ;
225+ }
226+ }
227+ }
228+
229+ computed. insert (
230+ dir_id,
231+ DirStatsById {
232+ entry_id : dir_id,
233+ recursive_logical_size,
234+ recursive_physical_size,
235+ recursive_file_count,
236+ recursive_dir_count,
237+ } ,
238+ ) ;
239+
240+ on_iter ( i) ;
241+ }
242+
243+ computed
244+ }
245+
223246/// Compute `dir_stats` for directories under `root` only (bottom-up).
224247///
225248/// Called after a subtree scan completes. Resolves the root path to an entry ID,
@@ -257,40 +280,7 @@ pub fn compute_subtree_aggregates(conn: &Connection, root: &str) -> Result<u64,
257280
258281 // Topological sort: leaves first
259282 let sorted = topological_sort_bottom_up ( & dir_entries) ;
260-
261- let mut computed: HashMap < i64 , DirStatsById > = HashMap :: with_capacity ( sorted. len ( ) ) ;
262-
263- for & dir_id in & sorted {
264- let ( logical_size_sum, physical_size_sum, file_count, child_dir_count) =
265- direct_stats. get ( & dir_id) . copied ( ) . unwrap_or ( ( 0 , 0 , 0 , 0 ) ) ;
266-
267- let mut recursive_logical_size = logical_size_sum;
268- let mut recursive_physical_size = physical_size_sum;
269- let mut recursive_file_count = file_count;
270- let mut recursive_dir_count = child_dir_count;
271-
272- if let Some ( children) = child_dirs_map. get ( & dir_id) {
273- for & child_id in children {
274- if let Some ( child_stats) = computed. get ( & child_id) {
275- recursive_logical_size += child_stats. recursive_logical_size ;
276- recursive_physical_size += child_stats. recursive_physical_size ;
277- recursive_file_count += child_stats. recursive_file_count ;
278- recursive_dir_count += child_stats. recursive_dir_count ;
279- }
280- }
281- }
282-
283- computed. insert (
284- dir_id,
285- DirStatsById {
286- entry_id : dir_id,
287- recursive_logical_size,
288- recursive_physical_size,
289- recursive_file_count,
290- recursive_dir_count,
291- } ,
292- ) ;
293- }
283+ let computed = compute_bottom_up ( & sorted, & direct_stats, & child_dirs_map, None , |_| { } ) ;
294284
295285 // Batch-write all computed stats
296286 log:: debug!(
@@ -339,6 +329,10 @@ pub fn backfill_missing_dir_stats(conn: &Connection) -> Result<u64, IndexStoreEr
339329 let direct_stats = bulk_get_children_stats_by_id ( conn) ?;
340330 let child_dirs_map = bulk_get_child_dir_ids ( conn) ?;
341331
332+ // Bulk-load existing dir_stats so the bottom-up pass can use them as
333+ // fallback for children that already have stats (avoids N+1 queries).
334+ let existing_stats = bulk_get_all_dir_stats ( conn) ?;
335+
342336 // Topological sort all dirs (we need correct ordering)
343337 let sorted = topological_sort_bottom_up ( & all_dir_entries) ;
344338
@@ -349,48 +343,11 @@ pub fn backfill_missing_dir_stats(conn: &Connection) -> Result<u64, IndexStoreEr
349343 // We need to compute all because a missing dir's stats depend on its
350344 // children (which might have existing stats in the DB or might also be
351345 // missing).
352- let mut computed: HashMap < i64 , DirStatsById > = HashMap :: with_capacity ( sorted. len ( ) ) ;
353- let mut to_write: Vec < DirStatsById > = Vec :: with_capacity ( count) ;
354-
355- for & dir_id in & sorted {
356- let ( logical_size_sum, physical_size_sum, file_count, child_dir_count) =
357- direct_stats. get ( & dir_id) . copied ( ) . unwrap_or ( ( 0 , 0 , 0 , 0 ) ) ;
358-
359- let mut recursive_logical_size = logical_size_sum;
360- let mut recursive_physical_size = physical_size_sum;
361- let mut recursive_file_count = file_count;
362- let mut recursive_dir_count = child_dir_count;
363-
364- if let Some ( children) = child_dirs_map. get ( & dir_id) {
365- for & child_id in children {
366- // Prefer freshly computed stats, fall back to existing DB stats
367- if let Some ( child_stats) = computed. get ( & child_id) {
368- recursive_logical_size += child_stats. recursive_logical_size ;
369- recursive_physical_size += child_stats. recursive_physical_size ;
370- recursive_file_count += child_stats. recursive_file_count ;
371- recursive_dir_count += child_stats. recursive_dir_count ;
372- } else if let Ok ( Some ( db_stats) ) = IndexStore :: get_dir_stats_by_id ( conn, child_id) {
373- recursive_logical_size += db_stats. recursive_logical_size ;
374- recursive_physical_size += db_stats. recursive_physical_size ;
375- recursive_file_count += db_stats. recursive_file_count ;
376- recursive_dir_count += db_stats. recursive_dir_count ;
377- }
378- }
379- }
380-
381- let stats = DirStatsById {
382- entry_id : dir_id,
383- recursive_logical_size,
384- recursive_physical_size,
385- recursive_file_count,
386- recursive_dir_count,
387- } ;
388-
389- if missing_set. contains ( & dir_id) {
390- to_write. push ( stats. clone ( ) ) ;
391- }
392- computed. insert ( dir_id, stats) ;
393- }
346+ let computed = compute_bottom_up ( & sorted, & direct_stats, & child_dirs_map, Some ( & existing_stats) , |_| { } ) ;
347+ let to_write: Vec < DirStatsById > = computed
348+ . into_values ( )
349+ . filter ( |s| missing_set. contains ( & s. entry_id ) )
350+ . collect ( ) ;
394351
395352 // Batch-write only the missing stats
396353 for chunk in to_write. chunks ( 1000 ) {
@@ -548,6 +505,33 @@ fn bulk_get_child_dir_ids(conn: &Connection) -> Result<HashMap<i64, Vec<i64>>, I
548505 Ok ( map)
549506}
550507
508+ /// Bulk-load all existing `dir_stats` rows into a map keyed by `entry_id`.
509+ ///
510+ /// Used by `backfill_missing_dir_stats` so the bottom-up pass can fall back to
511+ /// existing stats for children that already have rows (avoiding N+1 queries).
512+ fn bulk_get_all_dir_stats ( conn : & Connection ) -> Result < HashMap < i64 , DirStatsById > , IndexStoreError > {
513+ let mut stmt = conn. prepare (
514+ "SELECT entry_id, recursive_logical_size, recursive_physical_size,
515+ recursive_file_count, recursive_dir_count
516+ FROM dir_stats" ,
517+ ) ?;
518+ let rows = stmt. query_map ( [ ] , |row| {
519+ Ok ( DirStatsById {
520+ entry_id : row. get ( 0 ) ?,
521+ recursive_logical_size : row. get ( 1 ) ?,
522+ recursive_physical_size : row. get ( 2 ) ?,
523+ recursive_file_count : row. get ( 3 ) ?,
524+ recursive_dir_count : row. get ( 4 ) ?,
525+ } )
526+ } ) ?;
527+ let mut map = HashMap :: new ( ) ;
528+ for row in rows {
529+ let stats = row?;
530+ map. insert ( stats. entry_id , stats) ;
531+ }
532+ Ok ( map)
533+ }
534+
551535/// Load direct children stats scoped to a subtree via recursive CTE.
552536///
553537/// Returns a map: `parent_id -> (logical_size_sum, physical_size_sum, file_count, dir_count)`.
0 commit comments