Skip to content

Commit 31cf5fd

Browse files
committed
File viewer: cap search matches at 10,000
- Prevents unbounded memory growth and IPC serialization cost when a common term matches millions of times in a huge file - Backends skip match-finding after cap but keep scanning for accurate progress - Frontend shows "10,000+" when capped
1 parent 2ee490e commit 31cf5fd

11 files changed

Lines changed: 211 additions & 78 deletions

File tree

apps/desktop/src-tauri/src/commands/ui.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ pub fn set_menu_context<R: Runtime>(app: AppHandle<R>, context: String) -> Resul
241241
for (id, entry) in menu_state.items.lock_ignore_poison().iter() {
242242
// Close tab stays enabled: on_menu_event has special logic to close the focused
243243
// non-main window when main isn't focused (standard ⌘W behavior on macOS).
244-
if id == &CLOSE_TAB_ID {
244+
if id == CLOSE_TAB_ID {
245245
continue;
246246
}
247247
let is_app = matches!(menu_id_to_command(id), Some((_, CommandScope::App)));

apps/desktop/src-tauri/src/file_viewer/byte_seek.rs

Lines changed: 46 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ use log::debug;
1818
use memchr::memchr;
1919

2020
use super::{
21-
BackendCapabilities, FileViewerBackend, LineChunk, MAX_BACKWARD_SCAN, SearchMatch, SeekTarget, ViewerError,
21+
BackendCapabilities, FileViewerBackend, LineChunk, MAX_BACKWARD_SCAN, MAX_SEARCH_MATCHES, SearchMatch, SeekTarget,
22+
ViewerError,
2223
};
2324

2425
pub struct ByteSeekBackend {
@@ -223,6 +224,7 @@ impl FileViewerBackend for ByteSeekBackend {
223224
let mut line_number: usize = 0;
224225
let mut scanned: u64 = 0;
225226
let mut leftover = Vec::new();
227+
let mut limit_reached = false;
226228

227229
loop {
228230
if cancel.load(Ordering::Relaxed) {
@@ -253,22 +255,30 @@ impl FileViewerBackend for ByteSeekBackend {
253255
}
254256

255257
if let Some(nl_pos) = memchr(b'\n', &data[pos..]) {
256-
let line_bytes = &data[pos..pos + nl_pos];
257-
let line = String::from_utf8_lossy(line_bytes);
258-
let line_lower = line.to_lowercase();
259-
260-
let mut search_start = 0;
261-
while let Some(match_pos) = line_lower[search_start..].find(&query_lower) {
262-
let col_bytes = search_start + match_pos;
263-
let col_utf16: usize = line_lower[..col_bytes].chars().map(|c| c.len_utf16()).sum();
264-
let len_utf16: usize = query_lower.chars().map(|c| c.len_utf16()).sum();
265-
let mut matches = results.lock_ignore_poison();
266-
matches.push(SearchMatch {
267-
line: line_number,
268-
column: col_utf16,
269-
length: len_utf16,
270-
});
271-
search_start = col_bytes + query_lower.len();
258+
// Only do the expensive match-finding if we haven't hit the limit
259+
if !limit_reached {
260+
let line_bytes = &data[pos..pos + nl_pos];
261+
let line = String::from_utf8_lossy(line_bytes);
262+
let line_lower = line.to_lowercase();
263+
264+
let mut search_start = 0;
265+
while let Some(match_pos) = line_lower[search_start..].find(&query_lower) {
266+
let col_bytes = search_start + match_pos;
267+
let col_utf16: usize =
268+
line_lower[..col_bytes].chars().map(|c| c.len_utf16()).sum();
269+
let len_utf16: usize = query_lower.chars().map(|c| c.len_utf16()).sum();
270+
let mut matches = results.lock_ignore_poison();
271+
matches.push(SearchMatch {
272+
line: line_number,
273+
column: col_utf16,
274+
length: len_utf16,
275+
});
276+
if matches.len() >= MAX_SEARCH_MATCHES {
277+
limit_reached = true;
278+
break;
279+
}
280+
search_start = col_bytes + query_lower.len();
281+
}
272282
}
273283

274284
scanned += (nl_pos + 1) as u64;
@@ -287,20 +297,25 @@ impl FileViewerBackend for ByteSeekBackend {
287297

288298
// Handle last line without newline
289299
if !leftover.is_empty() {
290-
let line = String::from_utf8_lossy(&leftover);
291-
let line_lower = line.to_lowercase();
292-
let mut search_start = 0;
293-
while let Some(match_pos) = line_lower[search_start..].find(&query_lower) {
294-
let col_bytes = search_start + match_pos;
295-
let col_utf16: usize = line_lower[..col_bytes].chars().map(|c| c.len_utf16()).sum();
296-
let len_utf16: usize = query_lower.chars().map(|c| c.len_utf16()).sum();
297-
let mut matches = results.lock_ignore_poison();
298-
matches.push(SearchMatch {
299-
line: line_number,
300-
column: col_utf16,
301-
length: len_utf16,
302-
});
303-
search_start = col_bytes + query_lower.len();
300+
if !limit_reached {
301+
let line = String::from_utf8_lossy(&leftover);
302+
let line_lower = line.to_lowercase();
303+
let mut search_start = 0;
304+
while let Some(match_pos) = line_lower[search_start..].find(&query_lower) {
305+
let col_bytes = search_start + match_pos;
306+
let col_utf16: usize = line_lower[..col_bytes].chars().map(|c| c.len_utf16()).sum();
307+
let len_utf16: usize = query_lower.chars().map(|c| c.len_utf16()).sum();
308+
let mut matches = results.lock_ignore_poison();
309+
matches.push(SearchMatch {
310+
line: line_number,
311+
column: col_utf16,
312+
length: len_utf16,
313+
});
314+
if matches.len() >= MAX_SEARCH_MATCHES {
315+
break;
316+
}
317+
search_start = col_bytes + query_lower.len();
318+
}
304319
}
305320
scanned += leftover.len() as u64;
306321
}

apps/desktop/src-tauri/src/file_viewer/byte_seek_test.rs

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::sync::Mutex;
66
use std::sync::atomic::AtomicBool;
77

88
use super::byte_seek::ByteSeekBackend;
9-
use super::{FileViewerBackend, SearchMatch, SeekTarget};
9+
use super::{FileViewerBackend, MAX_SEARCH_MATCHES, SearchMatch, SeekTarget};
1010

1111
fn create_test_dir(name: &str) -> PathBuf {
1212
let dir = std::env::temp_dir().join(format!("cmdr_viewer_byte_{}", name));
@@ -341,3 +341,27 @@ fn empty_file() {
341341

342342
cleanup(&dir);
343343
}
344+
345+
#[test]
346+
fn search_caps_at_match_limit() {
347+
let dir = create_test_dir("search_cap");
348+
let line_count = MAX_SEARCH_MATCHES + 1000;
349+
let content = "aa\n".repeat(line_count);
350+
let file = write_test_file(&dir, "test.txt", &content);
351+
352+
let backend = ByteSeekBackend::open(&file).unwrap();
353+
let cancel = AtomicBool::new(false);
354+
let results: Mutex<Vec<SearchMatch>> = Mutex::new(Vec::new());
355+
let progress: Mutex<u64> = Mutex::new(0);
356+
357+
let scanned = backend.search("a", &cancel, &results, &progress).unwrap();
358+
let matches = results.lock().unwrap();
359+
360+
// Should cap at exactly MAX_SEARCH_MATCHES
361+
assert_eq!(matches.len(), MAX_SEARCH_MATCHES);
362+
// But should still scan the entire file for progress
363+
assert_eq!(scanned, backend.total_bytes());
364+
assert_eq!(*progress.lock().unwrap(), backend.total_bytes());
365+
366+
cleanup(&dir);
367+
}

apps/desktop/src-tauri/src/file_viewer/full_load.rs

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use std::path::Path;
88
use std::sync::Mutex;
99
use std::sync::atomic::{AtomicBool, Ordering};
1010

11-
use super::{BackendCapabilities, FileViewerBackend, LineChunk, SearchMatch, SeekTarget, ViewerError};
11+
use super::{BackendCapabilities, FileViewerBackend, LineChunk, MAX_SEARCH_MATCHES, SearchMatch, SeekTarget, ViewerError};
1212

1313
pub struct FullLoadBackend {
1414
lines: Vec<String>,
@@ -133,25 +133,32 @@ impl FileViewerBackend for FullLoadBackend {
133133
) -> Result<u64, ViewerError> {
134134
let query_lower = query.to_lowercase();
135135
let mut scanned: u64 = 0;
136+
let mut limit_reached = false;
136137

137138
for (line_idx, line) in self.lines.iter().enumerate() {
138139
if cancel.load(Ordering::Relaxed) {
139140
break;
140141
}
141142

142-
let line_lower = line.to_lowercase();
143-
let mut search_start = 0;
144-
while let Some(pos) = line_lower[search_start..].find(&query_lower) {
145-
let col_bytes = search_start + pos;
146-
let col_utf16: usize = line_lower[..col_bytes].chars().map(|c| c.len_utf16()).sum();
147-
let len_utf16: usize = query_lower.chars().map(|c| c.len_utf16()).sum();
148-
let mut matches = results.lock_ignore_poison();
149-
matches.push(SearchMatch {
150-
line: line_idx,
151-
column: col_utf16,
152-
length: len_utf16,
153-
});
154-
search_start = col_bytes + query_lower.len();
143+
if !limit_reached {
144+
let line_lower = line.to_lowercase();
145+
let mut search_start = 0;
146+
while let Some(pos) = line_lower[search_start..].find(&query_lower) {
147+
let col_bytes = search_start + pos;
148+
let col_utf16: usize = line_lower[..col_bytes].chars().map(|c| c.len_utf16()).sum();
149+
let len_utf16: usize = query_lower.chars().map(|c| c.len_utf16()).sum();
150+
let mut matches = results.lock_ignore_poison();
151+
matches.push(SearchMatch {
152+
line: line_idx,
153+
column: col_utf16,
154+
length: len_utf16,
155+
});
156+
if matches.len() >= MAX_SEARCH_MATCHES {
157+
limit_reached = true;
158+
break;
159+
}
160+
search_start = col_bytes + query_lower.len();
161+
}
155162
}
156163

157164
scanned += line.len() as u64 + 1; // +1 for newline

apps/desktop/src-tauri/src/file_viewer/full_load_test.rs

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::sync::Mutex;
66
use std::sync::atomic::AtomicBool;
77

88
use super::full_load::FullLoadBackend;
9-
use super::{FileViewerBackend, SearchMatch, SeekTarget};
9+
use super::{FileViewerBackend, MAX_SEARCH_MATCHES, SearchMatch, SeekTarget};
1010

1111
fn create_test_dir(name: &str) -> PathBuf {
1212
let dir = std::env::temp_dir().join(format!("cmdr_viewer_full_{}", name));
@@ -299,3 +299,24 @@ fn single_line_no_newline() {
299299
let chunk = backend.get_lines(&SeekTarget::Line(0), 10).unwrap();
300300
assert_eq!(chunk.lines, vec!["just one line"]);
301301
}
302+
303+
#[test]
304+
fn search_caps_at_match_limit() {
305+
// Each line has "aa" which matches "a" twice, so we need MAX/2 + extra lines to exceed the limit
306+
let line_count = MAX_SEARCH_MATCHES + 1000;
307+
let content = "aa\n".repeat(line_count);
308+
let backend = FullLoadBackend::from_content(&content, "test.txt");
309+
310+
let cancel = AtomicBool::new(false);
311+
let results: Mutex<Vec<SearchMatch>> = Mutex::new(Vec::new());
312+
let progress: Mutex<u64> = Mutex::new(0);
313+
314+
backend.search("a", &cancel, &results, &progress).unwrap();
315+
let matches = results.lock().unwrap();
316+
317+
// Should cap at exactly MAX_SEARCH_MATCHES
318+
assert_eq!(matches.len(), MAX_SEARCH_MATCHES);
319+
// Progress should cover the full file (FullLoad scanned bytes may overshoot by 1
320+
// due to the trailing empty line from split('\n') getting +1 for a non-existent newline)
321+
assert!(*progress.lock().unwrap() >= backend.total_bytes());
322+
}

apps/desktop/src-tauri/src/file_viewer/line_index.rs

Lines changed: 40 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ use crate::ignore_poison::IgnorePoison;
1616
use memchr::memchr;
1717

1818
use super::{
19-
BackendCapabilities, FileViewerBackend, INDEX_CHECKPOINT_INTERVAL, LineChunk, SearchMatch, SeekTarget, ViewerError,
19+
BackendCapabilities, FileViewerBackend, INDEX_CHECKPOINT_INTERVAL, LineChunk, MAX_SEARCH_MATCHES, SearchMatch,
20+
SeekTarget, ViewerError,
2021
};
2122

2223
/// A checkpoint in the line index: (line_number, byte_offset).
@@ -245,6 +246,7 @@ impl FileViewerBackend for LineIndexBackend {
245246
let mut line_number: usize = 0;
246247
let mut scanned: u64 = 0;
247248
let mut leftover = Vec::new();
249+
let mut limit_reached = false;
248250

249251
loop {
250252
if cancel.load(Ordering::Relaxed) {
@@ -274,20 +276,26 @@ impl FileViewerBackend for LineIndexBackend {
274276
}
275277

276278
if let Some(nl_pos) = memchr(b'\n', &data[pos..]) {
277-
let line_bytes = &data[pos..pos + nl_pos];
278-
let line = String::from_utf8_lossy(line_bytes);
279-
let line_lower = line.to_lowercase();
280-
281-
let mut search_start = 0;
282-
while let Some(match_pos) = line_lower[search_start..].find(&query_lower) {
283-
let col = search_start + match_pos;
284-
let mut matches = results.lock_ignore_poison();
285-
matches.push(SearchMatch {
286-
line: line_number,
287-
column: col,
288-
length: query.len(),
289-
});
290-
search_start = col + 1;
279+
if !limit_reached {
280+
let line_bytes = &data[pos..pos + nl_pos];
281+
let line = String::from_utf8_lossy(line_bytes);
282+
let line_lower = line.to_lowercase();
283+
284+
let mut search_start = 0;
285+
while let Some(match_pos) = line_lower[search_start..].find(&query_lower) {
286+
let col = search_start + match_pos;
287+
let mut matches = results.lock_ignore_poison();
288+
matches.push(SearchMatch {
289+
line: line_number,
290+
column: col,
291+
length: query.len(),
292+
});
293+
if matches.len() >= MAX_SEARCH_MATCHES {
294+
limit_reached = true;
295+
break;
296+
}
297+
search_start = col + 1;
298+
}
291299
}
292300

293301
scanned += (nl_pos + 1) as u64;
@@ -305,18 +313,23 @@ impl FileViewerBackend for LineIndexBackend {
305313

306314
// Handle last line
307315
if !leftover.is_empty() {
308-
let line = String::from_utf8_lossy(&leftover);
309-
let line_lower = line.to_lowercase();
310-
let mut search_start = 0;
311-
while let Some(match_pos) = line_lower[search_start..].find(&query_lower) {
312-
let col = search_start + match_pos;
313-
let mut matches = results.lock_ignore_poison();
314-
matches.push(SearchMatch {
315-
line: line_number,
316-
column: col,
317-
length: query.len(),
318-
});
319-
search_start = col + 1;
316+
if !limit_reached {
317+
let line = String::from_utf8_lossy(&leftover);
318+
let line_lower = line.to_lowercase();
319+
let mut search_start = 0;
320+
while let Some(match_pos) = line_lower[search_start..].find(&query_lower) {
321+
let col = search_start + match_pos;
322+
let mut matches = results.lock_ignore_poison();
323+
matches.push(SearchMatch {
324+
line: line_number,
325+
column: col,
326+
length: query.len(),
327+
});
328+
if matches.len() >= MAX_SEARCH_MATCHES {
329+
break;
330+
}
331+
search_start = col + 1;
332+
}
320333
}
321334
scanned += leftover.len() as u64;
322335
}

apps/desktop/src-tauri/src/file_viewer/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ const INDEX_CHECKPOINT_INTERVAL: usize = 256;
3535
/// Maximum bytes to scan backward when seeking by byte offset.
3636
const MAX_BACKWARD_SCAN: usize = 8192;
3737

38+
/// Maximum number of matches stored during search. Once reached, the search keeps scanning
39+
/// (for accurate progress) but stops accumulating matches. Prevents unbounded memory growth
40+
/// and IPC serialization cost when a common term matches millions of times in a huge file.
41+
const MAX_SEARCH_MATCHES: usize = 10_000;
42+
3843
/// Where to seek in the file.
3944
#[derive(Debug, Clone)]
4045
pub enum SeekTarget {

apps/desktop/src-tauri/src/file_viewer/session.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ use super::byte_seek::ByteSeekBackend;
1818
use super::full_load::FullLoadBackend;
1919
use super::line_index::LineIndexBackend;
2020
use super::{
21-
BackendCapabilities, FULL_LOAD_THRESHOLD, FileViewerBackend, LineChunk, SearchMatch, SeekTarget, ViewerError,
21+
BackendCapabilities, FULL_LOAD_THRESHOLD, FileViewerBackend, LineChunk, MAX_SEARCH_MATCHES, SearchMatch, SeekTarget,
22+
ViewerError,
2223
};
2324

2425
/// Which backend strategy is active for a session.
@@ -74,6 +75,9 @@ pub struct SearchPollResult {
7475
pub matches: Vec<SearchMatch>,
7576
pub total_bytes: u64,
7677
pub bytes_scanned: u64,
78+
/// True when the match list was capped at MAX_SEARCH_MATCHES. The search kept scanning
79+
/// (for progress) but stopped storing new matches.
80+
pub match_limit_reached: bool,
7781
}
7882

7983
/// Internal state for an active search.
@@ -362,17 +366,20 @@ pub fn search_poll(session_id: &str) -> Result<SearchPollResult, ViewerError> {
362366
matches: Vec::new(),
363367
total_bytes,
364368
bytes_scanned: 0,
369+
match_limit_reached: false,
365370
}),
366371
Some(search) => {
367372
let status = search.status.lock_ignore_poison().clone();
368373
let matches = search.matches.lock_ignore_poison().clone();
369374
let bytes_scanned = *search.bytes_scanned.lock_ignore_poison();
375+
let match_limit_reached = matches.len() >= MAX_SEARCH_MATCHES;
370376

371377
Ok(SearchPollResult {
372378
status,
373379
matches,
374380
total_bytes,
375381
bytes_scanned,
382+
match_limit_reached,
376383
})
377384
}
378385
}

0 commit comments

Comments
 (0)