File viewer: fix search nav in ByteSeek mode

vdavid · vdavid · commit d15ecded13be · 2026-03-03T22:56:54.000+01:00
- Add byte_offset to SearchMatch so the frontend knows where each match is in the file
- scrollToMatch converts byte offset to scroll position when line seek is unavailable, fixing drift in estimated line coordinates
diff --git a/apps/desktop/src-tauri/src/file_viewer/CLAUDE.md b/apps/desktop/src-tauri/src/file_viewer/CLAUDE.md
@@ -48,6 +48,9 @@ if file_size < 1MB {
 **Decision**: `SearchMatch.column` and `.length` use UTF-16 code units instead of byte or char offsets.
 **Why**: The frontend is JavaScript, where `String.prototype.length` and `String.prototype.substring()` count UTF-16 code units. If the backend returned byte offsets or Unicode scalar offsets, the frontend would need to convert on every match highlight, which is error-prone for text with emoji or CJK characters. Matching the JS string model eliminates an entire class of off-by-one bugs in the highlight rendering.
 
+**Decision**: `SearchMatch.byte_offset` stores the byte offset of the line start for each match.
+**Why**: In ByteSeek mode (when line indexing timed out), search returns exact line numbers but the virtual scroll uses estimated line counts for fraction-based seeking. The byte offset lets the frontend convert to scroll position via `(byteOffset / totalBytes) * estimatedTotalLines`, which is the same fraction the virtual scroll uses for fetching. Without this, navigating to a search match scrolls to the wrong part of the file.
+
 **Decision**: Sparse checkpoints every 256 lines instead of indexing every line.
 **Why**: Indexing every line in a 100M-line file would need ~800 MB of offset data (8 bytes each). At 256-line intervals, the same file needs ~3 MB. The trade-off is that seeking to a specific line requires reading forward up to 255 lines from the nearest checkpoint, which takes <1ms on any modern disk — well within the 16ms frame budget for 60fps scrolling.
 
diff --git a/apps/desktop/src-tauri/src/file_viewer/byte_seek.rs b/apps/desktop/src-tauri/src/file_viewer/byte_seek.rs
@@ -223,6 +223,7 @@ impl FileViewerBackend for ByteSeekBackend {
         let mut buf = vec![0u8; chunk_size];
         let mut line_number: usize = 0;
         let mut scanned: u64 = 0;
+        let mut line_byte_offset: u64 = 0;
         let mut leftover = Vec::new();
         let mut limit_reached = false;
 
@@ -263,14 +264,14 @@ impl FileViewerBackend for ByteSeekBackend {
                     let mut search_start = 0;
                     while let Some(match_pos) = line_lower[search_start..].find(&query_lower) {
                         let col_bytes = search_start + match_pos;
-                        let col_utf16: usize =
-                            line_lower[..col_bytes].chars().map(|c| c.len_utf16()).sum();
+                        let col_utf16: usize = line_lower[..col_bytes].chars().map(|c| c.len_utf16()).sum();
                         let len_utf16: usize = query_lower.chars().map(|c| c.len_utf16()).sum();
                         let mut matches = results.lock_ignore_poison();
                         matches.push(SearchMatch {
                             line: line_number,
                             column: col_utf16,
                             length: len_utf16,
+                            byte_offset: line_byte_offset,
                         });
                         if matches.len() >= MAX_SEARCH_MATCHES {
                             limit_reached = true;
@@ -281,6 +282,7 @@ impl FileViewerBackend for ByteSeekBackend {
 
                     scanned += (nl_pos + 1) as u64;
                     pos += nl_pos + 1;
+                    line_byte_offset = scanned;
                     line_number += 1;
                 } else {
                     // Incomplete line — save as leftover for next iteration
@@ -307,6 +309,7 @@ impl FileViewerBackend for ByteSeekBackend {
                     line: line_number,
                     column: col_utf16,
                     length: len_utf16,
+                    byte_offset: line_byte_offset,
                 });
                 if matches.len() >= MAX_SEARCH_MATCHES {
                     break;
diff --git a/apps/desktop/src-tauri/src/file_viewer/byte_seek_test.rs b/apps/desktop/src-tauri/src/file_viewer/byte_seek_test.rs
@@ -180,7 +180,10 @@ fn search_finds_matches() {
     assert_eq!(matches.len(), 2);
     assert_eq!(matches[0].line, 0);
     assert_eq!(matches[0].column, 0);
+    assert_eq!(matches[0].byte_offset, 0); // First line starts at byte 0
     assert_eq!(matches[1].line, 2);
+    // "hello world\n" = 12 bytes, "foo bar\n" = 8 bytes → line 2 starts at byte 20
+    assert_eq!(matches[1].byte_offset, 20);
 
     // Progress should equal total bytes after search completes
     assert_eq!(*progress.lock().unwrap(), backend.total_bytes());
diff --git a/apps/desktop/src-tauri/src/file_viewer/full_load.rs b/apps/desktop/src-tauri/src/file_viewer/full_load.rs
@@ -8,7 +8,9 @@ use std::path::Path;
 use std::sync::Mutex;
 use std::sync::atomic::{AtomicBool, Ordering};
 
-use super::{BackendCapabilities, FileViewerBackend, LineChunk, MAX_SEARCH_MATCHES, SearchMatch, SeekTarget, ViewerError};
+use super::{
+    BackendCapabilities, FileViewerBackend, LineChunk, MAX_SEARCH_MATCHES, SearchMatch, SeekTarget, ViewerError,
+};
 
 pub struct FullLoadBackend {
     lines: Vec<String>,
@@ -151,6 +153,7 @@ impl FileViewerBackend for FullLoadBackend {
                     line: line_idx,
                     column: col_utf16,
                     length: len_utf16,
+                    byte_offset: self.line_offsets[line_idx],
                 });
                 if matches.len() >= MAX_SEARCH_MATCHES {
                     limit_reached = true;
diff --git a/apps/desktop/src-tauri/src/file_viewer/full_load_test.rs b/apps/desktop/src-tauri/src/file_viewer/full_load_test.rs
@@ -140,8 +140,11 @@ fn search_finds_matches() {
     assert_eq!(matches.len(), 2);
     assert_eq!(matches[0].line, 0);
     assert_eq!(matches[0].column, 0);
+    assert_eq!(matches[0].byte_offset, 0); // First line starts at byte 0
     assert_eq!(matches[1].line, 2);
     assert_eq!(matches[1].column, 0);
+    // "hello world\n" = 12 bytes, "foo bar\n" = 8 bytes → line 2 starts at byte 20
+    assert_eq!(matches[1].byte_offset, 20);
     assert!(scanned > 0);
     // Progress should equal total bytes after search completes
     assert_eq!(*progress.lock().unwrap(), scanned);
diff --git a/apps/desktop/src-tauri/src/file_viewer/line_index.rs b/apps/desktop/src-tauri/src/file_viewer/line_index.rs
@@ -245,6 +245,7 @@ impl FileViewerBackend for LineIndexBackend {
         let mut buf = vec![0u8; chunk_size];
         let mut line_number: usize = 0;
         let mut scanned: u64 = 0;
+        let mut line_byte_offset: u64 = 0;
         let mut leftover = Vec::new();
         let mut limit_reached = false;
 
@@ -289,6 +290,7 @@ impl FileViewerBackend for LineIndexBackend {
                             line: line_number,
                             column: col,
                             length: query.len(),
+                            byte_offset: line_byte_offset,
                         });
                         if matches.len() >= MAX_SEARCH_MATCHES {
                             limit_reached = true;
@@ -299,6 +301,7 @@ impl FileViewerBackend for LineIndexBackend {
 
                     scanned += (nl_pos + 1) as u64;
                     pos += nl_pos + 1;
+                    line_byte_offset = scanned;
                     line_number += 1;
                 } else {
                     leftover.extend_from_slice(&data[pos..]);
@@ -322,6 +325,7 @@ impl FileViewerBackend for LineIndexBackend {
                     line: line_number,
                     column: col,
                     length: query.len(),
+                    byte_offset: line_byte_offset,
                 });
                 if matches.len() >= MAX_SEARCH_MATCHES {
                     break;
diff --git a/apps/desktop/src-tauri/src/file_viewer/line_index_test.rs b/apps/desktop/src-tauri/src/file_viewer/line_index_test.rs
@@ -194,7 +194,10 @@ fn search_finds_matches() {
 
     assert_eq!(matches.len(), 2);
     assert_eq!(matches[0].line, 0);
+    assert_eq!(matches[0].byte_offset, 0); // First line starts at byte 0
     assert_eq!(matches[1].line, 2);
+    // "hello world\n" = 12 bytes, "foo bar\n" = 8 bytes → line 2 starts at byte 20
+    assert_eq!(matches[1].byte_offset, 20);
 
     cleanup(&dir);
 }
diff --git a/apps/desktop/src-tauri/src/file_viewer/mod.rs b/apps/desktop/src-tauri/src/file_viewer/mod.rs
@@ -74,6 +74,10 @@ pub struct SearchMatch {
     pub column: usize,
     /// Length in UTF-16 code units (matches JS string indexing).
     pub length: usize,
+    /// Byte offset of the start of the line containing this match.
+    /// Used by the frontend to scroll accurately in ByteSeek mode where line numbers
+    /// don't map to the virtual scroll coordinate system.
+    pub byte_offset: u64,
 }
 
 /// What a backend can do.
diff --git a/apps/desktop/src-tauri/src/file_viewer/session.rs b/apps/desktop/src-tauri/src/file_viewer/session.rs
@@ -18,8 +18,8 @@ use super::byte_seek::ByteSeekBackend;
 use super::full_load::FullLoadBackend;
 use super::line_index::LineIndexBackend;
 use super::{
-    BackendCapabilities, FULL_LOAD_THRESHOLD, FileViewerBackend, LineChunk, MAX_SEARCH_MATCHES, SearchMatch, SeekTarget,
-    ViewerError,
+    BackendCapabilities, FULL_LOAD_THRESHOLD, FileViewerBackend, LineChunk, MAX_SEARCH_MATCHES, SearchMatch,
+    SeekTarget, ViewerError,
 };
 
 /// Which backend strategy is active for a session.
diff --git a/apps/desktop/src-tauri/src/file_viewer/session_test.rs b/apps/desktop/src-tauri/src/file_viewer/session_test.rs
@@ -5,8 +5,8 @@ use std::path::{Path, PathBuf};
 use std::thread;
 use std::time::Duration;
 
-use super::{FULL_LOAD_THRESHOLD, MAX_SEARCH_MATCHES};
 use super::session::{self, SearchStatus};
+use super::{FULL_LOAD_THRESHOLD, MAX_SEARCH_MATCHES};
 
 fn create_test_dir(name: &str) -> PathBuf {
     let dir = std::env::temp_dir().join(format!("cmdr_viewer_session_{}", name));
diff --git a/apps/desktop/src/lib/tauri-commands/file-viewer.ts b/apps/desktop/src/lib/tauri-commands/file-viewer.ts
@@ -46,6 +46,8 @@ export interface ViewerSearchMatch {
     line: number
     column: number
     length: number
+    /** Byte offset of the line start. Used for accurate scroll positioning in ByteSeek mode. */
+    byteOffset: number
 }
 
 /** Result from polling search progress. */
diff --git a/apps/desktop/src/routes/viewer/+page.svelte b/apps/desktop/src/routes/viewer/+page.svelte
@@ -263,8 +263,9 @@
             const fetchFrom = Math.max(0, from - BUFFER_LINES)
             const fetchCount = Math.min(FETCH_BATCH, to - fetchFrom + BUFFER_LINES * 2)
 
-            // Decide seek type based on backend capabilities
-            const supportsLineSeek = capabilities?.supportsLineSeek ?? false
+            // Decide seek type: use line seek when we know exact line count (LineIndex/FullLoad),
+            // fraction seek when we only have estimates (ByteSeek)
+            const supportsLineSeek = totalLines !== null
             const seekType = supportsLineSeek ? 'line' : 'fraction'
             const seekValue = supportsLineSeek ? fetchFrom : fetchFrom / estimatedTotalLines()
 
@@ -455,8 +456,18 @@
 
     function scrollToMatch(match: ViewerSearchMatch) {
         if (!contentRef) return
-        const targetScroll = match.line * effectiveLineHeight - viewportHeight / 2
-        contentRef.scrollTop = Math.max(0, targetScroll)
+        let targetLine: number
+        if (totalLines !== null) {
+            // LineIndex/FullLoad: line numbers are exact, use directly
+            targetLine = match.line
+        } else {
+            // ByteSeek: line numbers don't match the virtual scroll coordinate system.
+            // Convert via byte offset: (byteOffset / totalBytes) * estimatedTotalLines
+            targetLine = totalBytes > 0 ? (match.byteOffset / totalBytes) * estimatedTotalLines() : match.line
+        }
+        const targetScroll = targetLine * effectiveLineHeight - viewportHeight / 2
+        const finalScroll = Math.max(0, targetScroll)
+        contentRef.scrollTop = finalScroll
     }
 
     // Debounce search input
@@ -1018,6 +1029,7 @@
     .file-content {
         flex: 1;
         overflow: auto;
+        overflow-anchor: none; /* Virtual scroll manages scroll position programmatically */
         font-family: var(--font-mono);
         font-size: var(--font-size-sm);
         line-height: 1.5;

Original file line number	Diff line number	Diff line change
`@@ -194,7 +194,10 @@ fn search_finds_matches() {`
`194`	`194`
`195`	`195`	`assert_eq!(matches.len(), 2);`
`196`	`196`	`assert_eq!(matches[0].line, 0);`
	`197`	`+ assert_eq!(matches[0].byte_offset, 0); // First line starts at byte 0`
`197`	`198`	`assert_eq!(matches[1].line, 2);`
	`199`	`+ // "hello world\n" = 12 bytes, "foo bar\n" = 8 bytes → line 2 starts at byte 20`
	`200`	`+ assert_eq!(matches[1].byte_offset, 20);`
`198`	`201`
`199`	`202`	`cleanup(&dir);`
`200`	`203`	`}`