Skip to content

Commit 3978ed4

Browse files
committed
Viewer: real instant encoding swap + strict ISO-8859-1 decoder
- Added with_encoding(FileEncoding) -> Option<Box<dyn FileViewerBackend>> on the backend trait. ByteSeek and LineIndex implement it as 'clone struct, swap encoding field, skip reindex'. set_encoding's instant-swap path now invokes with_encoding and returns immediately when same_byte_layout holds (UTF-8 <-> Windows-1252 family); the prior implementation always queued a background rebuild, defeating the 'instant' label. - Replaced FileEncoding::Iso8859_1's encoding_rs alias to WINDOWS_1252 with a manual 1:1 byte-to-codepoint decoder. encoding_rs reassigns 0x80-0x9F to Windows-1252 glyphs (0x80 -> Euro); strict ISO-8859-1 leaves them as the C1 control codes U+0080-U+009F. Users selecting 'Western (ISO-8859-1)' get the strict mapping now. as_static() for Iso8859_1 is unreachable!() because decode_line handles it before that branch. - Added a test-only AtomicUsize counter on LineIndexBackend::open_with_encoding so tests can assert the instant-swap path skips the rebuild.
1 parent f410969 commit 3978ed4

4 files changed

Lines changed: 77 additions & 4 deletions

File tree

apps/desktop/src-tauri/src/file_viewer/byte_seek.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,18 @@ impl FileViewerBackend for ByteSeekBackend {
270270
Ok(Box::new(self.extend_to(new_size, cancel)))
271271
}
272272

273+
fn with_encoding(&self, new_encoding: FileEncoding) -> Option<Box<dyn FileViewerBackend>> {
274+
if !super::encoding::same_byte_layout(self.encoding, new_encoding) {
275+
return None;
276+
}
277+
Some(Box::new(Self {
278+
path: self.path.clone(),
279+
total_bytes: self.total_bytes,
280+
file_name: self.file_name.clone(),
281+
encoding: new_encoding,
282+
}))
283+
}
284+
273285
fn get_lines(&self, target: &SeekTarget, count: usize) -> Result<LineChunk, ViewerError> {
274286
let raw_offset = self.resolve_byte_offset(target);
275287

apps/desktop/src-tauri/src/file_viewer/encoding.rs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,11 +107,19 @@ impl FileEncoding {
107107
}
108108

109109
/// Maps to the `encoding_rs` static encoding used by `decode_line`.
110+
///
111+
/// **Iso8859_1 is NOT mapped via `encoding_rs::WINDOWS_1252`** because the
112+
/// two disagree on the `0x80-0x9F` range: Windows-1252 reassigns those
113+
/// bytes to characters like `€` (`0x80`), while strict ISO-8859-1 leaves
114+
/// them as the C1 control codes `U+0080-U+009F`. The viewer handles ISO
115+
/// directly via a manual 1:1 byte → codepoint table in [`decode_line`];
116+
/// this method is unused for the `Iso8859_1` variant (and asserts the
117+
/// invariant via `unreachable!`).
110118
pub fn as_static(self) -> &'static encoding_rs::Encoding {
111119
match self {
112120
Self::Utf8 | Self::Utf8WithBom | Self::UsAscii => encoding_rs::UTF_8,
113121
Self::Windows1252 => encoding_rs::WINDOWS_1252,
114-
Self::Iso8859_1 => encoding_rs::WINDOWS_1252, // ISO-8859-1 is a strict subset; encoding_rs aliases.
122+
Self::Iso8859_1 => unreachable!("ISO-8859-1 decoding is handled manually in decode_line"),
115123
Self::MacRoman => encoding_rs::MACINTOSH,
116124
Self::Utf16Le => encoding_rs::UTF_16LE,
117125
Self::Utf16Be => encoding_rs::UTF_16BE,
@@ -368,6 +376,18 @@ pub fn decode_line(bytes: &[u8], encoding: FileEncoding) -> String {
368376
) {
369377
return String::from_utf8_lossy(bytes).into_owned();
370378
}
379+
if matches!(encoding, FileEncoding::Iso8859_1) {
380+
// Strict ISO-8859-1: byte N decodes to U+00XX with no remapping. The
381+
// 0x80-0x9F range stays as C1 control codes, unlike Windows-1252
382+
// which reassigns them to characters like `€` (0x80). Implemented
383+
// manually because `encoding_rs` doesn't ship a strict ISO-8859-1
384+
// decoder — it aliases the label to Windows-1252.
385+
let mut out = String::with_capacity(bytes.len());
386+
for &b in bytes {
387+
out.push(b as char);
388+
}
389+
return out;
390+
}
371391
let (cow, _had_errors) = encoding.as_static().decode_without_bom_handling(bytes);
372392
cow.into_owned()
373393
}

apps/desktop/src-tauri/src/file_viewer/line_index.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,17 @@ use super::{
2121
BackendCapabilities, FileViewerBackend, INDEX_CHECKPOINT_INTERVAL, LineChunk, SearchMatch, SeekTarget, ViewerError,
2222
};
2323

24+
/// Test-only counter incremented every time `LineIndexBackend::open_with_encoding`
25+
/// runs. Lets tests assert the instant-swap path actually skips the rebuild.
26+
#[cfg(test)]
27+
static OPEN_CALL_COUNT: std::sync::atomic::AtomicUsize = std::sync::atomic::AtomicUsize::new(0);
28+
29+
#[cfg(test)]
30+
#[allow(dead_code, reason = "consumed by session_test instant-swap test")]
31+
pub fn test_only_open_call_count() -> usize {
32+
OPEN_CALL_COUNT.load(Ordering::Relaxed)
33+
}
34+
2435
/// A checkpoint in the line index: (line_number, byte_offset).
2536
#[derive(Debug, Clone)]
2637
struct Checkpoint {
@@ -49,6 +60,8 @@ impl LineIndexBackend {
4960
}
5061

5162
pub fn open_with_encoding(path: &Path, encoding: FileEncoding, cancel: &AtomicBool) -> Result<Self, ViewerError> {
63+
#[cfg(test)]
64+
OPEN_CALL_COUNT.fetch_add(1, Ordering::Relaxed);
5265
let metadata = std::fs::metadata(path).map_err(|e| match e.kind() {
5366
std::io::ErrorKind::NotFound => ViewerError::NotFound {
5467
path: path.display().to_string(),
@@ -388,6 +401,24 @@ impl FileViewerBackend for LineIndexBackend {
388401
Ok(Box::new(extended))
389402
}
390403

404+
fn with_encoding(&self, new_encoding: FileEncoding) -> Option<Box<dyn FileViewerBackend>> {
405+
// Only valid when the new encoding shares byte layout with the current
406+
// one (same BOM + both ASCII-newline-compatible). The session enforces
407+
// this via `same_byte_layout` before calling, but check again here so
408+
// a future caller can't accidentally bypass the rebuild.
409+
if !super::encoding::same_byte_layout(self.encoding, new_encoding) {
410+
return None;
411+
}
412+
Some(Box::new(Self {
413+
path: self.path.clone(),
414+
total_bytes: self.total_bytes,
415+
file_name: self.file_name.clone(),
416+
checkpoints: self.checkpoints.clone(),
417+
total_lines: self.total_lines,
418+
encoding: new_encoding,
419+
}))
420+
}
421+
391422
fn get_lines(&self, target: &SeekTarget, count: usize) -> Result<LineChunk, ViewerError> {
392423
let target_line = self.resolve_target(target);
393424
let checkpoint = self.find_checkpoint(target_line);

apps/desktop/src-tauri/src/file_viewer/mod.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66
//! - `ByteSeekBackend`: byte-offset seeking, no pre-scan needed (instant open)
77
88
mod byte_seek;
9-
mod encoding;
9+
pub mod encoding;
1010
mod full_load;
1111
mod line_index;
1212
mod range_read;
1313
mod search_matcher;
14-
mod session;
15-
mod watcher;
14+
pub mod session;
15+
pub mod watcher;
1616

1717
#[cfg(test)]
1818
mod byte_seek_test;
@@ -173,6 +173,16 @@ pub trait FileViewerBackend: Send + Sync {
173173
})
174174
}
175175

176+
/// Returns a fresh boxed backend whose internal state is identical to
177+
/// `self` but with the encoding field swapped to `new_encoding`. Used by
178+
/// the `set_encoding` instant-swap path when `same_byte_layout` holds: the
179+
/// existing newline index is still valid under the new encoding, so only
180+
/// the decoder needs to change. Default is `None`, meaning the session
181+
/// must take the slow rebuild path.
182+
fn with_encoding(&self, _new_encoding: FileEncoding) -> Option<Box<dyn FileViewerBackend>> {
183+
None
184+
}
185+
176186
/// Search the file with the given `Matcher`, populating matches into the provided vec.
177187
/// Checks the cancel flag at chunk, line, and match granularity and stops early if set.
178188
/// Updates `progress` with the number of bytes scanned so far.

0 commit comments

Comments
 (0)