From a010633cbe5a63add7b6ecad74fe7b02946e3f80 Mon Sep 17 00:00:00 2001 From: mukunda katta Date: Fri, 15 May 2026 13:08:07 -0700 Subject: [PATCH] fix: list non-ascii as octal bytes --- src/sed/processor.rs | 9 +++++++-- tests/by-util/test_sed.rs | 9 +++++++++ tests/fixtures/sed/output/list_unicode | 8 +++++--- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/sed/processor.rs b/src/sed/processor.rs index 45d6b266..e5ad6daa 100644 --- a/src/sed/processor.rs +++ b/src/sed/processor.rs @@ -392,8 +392,13 @@ fn list(output: &mut OutputBuffer, line: &IOChunk, max_width: usize) -> UResult< '\t' => Cow::Borrowed(r"\t"), c if c.is_ascii_control() => Cow::Owned(format!("\\{:03o}", ch as u8)), c if c == ' ' || c.is_ascii_graphic() => Cow::Borrowed(ch.encode_utf8(&mut char_buff)), - c if (c as u32) <= 0xFFFF => Cow::Owned(format!("\\u{:04X}", c as u32)), - _ => Cow::Owned(format!("\\U{:08X}", ch as u32)), + _ => Cow::Owned( + ch.to_string() + .as_bytes() + .iter() + .map(|b| format!("\\{b:03o}")) + .collect(), + ), }; // See if folding is required before adding out_str and terminator. diff --git a/tests/by-util/test_sed.rs b/tests/by-util/test_sed.rs index b31fed76..6b9335f6 100644 --- a/tests/by-util/test_sed.rs +++ b/tests/by-util/test_sed.rs @@ -829,6 +829,15 @@ check_output!(list_ascii, ["-n", "l 60", "input/ascii"]); check_output!(list_empty, ["-n", "l 60", "input/empty"]); check_output!(list_unicode, ["l 60", "input/unicode"]); +#[test] +fn list_non_ascii_uses_octal_bytes() { + new_ucmd!() + .arg("l") + .pipe_in("cÃ🧰y\n") + .succeeds() + .stdout_is("c\\303\\203\\360\\237\\247\\260y$\ncÃ🧰y\n"); +} + //////////////////////////////////////////////////////////// // In-place editing #[test] diff --git a/tests/fixtures/sed/output/list_unicode b/tests/fixtures/sed/output/list_unicode index f15bfa2c..711d605d 100644 --- a/tests/fixtures/sed/output/list_unicode +++ b/tests/fixtures/sed/output/list_unicode @@ -1,4 +1,6 @@ -Hello World or \u039A\u03B1\u03BB\u03B7\u03BC\u03AD\u03C1\ -\u03B1 \u03BA\u03CC\u03C3\u03BC\u03B5 or \u3053\u3093\u306B\ -\u3061\u306F \u4E16\u754C \U0001F600$ +Hello World or \316\232\316\261\316\273\316\267\316\274\ +\316\255\317\201\316\261 \316\272\317\214\317\203\316\274\ +\316\265 or \343\201\223\343\202\223\343\201\253\ +\343\201\241\343\201\257 \344\270\226\347\225\214 \ +\360\237\230\200$ Hello World or Καλημέρα κόσμε or こんにちは 世界 😀