Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 30 additions & 2 deletions src/uucore/src/lib/features/i18n/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,24 @@ pub enum UEncoding {
// This ensures real locales like "en-US" won't match
const DEFAULT_LOCALE: Locale = locale!("und");

/// On Windows, detect the encoding from the system ANSI code page.
/// Returns `UEncoding::Utf8` if the active code page is 65001 (UTF-8),
/// otherwise `UEncoding::Ascii`.
///
/// This mirrors the GNU lib approach where `locale_charset()` calls `GetACP()` on Windows.
#[cfg(target_os = "windows")]
fn get_windows_encoding() -> UEncoding {
unsafe extern "system" {
fn GetACP() -> u32;
}
let acp = unsafe { GetACP() };
if acp == 65001 {
UEncoding::Utf8
} else {
UEncoding::Ascii
}
}

/// Look at 3 environment variables in the following order
///
/// 1. LC_ALL
Expand Down Expand Up @@ -70,8 +88,18 @@ pub fn get_locale_from_env(locale_name: &str) -> (Locale, UEncoding) {
return (locale, encoding);
}
}
// Default POSIX locale representing LC_ALL=C
(DEFAULT_LOCALE, UEncoding::Ascii)
// No locale environment variables set.
// On Windows, check the system ANSI code page to determine encoding,
// matching GNU coreutils' approach (locale_charset -> GetACP).
#[cfg(target_os = "windows")]
{
(DEFAULT_LOCALE, get_windows_encoding())
}
#[cfg(not(target_os = "windows"))]
{
// Default POSIX locale representing LC_ALL=C
(DEFAULT_LOCALE, UEncoding::Ascii)
}
}

/// Get the collating locale from the environment
Expand Down
73 changes: 73 additions & 0 deletions tests/by-util/test_ls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7236,6 +7236,79 @@ fn test_ls_a_dotdot_no_error_on_wasi() {
.no_stderr();
}

/// Verify that ls correctly detects encoding from locale environment variables.
/// Non-ASCII filenames should be escaped in C/POSIX/non-UTF-8 locales
/// and displayed as-is in UTF-8 locales.
#[cfg(not(any(target_vendor = "apple", target_os = "windows", target_os = "openbsd")))]
mod locale_encoding {
use uutests::util::TestScenario;
use uutests::util_name;

/// Create a file with a non-ASCII name and check ls output with the given locale.
/// If `expect_utf8` is true, assert the filename is shown as-is (UTF-8 locale).
/// Otherwise, assert the non-ASCII character is escaped (ASCII locale).
fn check_locale(locale: &str, expect_utf8: bool) {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
let filename = uucore::os_str_from_bytes("é".as_bytes()).expect("should be valid Unicode");
at.touch(filename);

let result = scene
.ucmd()
.env("LC_ALL", locale)
.arg("--quoting-style=shell-escape")
.succeeds();

if expect_utf8 {
result.stdout_contains("é");
} else {
result.stdout_does_not_contain("é");
}
}

#[test]
fn test_ls_locale_c_escapes_non_ascii() {
check_locale("C", false);
}

#[test]
fn test_ls_locale_posix_escapes_non_ascii() {
check_locale("POSIX", false);
}

#[test]
fn test_ls_locale_utf8_suffix_shows_non_ascii() {
check_locale("en_US.UTF-8", true);
}

#[test]
fn test_ls_locale_utf8_lowercase_shows_non_ascii() {
check_locale("en_US.utf8", true);
}

#[test]
fn test_ls_locale_iso8859_escapes_non_ascii() {
check_locale("en_US.ISO-8859-1", false);
}

#[test]
fn test_ls_locale_no_encoding_suffix_escapes_non_ascii() {
check_locale("en_US", false);
}
}

/// On Windows, verify that ls can display non-ASCII filenames correctly
/// when the system ANSI code page is set to UTF-8 (ACP 65001).
#[cfg(target_os = "windows")]
#[test]
fn test_ls_windows_non_ascii_filename() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.touch("文件1");

scene.ucmd().succeeds().stdout_contains("文件1").no_stderr();
}

#[test]
#[cfg(target_os = "wasi")]
fn test_ls_al_no_capabilities_insufficient_on_wasi() {
Expand Down
Loading