Skip to content

Commit

Permalink
utf16
Browse files Browse the repository at this point in the history
  • Loading branch information
robertbastian committed Apr 9, 2024
1 parent 61baed4 commit 1d9b898
Show file tree
Hide file tree
Showing 6 changed files with 160 additions and 24 deletions.
8 changes: 4 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -209,10 +209,10 @@ icu_benchmark_macros = { path = "tools/benchmark/macros" }

# The version here can either be a `version = ".."` spec or `git = "https://github.com/rust-diplomat/diplomat", rev = ".."`
# Diplomat must be published preceding a new ICU4X release but may use git versions in between
diplomat = { git = "https://github.com/rust-diplomat/diplomat.git", rev = "e78da92310f8a06f64f5d0ea3f4a19db392c840b" }
diplomat-runtime = { git = "https://github.com/rust-diplomat/diplomat.git", rev = "e78da92310f8a06f64f5d0ea3f4a19db392c840b" }
diplomat_core = { git = "https://github.com/rust-diplomat/diplomat.git", rev = "e78da92310f8a06f64f5d0ea3f4a19db392c840b" }
diplomat-tool = { git = "https://github.com/rust-diplomat/diplomat.git", rev = "e78da92310f8a06f64f5d0ea3f4a19db392c840b" }
diplomat = { git = "https://github.com/rust-diplomat/diplomat.git", rev = "ae808b4200415775f1d4858c3a0f3b7d0da03500" }
diplomat-runtime = { git = "https://github.com/rust-diplomat/diplomat.git", rev = "ae808b4200415775f1d4858c3a0f3b7d0da03500" }
diplomat_core = { git = "https://github.com/rust-diplomat/diplomat.git", rev = "ae808b4200415775f1d4858c3a0f3b7d0da03500" }
diplomat-tool = { git = "https://github.com/rust-diplomat/diplomat.git", rev = "ae808b4200415775f1d4858c3a0f3b7d0da03500" }

# Size optimized builds
[profile.release-opt-size]
Expand Down
10 changes: 5 additions & 5 deletions ffi/capi/bindings/dart/ListFormatter.g.dart

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 41 additions & 3 deletions ffi/capi/src/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,15 +136,53 @@ pub mod ffi {
#[diplomat::rust_link(icu::list::ListFormatter::format, FnInStruct)]
#[diplomat::rust_link(icu::list::ListFormatter::format_to_string, FnInStruct, hidden)]
#[diplomat::rust_link(icu::list::FormattedList, Struct, hidden)]
#[diplomat::attr(*, rename = "format")]
#[diplomat::attr(dart, disable)]
#[diplomat::skip_if_ast]
pub fn format2(
pub fn format_valid_utf8(
&self,
list: &[&str],
write: &mut DiplomatWriteable,
) -> Result<(), ICU4XError> {
self.0.format(list.iter()).write_to(write)?;
Ok(())
}

#[diplomat::rust_link(icu::list::ListFormatter::format, FnInStruct)]
#[diplomat::rust_link(icu::list::ListFormatter::format_to_string, FnInStruct, hidden)]
#[diplomat::rust_link(icu::list::FormattedList, Struct, hidden)]
#[diplomat::attr(dart, disable)]
#[diplomat::skip_if_ast]
pub fn format_utf8(
&self,
list: &[&DiplomatStr],
write: &mut DiplomatWriteable,
) -> Result<(), ICU4XError> {
self.0
.format(list.iter().filter_map(|&b| core::str::from_utf8(b).ok()))
.format(
list.iter()
.copied()
.map(writeable::utf::PotentiallyInvalidUtf8),
)
.write_to(write)?;
Ok(())
}

#[diplomat::rust_link(icu::list::ListFormatter::format, FnInStruct)]
#[diplomat::rust_link(icu::list::ListFormatter::format_to_string, FnInStruct, hidden)]
#[diplomat::rust_link(icu::list::FormattedList, Struct, hidden)]
#[diplomat::attr(dart, rename = "format")]
#[diplomat::skip_if_ast]
pub fn format_utf16(
&self,
list: &[&DiplomatStr16],
write: &mut DiplomatWriteable,
) -> Result<(), ICU4XError> {
self.0
.format(
list.iter()
.copied()
.map(writeable::utf::PotentiallyInvalidUtf16),
)
.write_to(write)?;
Ok(())
}
Expand Down
29 changes: 21 additions & 8 deletions utils/writeable/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ mod cmp;
mod either;
mod impls;
mod ops;
pub mod utf;

use alloc::borrow::Cow;
use alloc::string::String;
Expand Down Expand Up @@ -397,19 +398,21 @@ macro_rules! assert_writeable_eq {
($actual_writeable:expr, $expected_str:expr, $($arg:tt)+) => {{
let actual_writeable = &$actual_writeable;
let (actual_str, _) = $crate::writeable_to_parts_for_test(actual_writeable).unwrap();
let actual_len = actual_str.len();
assert_eq!(actual_str, $expected_str, $($arg)*);
assert_eq!(actual_str, $crate::Writeable::write_to_string(actual_writeable), $($arg)+);
let length_hint = $crate::Writeable::writeable_length_hint(actual_writeable);
let lower = length_hint.0;
assert!(
length_hint.0 <= actual_str.len(),
"hint lower bound {} larger than actual length {}: {}",
length_hint.0, actual_str.len(), format!($($arg)*),
lower <= actual_len,
"hint lower bound {lower} larger than actual length {actual_len}: {}",
format!($($arg)*),
);
if let Some(upper) = length_hint.1 {
assert!(
actual_str.len() <= upper,
"hint upper bound {} smaller than actual length {}: {}",
length_hint.0, actual_str.len(), format!($($arg)*),
actual_len <= upper,
"hint upper bound {upper} smaller than actual length {actual_len}: {}",
format!($($arg)*),
);
}
assert_eq!(actual_writeable.to_string(), $expected_str);
Expand All @@ -425,13 +428,23 @@ macro_rules! assert_writeable_parts_eq {
($actual_writeable:expr, $expected_str:expr, $expected_parts:expr, $($arg:tt)+) => {{
let actual_writeable = &$actual_writeable;
let (actual_str, actual_parts) = $crate::writeable_to_parts_for_test(actual_writeable).unwrap();
let actual_len = actual_str.len();
assert_eq!(actual_str, $expected_str, $($arg)+);
assert_eq!(actual_str, $crate::Writeable::write_to_string(actual_writeable), $($arg)+);
assert_eq!(actual_parts, $expected_parts, $($arg)+);
let length_hint = $crate::Writeable::writeable_length_hint(actual_writeable);
assert!(length_hint.0 <= actual_str.len(), $($arg)+);
let lower = length_hint.0;
assert!(
lower <= actual_len,
"hint lower bound {lower} larger than actual length {actual_len}: {}",
format!($($arg)*),
);
if let Some(upper) = length_hint.1 {
assert!(actual_str.len() <= upper, $($arg)+);
assert!(
actual_len <= upper,
"hint upper bound {upper} smaller than actual length {actual_len}: {}",
format!($($arg)*),
);
}
assert_eq!(actual_writeable.to_string(), $expected_str);
}};
Expand Down
85 changes: 85 additions & 0 deletions utils/writeable/src/utf.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use crate::{impl_display_with_writeable, LengthHint, Writeable};

use core::fmt;

/// Implements [`Writeable`] for [`&[u8]`] according to the [WHATWG Encoding Standard](
/// https://encoding.spec.whatwg.org/#utf-8-decoder).
#[derive(Debug)]
pub struct PotentiallyInvalidUtf8<'a>(pub &'a [u8]);

impl Writeable for PotentiallyInvalidUtf8<'_> {
fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result {
let mut remaining = self.0;
while remaining.len() > 0 {
match core::str::from_utf8(remaining) {
Ok(str) => {
return sink.write_str(str);
}
Err(e) => {
let (str, r) = remaining.split_at(e.valid_up_to());
sink.write_str(unsafe { core::str::from_utf8_unchecked(str) })?;
sink.write_char(char::REPLACEMENT_CHARACTER)?;
match e.error_len() {
None => remaining = &[],
Some(l) => remaining = &r[l..],
}
}
}
}

Ok(())
}

fn writeable_length_hint(&self) -> crate::LengthHint {
// In the worst case, every byte becomes a replacement character
LengthHint::at_most(self.0.len() * 3)
}
}

impl_display_with_writeable!(PotentiallyInvalidUtf8<'_>);

/// Implements [`Writeable`] for [`&[u16]`] according to the [WHATWG Encoding Standard](
/// https://encoding.spec.whatwg.org/#shared-utf-16-decoder).
#[derive(Debug)]
pub struct PotentiallyInvalidUtf16<'a>(pub &'a [u16]);

impl Writeable for PotentiallyInvalidUtf16<'_> {
fn write_to<W: fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result {
for c in core::char::decode_utf16(self.0.iter().copied()) {
if let Ok(c) = c {
sink.write_char(c)?;
} else {
sink.write_char(char::REPLACEMENT_CHARACTER)?;
}
}
Ok(())
}

fn writeable_length_hint(&self) -> LengthHint {
LengthHint::undefined() // todo
}
}

impl_display_with_writeable!(PotentiallyInvalidUtf16<'_>);

#[cfg(test)]
mod test {
use super::*;
use crate::assert_writeable_eq;

#[test]
fn test_utf8() {
assert_writeable_eq!(PotentiallyInvalidUtf8(b"Foo Bar"), "Foo Bar");
assert_writeable_eq!(PotentiallyInvalidUtf8(b"Foo\xFDBar"), "Foo�Bar");
}

#[test]
fn test_utf16() {
assert_writeable_eq!(PotentiallyInvalidUtf16(&[0xD83E, 0xDD73]), "🥳");
assert_writeable_eq!(PotentiallyInvalidUtf16(&[0xD83E, 0x20, 0xDD73]), "� �");
}
}

0 comments on commit 1d9b898

Please sign in to comment.