diff --git a/Cargo.lock b/Cargo.lock index 6f2e15bc4ec..8911551ad7d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -679,7 +679,7 @@ dependencies = [ [[package]] name = "diplomat" version = "0.7.0" -source = "git+https://github.com/rust-diplomat/diplomat.git?rev=e78da92310f8a06f64f5d0ea3f4a19db392c840b#e78da92310f8a06f64f5d0ea3f4a19db392c840b" +source = "git+https://github.com/rust-diplomat/diplomat.git?rev=ae808b4200415775f1d4858c3a0f3b7d0da03500#ae808b4200415775f1d4858c3a0f3b7d0da03500" dependencies = [ "diplomat_core", "proc-macro2", @@ -697,7 +697,7 @@ dependencies = [ [[package]] name = "diplomat-runtime" version = "0.7.0" -source = "git+https://github.com/rust-diplomat/diplomat.git?rev=e78da92310f8a06f64f5d0ea3f4a19db392c840b#e78da92310f8a06f64f5d0ea3f4a19db392c840b" +source = "git+https://github.com/rust-diplomat/diplomat.git?rev=ae808b4200415775f1d4858c3a0f3b7d0da03500#ae808b4200415775f1d4858c3a0f3b7d0da03500" dependencies = [ "log", ] @@ -705,7 +705,7 @@ dependencies = [ [[package]] name = "diplomat-tool" version = "0.7.0" -source = "git+https://github.com/rust-diplomat/diplomat.git?rev=e78da92310f8a06f64f5d0ea3f4a19db392c840b#e78da92310f8a06f64f5d0ea3f4a19db392c840b" +source = "git+https://github.com/rust-diplomat/diplomat.git?rev=ae808b4200415775f1d4858c3a0f3b7d0da03500#ae808b4200415775f1d4858c3a0f3b7d0da03500" dependencies = [ "askama", "clap", @@ -725,7 +725,7 @@ dependencies = [ [[package]] name = "diplomat_core" version = "0.7.0" -source = "git+https://github.com/rust-diplomat/diplomat.git?rev=e78da92310f8a06f64f5d0ea3f4a19db392c840b#e78da92310f8a06f64f5d0ea3f4a19db392c840b" +source = "git+https://github.com/rust-diplomat/diplomat.git?rev=ae808b4200415775f1d4858c3a0f3b7d0da03500#ae808b4200415775f1d4858c3a0f3b7d0da03500" dependencies = [ "displaydoc", "either", diff --git a/Cargo.toml b/Cargo.toml index 63a61188bb1..7b214103d20 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -209,10 +209,10 @@ icu_benchmark_macros = { path = "tools/benchmark/macros" } # The version here can either be a `version = ".."` spec or `git = "https://github.com/rust-diplomat/diplomat", rev = ".."` # Diplomat must be published preceding a new ICU4X release but may use git versions in between -diplomat = { git = "https://github.com/rust-diplomat/diplomat.git", rev = "e78da92310f8a06f64f5d0ea3f4a19db392c840b" } -diplomat-runtime = { git = "https://github.com/rust-diplomat/diplomat.git", rev = "e78da92310f8a06f64f5d0ea3f4a19db392c840b" } -diplomat_core = { git = "https://github.com/rust-diplomat/diplomat.git", rev = "e78da92310f8a06f64f5d0ea3f4a19db392c840b" } -diplomat-tool = { git = "https://github.com/rust-diplomat/diplomat.git", rev = "e78da92310f8a06f64f5d0ea3f4a19db392c840b" } +diplomat = { git = "https://github.com/rust-diplomat/diplomat.git", rev = "ae808b4200415775f1d4858c3a0f3b7d0da03500" } +diplomat-runtime = { git = "https://github.com/rust-diplomat/diplomat.git", rev = "ae808b4200415775f1d4858c3a0f3b7d0da03500" } +diplomat_core = { git = "https://github.com/rust-diplomat/diplomat.git", rev = "ae808b4200415775f1d4858c3a0f3b7d0da03500" } +diplomat-tool = { git = "https://github.com/rust-diplomat/diplomat.git", rev = "ae808b4200415775f1d4858c3a0f3b7d0da03500" } # Size optimized builds [profile.release-opt-size] diff --git a/ffi/capi/bindings/dart/ListFormatter.g.dart b/ffi/capi/bindings/dart/ListFormatter.g.dart index d11f441fd74..3d0ea8f2ef0 100644 --- a/ffi/capi/bindings/dart/ListFormatter.g.dart +++ b/ffi/capi/bindings/dart/ListFormatter.g.dart @@ -66,9 +66,9 @@ final class ListFormatter implements ffi.Finalizable { /// Throws [Error] on failure. String format(core.List list) { final temp = ffi2.Arena(); - final listView = list.utf8View; + final listView = list.utf16View; final writeable = _Writeable(); - final result = _ICU4XListFormatter_format2(_ffi, listView.allocIn(temp), listView.length, writeable._ffi); + final result = _ICU4XListFormatter_format_utf16(_ffi, listView.allocIn(temp), listView.length, writeable._ffi); temp.releaseAll(); if (!result.isOk) { throw Error.values.firstWhere((v) => v._ffi == result.union.err); @@ -97,7 +97,7 @@ external _ResultOpaqueInt32 _ICU4XListFormatter_create_or_with_length(ffi.Pointe // ignore: non_constant_identifier_names external _ResultOpaqueInt32 _ICU4XListFormatter_create_unit_with_length(ffi.Pointer provider, ffi.Pointer locale, int length); -@meta.ResourceIdentifier('ICU4XListFormatter_format2') -@ffi.Native<_ResultVoidInt32 Function(ffi.Pointer, ffi.Pointer<_SliceUtf8>, ffi.Size, ffi.Pointer)>(isLeaf: true, symbol: 'ICU4XListFormatter_format2') +@meta.ResourceIdentifier('ICU4XListFormatter_format_utf16') +@ffi.Native<_ResultVoidInt32 Function(ffi.Pointer, ffi.Pointer<_SliceUtf16>, ffi.Size, ffi.Pointer)>(isLeaf: true, symbol: 'ICU4XListFormatter_format_utf16') // ignore: non_constant_identifier_names -external _ResultVoidInt32 _ICU4XListFormatter_format2(ffi.Pointer self, ffi.Pointer<_SliceUtf8> listData, int listLength, ffi.Pointer writeable); +external _ResultVoidInt32 _ICU4XListFormatter_format_utf16(ffi.Pointer self, ffi.Pointer<_SliceUtf16> listData, int listLength, ffi.Pointer writeable); diff --git a/ffi/capi/src/list.rs b/ffi/capi/src/list.rs index 55f124c4a49..b75439fc2ba 100644 --- a/ffi/capi/src/list.rs +++ b/ffi/capi/src/list.rs @@ -136,15 +136,53 @@ pub mod ffi { #[diplomat::rust_link(icu::list::ListFormatter::format, FnInStruct)] #[diplomat::rust_link(icu::list::ListFormatter::format_to_string, FnInStruct, hidden)] #[diplomat::rust_link(icu::list::FormattedList, Struct, hidden)] - #[diplomat::attr(*, rename = "format")] + #[diplomat::attr(dart, disable)] #[diplomat::skip_if_ast] - pub fn format2( + pub fn format_valid_utf8( + &self, + list: &[&str], + write: &mut DiplomatWriteable, + ) -> Result<(), ICU4XError> { + self.0.format(list.iter()).write_to(write)?; + Ok(()) + } + + #[diplomat::rust_link(icu::list::ListFormatter::format, FnInStruct)] + #[diplomat::rust_link(icu::list::ListFormatter::format_to_string, FnInStruct, hidden)] + #[diplomat::rust_link(icu::list::FormattedList, Struct, hidden)] + #[diplomat::attr(dart, disable)] + #[diplomat::skip_if_ast] + pub fn format_utf8( &self, list: &[&DiplomatStr], write: &mut DiplomatWriteable, ) -> Result<(), ICU4XError> { self.0 - .format(list.iter().filter_map(|&b| core::str::from_utf8(b).ok())) + .format( + list.iter() + .copied() + .map(writeable::utf::PotentiallyInvalidUtf8), + ) + .write_to(write)?; + Ok(()) + } + + #[diplomat::rust_link(icu::list::ListFormatter::format, FnInStruct)] + #[diplomat::rust_link(icu::list::ListFormatter::format_to_string, FnInStruct, hidden)] + #[diplomat::rust_link(icu::list::FormattedList, Struct, hidden)] + #[diplomat::attr(dart, rename = "format")] + #[diplomat::skip_if_ast] + pub fn format_utf16( + &self, + list: &[&DiplomatStr16], + write: &mut DiplomatWriteable, + ) -> Result<(), ICU4XError> { + self.0 + .format( + list.iter() + .copied() + .map(writeable::utf::PotentiallyInvalidUtf16), + ) .write_to(write)?; Ok(()) } diff --git a/utils/writeable/src/lib.rs b/utils/writeable/src/lib.rs index 7d52288945a..3a3f275cb0c 100644 --- a/utils/writeable/src/lib.rs +++ b/utils/writeable/src/lib.rs @@ -71,6 +71,7 @@ mod cmp; mod either; mod impls; mod ops; +pub mod utf; use alloc::borrow::Cow; use alloc::string::String; @@ -397,19 +398,21 @@ macro_rules! assert_writeable_eq { ($actual_writeable:expr, $expected_str:expr, $($arg:tt)+) => {{ let actual_writeable = &$actual_writeable; let (actual_str, _) = $crate::writeable_to_parts_for_test(actual_writeable).unwrap(); + let actual_len = actual_str.len(); assert_eq!(actual_str, $expected_str, $($arg)*); assert_eq!(actual_str, $crate::Writeable::write_to_string(actual_writeable), $($arg)+); let length_hint = $crate::Writeable::writeable_length_hint(actual_writeable); + let lower = length_hint.0; assert!( - length_hint.0 <= actual_str.len(), - "hint lower bound {} larger than actual length {}: {}", - length_hint.0, actual_str.len(), format!($($arg)*), + lower <= actual_len, + "hint lower bound {lower} larger than actual length {actual_len}: {}", + format!($($arg)*), ); if let Some(upper) = length_hint.1 { assert!( - actual_str.len() <= upper, - "hint upper bound {} smaller than actual length {}: {}", - length_hint.0, actual_str.len(), format!($($arg)*), + actual_len <= upper, + "hint upper bound {upper} smaller than actual length {actual_len}: {}", + format!($($arg)*), ); } assert_eq!(actual_writeable.to_string(), $expected_str); @@ -425,13 +428,23 @@ macro_rules! assert_writeable_parts_eq { ($actual_writeable:expr, $expected_str:expr, $expected_parts:expr, $($arg:tt)+) => {{ let actual_writeable = &$actual_writeable; let (actual_str, actual_parts) = $crate::writeable_to_parts_for_test(actual_writeable).unwrap(); + let actual_len = actual_str.len(); assert_eq!(actual_str, $expected_str, $($arg)+); assert_eq!(actual_str, $crate::Writeable::write_to_string(actual_writeable), $($arg)+); assert_eq!(actual_parts, $expected_parts, $($arg)+); let length_hint = $crate::Writeable::writeable_length_hint(actual_writeable); - assert!(length_hint.0 <= actual_str.len(), $($arg)+); + let lower = length_hint.0; + assert!( + lower <= actual_len, + "hint lower bound {lower} larger than actual length {actual_len}: {}", + format!($($arg)*), + ); if let Some(upper) = length_hint.1 { - assert!(actual_str.len() <= upper, $($arg)+); + assert!( + actual_len <= upper, + "hint upper bound {upper} smaller than actual length {actual_len}: {}", + format!($($arg)*), + ); } assert_eq!(actual_writeable.to_string(), $expected_str); }}; diff --git a/utils/writeable/src/utf.rs b/utils/writeable/src/utf.rs new file mode 100644 index 00000000000..da62768b4d5 --- /dev/null +++ b/utils/writeable/src/utf.rs @@ -0,0 +1,85 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::{impl_display_with_writeable, LengthHint, Writeable}; + +use core::fmt; + +/// Implements [`Writeable`] for [`&[u8]`] according to the [WHATWG Encoding Standard]( +/// https://encoding.spec.whatwg.org/#utf-8-decoder). +#[derive(Debug)] +pub struct PotentiallyInvalidUtf8<'a>(pub &'a [u8]); + +impl Writeable for PotentiallyInvalidUtf8<'_> { + fn write_to(&self, sink: &mut W) -> fmt::Result { + let mut remaining = self.0; + while remaining.len() > 0 { + match core::str::from_utf8(remaining) { + Ok(str) => { + return sink.write_str(str); + } + Err(e) => { + let (str, r) = remaining.split_at(e.valid_up_to()); + sink.write_str(unsafe { core::str::from_utf8_unchecked(str) })?; + sink.write_char(char::REPLACEMENT_CHARACTER)?; + match e.error_len() { + None => remaining = &[], + Some(l) => remaining = &r[l..], + } + } + } + } + + Ok(()) + } + + fn writeable_length_hint(&self) -> crate::LengthHint { + // In the worst case, every byte becomes a replacement character + LengthHint::at_most(self.0.len() * 3) + } +} + +impl_display_with_writeable!(PotentiallyInvalidUtf8<'_>); + +/// Implements [`Writeable`] for [`&[u16]`] according to the [WHATWG Encoding Standard]( +/// https://encoding.spec.whatwg.org/#shared-utf-16-decoder). +#[derive(Debug)] +pub struct PotentiallyInvalidUtf16<'a>(pub &'a [u16]); + +impl Writeable for PotentiallyInvalidUtf16<'_> { + fn write_to(&self, sink: &mut W) -> fmt::Result { + for c in core::char::decode_utf16(self.0.iter().copied()) { + if let Ok(c) = c { + sink.write_char(c)?; + } else { + sink.write_char(char::REPLACEMENT_CHARACTER)?; + } + } + Ok(()) + } + + fn writeable_length_hint(&self) -> LengthHint { + LengthHint::undefined() // todo + } +} + +impl_display_with_writeable!(PotentiallyInvalidUtf16<'_>); + +#[cfg(test)] +mod test { + use super::*; + use crate::assert_writeable_eq; + + #[test] + fn test_utf8() { + assert_writeable_eq!(PotentiallyInvalidUtf8(b"Foo Bar"), "Foo Bar"); + assert_writeable_eq!(PotentiallyInvalidUtf8(b"Foo\xFDBar"), "Foo�Bar"); + } + + #[test] + fn test_utf16() { + assert_writeable_eq!(PotentiallyInvalidUtf16(&[0xD83E, 0xDD73]), "🥳"); + assert_writeable_eq!(PotentiallyInvalidUtf16(&[0xD83E, 0x20, 0xDD73]), "� �"); + } +}