From 9a88c695dcec7f98ce97dcc9fb1973b1a7903f21 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Tue, 13 Feb 2024 17:36:27 -0800 Subject: [PATCH 1/2] Implement Ord and PartialOrd on DataLocale --- components/locid/src/langid.rs | 24 ++++++++++++++++++++++++ provider/core/src/request.rs | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/components/locid/src/langid.rs b/components/locid/src/langid.rs index 22f0f8956d5..ba1bd78f3e8 100644 --- a/components/locid/src/langid.rs +++ b/components/locid/src/langid.rs @@ -146,6 +146,30 @@ impl LanguageIdentifier { variants: subtags::Variants::new(), }; + /// Borrows the fields of this [`LanguageIdentifier`] in a tuple. + pub fn as_tuple( + &self, + ) -> ( + subtags::Language, + Option, + Option, + &subtags::Variants, + ) { + (self.language, self.script, self.region, &self.variants) + } + + /// Takes the fields of this [`LanguageIdentifier`] and returns them as a tuple. + pub fn into_tuple( + self, + ) -> ( + subtags::Language, + Option, + Option, + subtags::Variants, + ) { + (self.language, self.script, self.region, self.variants) + } + /// This is a best-effort operation that performs all available levels of canonicalization. /// /// At the moment the operation will normalize casing and the separator, but in the future diff --git a/provider/core/src/request.rs b/provider/core/src/request.rs index 3f21ae7eeec..ac0d7af1659 100644 --- a/provider/core/src/request.rs +++ b/provider/core/src/request.rs @@ -246,6 +246,22 @@ impl FromStr for DataLocale { } } +impl PartialOrd for DataLocale { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for DataLocale { + fn cmp(&self, other: &Self) -> Ordering { + self.langid + .as_tuple() + .cmp(&other.langid.as_tuple()) + .then_with(|| self.keywords.cmp(&other.keywords)) + .then_with(|| self.aux.cmp(&other.aux)) + } +} + impl DataLocale { /// Compare this [`DataLocale`] with BCP-47 bytes. /// @@ -755,7 +771,7 @@ impl DataLocale { /// ``` /// /// [`Keywords`]: unicode_ext::Keywords -#[derive(Debug, PartialEq, Clone, Eq, Hash)] +#[derive(Debug, PartialEq, Clone, Eq, Hash, PartialOrd, Ord)] #[cfg(feature = "experimental")] pub struct AuxiliaryKeys { value: AuxiliaryKeysInner, @@ -809,6 +825,22 @@ impl Hash for AuxiliaryKeysInner { } } +#[cfg(feature = "experimental")] +impl PartialOrd for AuxiliaryKeysInner { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + self.deref().partial_cmp(other.deref()) + } +} + +#[cfg(feature = "experimental")] +impl Ord for AuxiliaryKeysInner { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + self.deref().cmp(other.deref()) + } +} + #[cfg(feature = "experimental")] writeable::impl_display_with_writeable!(AuxiliaryKeys); From 83f19f8592ed4e3faedc99b47c35fca5d8f458e0 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Tue, 13 Feb 2024 17:36:59 -0800 Subject: [PATCH 2/2] Draft sketch of ResolvedLocaleAdapter --- provider/adapters/src/lib.rs | 2 + provider/adapters/src/resolved.rs | 73 +++++++++++++++++++++++++ provider/blob/src/blob_data_provider.rs | 12 ++-- provider/core/src/any.rs | 24 +++++--- provider/core/src/request.rs | 3 + provider/core/src/serde/mod.rs | 8 ++- provider/fs/src/fs_data_provider.rs | 8 ++- 7 files changed, 116 insertions(+), 14 deletions(-) create mode 100644 provider/adapters/src/resolved.rs diff --git a/provider/adapters/src/lib.rs b/provider/adapters/src/lib.rs index 81987f87785..e96efd3d286 100644 --- a/provider/adapters/src/lib.rs +++ b/provider/adapters/src/lib.rs @@ -8,6 +8,7 @@ //! - Use the [`either`] module to choose between multiple provider types at runtime. //! - Use the [`filter`] module to programmatically reject certain data requests. //! - Use the [`fallback`] module to automatically resolve arbitrary locales for data loading. +//! - Use the [`resolved`] module to determine the supported or resolved locale for a data request. // https://github.com/unicode-org/icu4x/blob/main/docs/process/boilerplate.md#library-annotations #![cfg_attr(not(any(test, feature = "std")), no_std)] @@ -34,3 +35,4 @@ pub mod fallback; pub mod filter; pub mod fork; mod helpers; +pub mod resolved; diff --git a/provider/adapters/src/resolved.rs b/provider/adapters/src/resolved.rs new file mode 100644 index 00000000000..12de0488be9 --- /dev/null +++ b/provider/adapters/src/resolved.rs @@ -0,0 +1,73 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use alloc::collections::{BTreeMap, BTreeSet}; +use core::cell::RefCell; + +use icu_provider::prelude::*; + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +struct ResolvedLocaleInfo { + pub requested_locale: DataLocale, + pub resolved_locale: Option, +} + +/// TODO: Docs +#[derive(Debug)] +pub struct ResolvedLocaleAdapter

{ + inner: P, + resolved_locales: RefCell>, + drop_payloads: bool, +} + +impl

ResolvedLocaleAdapter

{ + pub fn into_inner(self) -> P { + self.inner + } + + pub fn clear(&mut self) { + self.resolved_locales.borrow_mut().clear() + } + + pub fn take_resolved_locale_for_key(&mut self, key: DataKey) -> Option { + self.resolved_locales + .borrow_mut() + .remove(&key) + .and_then(|info| info.resolved_locale) + } + + pub fn take_all_resolved_locales(&mut self) -> BTreeSet { + let map = self.resolved_locales.take(); + map.into_iter() + .filter_map(|(_, info)| info.resolved_locale) + .collect() + } + + pub fn saw_last_resort_fallback(&self) -> bool { + self.resolved_locales.borrow().values().any(|info| { + info.resolved_locale + .as_ref() + .map(|l| l.is_langid_und()) + .unwrap_or(false) + }) + } +} + +impl BufferProvider for ResolvedLocaleAdapter

{ + fn load_buffer( + &self, + key: DataKey, + req: DataRequest, + ) -> Result, DataError> { + let mut response = self.inner.load_buffer(key, req)?; + self.resolved_locales.borrow_mut().insert( + key, + ResolvedLocaleInfo { + requested_locale: req.locale.clone(), + resolved_locale: response.metadata.locale.take(), + }, + ); + Ok(response) + } +} diff --git a/provider/blob/src/blob_data_provider.rs b/provider/blob/src/blob_data_provider.rs index 960af2d5fd3..2150b3f9c01 100644 --- a/provider/blob/src/blob_data_provider.rs +++ b/provider/blob/src/blob_data_provider.rs @@ -123,10 +123,14 @@ impl BufferProvider for BlobDataProvider { metadata.buffer_format = Some(BufferFormat::Postcard1); Ok(DataResponse { metadata, - payload: Some(DataPayload::from_yoked_buffer( - self.data - .try_map_project_cloned(|blob, _| blob.load(key, req))?, - )), + payload: if req.metadata.drop_payload { + None + } else { + Some(DataPayload::from_yoked_buffer( + self.data + .try_map_project_cloned(|blob, _| blob.load(key, req))?, + )) + }, }) } } diff --git a/provider/core/src/any.rs b/provider/core/src/any.rs index 243055d2123..b0db3779e4c 100644 --- a/provider/core/src/any.rs +++ b/provider/core/src/any.rs @@ -437,10 +437,7 @@ where { #[inline] fn load(&self, req: DataRequest) -> Result, DataError> { - self.0 - .load_any(M::KEY, req)? - .downcast() - .map_err(|e| e.with_req(M::KEY, req)) + self.load_data(M::KEY, req) } } @@ -454,10 +451,21 @@ where { #[inline] fn load_data(&self, key: DataKey, req: DataRequest) -> Result, DataError> { - self.0 - .load_any(key, req)? - .downcast() - .map_err(|e| e.with_req(key, req)) + let any_response = AnyProvider::load_any(self.0, key, req)?; + Ok(DataResponse { + metadata: any_response.metadata, + payload: any_response + .payload + .and_then(|p| { + if req.metadata.drop_payload { + None + } else { + Some(p.downcast()) + } + }) + .transpose() + .map_err(|e| e.with_req(key, req))?, + }) } } diff --git a/provider/core/src/request.rs b/provider/core/src/request.rs index ac0d7af1659..f6e00a94c4a 100644 --- a/provider/core/src/request.rs +++ b/provider/core/src/request.rs @@ -52,6 +52,9 @@ impl fmt::Display for DataRequest<'_> { pub struct DataRequestMetadata { /// Silent requests do not log errors. This can be used for exploratory querying, such as fallbacks. pub silent: bool, + /// Whether to drop the payload from the [`DataResponse`](crate::DataResponse). This can be used + /// for exploratory queries where the returned data is not of interest. + pub drop_payload: bool, } /// A locale type optimized for use in fallbacking and the ICU4X data pipeline. diff --git a/provider/core/src/serde/mod.rs b/provider/core/src/serde/mod.rs index edd827c3121..e527d66965b 100644 --- a/provider/core/src/serde/mod.rs +++ b/provider/core/src/serde/mod.rs @@ -173,7 +173,13 @@ where metadata: buffer_response.metadata, payload: buffer_response .payload - .map(|p| p.into_deserialized(buffer_format)) + .and_then(|p| { + if req.metadata.drop_payload { + None + } else { + Some(p.into_deserialized(buffer_format)) + } + }) .transpose() .map_err(|e| e.with_req(key, req))?, }) diff --git a/provider/fs/src/fs_data_provider.rs b/provider/fs/src/fs_data_provider.rs index a78d0205e62..699c9edbb9a 100644 --- a/provider/fs/src/fs_data_provider.rs +++ b/provider/fs/src/fs_data_provider.rs @@ -85,9 +85,15 @@ impl BufferProvider for FsDataProvider { if !Path::new(&path).exists() { return Err(DataErrorKind::MissingLocale.with_req(key, req)); } - let buffer = fs::read(&path).map_err(|e| DataError::from(e).with_path_context(&path))?; let mut metadata = DataResponseMetadata::default(); metadata.buffer_format = Some(self.manifest.buffer_format); + if req.metadata.drop_payload { + return Ok(DataResponse { + metadata, + payload: None, + }); + } + let buffer = fs::read(&path).map_err(|e| DataError::from(e).with_path_context(&path))?; Ok(DataResponse { metadata, payload: Some(DataPayload::from_owned_buffer(buffer.into_boxed_slice())),