diff --git a/components/locale_canonicalizer/src/locale_canonicalizer.rs b/components/locale_canonicalizer/src/locale_canonicalizer.rs index 48596da81c7..1a8fafaaa03 100644 --- a/components/locale_canonicalizer/src/locale_canonicalizer.rs +++ b/components/locale_canonicalizer/src/locale_canonicalizer.rs @@ -165,8 +165,8 @@ fn uts35_check_language_rules( locale: &mut Locale, alias_data: &DataPayload, ) -> CanonicalizationResult { - let maybe_lang: Option> = locale.id.language.into(); - if let Some(lang) = maybe_lang { + if !locale.id.language.is_empty() { + let lang: TinyAsciiStr<3> = locale.id.language.into(); let replacement = if lang.len() == 2 { alias_data .get() @@ -557,14 +557,24 @@ impl LocaleCanonicalizer { return CanonicalizationResult::Unmodified; } - if let Some(language) = langid.language.into() { + if !langid.language.is_empty() { if let Some(region) = langid.region { - maximize_locale!(langid, data.language_region, language, region.into()); + maximize_locale!( + langid, + data.language_region, + langid.language.into(), + region.into() + ); } if let Some(script) = langid.script { - maximize_locale!(langid, data.language_script, language, script.into()); + maximize_locale!( + langid, + data.language_script, + langid.language.into(), + script.into() + ); } - maximize_locale!(langid, data.language, language); + maximize_locale!(langid, data.language, langid.language.into()); } if let Some(script) = langid.script { if let Some(region) = langid.region { diff --git a/components/locid/src/subtags/language.rs b/components/locid/src/subtags/language.rs index dd4cdbad71f..26f0d9938e0 100644 --- a/components/locid/src/subtags/language.rs +++ b/components/locid/src/subtags/language.rs @@ -6,7 +6,7 @@ use crate::parser::errors::ParserError; use core::fmt; use core::ops::RangeInclusive; use core::str::FromStr; -use tinystr::{tinystr, TinyAsciiStr}; +use tinystr::TinyAsciiStr; /// A language subtag (examples: `"en"`, `"csb"`, `"zh"`, `"und"`, etc.) /// @@ -38,11 +38,13 @@ use tinystr::{tinystr, TinyAsciiStr}; /// but that form has not been used and ICU4X does not support it right now. /// /// [`unicode_language_id`]: https://unicode.org/reports/tr35/#unicode_language_id -#[derive(Default, Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Copy)] -pub struct Language(Option>); +#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Copy)] +pub struct Language(TinyAsciiStr<{ *LANGUAGE_LENGTH.end() }>); const LANGUAGE_LENGTH: RangeInclusive = 2..=3; -const UND_VALUE: TinyAsciiStr<3> = tinystr!(3, "und"); +// TODO(#348): Change this to invoke a const function. +// Safe because "und" is a valid language subtag +const UND: Language = Language(unsafe { TinyAsciiStr::from_bytes_unchecked(*b"und") }); impl Language { /// A constructor which takes a utf8 slice, parses it and @@ -73,11 +75,7 @@ impl Language { let value = s.to_ascii_lowercase(); - if value == UND_VALUE { - Ok(Self(None)) - } else { - Ok(Self(Some(value))) - } + Ok(Self(value)) } /// Deconstructs the [`Language`] into raw format to be consumed @@ -95,8 +93,8 @@ impl Language { /// let lang = unsafe { Language::from_raw_unchecked(raw) }; /// assert_eq!(lang, "en"); /// ``` - pub fn into_raw(self) -> Option<[u8; 3]> { - self.0.as_ref().map(TinyAsciiStr::all_bytes).copied() + pub fn into_raw(self) -> [u8; 3] { + *self.0.all_bytes() } /// Constructor which takes a raw value returned by @@ -119,11 +117,8 @@ impl Language { /// /// This function accepts a [`[u8; 3]`] that is expected to be a valid [`TinyAsciiStr<3>`] /// representing a [`Language`] subtag in canonical syntax. - pub const unsafe fn from_raw_unchecked(v: Option<[u8; 3]>) -> Self { - Self(match v { - Some(v) => Some(TinyAsciiStr::from_bytes_unchecked(v)), - None => None, - }) + pub const unsafe fn from_raw_unchecked(v: [u8; 3]) -> Self { + Self(TinyAsciiStr::from_bytes_unchecked(v)) } /// Returns the default undefined language "und". Same as [`default()`](Default::default()), but is `const`. @@ -139,7 +134,7 @@ impl Language { /// ``` #[inline] pub const fn und() -> Self { - Self(None) + UND } /// A helper function for displaying @@ -158,11 +153,12 @@ impl Language { /// /// `Notice`: For many use cases, such as comparison, /// [`Language`] implements [`PartialEq`]`<&`[`str`]`>` which allows for direct comparisons. + #[inline] pub fn as_str(&self) -> &str { - self.0.as_deref().unwrap_or("und") + self.0.as_str() } - /// Resets the [`Language`] subtag to an empty one. + /// Resets the [`Language`] subtag to an empty one (equal to `"und"`). /// /// # Examples /// @@ -178,11 +174,12 @@ impl Language { /// /// assert_eq!(lang.as_str(), "und"); /// ``` + #[inline] pub fn clear(&mut self) { - self.0.take(); + *self = UND } - /// Tests if the [`Language`] subtag is empty. + /// Tests if the [`Language`] subtag is empty (equal to `"und"`). /// /// # Examples /// @@ -198,8 +195,9 @@ impl Language { /// /// assert_eq!(lang.is_empty(), true); /// ``` + #[inline] pub fn is_empty(self) -> bool { - self.0.is_none() + self == UND } } @@ -224,7 +222,7 @@ impl writeable::Writeable for Language { #[inline] fn write_len(&self) -> writeable::LengthHint { - writeable::LengthHint::exact(self.0.map_or(3, |t| t.len())) + writeable::LengthHint::exact(self.0.len()) } } @@ -247,8 +245,14 @@ impl<'l> From<&'l Language> for &'l str { } } -impl From for Option> { +impl From for TinyAsciiStr<3> { fn from(input: Language) -> Self { - input.0.map(Into::into) + input.0 + } +} + +impl Default for Language { + fn default() -> Language { + Language::und() } } diff --git a/provider/cldr/src/transform/locale_canonicalizer/aliases.rs b/provider/cldr/src/transform/locale_canonicalizer/aliases.rs index a726ebde77c..db0ef0183ee 100644 --- a/provider/cldr/src/transform/locale_canonicalizer/aliases.rs +++ b/provider/cldr/src/transform/locale_canonicalizer/aliases.rs @@ -138,8 +138,8 @@ impl From<&cldr_serde::aliases::Resource> for AliasesV1 { continue; } - let maybe_lang: Option> = langid.language.into(); - if let Some(lang) = maybe_lang { + if !langid.language.is_empty() { + let lang: TinyAsciiStr<3> = langid.language.into(); if langid.region.is_none() && langid.variants.is_empty() { // Relatively few aliases exist for two character language identifiers, // so we store them separately to not slow down canonicalization of @@ -275,9 +275,9 @@ fn test_rules_cmp() { assert_eq!(union_size(&rules[3]), 2); rules.sort_unstable_by(rules_cmp); - assert_eq!(rules[0], "und-hepburn-heploc"); - assert_eq!(rules[1], "en-GB"); - assert_eq!(rules[2], "fr-CA"); + assert_eq!(rules[0], "en-GB"); + assert_eq!(rules[1], "fr-CA"); + assert_eq!(rules[2], "und-hepburn-heploc"); assert_eq!(rules[3], "CA"); } diff --git a/provider/cldr/src/transform/locale_canonicalizer/likely_subtags.rs b/provider/cldr/src/transform/locale_canonicalizer/likely_subtags.rs index 5a98852e4d0..38518626c44 100644 --- a/provider/cldr/src/transform/locale_canonicalizer/likely_subtags.rs +++ b/provider/cldr/src/transform/locale_canonicalizer/likely_subtags.rs @@ -108,7 +108,8 @@ impl From<&cldr_serde::likely_subtags::Resource> for LikelySubtagsV1 { }; for entry in other.supplemental.likely_subtags.iter() { - if let Some(lang) = entry.0.language.into() { + if !entry.0.language.is_empty() { + let lang = entry.0.language.into(); if let Some(script) = entry.0.script { language_script.insert((lang, script.into()), extract_result(entry)); } else if let Some(region) = entry.0.region { diff --git a/provider/testdata/data/json/locale_canonicalizer/aliases@1.json b/provider/testdata/data/json/locale_canonicalizer/aliases@1.json index 5192a7c731e..5c31e7fb05a 100644 --- a/provider/testdata/data/json/locale_canonicalizer/aliases@1.json +++ b/provider/testdata/data/json/locale_canonicalizer/aliases@1.json @@ -1,10 +1,6 @@ { "language": [], "language_variants": [ - [ - "und-hepburn-heploc", - "und-alalc97" - ], [ "aa-saaho", "ssy" @@ -29,6 +25,10 @@ "no-nynorsk", "nn" ], + [ + "und-hepburn-heploc", + "und-alalc97" + ], [ "zh-guoyu", "zh" diff --git a/provider/testdata/data/testdata.postcard b/provider/testdata/data/testdata.postcard index e90ef6ee1e5..c592f3c5830 100644 Binary files a/provider/testdata/data/testdata.postcard and b/provider/testdata/data/testdata.postcard differ