Merge branch 'main' into segiter

unicode-org · May 3, 2023 · 20a4655 · 20a4655
2 parents 0716570 + 1a24d05
commit 20a4655
Show file tree

Hide file tree

Showing 64 changed files with 594 additions and 486 deletions.
diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
@@ -7,8 +7,6 @@ name:                           Coverage
 on:
   push:
     branches: [ main ]
-  pull_request:
-    branches: '*'
 
 jobs:
   test:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,20 +1,34 @@
 # Changelog
 
-
-## icu4x 1.2.1 & 1.2.2 (Apr 17, 2023)
+## icu4x 1.2.x (Apr 17, 2023)
 
 Note: A subset of crates received patch releases in the 1.2 stream.
 
-- `icu_capi`
+- `databake`: 0.1.5
+  - Fixed [#3356](https://github.com/unicode-org/icu4x/pull/3356), adding `allow` for clippy false-positives
+- `icu_capi` 1.2.1
   - Fixed [#3344](https://github.com/unicode-org/icu4x/pull/3344), `buffer_provider` feature accidentally pulling in extra crates
+- `icu_capi` 1.2.2
+  - Use `intptr_t` instead of `ssize_t` for portability ([diplomat #326](https://github.com/rust-diplomat/diplomat/issues/326))
+
 - `icu_datagen` 1.2.1
   - Fixed [#3339](https://github.com/unicode-org/icu4x/pull/3339), incorrect Cargo features
-- `icu_datagen` 1.2.2
-  - Fixed [#3354](https://github.com/unicode-org/icu4x/pull/3354), ability to run datagen without support for `icu_provider_fs::FsDataProvider`
-- `icu_locid_transform`
+- `icu_datagen` 1.2.3
+  - Fixed [#3355](https://github.com/unicode-org/icu4x/pull/3355), adding MSRV annotations to generated code
+  - Fixed [#3369](https://github.com/unicode-org/icu4x/pull/3369), making datagen call `rustfmt` directly instead of using the `rust-format` dependency
+- `icu_datagen` 1.2.4
+  - Remove dependency on `clap`'s `"cargo"` feature to better support non-Cargo users (#3388)
+- `icu_datagen` 1.2.5
+  - Remove runtime dependency on segmenter data pulled from the cargo cache (#3391)
+- `icu_locid_transform` 1.2.1
   - Fixed [#3332](https://github.com/unicode-org/icu4x/issues/3332), missing `+?Sized` bound
-- `icu_segmenter`
+- `icu_segmenter` 1.2.1
   - Fixed [#3341](https://github.com/unicode-org/icu4x/pull/3341), incorrect results on some strings with mixed scripts
+- `icu_provider` 1.2.1
+  - Do not autoenable `postcard/use-std` ([#3376](https://github.com/unicode-org/icu4x/pull/3376))
+- `icu_datetime` 1.2.1
+  - Remove superfluous `JapaneseEraV1` provider bounds on `TypedZonedDateTimeFormatter` [#3379](https://github.com/unicode-org/icu4x/pull/3379)
+
 
 ## icu4x 1.2 (Apr 13, 2023)
 

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -204,8 +204,17 @@ The following PR has one non-blocking review, one blocking review, one approval,
 
 ## Licenses
 
-See the file called [LICENSE](LICENSE) for terms applying to your contribution.
-When creating the PR, you will be asked to [sign the CLA](https://cla-assistant.io/unicode-org/icu4x).
+### Contributor License Agreement
+
+In order to contribute to this project, the Unicode Consortium must have on file a Contributor License Agreement (CLA) covering your contributions, either an individual or a corporate CLA. Pull Requests will not be merged until the correct CLA is signed. Which version needs to be signed depends on who owns the contribution being made: you as the individual making the contribution or your employer. _It is your responsibility to determine whether your contribution is owned by your employer._ Please review [The Unicode Consortium Intellectual Property, Licensing, and Technical Contribution Policies][policies] for further guidance on which CLA to sign, as well as other information and guidelines regarding the Consortium’s licensing and technical contribution policies and procedures.
+
+- **Individual CLA**: If you have determined that the Individual CLA is appropriate, just open a Pull Request and you will have the opportunity to click to accept the Individual CLA.
+
+- **Corporate CLA**: If you have determined that a Corporate CLA is appropriate, please check the [public list of Corporate CLAs][unicode-corporate-clas] that the Consortium has on file. If your employer has already signed a CLA, then just open a Pull Request and you will have the opportunity to click that your employer has already signed a CLA. If your employer has not already signed a CLA, you will need to arrange for your employer to sign the Corporate CLA, as described in [How to Sign a Unicode CLA][signing].
+
+Unless otherwise noted in the [LICENSE](./LICENSE) file, this project is released under the free and open-source [Unicode License][unicode-license], also known as Unicode, Inc. License Agreement - Data Files and Software.
+
+SPDX-License-Identifier: Unicode-DFS-2016
 
 ### New files
 
@@ -277,3 +286,7 @@ _(followed by the original boilerplate from Unicode data)_
 Please discuss first.
 
 [style_guide]: docs/process/style_guide.md
+[policies]: https://www.unicode.org/policies/licensing_policy.html
+[unicode-corporate-clas]: https://www.unicode.org/policies/corporate-cla-list/
+[signing]: https://www.unicode.org/policies/licensing_policy.html#signing
+[unicode-license]: https://www.unicode.org/license.txt
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/components/datetime/Cargo.toml b/components/datetime/Cargo.toml
@@ -5,7 +5,7 @@
 [package]
 name = "icu_datetime"
 description = "API for formatting date and time to user readable textual representation"
-version = "1.2.0"
+version = "1.2.1"
 authors = ["The ICU4X Project Developers"]
 edition = "2021"
 readme = "README.md"

diff --git a/components/datetime/src/zoned_datetime.rs b/components/datetime/src/zoned_datetime.rs
@@ -4,7 +4,7 @@
 
 use alloc::string::String;
 use core::marker::PhantomData;
-use icu_calendar::provider::{JapaneseErasV1Marker, WeekDataV1Marker};
+use icu_calendar::provider::WeekDataV1Marker;
 use icu_decimal::provider::DecimalSymbolsV1Marker;
 use icu_plurals::provider::OrdinalV1Marker;
 use icu_provider::prelude::*;
@@ -147,7 +147,6 @@ impl<C: CldrCalendar> TypedZonedDateTimeFormatter<C> {
             + DataProvider<provider::time_zones::MetazoneSpecificNamesShortV1Marker>
             + DataProvider<OrdinalV1Marker>
             + DataProvider<DecimalSymbolsV1Marker>
-            + DataProvider<JapaneseErasV1Marker>
             + ?Sized,
     {
         let patterns = PatternSelector::for_options_experimental(
@@ -232,7 +231,6 @@ impl<C: CldrCalendar> TypedZonedDateTimeFormatter<C> {
             + DataProvider<provider::time_zones::MetazoneSpecificNamesShortV1Marker>
             + DataProvider<OrdinalV1Marker>
             + DataProvider<DecimalSymbolsV1Marker>
-            + DataProvider<JapaneseErasV1Marker>
             + ?Sized,
     {
         let patterns = PatternSelector::for_options(

diff --git a/components/plurals/README.md b/components/plurals/README.md
diff --git a/components/plurals/src/lib.rs b/components/plurals/src/lib.rs
@@ -4,7 +4,7 @@
 
 //! Determine the plural category appropriate for a given number in a given language.
 //!
-//! This module is published as its own crate ([`icu_plural`](https://docs.rs/icu_plural/latest/icu_plural/))
+//! This module is published as its own crate ([`icu_plurals`](https://docs.rs/icu_plurals/latest/icu_plurals/))
 //! and as part of the [`icu`](https://docs.rs/icu/latest/icu/) crate. See the latter for more details on the ICU4X project.
 //!
 //! For example in English, when constructing a message

diff --git a/components/segmenter/Cargo.toml b/components/segmenter/Cargo.toml
@@ -38,7 +38,7 @@ zerovec = { version = "0.9.4", path = "../../utils/zerovec", features = ["yoke"]
 databake = { version = "0.1.3", path = "../../utils/databake", optional = true, features = ["derive"] }
 serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true }
 
-num-traits = { version = "0.2", default-features = false, features = ["libm"], optional = true }
+libm = { version = "0.2", default-features = false, optional = true }
 
 [dev-dependencies]
 criterion = "0.4"
@@ -56,7 +56,7 @@ default = ["auto"]
 std = ["icu_collections/std", "icu_locid/std", "icu_provider/std"]
 serde = ["dep:serde", "zerovec/serde", "icu_collections/serde", "icu_provider/serde"]
 datagen = ["serde", "dep:databake", "zerovec/databake", "icu_collections/databake"]
-lstm = ["dep:num-traits"]
+lstm = ["dep:libm"]
 auto = ["lstm"] # Enabled try_new_auto_unstable constructors
 
 [lib]

diff --git a/components/segmenter/src/complex/dictionary.rs b/components/segmenter/src/complex/dictionary.rs
@@ -7,7 +7,6 @@ use crate::indices::Utf16Indices;
 use crate::provider::*;
 use core::str::CharIndices;
 use icu_collections::char16trie::{Char16Trie, TrieResult};
-use icu_provider::prelude::*;
 
 /// A trait for dictionary based iterator
 trait DictionaryType<'l, 's> {
@@ -144,14 +143,11 @@ pub(super) struct DictionarySegmenter<'l> {
 
 impl<'l> DictionarySegmenter<'l> {
     pub(super) fn new(
-        dict: &'l DataPayload<UCharDictionaryBreakDataV1Marker>,
-        grapheme: &'l DataPayload<GraphemeClusterBreakDataV1Marker>,
+        dict: &'l UCharDictionaryBreakDataV1<'l>,
+        grapheme: &'l RuleBreakDataV1<'l>,
     ) -> Self {
         // TODO: no way to verify trie data
-        Self {
-            dict: dict.get(),
-            grapheme: grapheme.get(),
-        }
+        Self { dict, grapheme }
     }
 
     /// Create a dictionary based break iterator for an `str` (a UTF-8 string).
@@ -181,7 +177,8 @@ impl<'l> DictionarySegmenter<'l> {
 #[cfg(feature = "serde")]
 mod tests {
     use super::*;
-    use crate::{provider::DictionaryForWordOnlyAutoV1Marker, LineSegmenter, WordSegmenter};
+    use crate::{LineSegmenter, WordSegmenter};
+    use icu_provider::prelude::*;
     use icu_provider_adapters::fork::ForkByKeyProvider;
     use icu_provider_fs::FsDataProvider;
     use std::path::PathBuf;
@@ -211,19 +208,16 @@ mod tests {
     #[test]
     fn cj_dictionary_test() {
         let provider = get_segmenter_testdata_provider();
-        let dict_payload: DataPayload<crate::provider::UCharDictionaryBreakDataV1Marker> =
-            DataProvider::<DictionaryForWordOnlyAutoV1Marker>::load(
-                &icu_testdata::buffer().as_deserializing(),
-                DataRequest {
-                    locale: &icu_locid::locale!("ja").into(),
-                    metadata: Default::default(),
-                },
-            )
+        let dict_payload: DataPayload<DictionaryForWordOnlyAutoV1Marker> = provider
+            .as_deserializing()
+            .load(DataRequest {
+                locale: &icu_locid::locale!("ja").into(),
+                metadata: Default::default(),
+            })
             .unwrap()
             .take_payload()
-            .unwrap()
-            .cast();
-        let grph_payload: DataPayload<crate::provider::GraphemeClusterBreakDataV1Marker> = provider
+            .unwrap();
+        let grph_payload: DataPayload<GraphemeClusterBreakDataV1Marker> = provider
             .as_deserializing()
             .load(DataRequest {
                 locale: &icu_locid::locale!("ja").into(),
@@ -234,7 +228,7 @@ mod tests {
             .unwrap();
         let word_segmenter =
             WordSegmenter::try_new_dictionary_with_buffer_provider(&provider).unwrap();
-        let dict_segmenter = DictionarySegmenter::new(&dict_payload, &grph_payload);
+        let dict_segmenter = DictionarySegmenter::new(dict_payload.get(), grph_payload.get());
 
         // Match case
         let s = "龟山岛龟山岛";