Skip to content

Commit

Permalink
Move the icu4x-datagen binary into its own crate (#5011)
Browse files Browse the repository at this point in the history
  • Loading branch information
robertbastian authored Jun 12, 2024
1 parent 12be3b6 commit 5f8de9c
Show file tree
Hide file tree
Showing 23 changed files with 255 additions and 143 deletions.
34 changes: 28 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ members = [
"provider/datagen",
"provider/fs",
"provider/registry",
"provider/icu4x-datagen",

# Baked data
"provider/data/calendar",
Expand Down Expand Up @@ -89,13 +90,14 @@ members = [
"tools/ffi_coverage",
"tools/testdata-scripts",
"tools/md-tests",
"tools/graveyard",
]
# Note: Workspaces in subdirectories, such as tutorials/crates, are
# implicitly excluded from the main workspace.
exclude = [
# Tutorials are tested outside the workspace to simulate external users
"tutorials",
# Don't publish the graveyard
"tools/graveyard"
]

[workspace.package]
Expand Down
2 changes: 1 addition & 1 deletion components/datetime/tests/data/gen.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/sh
cargo run -p icu_datagen -- \
cargo run -p icu4x-datagen -- \
--markers "datetime/gregory/datelengths@1" "datetime/gregory/datesymbols@1" "datetime/timelengths@1" "datetime/timesymbols@1" "decimal/symbols@1" "time_zone/formats@1" "time_zone/specific_short@1" \
--locales en \
--format blob2 \
Expand Down
2 changes: 1 addition & 1 deletion components/experimental/tests/transliterate/data/gen.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/sh
cargo run -p icu_datagen --features experimental_components -- \
cargo run -p icu4x-datagen --features experimental_components -- \
--markers "transliterator/rules@1" \
--locales full \
--runtime-fallback-location external \
Expand Down
37 changes: 1 addition & 36 deletions provider/datagen/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ include = [
"LICENSE",
"README.md",
]
default-run = "icu4x-datagen"

authors.workspace = true
categories.workspace = true
Expand Down Expand Up @@ -93,19 +92,14 @@ twox-hash = { workspace = true, optional = true }
ureq = { workspace = true, optional = true }
zip = { workspace = true, features = ["deflate"], optional = true }

# "bin" feature
clap = { workspace = true, optional = true, features = ["derive"] }
eyre = { workspace = true, optional = true }
simple_logger = { workspace = true, optional = true }

[dev-dependencies]
elsa = { workspace = true }
icu = { path = "../../components/icu" }
postcard = { workspace = true }
simple_logger = { workspace = true }

[features]
default = ["bin", "use_wasm", "networking", "rayon", "fs_exporter", "blob_exporter", "baked_exporter", "provider"]
default = ["use_wasm", "networking", "rayon", "fs_exporter", "blob_exporter", "baked_exporter", "provider"]
provider = [
"icu_calendar/datagen",
"icu_casemap/datagen",
Expand Down Expand Up @@ -144,24 +138,7 @@ provider = [
]
baked_exporter = ["dep:icu_provider_baked"]
blob_exporter = ["dep:icu_provider_blob"]
blob_input = [
"dep:icu_provider_blob",
"icu_calendar/datagen",
"icu_casemap/datagen",
"icu_collator/datagen",
"icu_datetime/datagen",
"icu_decimal/datagen",
"icu_list/datagen",
"icu_locale/datagen",
"icu_normalizer/datagen",
"icu_plurals/datagen",
"icu_properties/datagen",
"icu_segmenter/datagen",
"icu_timezone/datagen",
"icu_experimental?/datagen",
]
fs_exporter = ["dep:icu_provider_fs"]
bin = ["dep:clap", "dep:eyre", "dep:simple_logger"]
rayon = ["dep:rayon"]
# Use wasm for building codepointtries
use_wasm = ["icu_codepointtrie_builder?/wasm"]
Expand All @@ -184,21 +161,9 @@ experimental_components = [
"icu/experimental",
]

[[bin]]
name = "icu4x-datagen"
path = "src/bin/icu4x-datagen.rs"
required-features = ["bin"]

[package.metadata.cargo-all-features]
# We don't need working CPT builders for check
skip_feature_sets = [["use_icu4c"], ["use_wasm"]]
skip_optional_dependencies = true
always_include_features = [
# Always the experimental deps because we want to all-or-nothing them
# and the nothing case is already tested in regular check CI
"experimental_components",
# The binary requires either provider or blob_input to build, and blob_input is cheaper
"blob_input"
]
# We have a *lot* of features here
max_combination_size = 2
28 changes: 2 additions & 26 deletions provider/datagen/README.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

48 changes: 2 additions & 46 deletions provider/datagen/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,12 @@
#![allow(clippy::needless_doctest_main)]
//! `icu_datagen` is a library to generate data files that can be used in ICU4X data providers.
//!
//! Data files can be generated either programmatically (i.e. in `build.rs`), or through a
//! command-line utility.
//!
//! For command-line usage, see the [`icu4x-datagen` binary](https://crates.io/crate/icu4x-datagen).
//!
//! Also see our [datagen tutorial](https://github.com/unicode-org/icu4x/blob/main/tutorials/data_management.md).
//!
//! # Examples
//!
//! ## Rust API
//!
//! ```no_run
//! use icu_datagen::blob_exporter::*;
//! use icu_datagen::prelude::*;
Expand All @@ -32,22 +28,6 @@
//! .unwrap();
//! ```
//!
//! ## Command line
//!
//! The command line interface can be installed through Cargo.
//!
//! ```bash
//! $ cargo install icu_datagen
//! ```
//!
//! Once the tool is installed, you can invoke it like this:
//!
//! ```bash
//! $ icu4x-datagen --markers all --locales de en-AU --format blob --out data.postcard
//! ```
//!
//! More details can be found by running `--help`.
//!
//! # Cargo features
//!
//! This crate has a lot of dependencies, some of which are not required for all operating modes. These default Cargo features
Expand All @@ -68,13 +48,9 @@
//! * enables parallelism during export
//! * `use_wasm` / `use_icu4c`
//! * see the documentation on [`icu_codepointtrie_builder`](icu_codepointtrie_builder#build-configuration)
//! * `bin`
//! * required by the CLI and enabled by default to make `cargo install` work
//! * `icu_experimental`
//! * `experimental_components`
//! * enables data generation for markers defined in the unstable `icu_experimental` crate
//! * note that this features affects the behaviour of `all_markers`
//!
//! The meta-feature `experimental_components` is available to activate all experimental components.

#![cfg_attr(
not(test),
Expand Down Expand Up @@ -212,26 +188,6 @@ macro_rules! cb {
Some(Ok(marker)) => Some(marker)
}
}

#[macro_export]
#[doc(hidden)] // macro
macro_rules! make_exportable_provider {
($ty:ty) => {
icu_provider::make_exportable_provider!(
$ty,
[
icu_provider::hello_world::HelloWorldV1Marker,
$(
$marker,
)+
$(
#[cfg(feature = "experimental_components")]
$emarker,
)+
]
);
}
}
}
}
icu_registry::registry!(cb);
Expand Down
2 changes: 1 addition & 1 deletion provider/datagen/src/transform/cldr/calendar/japanese.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ impl DatagenProvider {
return Err(DataError::custom(
"Era data has changed! This can be for two reasons: Either the CLDR locale data for Japanese eras has \
changed in an incompatible way, or there is a new Japanese era. Run \
`ICU4X_SKIP_JAPANESE_INTEGRITY_CHECK=1 cargo run -p icu_datagen -- --markers calendar/japanext@1 --format dir --syntax json \
`ICU4X_SKIP_JAPANESE_INTEGRITY_CHECK=1 cargo run -p icu4x-datagen -- --markers calendar/japanext@1 --format dir --syntax json \
--out provider/datagen/data/japanese-golden --pretty --overwrite` in the icu4x repo and inspect the diff to \
check which situation it is. If a new era has been introduced, commit the diff, if not, it's likely that japanese.rs \
in icu_datagen will need to be updated to handle the data changes."
Expand Down
Loading

0 comments on commit 5f8de9c

Please sign in to comment.