Skip to content

Commit

Permalink
Move DatagenProvider into its own crate (#5016)
Browse files Browse the repository at this point in the history
#4721 

`provider/datagen/src/{provider.rs, provider/, transform/}` move to
`provider/bikeshed/src`.

Stubdata moves to `provider/data/{component}/stubdata`, as it's a
quality-of-life thing and not required for testing or reviewing the
`DatagenProvider` (also, `../stubdata` is a much nicer value for
`ICU4X_DATA_DIR` than `../../../bikeshed/tests/data/baked`). JSON
testdata moves to `provider/bikeshed/data/debug`, as it's used only for
debugging/reviewing, and shouldn't be in `tests/data`, which is data
that actually affects tests.
  • Loading branch information
robertbastian authored Jun 20, 2024
1 parent d96b58e commit b4fe46a
Show file tree
Hide file tree
Showing 11,522 changed files with 3,839 additions and 3,349 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ To build all code paths, improve build times in VSCode, and prevent locking the
"rust-analyzer.cargo.features": "all",
"rust-analyzer.cargo.extraEnv": {
"CARGO_TARGET_DIR": "${workspaceFolder}/target/vscode",
"ICU4X_DATA_DIR": "../../../datagen/tests/data/baked"
"ICU4X_DATA_DIR": "../stubdata"
}
}
```
Expand Down
33 changes: 27 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ members = [
# Provider
"provider/adapters",
"provider/baked",
"provider/bikeshed",
"provider/blob",
"provider/core",
"provider/core/macros",
Expand Down Expand Up @@ -151,6 +152,7 @@ icu_harfbuzz = { version = "~0.2.0", path = "ffi/harfbuzz", default-features = f

# Provider
icu_datagen = { version = "~1.5.0", path = "provider/datagen", default-features = false }
icu_datagen_bikeshed = { version = "~1.5.0", path = "provider/bikeshed", default-features = false }
icu_provider = { version = "~1.5.0", path = "provider/core", default-features = false }
icu_provider_macros = { version = "~1.5.0", path = "provider/core/macros", default-features = false }
icu_provider_adapters = { version = "~1.5.0", path = "provider/adapters", default-features = false }
Expand Down
2 changes: 1 addition & 1 deletion Makefile.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ PINNED_CI_NIGHTLY = { value = "nightly-2023-08-08", condition = { env_not_set =
[tasks.quick]
description = "Run quick version of all lints and builds (useful before pushing to GitHub)"
category = "ICU4X Development"
env = { "ICU4X_DATA_DIR" = "${CARGO_MAKE_WORKSPACE_WORKING_DIRECTORY}/provider/datagen/tests/data/baked" }
env = { "ICU4X_DATA_DIR" = "../stubdata" }
run_task.name = [
"ci-job-fmt",
"ci-job-clippy",
Expand Down
2 changes: 2 additions & 0 deletions provider/bikeshed/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
data/** linguist-generated=true
tests/data/** linguist-generated=true
100 changes: 100 additions & 0 deletions provider/bikeshed/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# This file is part of ICU4X. For terms of use, please see the file
# called LICENSE at the top level of the ICU4X source tree
# (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

[package]
name = "icu_datagen_bikeshed"
description = "A data provider based on CLDR and ICU data."
license = "Unicode-3.0"
include = [
"data/**/*",
"!data/debug/**/*",
"src/**/*",
"examples/**/*",
"benches/**/*",
"tests/**/*",
"Cargo.toml",
"LICENSE",
"README.md",
]

authors.workspace = true
categories.workspace = true
edition.workspace = true
homepage.workspace = true
repository.workspace = true
rust-version.workspace = true
version.workspace = true

[package.metadata.docs.rs]
all-features = true

[dependencies]

# ICU components
icu = { workspace = true, features = ["datagen"] }

# ICU infrastructure
calendrical_calculations = { workspace = true }
icu_codepointtrie_builder = { workspace = true }
icu_collections = { workspace = true, features = ["serde"] }
icu_pattern = { workspace = true, features = ["alloc"] }
icu_provider = { workspace = true, features = ["std", "logging", "datagen"]}
icu_provider_adapters = { workspace = true }
icu_registry = { workspace = true }
litemap = { workspace = true, features = ["serde"] }
tinystr = { workspace = true, features = ["alloc", "serde", "zerovec"] }
writeable = { workspace = true }
zerotrie = { workspace = true, features = ["alloc"] }
zerovec = { workspace = true, features = ["serde", "yoke"] }

# External dependencies
displaydoc = { workspace = true }
either = { workspace = true }
elsa = { workspace = true }
itertools = { workspace = true }
log = { workspace = true }
ndarray = { workspace = true }
serde = { workspace = true, features = ["derive", "alloc"] }
serde_json = { workspace = true }
serde-aux = { workspace = true }
toml = { workspace = true }
twox-hash = { workspace = true }
zip = { workspace = true, features = ["deflate"] }

# `networking` feature
ureq = { workspace = true, optional = true}

# `experimental` feature
icu_experimental = { workspace = true, features = ["datagen"], optional = true }
num-bigint = { workspace = true, optional = true }
num-rational = { workspace = true, optional = true }
num-traits = { workspace = true, optional = true }

[dev-dependencies]
postcard = { workspace = true, features = ["alloc"] }
icu_datagen = { workspace = true, features = ["experimental", "fs_exporter", "baked_exporter", "rayon"] }
icu_provider = { workspace = true, features = ["deserialize_postcard_1"] }
icu_segmenter = { path = "../../components/segmenter", features = ["lstm"] }
simple_logger = { workspace = true }

[features]
default = ["use_wasm", "networking"]
# Use wasm for building codepointtries
use_wasm = ["icu_codepointtrie_builder/wasm"]
# Use local ICU4C libraries for building codepointtries
# (will do nothing if used with `use_wasm`)
# If neither `use_wasm` nor `use_icu4c` are enabled,
# rule based segmenter data will not be generated.
use_icu4c = ["icu_codepointtrie_builder/icu4c"]
networking = ["dep:ureq"]
experimental = [
"icu/experimental",
"dep:num-bigint",
"dep:num-rational",
"dep:num-traits",
]

[package.metadata.cargo-all-features]
# We don't need working CPT builders for check
skip_feature_sets = [["use_icu4c"], ["use_wasm"]]
46 changes: 46 additions & 0 deletions provider/bikeshed/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
UNICODE LICENSE V3

COPYRIGHT AND PERMISSION NOTICE

Copyright © 2020-2024 Unicode, Inc.

NOTICE TO USER: Carefully read the following legal agreement. BY
DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.

Permission is hereby granted, free of charge, to any person obtaining a
copy of data files and any associated documentation (the "Data Files") or
software and any associated documentation (the "Software") to deal in the
Data Files or Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, and/or sell
copies of the Data Files or Software, and to permit persons to whom the
Data Files or Software are furnished to do so, provided that either (a)
this copyright and permission notice appear with all copies of the Data
Files or Software, or (b) this copyright and permission notice appear in
associated Documentation.

THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
THIRD PARTY RIGHTS.

IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
FILES OR SOFTWARE.

Except as contained in this notice, the name of a copyright holder shall
not be used in advertising or otherwise to promote the sale, use or other
dealings in these Data Files or Software without prior written
authorization of the copyright holder.

SPDX-License-Identifier: Unicode-3.0


Portions of ICU4X may have been adapted from ICU4C and/or ICU4J.
ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others.
24 changes: 24 additions & 0 deletions provider/bikeshed/README.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit b4fe46a

Please sign in to comment.