Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add BlobSchema V2 with ZeroTrie #4207

Merged
merged 10 commits into from
Nov 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/artifacts-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ jobs:
- components/segmenter
- experimental/transliterate
- experimental/zerotrie
- provider/blob
- utils/fixed_decimal
- utils/litemap
- utils/tinystr
Expand Down
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions provider/blob/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,17 @@ postcard = { version = "1.0.0", default-features = false, features = ["alloc"] }
serde = { version = "1.0", default-features = false, features = ["alloc"] }
writeable = {workspace = true }
zerovec = { workspace = true, features = ["serde", "yoke"] }
zerotrie = { workspace = true, features = ["serde", "zerovec"] }

log = { version = "0.4", optional = true }

[dev-dependencies]
icu_locid = { path = "../../components/locid", features = ["serde"] }
icu_datagen = { path = "../../provider/datagen", features = ["networking"] }

[target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies]
criterion = "0.4"
robertbastian marked this conversation as resolved.
Show resolved Hide resolved

[features]
std = ["icu_provider/std"]
export = [
Expand All @@ -40,4 +44,10 @@ export = [
"postcard/alloc",
"std",
"zerovec/serde",
"zerotrie/alloc",
"zerotrie/litemap",
]

[[bench]]
name = "blob_version_bench"
harness = false
62 changes: 62 additions & 0 deletions provider/blob/benches/blob_version_bench.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

extern crate alloc;

use criterion::{black_box, criterion_group, criterion_main, Criterion};
use icu_provider::datagen::IterableDataProvider;
use icu_provider::hello_world::*;
use icu_provider::prelude::*;
use icu_provider_blob::BlobDataProvider;

const BLOB_V1: &[u8] = include_bytes!("../tests/data/v1.postcard");
const BLOB_V2: &[u8] = include_bytes!("../tests/data/v2.postcard");

fn blob_version_bench(c: &mut Criterion) {
c.bench_function("provider/construct/v1", |b| {
b.iter(|| BlobDataProvider::try_new_from_static_blob(black_box(BLOB_V1)).unwrap());
});
c.bench_function("provider/construct/v2", |b| {
b.iter(|| BlobDataProvider::try_new_from_static_blob(black_box(BLOB_V1)).unwrap());
});

let hello_world_provider = HelloWorldProvider;
let locales = hello_world_provider.supported_locales().unwrap();

c.bench_function("provider/read/v1", |b| {
let provider = BlobDataProvider::try_new_from_static_blob(black_box(BLOB_V1)).unwrap();
b.iter(|| {
for locale in black_box(&locales).iter() {
black_box(&provider)
.load_buffer(
HelloWorldV1Marker::KEY,
DataRequest {
locale,
metadata: Default::default(),
},
)
.unwrap();
}
});
});
c.bench_function("provider/read/v2", |b| {
let provider = BlobDataProvider::try_new_from_static_blob(black_box(BLOB_V2)).unwrap();
b.iter(|| {
for locale in black_box(&locales).iter() {
black_box(&provider)
.load_buffer(
HelloWorldV1Marker::KEY,
DataRequest {
locale,
metadata: Default::default(),
},
)
.unwrap();
}
});
});
}

criterion_group!(benches, blob_version_bench,);
criterion_main!(benches);
133 changes: 77 additions & 56 deletions provider/blob/src/blob_data_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use crate::blob_schema::{BlobSchema, BlobSchemaV1};
use crate::blob_schema::BlobSchema;
use alloc::boxed::Box;
use icu_provider::buf::BufferFormat;
use icu_provider::prelude::*;
Expand Down Expand Up @@ -38,7 +38,7 @@ use yoke::*;
/// // Read an ICU4X data blob dynamically:
/// let blob = std::fs::read(concat!(
/// env!("CARGO_MANIFEST_DIR"),
/// "/tests/data/hello_world.postcard",
/// "/tests/data/v2.postcard",
/// ))
/// .expect("Reading pre-computed postcard buffer");
///
Expand Down Expand Up @@ -69,7 +69,7 @@ use yoke::*;
/// // Read an ICU4X data blob statically:
/// const HELLO_WORLD_BLOB: &[u8] = include_bytes!(concat!(
/// env!("CARGO_MANIFEST_DIR"),
/// "/tests/data/hello_world.postcard"
/// "/tests/data/v2.postcard"
/// ));
///
/// // Create a DataProvider from it:
Expand All @@ -87,7 +87,7 @@ use yoke::*;
/// ```
#[derive(Clone)]
pub struct BlobDataProvider {
data: Yoke<BlobSchemaV1<'static>, Option<Cart>>,
data: Yoke<BlobSchema<'static>, Option<Cart>>,
}

impl core::fmt::Debug for BlobDataProvider {
Expand All @@ -103,7 +103,7 @@ impl BlobDataProvider {
pub fn try_new_from_blob(blob: Box<[u8]>) -> Result<Self, DataError> {
Ok(Self {
data: Cart::try_make_yoke(blob, |bytes| {
BlobSchema::deserialize_v1(&mut postcard::Deserializer::from_bytes(bytes))
BlobSchema::deserialize_and_check(&mut postcard::Deserializer::from_bytes(bytes))
})?,
})
}
Expand All @@ -112,7 +112,7 @@ impl BlobDataProvider {
/// [`try_new_from_blob`](BlobDataProvider::try_new_from_blob) and is allocation-free.
pub fn try_new_from_static_blob(blob: &'static [u8]) -> Result<Self, DataError> {
Ok(Self {
data: Yoke::new_owned(BlobSchema::deserialize_v1(
data: Yoke::new_owned(BlobSchema::deserialize_and_check(
&mut postcard::Deserializer::from_bytes(blob),
)?),
})
Expand Down Expand Up @@ -150,61 +150,82 @@ mod test {

#[test]
fn test_empty() {
let mut blob: Vec<u8> = Vec::new();

{
let mut exporter = BlobExporter::new_with_sink(Box::new(&mut blob));

exporter.flush(HelloWorldV1Marker::KEY).unwrap();

exporter.close().unwrap();
for version in [1, 2] {
let mut blob: Vec<u8> = Vec::new();

{
robertbastian marked this conversation as resolved.
Show resolved Hide resolved
let mut exporter = if version == 1 {
BlobExporter::new_with_sink(Box::new(&mut blob))
} else {
BlobExporter::new_v2_with_sink(Box::new(&mut blob))
};

exporter.flush(HelloWorldV1Marker::KEY).unwrap();

exporter.close().unwrap();
}

let provider = BlobDataProvider::try_new_from_blob(blob.into()).unwrap();

assert!(
matches!(
provider.load_buffer(HelloWorldV1Marker::KEY, Default::default()),
Err(DataError {
kind: DataErrorKind::MissingLocale,
..
})
),
"(version: {version})"
);
}

let provider = BlobDataProvider::try_new_from_blob(blob.into()).unwrap();

assert!(matches!(
provider.load_buffer(HelloWorldV1Marker::KEY, Default::default()),
Err(DataError {
kind: DataErrorKind::MissingLocale,
..
})
));
}

#[test]
fn test_singleton() {
let mut blob: Vec<u8> = Vec::new();

{
let mut exporter = BlobExporter::new_with_sink(Box::new(&mut blob));

exporter.flush(HelloSingletonV1Marker::KEY).unwrap();

exporter.close().unwrap();
for version in [1, 2] {
let mut blob: Vec<u8> = Vec::new();

{
let mut exporter = if version == 1 {
BlobExporter::new_with_sink(Box::new(&mut blob))
} else {
BlobExporter::new_v2_with_sink(Box::new(&mut blob))
};

exporter.flush(HelloSingletonV1Marker::KEY).unwrap();

exporter.close().unwrap();
}

let provider = BlobDataProvider::try_new_from_blob(blob.into()).unwrap();

assert!(
matches!(
provider.load_buffer(
HelloSingletonV1Marker::KEY,
DataRequest {
locale: &icu_locid::locale!("de").into(),
metadata: Default::default()
}
),
Err(DataError {
kind: DataErrorKind::ExtraneousLocale,
..
})
),
"(version: {version})"
);

assert!(
matches!(
provider.load_buffer(HelloSingletonV1Marker::KEY, Default::default()),
Err(DataError {
kind: DataErrorKind::MissingLocale,
..
})
),
"(version: {version})"
);
}

let provider = BlobDataProvider::try_new_from_blob(blob.into()).unwrap();

assert!(matches!(
provider.load_buffer(
HelloSingletonV1Marker::KEY,
DataRequest {
locale: &icu_locid::locale!("de").into(),
metadata: Default::default()
}
),
Err(DataError {
kind: DataErrorKind::ExtraneousLocale,
..
})
));

assert!(matches!(
provider.load_buffer(HelloSingletonV1Marker::KEY, Default::default()),
Err(DataError {
kind: DataErrorKind::MissingLocale,
..
})
));
}
}
Loading