Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add BlobSchema V2 with ZeroTrie #4207

Merged
merged 10 commits into from
Nov 2, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions provider/blob/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,14 @@ postcard = { version = "1.0.0", default-features = false, features = ["alloc"] }
serde = { version = "1.0", default-features = false, features = ["alloc"] }
writeable = {workspace = true }
zerovec = { workspace = true, features = ["serde", "yoke"] }
zerotrie = { workspace = true, features = ["serde", "zerovec"] }

log = { version = "0.4", optional = true }

[dev-dependencies]
icu_locid = { path = "../../components/locid", features = ["serde"] }
icu_datagen = { path = "../../provider/datagen", features = ["networking"] }
criterion = "0.4"
robertbastian marked this conversation as resolved.
Show resolved Hide resolved

[features]
std = ["icu_provider/std"]
Expand All @@ -40,4 +42,10 @@ export = [
"postcard/alloc",
"std",
"zerovec/serde",
"zerotrie/alloc",
"zerotrie/litemap",
]

[[bench]]
name = "blob_version_bench"
harness = false
58 changes: 58 additions & 0 deletions provider/blob/benches/blob_version_bench.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

extern crate alloc;

use criterion::{black_box, criterion_group, criterion_main, Criterion};
use icu_provider::datagen::IterableDataProvider;
use icu_provider::hello_world::*;
use icu_provider::prelude::*;
use icu_provider_blob::BlobDataProvider;

const BLOB_V1: &[u8] = include_bytes!("../tests/data/v1.postcard");
const BLOB_V2: &[u8] = include_bytes!("../tests/data/v2.postcard");

fn blob_version_bench(c: &mut Criterion) {
c.bench_function("provider/construct/v1", |b| {
b.iter(|| BlobDataProvider::try_new_from_static_blob(black_box(BLOB_V1)).unwrap());
});
c.bench_function("provider/construct/v2", |b| {
b.iter(|| BlobDataProvider::try_new_from_static_blob(black_box(BLOB_V1)).unwrap());
});

let hello_world_provider = HelloWorldProvider;
let locales = hello_world_provider.supported_locales().unwrap();

c.bench_function("provider/read/v1", |b| {
let provider = BlobDataProvider::try_new_from_static_blob(black_box(BLOB_V1)).unwrap();
b.iter(|| {
for locale in black_box(&locales).iter() {
let _: DataResponse<HelloWorldV1Marker> = black_box(&provider)
.as_deserializing()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I'd prefer benching this as a BufferProvider without deserialization

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

.load(DataRequest {
locale,
metadata: Default::default(),
})
.unwrap();
}
});
});
c.bench_function("provider/read/v2", |b| {
let provider = BlobDataProvider::try_new_from_static_blob(black_box(BLOB_V2)).unwrap();
b.iter(|| {
for locale in black_box(&locales).iter() {
let _: DataResponse<HelloWorldV1Marker> = black_box(&provider)
.as_deserializing()
.load(DataRequest {
locale,
metadata: Default::default(),
})
.unwrap();
}
});
});
}

criterion_group!(benches, blob_version_bench,);
criterion_main!(benches);
133 changes: 81 additions & 52 deletions provider/blob/src/blob_data_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use crate::blob_schema::{BlobSchema, BlobSchemaV1};
use crate::blob_schema::BlobSchema;
use alloc::boxed::Box;
use icu_provider::buf::BufferFormat;
use icu_provider::prelude::*;
Expand Down Expand Up @@ -38,7 +38,7 @@ use yoke::*;
/// // Read an ICU4X data blob dynamically:
/// let blob = std::fs::read(concat!(
/// env!("CARGO_MANIFEST_DIR"),
/// "/tests/data/hello_world.postcard",
/// "/tests/data/v2.postcard",
/// ))
/// .expect("Reading pre-computed postcard buffer");
///
Expand Down Expand Up @@ -69,7 +69,7 @@ use yoke::*;
/// // Read an ICU4X data blob statically:
/// const HELLO_WORLD_BLOB: &[u8] = include_bytes!(concat!(
/// env!("CARGO_MANIFEST_DIR"),
/// "/tests/data/hello_world.postcard"
/// "/tests/data/v2.postcard"
/// ));
///
/// // Create a DataProvider from it:
Expand All @@ -87,7 +87,7 @@ use yoke::*;
/// ```
#[derive(Clone)]
pub struct BlobDataProvider {
data: Yoke<BlobSchemaV1<'static>, Option<Cart>>,
data: Yoke<BlobSchema<'static>, Option<Cart>>,
}

impl core::fmt::Debug for BlobDataProvider {
Expand All @@ -103,7 +103,7 @@ impl BlobDataProvider {
pub fn try_new_from_blob(blob: Box<[u8]>) -> Result<Self, DataError> {
Ok(Self {
data: Cart::try_make_yoke(blob, |bytes| {
BlobSchema::deserialize_v1(&mut postcard::Deserializer::from_bytes(bytes))
BlobSchema::deserialize_and_check(&mut postcard::Deserializer::from_bytes(bytes))
})?,
})
}
Expand All @@ -112,7 +112,7 @@ impl BlobDataProvider {
/// [`try_new_from_blob`](BlobDataProvider::try_new_from_blob) and is allocation-free.
pub fn try_new_from_static_blob(blob: &'static [u8]) -> Result<Self, DataError> {
Ok(Self {
data: Yoke::new_owned(BlobSchema::deserialize_v1(
data: Yoke::new_owned(BlobSchema::deserialize_and_check(
&mut postcard::Deserializer::from_bytes(blob),
)?),
})
Expand Down Expand Up @@ -150,61 +150,90 @@ mod test {

#[test]
fn test_empty() {
let mut blob: Vec<u8> = Vec::new();
let mut version = 1;

{
let mut exporter = BlobExporter::new_with_sink(Box::new(&mut blob));
while version <= 2 {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
let mut version = 1;
{
let mut exporter = BlobExporter::new_with_sink(Box::new(&mut blob));
while version <= 2 {
for version in [1, 2] {

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

let mut blob: Vec<u8> = Vec::new();

exporter.flush(HelloWorldV1Marker::KEY).unwrap();
{
robertbastian marked this conversation as resolved.
Show resolved Hide resolved
let mut exporter = if version == 1 {
BlobExporter::new_with_sink(Box::new(&mut blob))
} else {
BlobExporter::new_v2_with_sink(Box::new(&mut blob))
};

exporter.close().unwrap();
}
exporter.flush(HelloWorldV1Marker::KEY).unwrap();

exporter.close().unwrap();
}

let provider = BlobDataProvider::try_new_from_blob(blob.into()).unwrap();
let provider = BlobDataProvider::try_new_from_blob(blob.into()).unwrap();

assert!(matches!(
provider.load_buffer(HelloWorldV1Marker::KEY, Default::default()),
Err(DataError {
kind: DataErrorKind::MissingLocale,
..
})
));
assert!(
matches!(
provider.load_buffer(HelloWorldV1Marker::KEY, Default::default()),
Err(DataError {
kind: DataErrorKind::MissingLocale,
..
})
),
"(version: {version})"
);

version += 1;
}
}

#[test]
fn test_singleton() {
let mut blob: Vec<u8> = Vec::new();

{
let mut exporter = BlobExporter::new_with_sink(Box::new(&mut blob));

exporter.flush(HelloSingletonV1Marker::KEY).unwrap();

exporter.close().unwrap();
let mut version = 1;

while version <= 2 {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

let mut blob: Vec<u8> = Vec::new();

{
let mut exporter = if version == 1 {
BlobExporter::new_with_sink(Box::new(&mut blob))
} else {
BlobExporter::new_v2_with_sink(Box::new(&mut blob))
};

exporter.flush(HelloSingletonV1Marker::KEY).unwrap();

exporter.close().unwrap();
}

let provider = BlobDataProvider::try_new_from_blob(blob.into()).unwrap();

assert!(
matches!(
provider.load_buffer(
HelloSingletonV1Marker::KEY,
DataRequest {
locale: &icu_locid::locale!("de").into(),
metadata: Default::default()
}
),
Err(DataError {
kind: DataErrorKind::ExtraneousLocale,
..
})
),
"(version: {version})"
);

assert!(
matches!(
provider.load_buffer(HelloSingletonV1Marker::KEY, Default::default()),
Err(DataError {
kind: DataErrorKind::MissingLocale,
..
})
),
"(version: {version})"
);

version += 1;
}

let provider = BlobDataProvider::try_new_from_blob(blob.into()).unwrap();

assert!(matches!(
provider.load_buffer(
HelloSingletonV1Marker::KEY,
DataRequest {
locale: &icu_locid::locale!("de").into(),
metadata: Default::default()
}
),
Err(DataError {
kind: DataErrorKind::ExtraneousLocale,
..
})
));

assert!(matches!(
provider.load_buffer(HelloSingletonV1Marker::KEY, Default::default()),
Err(DataError {
kind: DataErrorKind::MissingLocale,
..
})
));
}
}
Loading