Skip to content

Commit

Permalink
mv
Browse files Browse the repository at this point in the history
  • Loading branch information
robertbastian committed Jul 3, 2024
1 parent 3d0bd17 commit 2f2f70e
Show file tree
Hide file tree
Showing 5 changed files with 178 additions and 97 deletions.
80 changes: 51 additions & 29 deletions provider/baked/src/binary_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,64 +9,86 @@ use databake::*;
use icu_provider::prelude::*;

#[cfg(feature = "export")]
pub fn bake(
pub(crate) fn bake(
marker_bake: &TokenStream,
reqs_to_idents: Vec<(DataIdentifierCow, proc_macro2::Ident)>,
mut ids_to_idents: Vec<(DataIdentifierCow, proc_macro2::Ident)>,
idents_to_bakes: Vec<(proc_macro2::Ident, TokenStream)>,
) -> TokenStream {
let mut ids_to_idents = reqs_to_idents
.into_iter()
.map(|(id, ident)| {
(
(id.marker_attributes.to_string(), id.locale.to_string()),
quote!(#ident),
)
})
.collect::<Vec<_>>();

ids_to_idents.sort_by(|(a, _), (b, _)| a.cmp(b));
) -> (TokenStream, usize) {
let mut size = 0;

let idents_to_bakes = idents_to_bakes.into_iter().map(|(ident, bake)| {
quote! {
const #ident: &S = &#bake;
}
// Data.0 is a fat pointer
size += core::mem::size_of::<&[()]>();

// The idents are references
size += ids_to_idents.len() * core::mem::size_of::<&()>();

ids_to_idents.sort_by_cached_key(|(id, _)| {
(
id.marker_attributes.as_str().to_string(),
id.locale.to_string(),
)
});

let (ty, reqs_to_idents) = if ids_to_idents.iter().all(|((a, _), _)| a.is_empty()) {
let (ty, id_bakes_to_idents) = if ids_to_idents
.iter()
.all(|(id, _)| id.marker_attributes.is_empty())
{
// Only DataLocales
size += ids_to_idents.len() * core::mem::size_of::<&str>();
(
quote! { icu_provider_baked::binary_search::Locale },
ids_to_idents
.iter()
.map(|((_, l), i)| quote!((#l, #i)))
.map(|(id, ident)| {
let k = id.locale.to_string();
quote!((#k, #ident))
})
.collect::<Vec<_>>(),
)
} else if ids_to_idents.iter().all(|((_, l), _)| *l == "und") {
} else if ids_to_idents.iter().all(|(id, _)| id.locale.is_und()) {
// Only marker attributes
size += ids_to_idents.len() * core::mem::size_of::<&str>();
(
quote! { icu_provider_baked::binary_search::Attributes },
ids_to_idents
.iter()
.map(|((a, _), i)| quote!((#a, #i)))
.map(|(id, ident)| {
let k = id.marker_attributes.as_str();
quote!((#k, #ident))
})
.collect(),
)
} else {
size += ids_to_idents.len() * 2 * core::mem::size_of::<&str>();
(
quote! { icu_provider_baked::binary_search::AttributesAndLocale },
ids_to_idents
.iter()
.map(|((a, l), i)| quote!(((#a, #l), #i)))
.map(|(id, ident)| {
let k0 = id.marker_attributes.as_str();
let k1 = id.locale.to_string();
quote!(((#k0, #k1), #ident))
})
.collect(),
)
};

quote! {
icu_provider_baked::binary_search::Data<#ty, #marker_bake> = {
type S = <#marker_bake as icu_provider::DynamicDataMarker>::Yokeable;
#(#idents_to_bakes)*
icu_provider_baked::binary_search::Data(&[#(#reqs_to_idents,)*])
let idents_to_bakes = idents_to_bakes.into_iter().map(|(ident, bake)| {
quote! {
const #ident: &S = &#bake;
}
}
});

(
quote! {
icu_provider_baked::binary_search::Data<#ty, #marker_bake> = {
type S = <#marker_bake as icu_provider::DynamicDataMarker>::Yokeable;
#(#idents_to_bakes)*
icu_provider_baked::binary_search::Data(&[#(#id_bakes_to_idents,)*])
}
},
size,
)
}

pub struct Data<K: BinarySearchKey, M: DataMarker>(pub &'static [(K::Type, &'static M::Yokeable)]);
Expand Down
128 changes: 99 additions & 29 deletions provider/baked/src/export.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ use icu_provider::export::*;
use icu_provider::prelude::*;
use std::collections::HashSet;
use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::fmt::Write as _;
use std::fs::File;
use std::io::Write;
use std::path::Path;
Expand Down Expand Up @@ -162,12 +163,20 @@ pub struct BakedExporter {
HashMap<DataPayload<ExportMarker>, HashSet<DataIdentifierCow<'static>>>,
>,
>,
/// (marker, file name) pairs to wire up in mod.rs. This is populated by `flush` and consumed by `close`.
impl_data: Mutex<BTreeMap<DataMarkerInfo, SyncTokenStream>>,
/// file names and statistics to be consumed by `close`.
impl_data: Mutex<BTreeMap<DataMarkerInfo, (SyncTokenStream, Statistics)>>,
// List of dependencies used by baking.
dependencies: CrateEnv,
}

#[derive(Default)]
pub struct Statistics {
pub structs_total_size: usize,
pub structs_count: usize,
pub lookup_struct_size: usize,
pub identifiers_count: usize,
}

impl std::fmt::Debug for BakedExporter {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("BakedExporter")
Expand Down Expand Up @@ -248,8 +257,9 @@ impl BakedExporter {
};

if !self.use_separate_crates {
// Don't search the whole file, there should be a macro in the first 300 bytes
if formatted[..300].contains("macro_rules!") || formatted[..100].contains("include!") {
// Don't search the whole file, there should be a macro in the first 1000 bytes
if formatted[..1000].contains("macro_rules!") || formatted[..1000].contains("include!")
{
// Formatted, otherwise it'd be `macro_rules !`
formatted = formatted
.replace("icu_", "icu::")
Expand Down Expand Up @@ -298,16 +308,43 @@ impl BakedExporter {
fn write_impl_macros(
&self,
marker: DataMarkerInfo,
stats: Statistics,
body: TokenStream,
iterable_body: TokenStream,
) -> Result<(), DataError> {
let marker_unqualified = bake_marker(marker).into_iter().last().unwrap().to_string();

let doc = format!(
" Implement `DataProvider<{}>` on the given struct using the data",
marker_unqualified
let &Statistics {
structs_total_size,
structs_count,
lookup_struct_size,
identifiers_count,
} = &stats;

let mut doc = format!(
" Implement `DataProvider<{marker_unqualified}>` on the given struct using the data\n \
hardcoded in this file. This allows the struct to be used with\n \
`icu`'s `_unstable` constructors."
);

if structs_count > 0 {
let _infallible = write!(&mut doc, "\n\n Using this implementation will embed the following data in the binary's data segment:\n ");

if marker.is_singleton {
let _infallible = write!(
&mut doc,
"* {structs_total_size}B[^1] for the singleton data struct\n "
);
} else {
let _infallible = write!(&mut doc, "* {lookup_struct_size}B[^1] for the lookup data structure ({identifiers_count} data identifiers)\n ");
let _infallible = write!(&mut doc, "* {structs_total_size}B[^1] for the actual data ({structs_count} unique structs)\n ");
};
let _infallible = write!(
&mut doc,
"\n [^1]: these numbers can be smaller in practice due to linker deduplication"
);
}

let ident = marker_unqualified.to_snake_case();

let macro_ident = format!("impl_{ident}",).parse::<TokenStream>().unwrap();
Expand All @@ -323,8 +360,6 @@ impl BakedExporter {
Path::new(&format!("{ident}.rs.data")),
quote! {
#[doc = #doc]
/// hardcoded in this file. This allows the struct to be used with
/// `icu`'s `_unstable` constructors.
#[doc(hidden)] // macro
#[macro_export]
macro_rules! #prefixed_macro_ident {
Expand All @@ -343,7 +378,10 @@ impl BakedExporter {
},
)?;

self.impl_data.lock().expect("poison").insert(marker, ident);
self.impl_data
.lock()
.expect("poison")
.insert(marker, (ident, stats));
Ok(())
}
}
Expand Down Expand Up @@ -390,7 +428,14 @@ impl DataExporter for BakedExporter {

let bake = payload.tokenize(&self.dependencies);

self.write_impl_macros(marker, quote! {
let stats = Statistics {
structs_total_size: payload.baked_size(),
structs_count: 1,
identifiers_count: 1,
lookup_struct_size: 0,
};

self.write_impl_macros(marker, stats, quote! {
#maybe_msrv
impl $provider {
// Exposing singleton structs as consts allows us to get rid of fallibility
Expand Down Expand Up @@ -439,6 +484,7 @@ impl DataExporter for BakedExporter {
if deduplicated_values.is_empty() {
self.write_impl_macros(
marker,
Default::default(),
quote! {
#maybe_msrv
impl icu_provider::DataProvider<#marker_bake> for $provider {
Expand All @@ -461,27 +507,33 @@ impl DataExporter for BakedExporter {
)
} else {
let mut idents_to_bakes = Vec::new();
let mut stats = Statistics::default();

let ids_to_idents = deduplicated_values
.iter()
.flat_map(|(payload, ids)| {
let ident = ids
let min_id = ids
.iter()
.map(|id| {
format!("_{}_{}", id.marker_attributes.as_str(), id.locale)
.chars()
.map(|ch| {
if ch == '-' {
'_'
} else {
ch.to_ascii_uppercase()
}
})
.collect::<String>()
})
.min()
.min_by_key(|id| (id.marker_attributes.as_str(), id.locale.to_string()))
.unwrap();
let ident = proc_macro2::Ident::new(&ident, proc_macro2::Span::call_site());

let ident = proc_macro2::Ident::new(
&format!("_{}_{}", min_id.marker_attributes.as_str(), min_id.locale)
.chars()
.map(|ch| {
if ch == '-' {
'_'
} else {
ch.to_ascii_uppercase()
}
})
.collect::<String>(),
proc_macro2::Span::call_site(),
);

stats.structs_count += 1;
stats.identifiers_count += ids.len();
stats.structs_total_size += payload.baked_size();

idents_to_bakes.push((ident.clone(), payload.tokenize(&self.dependencies)));
ids.iter().map(move |id| (id.clone(), ident.clone()))
Expand All @@ -503,7 +555,10 @@ impl DataExporter for BakedExporter {
.parse::<TokenStream>()
.unwrap();

let data = crate::binary_search::bake(&marker_bake, ids_to_idents, idents_to_bakes);
let (data, lookup_struct_size) =
crate::binary_search::bake(&marker_bake, ids_to_idents, idents_to_bakes);

stats.lookup_struct_size = lookup_struct_size;

let search = if !self.use_internal_fallback
|| deduplicated_values
Expand Down Expand Up @@ -544,6 +599,7 @@ impl DataExporter for BakedExporter {

self.write_impl_macros(
marker,
stats,
quote! {
#maybe_msrv
impl $provider {
Expand Down Expand Up @@ -588,11 +644,11 @@ impl DataExporter for BakedExporter {

let marker_bakes = data.keys().copied().map(bake_marker);

let file_paths = data.values().map(|i| format!("{i}.rs.data"));
let file_paths = data.values().map(|(i, _)| format!("{i}.rs.data"));

let macro_idents = data
.values()
.map(|i| format!("impl_{i}").parse::<TokenStream>().unwrap());
.map(|(i, _)| format!("impl_{i}").parse::<TokenStream>().unwrap());

// mod.rs is the interface for built-in data. It exposes one macro per marker.
self.write_to_file(
Expand Down Expand Up @@ -660,6 +716,20 @@ impl DataExporter for BakedExporter {
},
)?;

// TODO: Return the statistics instead of writing them out.
let mut file = crlify::BufWriterWithLineEndingFix::new(std::fs::File::create(
self.mod_directory.join("fingerprints.csv"),
)?);
for (marker, (_, stats)) in data {
if !marker.is_singleton {
writeln!(
&mut file,
"{marker:?}, <lookup>, {}B, {} identifiers",
stats.lookup_struct_size, stats.identifiers_count
)?;
}
}

self.print_deps();

Ok(())
Expand Down
1 change: 1 addition & 0 deletions provider/baked/tests/data/fingerprints.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
core/helloworld@1, <lookup>, 1096B, 27 identifiers
6 changes: 6 additions & 0 deletions provider/baked/tests/data/hello_world_v1_marker.rs.data
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
/// Implement `DataProvider<HelloWorldV1Marker>` on the given struct using the data
/// hardcoded in this file. This allows the struct to be used with
/// `icu`'s `_unstable` constructors.
///
/// Using this implementation will embed the following data in the binary's data segment:
/// * 1096B[^1] for the lookup data structure (27 data identifiers)
/// * 1100B[^1] for the actual data (27 unique structs)
///
/// [^1]: these numbers can be smaller in practice due to linker deduplication
#[doc(hidden)]
#[macro_export]
macro_rules! __impl_hello_world_v1_marker {
Expand Down
Loading

0 comments on commit 2f2f70e

Please sign in to comment.