From 027e3e31ec076d8006f1ba8bfebacda28338a8f3 Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Wed, 22 Oct 2025 01:28:24 +0000 Subject: [PATCH 1/2] Add method to FontHelper to get the size of the outline data for a set of glyphs. --- common/font_helper.cc | 29 ++++++++++++++++++++++++++ common/font_helper.h | 4 ++++ common/font_helper_test.cc | 42 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+) diff --git a/common/font_helper.cc b/common/font_helper.cc index 11741a8f..e1779ff3 100644 --- a/common/font_helper.cc +++ b/common/font_helper.cc @@ -13,6 +13,7 @@ #include "hb-ot.h" #include "hb-subset.h" #include "hb.h" +#include "common/try.h" using absl::btree_set; using absl::flat_hash_map; @@ -47,6 +48,7 @@ bool FontHelper::HasWideGvar(const hb_face_t* face) { return (((uint8_t)gvar.str()[gvar_flags_offset]) & 0x01); } + absl::StatusOr FontHelper::GlyfData(const hb_face_t* face, uint32_t gid) { auto loca = Loca(face); @@ -124,6 +126,33 @@ FontData FontHelper::Cff2Data(hb_face_t* face, uint32_t gid) { return data; } +StatusOr FontHelper::TotalGlyphData(hb_face_t* face, const GlyphSet& gids) { + auto tags = FontHelper::GetTags(face); + + uint32_t total = 0; + for (uint32_t gid : gids) { + // TODO(grieger): write this using indexed data readers instead so we can + // avoid the per glyph setup overhead incurred by the *Data() methods. + if (tags.contains(FontHelper::kGlyf)) { + total += TRY(FontHelper::GlyfData(face, gid)).size(); + } + + if (tags.contains(FontHelper::kGvar)) { + total += TRY(FontHelper::GvarData(face, gid)).size(); + } + + if (tags.contains(FontHelper::kCFF)) { + total += FontHelper::CffData(face, gid).size(); + } + + if (tags.contains(FontHelper::kCFF2)) { + total += FontHelper::Cff2Data(face, gid).size(); + } + } + + return total; +} + Status FontHelper::Cff2GetCharstrings(hb_face_t* face, FontData& non_charstrings, FontData& charstrings) { diff --git a/common/font_helper.h b/common/font_helper.h index 1bf061f0..bd986320 100644 --- a/common/font_helper.h +++ b/common/font_helper.h @@ -145,6 +145,10 @@ class FontHelper { static FontData Cff2Data(hb_face_t* face, uint32_t gid); + // Counts up the total size of all glyph data (gvar, glyf, cff, cff2) + // for the provided set of gids. + static absl::StatusOr TotalGlyphData(hb_face_t* face, const GlyphSet& gids); + static absl::Status Cff2GetCharstrings(hb_face_t* face, FontData& non_charstrings, FontData& charstrings); diff --git a/common/font_helper_test.cc b/common/font_helper_test.cc index ce373317..6958dbcb 100644 --- a/common/font_helper_test.cc +++ b/common/font_helper_test.cc @@ -618,6 +618,48 @@ TEST_F(FontHelperTest, GlyfData_ShortOverflowSynthetic) { ASSERT_EQ(*data, expected); } +TEST_F(FontHelperTest, TotalGlyphData_GlyfGvar) { + auto size = FontHelper::TotalGlyphData(roboto_vf.get(), GlyphSet {78, 83, 95}); + ASSERT_TRUE(size.ok()) << size.status(); + + uint32_t expected = + FontHelper::GlyfData(roboto_vf.get(), 78)->size() + + FontHelper::GlyfData(roboto_vf.get(), 83)->size() + + FontHelper::GlyfData(roboto_vf.get(), 95)->size() + + FontHelper::GvarData(roboto_vf.get(), 78)->size() + + FontHelper::GvarData(roboto_vf.get(), 83)->size() + + FontHelper::GvarData(roboto_vf.get(), 95)->size(); + + ASSERT_GT(*size, 0); + ASSERT_EQ(*size, expected); +} + +TEST_F(FontHelperTest, TotalGlyphData_Cff) { + auto size = FontHelper::TotalGlyphData(noto_sans_jp_otf.get(), GlyphSet {78, 83, 95}); + ASSERT_TRUE(size.ok()) << size.status(); + + uint32_t expected = + FontHelper::CffData(noto_sans_jp_otf.get(), 78).size() + + FontHelper::CffData(noto_sans_jp_otf.get(), 83).size() + + FontHelper::CffData(noto_sans_jp_otf.get(), 95).size(); + + ASSERT_GT(*size, 0); + ASSERT_EQ(*size, expected); +} + +TEST_F(FontHelperTest, TotalGlyphData_Cff2) { + auto size = FontHelper::TotalGlyphData(noto_sans_vf_jp_otf.get(), GlyphSet {34, 35, 46}); + ASSERT_TRUE(size.ok()) << size.status(); + + uint32_t expected = + FontHelper::Cff2Data(noto_sans_vf_jp_otf.get(), 34).size() + + FontHelper::Cff2Data(noto_sans_vf_jp_otf.get(), 35).size() + + FontHelper::Cff2Data(noto_sans_vf_jp_otf.get(), 46).size(); + + ASSERT_GT(*size, 0); + ASSERT_EQ(*size, expected); +} + // TODO test BuildFont... } // namespace common From 84df320863fbaf1bc8d57d99a8a3aa6bbe69558f Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Wed, 22 Oct 2025 17:45:07 +0000 Subject: [PATCH 2/2] When brotli quality is set to 0 estimate compressed size instead of doing the compression. --- common/font_helper.cc | 9 +-- common/font_helper.h | 3 +- common/font_helper_test.cc | 34 +++++------ ift/encoder/BUILD | 15 +++++ ift/encoder/estimated_patch_size_cache.cc | 60 +++++++++++++++++++ ift/encoder/estimated_patch_size_cache.h | 47 +++++++++++++++ .../estimated_patch_size_cache_test.cc | 46 ++++++++++++++ ift/encoder/patch_size_cache.h | 1 + ift/encoder/segmentation_context.h | 15 ++++- util/segmenter_config.proto | 4 ++ 10 files changed, 210 insertions(+), 24 deletions(-) create mode 100644 ift/encoder/estimated_patch_size_cache.cc create mode 100644 ift/encoder/estimated_patch_size_cache.h create mode 100644 ift/encoder/estimated_patch_size_cache_test.cc diff --git a/common/font_helper.cc b/common/font_helper.cc index e1779ff3..8f5465df 100644 --- a/common/font_helper.cc +++ b/common/font_helper.cc @@ -10,13 +10,14 @@ #include "common/hb_set_unique_ptr.h" #include "common/indexed_data_reader.h" #include "common/int_set.h" +#include "common/try.h" #include "hb-ot.h" #include "hb-subset.h" #include "hb.h" -#include "common/try.h" using absl::btree_set; using absl::flat_hash_map; +using absl::flat_hash_set; using absl::Status; using absl::StatusOr; using absl::StrCat; @@ -48,7 +49,6 @@ bool FontHelper::HasWideGvar(const hb_face_t* face) { return (((uint8_t)gvar.str()[gvar_flags_offset]) & 0x01); } - absl::StatusOr FontHelper::GlyfData(const hb_face_t* face, uint32_t gid) { auto loca = Loca(face); @@ -126,8 +126,9 @@ FontData FontHelper::Cff2Data(hb_face_t* face, uint32_t gid) { return data; } -StatusOr FontHelper::TotalGlyphData(hb_face_t* face, const GlyphSet& gids) { - auto tags = FontHelper::GetTags(face); +StatusOr FontHelper::TotalGlyphData(hb_face_t* face, + const GlyphSet& gids) { + flat_hash_set tags = FontHelper::GetTags(face); uint32_t total = 0; for (uint32_t gid : gids) { diff --git a/common/font_helper.h b/common/font_helper.h index bd986320..18ccce06 100644 --- a/common/font_helper.h +++ b/common/font_helper.h @@ -147,7 +147,8 @@ class FontHelper { // Counts up the total size of all glyph data (gvar, glyf, cff, cff2) // for the provided set of gids. - static absl::StatusOr TotalGlyphData(hb_face_t* face, const GlyphSet& gids); + static absl::StatusOr TotalGlyphData(hb_face_t* face, + const GlyphSet& gids); static absl::Status Cff2GetCharstrings(hb_face_t* face, FontData& non_charstrings, diff --git a/common/font_helper_test.cc b/common/font_helper_test.cc index 6958dbcb..7aaaf093 100644 --- a/common/font_helper_test.cc +++ b/common/font_helper_test.cc @@ -619,42 +619,42 @@ TEST_F(FontHelperTest, GlyfData_ShortOverflowSynthetic) { } TEST_F(FontHelperTest, TotalGlyphData_GlyfGvar) { - auto size = FontHelper::TotalGlyphData(roboto_vf.get(), GlyphSet {78, 83, 95}); + auto size = FontHelper::TotalGlyphData(roboto_vf.get(), GlyphSet{78, 83, 95}); ASSERT_TRUE(size.ok()) << size.status(); - uint32_t expected = - FontHelper::GlyfData(roboto_vf.get(), 78)->size() + - FontHelper::GlyfData(roboto_vf.get(), 83)->size() + - FontHelper::GlyfData(roboto_vf.get(), 95)->size() + - FontHelper::GvarData(roboto_vf.get(), 78)->size() + - FontHelper::GvarData(roboto_vf.get(), 83)->size() + - FontHelper::GvarData(roboto_vf.get(), 95)->size(); + uint32_t expected = FontHelper::GlyfData(roboto_vf.get(), 78)->size() + + FontHelper::GlyfData(roboto_vf.get(), 83)->size() + + FontHelper::GlyfData(roboto_vf.get(), 95)->size() + + FontHelper::GvarData(roboto_vf.get(), 78)->size() + + FontHelper::GvarData(roboto_vf.get(), 83)->size() + + FontHelper::GvarData(roboto_vf.get(), 95)->size(); ASSERT_GT(*size, 0); ASSERT_EQ(*size, expected); } TEST_F(FontHelperTest, TotalGlyphData_Cff) { - auto size = FontHelper::TotalGlyphData(noto_sans_jp_otf.get(), GlyphSet {78, 83, 95}); + auto size = + FontHelper::TotalGlyphData(noto_sans_jp_otf.get(), GlyphSet{78, 83, 95}); ASSERT_TRUE(size.ok()) << size.status(); - uint32_t expected = - FontHelper::CffData(noto_sans_jp_otf.get(), 78).size() + - FontHelper::CffData(noto_sans_jp_otf.get(), 83).size() + - FontHelper::CffData(noto_sans_jp_otf.get(), 95).size(); + uint32_t expected = FontHelper::CffData(noto_sans_jp_otf.get(), 78).size() + + FontHelper::CffData(noto_sans_jp_otf.get(), 83).size() + + FontHelper::CffData(noto_sans_jp_otf.get(), 95).size(); ASSERT_GT(*size, 0); ASSERT_EQ(*size, expected); } TEST_F(FontHelperTest, TotalGlyphData_Cff2) { - auto size = FontHelper::TotalGlyphData(noto_sans_vf_jp_otf.get(), GlyphSet {34, 35, 46}); + auto size = FontHelper::TotalGlyphData(noto_sans_vf_jp_otf.get(), + GlyphSet{34, 35, 46}); ASSERT_TRUE(size.ok()) << size.status(); uint32_t expected = - FontHelper::Cff2Data(noto_sans_vf_jp_otf.get(), 34).size() + - FontHelper::Cff2Data(noto_sans_vf_jp_otf.get(), 35).size() + - FontHelper::Cff2Data(noto_sans_vf_jp_otf.get(), 46).size(); + FontHelper::Cff2Data(noto_sans_vf_jp_otf.get(), 34).size() + + FontHelper::Cff2Data(noto_sans_vf_jp_otf.get(), 35).size() + + FontHelper::Cff2Data(noto_sans_vf_jp_otf.get(), 46).size(); ASSERT_GT(*size, 0); ASSERT_EQ(*size, expected); diff --git a/ift/encoder/BUILD b/ift/encoder/BUILD index d802abc5..a3980240 100644 --- a/ift/encoder/BUILD +++ b/ift/encoder/BUILD @@ -63,6 +63,8 @@ cc_library( "candidate_merge.cc", "candidate_merge.h", "patch_size_cache.h", + "estimated_patch_size_cache.h", + "estimated_patch_size_cache.cc", "merge_strategy.h", "merge_strategy.cc", "merger.cc", @@ -253,4 +255,17 @@ cc_test( "@abseil-cpp//absl/types:span", "@googletest//:gtest_main", ], +) + +cc_test( + name = "estimated_patch_size_cache_test", + srcs = ["estimated_patch_size_cache_test.cc"], + data = [ + "//common:testdata", + ], + deps = [ + ":segmentation_context", + "//common", + "@googletest//:gtest_main", + ], ) \ No newline at end of file diff --git a/ift/encoder/estimated_patch_size_cache.cc b/ift/encoder/estimated_patch_size_cache.cc new file mode 100644 index 00000000..7ef33f97 --- /dev/null +++ b/ift/encoder/estimated_patch_size_cache.cc @@ -0,0 +1,60 @@ +#include "ift/encoder/estimated_patch_size_cache.h" + +#include "common/int_set.h" +#include "common/try.h" + +using absl::StatusOr; +using absl::flat_hash_set; +using common::GlyphSet; + +namespace ift::encoder { + +StatusOr EstimatedPatchSizeCache::GetPatchSize(const GlyphSet& gids) { + auto it = cache_.find(gids); + if (it != cache_.end()) { + return it->second; + } + + flat_hash_set tags = common::FontHelper::GetTags(face_.get()); + uint32_t table_count = (tags.contains(common::FontHelper::kCFF) ? 1 : 0) + + (tags.contains(common::FontHelper::kCFF2) ? 1 : 0) + + (tags.contains(common::FontHelper::kGlyf) ? 1 : 0) + + (tags.contains(common::FontHelper::kGvar) ? 1 : 0); + + uint32_t gid_width = (gids.size() > 255) ? 3 : 2; + + uint32_t header_size = 1 + 7 * 4; + uint32_t uncompressed_stream_size = + 5 + gids.size() * gid_width + // glyph ids + 4 * table_count + // table tags + 4 * (gids.size() * table_count + 1); // data offsets + + uncompressed_stream_size += + TRY(common::FontHelper::TotalGlyphData(face_.get(), gids)); + + uint32_t size = header_size + (uint32_t)((double)uncompressed_stream_size * + compression_ratio_); + cache_[gids] = size; + return size; +} + +StatusOr EstimatedPatchSizeCache::EstimateCompressionRatio( + hb_face_t* original_face) { + PatchSizeCacheImpl patch_sizes(original_face, 11); + + uint32_t glyph_count = hb_face_get_glyph_count(original_face); + if (glyph_count == 0) { + return 0.0; + } + + common::GlyphSet gids; + gids.insert_range(0, glyph_count - 1); + + double uncompressed_size = + TRY(common::FontHelper::TotalGlyphData(original_face, gids)); + double compressed_size = TRY(patch_sizes.GetPatchSize(gids)); + + return compressed_size / uncompressed_size; +} + +} // namespace ift::encoder \ No newline at end of file diff --git a/ift/encoder/estimated_patch_size_cache.h b/ift/encoder/estimated_patch_size_cache.h new file mode 100644 index 00000000..5349a69c --- /dev/null +++ b/ift/encoder/estimated_patch_size_cache.h @@ -0,0 +1,47 @@ +#ifndef IFT_ENCODER_ESTIMATED_PATCH_SIZE_CACHE_H_ +#define IFT_ENCODER_ESTIMATED_PATCH_SIZE_CACHE_H_ + +#include +#include "absl/status/statusor.h" +#include "common/font_data.h" +#include "common/int_set.h" +#include "ift/encoder/patch_size_cache.h" + +namespace ift::encoder { + +// Estimates the size of a glyph keyed patch using a fixed compression ratio. +// Does not actually run the brotli compression. +// +// The fixed compression ratio is determined by looking at the compression ratio +// of glyph data in the provided original_face. +class EstimatedPatchSizeCache : public PatchSizeCache { + public: + static absl::StatusOr> New(hb_face_t* face) { + double compression_ratio = TRY(EstimateCompressionRatio(face)); + return std::unique_ptr(new EstimatedPatchSizeCache(face, compression_ratio)); + } + + absl::StatusOr GetPatchSize(const common::GlyphSet& gids) override; + + double CompressionRatio() const { + return compression_ratio_; + } + + private: + explicit EstimatedPatchSizeCache(hb_face_t* original_face, + double compression_ratio) + : face_(common::make_hb_face(hb_face_reference(original_face))), + compression_ratio_(compression_ratio), + cache_() {} + + static absl::StatusOr EstimateCompressionRatio( + hb_face_t* original_face); + + common::hb_face_unique_ptr face_; + double compression_ratio_; + absl::flat_hash_map cache_; +}; + +} // namespace ift::encoder + +#endif // IFT_ENCODER_ESTIMATED_PATCH_SIZE_CACHE_H_ diff --git a/ift/encoder/estimated_patch_size_cache_test.cc b/ift/encoder/estimated_patch_size_cache_test.cc new file mode 100644 index 00000000..c72e5ba7 --- /dev/null +++ b/ift/encoder/estimated_patch_size_cache_test.cc @@ -0,0 +1,46 @@ +#include "ift/encoder/estimated_patch_size_cache.h" + +#include "common/font_helper.h" +#include "common/int_set.h" +#include "gtest/gtest.h" + +#include "common/font_data.h" + +using common::hb_face_unique_ptr; +using common::make_hb_face; +using common::hb_blob_unique_ptr; +using common::make_hb_blob; +using common::FontHelper; +using common::GlyphSet; + +namespace ift::encoder { + +class EstimatedPatchSizeCacheTest : public ::testing::Test { + protected: + EstimatedPatchSizeCacheTest() : roboto(make_hb_face(nullptr)) { + hb_blob_unique_ptr blob = make_hb_blob( + hb_blob_create_from_file("common/testdata/Roboto-Regular.ttf")); + roboto = make_hb_face(hb_face_create(blob.get(), 0)); + } + + + double CompressionRatio(GlyphSet gids, double expected_compression_ratio) { + uint32_t raw_outline_size = + *FontHelper::TotalGlyphData(roboto.get(), gids); + double fixed_size = 1 + 7 * 4; // header + fixed_size += (double) (5 + gids.size() * 2 + 4 + (gids.size() + 1)*4) * expected_compression_ratio; // glyph patches header + auto estimated = *EstimatedPatchSizeCache::New(roboto.get()); + uint32_t compressed_size = *estimated->GetPatchSize(gids); + return (double) (compressed_size - fixed_size) / (double) raw_outline_size; + } + + hb_face_unique_ptr roboto; +}; + +TEST_F(EstimatedPatchSizeCacheTest, PatchSize) { + // There should be a consistent compression ratio between patches. + ASSERT_NEAR(this->CompressionRatio(GlyphSet {44, 47, 49}, 0.457), 0.46, 0.01); + ASSERT_NEAR(CompressionRatio(GlyphSet {45, 48, 50, 51, 52, 53}, 0.457), 0.46, 0.01); +} + +} // namespace ift::encoder \ No newline at end of file diff --git a/ift/encoder/patch_size_cache.h b/ift/encoder/patch_size_cache.h index 2b81669b..b95474cc 100644 --- a/ift/encoder/patch_size_cache.h +++ b/ift/encoder/patch_size_cache.h @@ -2,6 +2,7 @@ #define IFT_ENCODER_PATCH_SIZE_CACHE_H_ #include +#include #include "absl/container/flat_hash_map.h" #include "absl/status/statusor.h" diff --git a/ift/encoder/segmentation_context.h b/ift/encoder/segmentation_context.h index dc6ade93..2608428d 100644 --- a/ift/encoder/segmentation_context.h +++ b/ift/encoder/segmentation_context.h @@ -8,6 +8,7 @@ #include "common/font_data.h" #include "common/int_set.h" #include "common/try.h" +#include "ift/encoder/estimated_patch_size_cache.h" #include "ift/encoder/glyph_closure_cache.h" #include "ift/encoder/glyph_condition_set.h" #include "ift/encoder/glyph_groupings.h" @@ -43,9 +44,9 @@ class SegmentationContext { const std::vector& segments, uint32_t brotli_quality, uint32_t init_font_brotli_quality) - : patch_size_cache(new PatchSizeCacheImpl(face, brotli_quality)), + : patch_size_cache(NewPatchSizeCache(face, brotli_quality)), patch_size_cache_for_init_font( - new PatchSizeCacheImpl(face, init_font_brotli_quality)), + NewPatchSizeCache(face, init_font_brotli_quality)), glyph_closure_cache(face), original_face(common::make_hb_face(hb_face_reference(face))), segmentation_info_(segments, initial_segment, glyph_closure_cache), @@ -175,6 +176,16 @@ class SegmentationContext { // too small to be worthwhile. absl::StatusOr ComputeSegmentCutoff() const; + static std::unique_ptr NewPatchSizeCache(hb_face_t* face, uint32_t brotli_quality) { + if (brotli_quality == 0) { + auto cache = EstimatedPatchSizeCache::New(face); + if (cache.ok()) { + return std::move(*cache); + } + } + return std::unique_ptr(new PatchSizeCacheImpl(face, brotli_quality)); + } + public: // Caches and logging std::unique_ptr patch_size_cache; diff --git a/util/segmenter_config.proto b/util/segmenter_config.proto index 1b8bdaaf..937b7562 100644 --- a/util/segmenter_config.proto +++ b/util/segmenter_config.proto @@ -29,6 +29,10 @@ message SegmenterConfig { // When generating compressed patches (to evaluate their size) this is the brotli quality // level used. Segmentation is typically bottle necked on brotli compression so higher values // increase segmentation times, but yield more accurate results. + // + // If quality is set to '0' this disables brotli compression and instead estimates the + // effect of compression using a fixed compression ratio calculated based on how well + // the glyph data in the input font compresses. uint32 brotli_quality = 5 [default = 8]; // During processing to determine which segments to move into the initial font this is