diff --git a/common/font_helper.cc b/common/font_helper.cc index 11741a8..8f5465d 100644 --- a/common/font_helper.cc +++ b/common/font_helper.cc @@ -10,12 +10,14 @@ #include "common/hb_set_unique_ptr.h" #include "common/indexed_data_reader.h" #include "common/int_set.h" +#include "common/try.h" #include "hb-ot.h" #include "hb-subset.h" #include "hb.h" using absl::btree_set; using absl::flat_hash_map; +using absl::flat_hash_set; using absl::Status; using absl::StatusOr; using absl::StrCat; @@ -124,6 +126,34 @@ FontData FontHelper::Cff2Data(hb_face_t* face, uint32_t gid) { return data; } +StatusOr FontHelper::TotalGlyphData(hb_face_t* face, + const GlyphSet& gids) { + flat_hash_set tags = FontHelper::GetTags(face); + + uint32_t total = 0; + for (uint32_t gid : gids) { + // TODO(grieger): write this using indexed data readers instead so we can + // avoid the per glyph setup overhead incurred by the *Data() methods. + if (tags.contains(FontHelper::kGlyf)) { + total += TRY(FontHelper::GlyfData(face, gid)).size(); + } + + if (tags.contains(FontHelper::kGvar)) { + total += TRY(FontHelper::GvarData(face, gid)).size(); + } + + if (tags.contains(FontHelper::kCFF)) { + total += FontHelper::CffData(face, gid).size(); + } + + if (tags.contains(FontHelper::kCFF2)) { + total += FontHelper::Cff2Data(face, gid).size(); + } + } + + return total; +} + Status FontHelper::Cff2GetCharstrings(hb_face_t* face, FontData& non_charstrings, FontData& charstrings) { diff --git a/common/font_helper.h b/common/font_helper.h index 1bf061f..18ccce0 100644 --- a/common/font_helper.h +++ b/common/font_helper.h @@ -145,6 +145,11 @@ class FontHelper { static FontData Cff2Data(hb_face_t* face, uint32_t gid); + // Counts up the total size of all glyph data (gvar, glyf, cff, cff2) + // for the provided set of gids. + static absl::StatusOr TotalGlyphData(hb_face_t* face, + const GlyphSet& gids); + static absl::Status Cff2GetCharstrings(hb_face_t* face, FontData& non_charstrings, FontData& charstrings); diff --git a/common/font_helper_test.cc b/common/font_helper_test.cc index ce37331..7aaaf09 100644 --- a/common/font_helper_test.cc +++ b/common/font_helper_test.cc @@ -618,6 +618,48 @@ TEST_F(FontHelperTest, GlyfData_ShortOverflowSynthetic) { ASSERT_EQ(*data, expected); } +TEST_F(FontHelperTest, TotalGlyphData_GlyfGvar) { + auto size = FontHelper::TotalGlyphData(roboto_vf.get(), GlyphSet{78, 83, 95}); + ASSERT_TRUE(size.ok()) << size.status(); + + uint32_t expected = FontHelper::GlyfData(roboto_vf.get(), 78)->size() + + FontHelper::GlyfData(roboto_vf.get(), 83)->size() + + FontHelper::GlyfData(roboto_vf.get(), 95)->size() + + FontHelper::GvarData(roboto_vf.get(), 78)->size() + + FontHelper::GvarData(roboto_vf.get(), 83)->size() + + FontHelper::GvarData(roboto_vf.get(), 95)->size(); + + ASSERT_GT(*size, 0); + ASSERT_EQ(*size, expected); +} + +TEST_F(FontHelperTest, TotalGlyphData_Cff) { + auto size = + FontHelper::TotalGlyphData(noto_sans_jp_otf.get(), GlyphSet{78, 83, 95}); + ASSERT_TRUE(size.ok()) << size.status(); + + uint32_t expected = FontHelper::CffData(noto_sans_jp_otf.get(), 78).size() + + FontHelper::CffData(noto_sans_jp_otf.get(), 83).size() + + FontHelper::CffData(noto_sans_jp_otf.get(), 95).size(); + + ASSERT_GT(*size, 0); + ASSERT_EQ(*size, expected); +} + +TEST_F(FontHelperTest, TotalGlyphData_Cff2) { + auto size = FontHelper::TotalGlyphData(noto_sans_vf_jp_otf.get(), + GlyphSet{34, 35, 46}); + ASSERT_TRUE(size.ok()) << size.status(); + + uint32_t expected = + FontHelper::Cff2Data(noto_sans_vf_jp_otf.get(), 34).size() + + FontHelper::Cff2Data(noto_sans_vf_jp_otf.get(), 35).size() + + FontHelper::Cff2Data(noto_sans_vf_jp_otf.get(), 46).size(); + + ASSERT_GT(*size, 0); + ASSERT_EQ(*size, expected); +} + // TODO test BuildFont... } // namespace common diff --git a/ift/encoder/BUILD b/ift/encoder/BUILD index d802abc..a398024 100644 --- a/ift/encoder/BUILD +++ b/ift/encoder/BUILD @@ -63,6 +63,8 @@ cc_library( "candidate_merge.cc", "candidate_merge.h", "patch_size_cache.h", + "estimated_patch_size_cache.h", + "estimated_patch_size_cache.cc", "merge_strategy.h", "merge_strategy.cc", "merger.cc", @@ -253,4 +255,17 @@ cc_test( "@abseil-cpp//absl/types:span", "@googletest//:gtest_main", ], +) + +cc_test( + name = "estimated_patch_size_cache_test", + srcs = ["estimated_patch_size_cache_test.cc"], + data = [ + "//common:testdata", + ], + deps = [ + ":segmentation_context", + "//common", + "@googletest//:gtest_main", + ], ) \ No newline at end of file diff --git a/ift/encoder/estimated_patch_size_cache.cc b/ift/encoder/estimated_patch_size_cache.cc new file mode 100644 index 0000000..7ef33f9 --- /dev/null +++ b/ift/encoder/estimated_patch_size_cache.cc @@ -0,0 +1,60 @@ +#include "ift/encoder/estimated_patch_size_cache.h" + +#include "common/int_set.h" +#include "common/try.h" + +using absl::StatusOr; +using absl::flat_hash_set; +using common::GlyphSet; + +namespace ift::encoder { + +StatusOr EstimatedPatchSizeCache::GetPatchSize(const GlyphSet& gids) { + auto it = cache_.find(gids); + if (it != cache_.end()) { + return it->second; + } + + flat_hash_set tags = common::FontHelper::GetTags(face_.get()); + uint32_t table_count = (tags.contains(common::FontHelper::kCFF) ? 1 : 0) + + (tags.contains(common::FontHelper::kCFF2) ? 1 : 0) + + (tags.contains(common::FontHelper::kGlyf) ? 1 : 0) + + (tags.contains(common::FontHelper::kGvar) ? 1 : 0); + + uint32_t gid_width = (gids.size() > 255) ? 3 : 2; + + uint32_t header_size = 1 + 7 * 4; + uint32_t uncompressed_stream_size = + 5 + gids.size() * gid_width + // glyph ids + 4 * table_count + // table tags + 4 * (gids.size() * table_count + 1); // data offsets + + uncompressed_stream_size += + TRY(common::FontHelper::TotalGlyphData(face_.get(), gids)); + + uint32_t size = header_size + (uint32_t)((double)uncompressed_stream_size * + compression_ratio_); + cache_[gids] = size; + return size; +} + +StatusOr EstimatedPatchSizeCache::EstimateCompressionRatio( + hb_face_t* original_face) { + PatchSizeCacheImpl patch_sizes(original_face, 11); + + uint32_t glyph_count = hb_face_get_glyph_count(original_face); + if (glyph_count == 0) { + return 0.0; + } + + common::GlyphSet gids; + gids.insert_range(0, glyph_count - 1); + + double uncompressed_size = + TRY(common::FontHelper::TotalGlyphData(original_face, gids)); + double compressed_size = TRY(patch_sizes.GetPatchSize(gids)); + + return compressed_size / uncompressed_size; +} + +} // namespace ift::encoder \ No newline at end of file diff --git a/ift/encoder/estimated_patch_size_cache.h b/ift/encoder/estimated_patch_size_cache.h new file mode 100644 index 0000000..5349a69 --- /dev/null +++ b/ift/encoder/estimated_patch_size_cache.h @@ -0,0 +1,47 @@ +#ifndef IFT_ENCODER_ESTIMATED_PATCH_SIZE_CACHE_H_ +#define IFT_ENCODER_ESTIMATED_PATCH_SIZE_CACHE_H_ + +#include +#include "absl/status/statusor.h" +#include "common/font_data.h" +#include "common/int_set.h" +#include "ift/encoder/patch_size_cache.h" + +namespace ift::encoder { + +// Estimates the size of a glyph keyed patch using a fixed compression ratio. +// Does not actually run the brotli compression. +// +// The fixed compression ratio is determined by looking at the compression ratio +// of glyph data in the provided original_face. +class EstimatedPatchSizeCache : public PatchSizeCache { + public: + static absl::StatusOr> New(hb_face_t* face) { + double compression_ratio = TRY(EstimateCompressionRatio(face)); + return std::unique_ptr(new EstimatedPatchSizeCache(face, compression_ratio)); + } + + absl::StatusOr GetPatchSize(const common::GlyphSet& gids) override; + + double CompressionRatio() const { + return compression_ratio_; + } + + private: + explicit EstimatedPatchSizeCache(hb_face_t* original_face, + double compression_ratio) + : face_(common::make_hb_face(hb_face_reference(original_face))), + compression_ratio_(compression_ratio), + cache_() {} + + static absl::StatusOr EstimateCompressionRatio( + hb_face_t* original_face); + + common::hb_face_unique_ptr face_; + double compression_ratio_; + absl::flat_hash_map cache_; +}; + +} // namespace ift::encoder + +#endif // IFT_ENCODER_ESTIMATED_PATCH_SIZE_CACHE_H_ diff --git a/ift/encoder/estimated_patch_size_cache_test.cc b/ift/encoder/estimated_patch_size_cache_test.cc new file mode 100644 index 0000000..c72e5ba --- /dev/null +++ b/ift/encoder/estimated_patch_size_cache_test.cc @@ -0,0 +1,46 @@ +#include "ift/encoder/estimated_patch_size_cache.h" + +#include "common/font_helper.h" +#include "common/int_set.h" +#include "gtest/gtest.h" + +#include "common/font_data.h" + +using common::hb_face_unique_ptr; +using common::make_hb_face; +using common::hb_blob_unique_ptr; +using common::make_hb_blob; +using common::FontHelper; +using common::GlyphSet; + +namespace ift::encoder { + +class EstimatedPatchSizeCacheTest : public ::testing::Test { + protected: + EstimatedPatchSizeCacheTest() : roboto(make_hb_face(nullptr)) { + hb_blob_unique_ptr blob = make_hb_blob( + hb_blob_create_from_file("common/testdata/Roboto-Regular.ttf")); + roboto = make_hb_face(hb_face_create(blob.get(), 0)); + } + + + double CompressionRatio(GlyphSet gids, double expected_compression_ratio) { + uint32_t raw_outline_size = + *FontHelper::TotalGlyphData(roboto.get(), gids); + double fixed_size = 1 + 7 * 4; // header + fixed_size += (double) (5 + gids.size() * 2 + 4 + (gids.size() + 1)*4) * expected_compression_ratio; // glyph patches header + auto estimated = *EstimatedPatchSizeCache::New(roboto.get()); + uint32_t compressed_size = *estimated->GetPatchSize(gids); + return (double) (compressed_size - fixed_size) / (double) raw_outline_size; + } + + hb_face_unique_ptr roboto; +}; + +TEST_F(EstimatedPatchSizeCacheTest, PatchSize) { + // There should be a consistent compression ratio between patches. + ASSERT_NEAR(this->CompressionRatio(GlyphSet {44, 47, 49}, 0.457), 0.46, 0.01); + ASSERT_NEAR(CompressionRatio(GlyphSet {45, 48, 50, 51, 52, 53}, 0.457), 0.46, 0.01); +} + +} // namespace ift::encoder \ No newline at end of file diff --git a/ift/encoder/patch_size_cache.h b/ift/encoder/patch_size_cache.h index 2b81669..b95474c 100644 --- a/ift/encoder/patch_size_cache.h +++ b/ift/encoder/patch_size_cache.h @@ -2,6 +2,7 @@ #define IFT_ENCODER_PATCH_SIZE_CACHE_H_ #include +#include #include "absl/container/flat_hash_map.h" #include "absl/status/statusor.h" diff --git a/ift/encoder/segmentation_context.h b/ift/encoder/segmentation_context.h index dc6ade9..2608428 100644 --- a/ift/encoder/segmentation_context.h +++ b/ift/encoder/segmentation_context.h @@ -8,6 +8,7 @@ #include "common/font_data.h" #include "common/int_set.h" #include "common/try.h" +#include "ift/encoder/estimated_patch_size_cache.h" #include "ift/encoder/glyph_closure_cache.h" #include "ift/encoder/glyph_condition_set.h" #include "ift/encoder/glyph_groupings.h" @@ -43,9 +44,9 @@ class SegmentationContext { const std::vector& segments, uint32_t brotli_quality, uint32_t init_font_brotli_quality) - : patch_size_cache(new PatchSizeCacheImpl(face, brotli_quality)), + : patch_size_cache(NewPatchSizeCache(face, brotli_quality)), patch_size_cache_for_init_font( - new PatchSizeCacheImpl(face, init_font_brotli_quality)), + NewPatchSizeCache(face, init_font_brotli_quality)), glyph_closure_cache(face), original_face(common::make_hb_face(hb_face_reference(face))), segmentation_info_(segments, initial_segment, glyph_closure_cache), @@ -175,6 +176,16 @@ class SegmentationContext { // too small to be worthwhile. absl::StatusOr ComputeSegmentCutoff() const; + static std::unique_ptr NewPatchSizeCache(hb_face_t* face, uint32_t brotli_quality) { + if (brotli_quality == 0) { + auto cache = EstimatedPatchSizeCache::New(face); + if (cache.ok()) { + return std::move(*cache); + } + } + return std::unique_ptr(new PatchSizeCacheImpl(face, brotli_quality)); + } + public: // Caches and logging std::unique_ptr patch_size_cache; diff --git a/util/segmenter_config.proto b/util/segmenter_config.proto index 1b8bdaa..937b756 100644 --- a/util/segmenter_config.proto +++ b/util/segmenter_config.proto @@ -29,6 +29,10 @@ message SegmenterConfig { // When generating compressed patches (to evaluate their size) this is the brotli quality // level used. Segmentation is typically bottle necked on brotli compression so higher values // increase segmentation times, but yield more accurate results. + // + // If quality is set to '0' this disables brotli compression and instead estimates the + // effect of compression using a fixed compression ratio calculated based on how well + // the glyph data in the input font compresses. uint32 brotli_quality = 5 [default = 8]; // During processing to determine which segments to move into the initial font this is