Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 37 additions & 16 deletions ift/encoder/closure_glyph_segmenter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <algorithm>
#include <cstdint>
#include <optional>
#include <string>
#include <vector>

#include "absl/container/btree_map.h"
Expand Down Expand Up @@ -193,11 +194,10 @@ struct SegmentOrdering {
};

static std::vector<Segment> PreGroupSegments(
const btree_map<SegmentSet, MergeStrategy>& merge_groups,
const std::vector<SegmentOrdering>& ordering,
const std::vector<SubsetDefinition>& subset_definitions,
std::vector<uint32_t>& segment_index_map
) {
const btree_map<SegmentSet, MergeStrategy>& merge_groups,
const std::vector<SegmentOrdering>& ordering,
const std::vector<SubsetDefinition>& subset_definitions,
std::vector<uint32_t>& segment_index_map) {
segment_index_map.resize(subset_definitions.size());
std::vector<Segment> segments;

Expand All @@ -206,9 +206,10 @@ static std::vector<Segment> PreGroupSegments(
auto merge_group_it = merge_groups.begin();
auto ordering_it = ordering.begin();

while (ordering_it != ordering.end()) {
while (ordering_it != ordering.end()) {
const auto& o = *ordering_it;
if (o.group_index != last_group_index && merge_group_it != merge_groups.end()) {
if (o.group_index != last_group_index &&
merge_group_it != merge_groups.end()) {
merge_group_it++;
}

Expand All @@ -217,11 +218,11 @@ static std::vector<Segment> PreGroupSegments(
strategy = &(merge_group_it->second);
}

Segment segment = Segment{subset_definitions[o.original_index], o.probability};
Segment segment =
Segment{subset_definitions[o.original_index], o.probability};
ordering_it++;

if (strategy == nullptr ||
strategy->PreClosureGroupSize() <= 1 ||
if (strategy == nullptr || strategy->PreClosureGroupSize() <= 1 ||
o.probability.Max() > strategy->PreClosureProbabilityThreshold()) {
segment_index_map[o.original_index] = i;
} else {
Expand All @@ -232,15 +233,18 @@ static std::vector<Segment> PreGroupSegments(
break;
}

segment.Definition().Union(subset_definitions[ordering_it->original_index]);
segment.Definition().Union(
subset_definitions[ordering_it->original_index]);
segment_index_map[ordering_it->original_index] = i;

ordering_it++;
remaining--;
}

if (strategy->UseCosts()) {
segment.SetProbability(strategy->ProbabilityCalculator()->ComputeProbability(segment.Definition()));
segment.SetProbability(
strategy->ProbabilityCalculator()->ComputeProbability(
segment.Definition()));
}
}

Expand Down Expand Up @@ -328,7 +332,8 @@ static StatusOr<std::vector<Segment>> ToOrderedSegments(

// maps from index in subset_definitions to the new ordering.
std::vector<uint32_t> segment_index_map;
std::vector<Segment> segments = PreGroupSegments(merge_groups, ordering, subset_definitions, segment_index_map);
std::vector<Segment> segments = PreGroupSegments(
merge_groups, ordering, subset_definitions, segment_index_map);
VLOG(0) << segments.size() << " segments after pregrouping.";

btree_map<SegmentSet, MergeStrategy> new_merge_groups;
Expand All @@ -344,8 +349,13 @@ static StatusOr<std::vector<Segment>> ToOrderedSegments(
remapped_full.insert(s_prime);
}

std::string name = std::to_string(group_index);
if (strategy.Name().has_value()) {
name = *strategy.Name();
}

VLOG(0) << " Merge group " << group_index << " has " << remapped.size() << " segments.";
VLOG(0) << " Merge group " << name << " has " << remapped.size()
<< " segments.";
group_index++;

if (!new_merge_groups.insert(std::make_pair(remapped, std::move(strategy)))
Expand Down Expand Up @@ -392,6 +402,7 @@ StatusOr<GlyphSegmentation> ClosureGlyphSegmenter::CodepointToGlyphSegments(
const std::vector<SubsetDefinition>& subset_definitions,
btree_map<SegmentSet, MergeStrategy> merge_groups,
bool place_fallback_in_init) const {

for (const auto& [segments, strategy] : merge_groups) {
if (strategy.UseCosts()) {
TRYV(CheckForDisjointCodepoints(subset_definitions, segments));
Expand Down Expand Up @@ -448,8 +459,13 @@ StatusOr<GlyphSegmentation> ClosureGlyphSegmenter::CodepointToGlyphSegments(

// ### Iteratively merge segments and incrementally reprocess affected data.
size_t merger_index = 0;
std::string merger_name = std::to_string(merger_index);
if (mergers[merger_index].Strategy().Name().has_value()) {
merger_name = *mergers[merger_index].Strategy().Name();
}

segment_index_t last_merged_segment_index = 0;
VLOG(0) << "Starting merge selection for merge group " << merger_index
VLOG(0) << "Starting merge selection for merge group " << merger_name
<< std::endl
<< " " << mergers[merger_index].NumInscopeSegments()
<< " inscope segments, " << mergers[merger_index].NumCutoffSegments()
Expand All @@ -461,8 +477,13 @@ StatusOr<GlyphSegmentation> ClosureGlyphSegmenter::CodepointToGlyphSegments(

if (!merged.has_value()) {
merger_index++;

if (merger_index < mergers.size()) {
VLOG(0) << "Merge group finished, starting next group " << merger_index
std::string merger_name = std::to_string(merger_index);
if (mergers[merger_index].Strategy().Name().has_value()) {
merger_name = *mergers[merger_index].Strategy().Name();
}
VLOG(0) << "Merge group finished, starting next group " << merger_name
<< std::endl
<< " " << mergers[merger_index].NumInscopeSegments()
<< " inscope segments, "
Expand Down
18 changes: 5 additions & 13 deletions ift/encoder/closure_glyph_segmenter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1427,19 +1427,11 @@ if (s0 AND s2) then p2
)");
}


TEST_F(ClosureGlyphSegmenterTest, MultipleMergeGroups_PreGrouping) {
UnicodeFrequencies freq{
{{' ', ' '}, 100},
{{'d', 'd'}, 100},
{{'a', 'a'}, 60},
{{'e', 'e'}, 30},
{{'b', 'b'}, 29},
{{'f', 'f'}, 28},
{{'c', 'c'}, 10},
{{'g', 'g'}, 9},
{{'h', 'h'}, 5},
{{'i', 'i'}, 1}, // 8
{{' ', ' '}, 100}, {{'d', 'd'}, 100}, {{'a', 'a'}, 60}, {{'e', 'e'}, 30},
{{'b', 'b'}, 29}, {{'f', 'f'}, 28}, {{'c', 'c'}, 10}, {{'g', 'g'}, 9},
{{'h', 'h'}, 5}, {{'i', 'i'}, 1}, // 8
};

MergeStrategy costs = *MergeStrategy::CostBased(std::move(freq), 0, 1);
Expand Down Expand Up @@ -1473,8 +1465,8 @@ TEST_F(ClosureGlyphSegmenterTest, MultipleMergeGroups_PreGrouping) {
// Group 1
{'d'},
{'a'},
{'e', 'b', 'f'}, // pre merge
{'c', 'g'}, // pre merge
{'e', 'b', 'f'}, // pre merge
{'c', 'g'}, // pre merge
// Shared
{'h'},
{'i'},
Expand Down
2 changes: 1 addition & 1 deletion ift/encoder/estimated_patch_size_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
#include "common/int_set.h"
#include "common/try.h"

using absl::StatusOr;
using absl::flat_hash_set;
using absl::StatusOr;
using common::GlyphSet;

namespace ift::encoder {
Expand Down
8 changes: 4 additions & 4 deletions ift/encoder/estimated_patch_size_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define IFT_ENCODER_ESTIMATED_PATCH_SIZE_CACHE_H_

#include <memory>

#include "absl/status/statusor.h"
#include "common/font_data.h"
#include "common/int_set.h"
Expand All @@ -18,14 +19,13 @@ class EstimatedPatchSizeCache : public PatchSizeCache {
public:
static absl::StatusOr<std::unique_ptr<PatchSizeCache>> New(hb_face_t* face) {
double compression_ratio = TRY(EstimateCompressionRatio(face));
return std::unique_ptr<PatchSizeCache>(new EstimatedPatchSizeCache(face, compression_ratio));
return std::unique_ptr<PatchSizeCache>(
new EstimatedPatchSizeCache(face, compression_ratio));
}

absl::StatusOr<uint32_t> GetPatchSize(const common::GlyphSet& gids) override;

double CompressionRatio() const {
return compression_ratio_;
}
double CompressionRatio() const { return compression_ratio_; }

private:
explicit EstimatedPatchSizeCache(hb_face_t* original_face,
Expand Down
27 changes: 13 additions & 14 deletions ift/encoder/estimated_patch_size_cache_test.cc
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
#include "ift/encoder/estimated_patch_size_cache.h"

#include "common/font_data.h"
#include "common/font_helper.h"
#include "common/int_set.h"
#include "gtest/gtest.h"

#include "common/font_data.h"

using common::hb_face_unique_ptr;
using common::make_hb_face;
using common::hb_blob_unique_ptr;
using common::make_hb_blob;
using common::FontHelper;
using common::GlyphSet;
using common::hb_blob_unique_ptr;
using common::hb_face_unique_ptr;
using common::make_hb_blob;
using common::make_hb_face;

namespace ift::encoder {

Expand All @@ -23,24 +22,24 @@ class EstimatedPatchSizeCacheTest : public ::testing::Test {
roboto = make_hb_face(hb_face_create(blob.get(), 0));
}


double CompressionRatio(GlyphSet gids, double expected_compression_ratio) {
uint32_t raw_outline_size =
*FontHelper::TotalGlyphData(roboto.get(), gids);
double fixed_size = 1 + 7 * 4; // header
fixed_size += (double) (5 + gids.size() * 2 + 4 + (gids.size() + 1)*4) * expected_compression_ratio; // glyph patches header
uint32_t raw_outline_size = *FontHelper::TotalGlyphData(roboto.get(), gids);
double fixed_size = 1 + 7 * 4; // header
fixed_size += (double)(5 + gids.size() * 2 + 4 + (gids.size() + 1) * 4) *
expected_compression_ratio; // glyph patches header
auto estimated = *EstimatedPatchSizeCache::New(roboto.get());
uint32_t compressed_size = *estimated->GetPatchSize(gids);
return (double) (compressed_size - fixed_size) / (double) raw_outline_size;
return (double)(compressed_size - fixed_size) / (double)raw_outline_size;
}

hb_face_unique_ptr roboto;
};

TEST_F(EstimatedPatchSizeCacheTest, PatchSize) {
// There should be a consistent compression ratio between patches.
ASSERT_NEAR(this->CompressionRatio(GlyphSet {44, 47, 49}, 0.457), 0.46, 0.01);
ASSERT_NEAR(CompressionRatio(GlyphSet {45, 48, 50, 51, 52, 53}, 0.457), 0.46, 0.01);
ASSERT_NEAR(this->CompressionRatio(GlyphSet{44, 47, 49}, 0.457), 0.46, 0.01);
ASSERT_NEAR(CompressionRatio(GlyphSet{45, 48, 50, 51, 52, 53}, 0.457), 0.46,
0.01);
}

} // namespace ift::encoder
15 changes: 9 additions & 6 deletions ift/encoder/glyph_segmentation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@

#include <cstdint>
#include <cstdio>
#include <optional>
#include <sstream>

#include "absl/container/btree_map.h"
#include "absl/container/btree_set.h"
#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/status/statusor.h"
#include "common/font_helper.h"
#include "common/int_set.h"
Expand Down Expand Up @@ -144,17 +142,22 @@ ProtoType TagsToSetProto(const btree_set<hb_tag_t>& set) {
return values;
}

void GlyphSegmentation::SubsetDefinitionToSegment(const SubsetDefinition& def,
SegmentProto& segment_proto) {
(*segment_proto.mutable_codepoints()) =
ToSetProto<Codepoints>(def.codepoints);
(*segment_proto.mutable_features()) =
TagsToSetProto<Features>(def.feature_tags);
}

SegmentationPlan GlyphSegmentation::ToSegmentationPlanProto() const {
SegmentationPlan config;

uint32_t set_index = 0;
for (const auto& s : Segments()) {
if (!s.Empty()) {
SegmentProto segment_proto;
(*segment_proto.mutable_codepoints()) =
ToSetProto<Codepoints>(s.codepoints);
(*segment_proto.mutable_features()) =
TagsToSetProto<Features>(s.feature_tags);
SubsetDefinitionToSegment(s, segment_proto);
(*config.mutable_segments())[set_index++] = segment_proto;
} else {
set_index++;
Expand Down
3 changes: 3 additions & 0 deletions ift/encoder/glyph_segmentation.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ class GlyphSegmentation {
return init_font_segment_;
};

static void SubsetDefinitionToSegment(const SubsetDefinition& def,
SegmentProto& segment_proto);

SegmentationPlan ToSegmentationPlanProto() const;

static absl::Status GroupsToSegmentation(
Expand Down
17 changes: 14 additions & 3 deletions ift/encoder/merge_strategy.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,18 @@ class MergeStrategy {
bool UseCosts() const { return use_costs_; }
bool UsePatchMerges() const { return use_patch_merges_; }

std::optional<absl::string_view> Name() const {
if (name_.has_value()) {
return name_;
} else {
return std::nullopt;
}
}

void SetName(std::string name) {
name_ = name;
}

uint32_t NetworkOverheadCost() const { return network_overhead_cost_; }
uint32_t MinimumGroupSize() const { return min_group_size_; }
uint32_t PatchSizeMinBytes() const { return patch_size_min_bytes_; }
Expand Down Expand Up @@ -145,9 +157,7 @@ class MergeStrategy {
return init_font_merge_probability_threshold_;
}

uint32_t PreClosureGroupSize() const {
return pre_closure_group_size_;
}
uint32_t PreClosureGroupSize() const { return pre_closure_group_size_; }

double PreClosureProbabilityThreshold() const {
return pre_closure_probability_threshold_;
Expand Down Expand Up @@ -193,6 +203,7 @@ class MergeStrategy {
patch_size_max_bytes_(patch_size_max_bytes),
probability_calculator_(nullptr) {}

std::optional<std::string> name_ = std::nullopt;
bool use_costs_;
uint32_t network_overhead_cost_;
uint32_t min_group_size_;
Expand Down
1 change: 1 addition & 0 deletions util/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ proto_library(
"//visibility:public",
],
deps = [
":segmentation_plan_proto",
":common_proto",
],
)
Expand Down
Loading
Loading