Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 69 additions & 10 deletions ift/encoder/closure_glyph_segmenter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,10 @@
#include "common/int_set.h"
#include "common/try.h"
#include "common/woff2.h"
#include "ift/encoder/activation_condition.h"
#include "ift/encoder/glyph_segmentation.h"
#include "ift/encoder/merge_strategy.h"
#include "ift/encoder/merger.h"
#include "ift/encoder/patch_size_cache.h"
#include "ift/encoder/segment.h"
#include "ift/encoder/segmentation_context.h"
#include "ift/encoder/subset_definition.h"
#include "ift/encoder/types.h"
Expand Down Expand Up @@ -193,6 +192,66 @@ struct SegmentOrdering {
}
};

static std::vector<Segment> PreGroupSegments(
const btree_map<SegmentSet, MergeStrategy>& merge_groups,
const std::vector<SegmentOrdering>& ordering,
const std::vector<SubsetDefinition>& subset_definitions,
std::vector<uint32_t>& segment_index_map
) {
segment_index_map.resize(subset_definitions.size());
std::vector<Segment> segments;

unsigned i = 0;
unsigned last_group_index = 0;
auto merge_group_it = merge_groups.begin();
auto ordering_it = ordering.begin();

while (ordering_it != ordering.end()) {
const auto& o = *ordering_it;
if (o.group_index != last_group_index && merge_group_it != merge_groups.end()) {
merge_group_it++;
}

const MergeStrategy* strategy = nullptr;
if (merge_group_it != merge_groups.end()) {
strategy = &(merge_group_it->second);
}

Segment segment = Segment{subset_definitions[o.original_index], o.probability};
ordering_it++;

if (strategy == nullptr ||
strategy->PreClosureGroupSize() <= 1 ||
o.probability.Max() > strategy->PreClosureProbabilityThreshold()) {
segment_index_map[o.original_index] = i;
} else {
uint32_t remaining = strategy->PreClosureGroupSize() - 1;
while (remaining > 0) {
if (ordering_it == ordering.end() ||
ordering_it->group_index != o.group_index) {
break;
}

segment.Definition().Union(subset_definitions[ordering_it->original_index]);
segment_index_map[ordering_it->original_index] = i;

ordering_it++;
remaining--;
}

if (strategy->UseCosts()) {
segment.SetProbability(strategy->ProbabilityCalculator()->ComputeProbability(segment.Definition()));
}
}

last_group_index = o.group_index;
segments.push_back(segment);
i++;
}

return segments;
}

// Converts the input subset definitions to a sorted list of segments, remaps
// the merge_groups segment set keys to reflect the ordering changes.
static StatusOr<std::vector<Segment>> ToOrderedSegments(
Expand Down Expand Up @@ -268,16 +327,12 @@ static StatusOr<std::vector<Segment>> ToOrderedSegments(
std::sort(ordering.begin(), ordering.end());

// maps from index in subset_definitions to the new ordering.
std::vector<uint32_t> segment_index_map(subset_definitions.size());
std::vector<Segment> segments;
unsigned i = 0;
for (const auto& ordering : ordering) {
segments.push_back(Segment{subset_definitions[ordering.original_index],
ordering.probability});
segment_index_map[ordering.original_index] = i++;
}
std::vector<uint32_t> segment_index_map;
std::vector<Segment> segments = PreGroupSegments(merge_groups, ordering, subset_definitions, segment_index_map);
VLOG(0) << segments.size() << " segments after pregrouping.";

btree_map<SegmentSet, MergeStrategy> new_merge_groups;
group_index = 0;
for (auto& [segments, strategy] : merge_groups) {
SegmentSet remapped;
SegmentSet remapped_full;
Expand All @@ -289,6 +344,10 @@ static StatusOr<std::vector<Segment>> ToOrderedSegments(
remapped_full.insert(s_prime);
}


VLOG(0) << " Merge group " << group_index << " has " << remapped.size() << " segments.";
group_index++;

if (!new_merge_groups.insert(std::make_pair(remapped, std::move(strategy)))
.second) {
return absl::InvalidArgumentError(
Expand Down
72 changes: 72 additions & 0 deletions ift/encoder/closure_glyph_segmenter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1427,6 +1427,78 @@ if (s0 AND s2) then p2
)");
}


TEST_F(ClosureGlyphSegmenterTest, MultipleMergeGroups_PreGrouping) {
UnicodeFrequencies freq{
{{' ', ' '}, 100},
{{'d', 'd'}, 100},
{{'a', 'a'}, 60},
{{'e', 'e'}, 30},
{{'b', 'b'}, 29},
{{'f', 'f'}, 28},
{{'c', 'c'}, 10},
{{'g', 'g'}, 9},
{{'h', 'h'}, 5},
{{'i', 'i'}, 1}, // 8
};

MergeStrategy costs = *MergeStrategy::CostBased(std::move(freq), 0, 1);
costs.SetPreClosureProbabilityThreshold(0.55);
costs.SetPreClosureGroupSize(3);

btree_map<SegmentSet, MergeStrategy> merge_groups{
{{0, 1, 2, 3, 4, 5, 6, 7, 8}, costs},
{{7, 8}, MergeStrategy::Heuristic(1)},
};

auto segmentation = segmenter.CodepointToGlyphSegments(roboto.get(), {},
{
{'a'},
{'b'},
{'c'},
{'d'},
{'e'},
{'f'},
{'g'},
{'h'},
{'i'},
},
merge_groups, false);
ASSERT_TRUE(segmentation.ok()) << segmentation.status();

// d, a are above the pregrouping threshold so aren't grouped.
// e, b, f, c, and g are below so are grouped into sets of 3.
// h, i are shared between merge groups so don't participate in pregrouping.
std::vector<SubsetDefinition> expected_segments = {
// Group 1
{'d'},
{'a'},
{'e', 'b', 'f'}, // pre merge
{'c', 'g'}, // pre merge
// Shared
{'h'},
{'i'},
};
ASSERT_EQ(segmentation->Segments(), expected_segments);
ASSERT_EQ(segmentation->ToString(),
R"(initial font: { gid0 }
p0: { gid72 }
p1: { gid69 }
p2: { gid70, gid73, gid74 }
p3: { gid71, gid75 }
p4: { gid76 }
p5: { gid77 }
p6: { gid444, gid446 }
if (s0) then p0
if (s1) then p1
if (s2) then p2
if (s3) then p3
if (s4) then p4
if (s5) then p5
if (s2 AND s5) then p6
)");
}

// TODO(garretrieger): test that segments are excluded by init font segment. ie.
// if a segment is present in the init font then it should be cleared out in the
// segmentation.
Expand Down
51 changes: 41 additions & 10 deletions ift/encoder/glyph_closure_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "common/int_set.h"
#include "common/try.h"
#include "ift/encoder/requested_segmentation_information.h"
#include "ift/encoder/subset_definition.h"
#include "ift/encoder/types.h"

using absl::Status;
Expand Down Expand Up @@ -63,6 +64,37 @@ StatusOr<GlyphSet> GlyphClosureCache::CodepointsToOrGids(
return or_gids;
}

// This generates the subset definition that contains all segments except for
// those listed in segment_ids.
SubsetDefinition ComputExceptSegment(
const RequestedSegmentationInformation& segmentation_info,
const SegmentSet& segment_ids, const SubsetDefinition& combined) {
if (segmentation_info.SegmentsAreDisjoint() &&
(segment_ids.size() == 1 ||
segment_ids.size() < (segmentation_info.Segments().size() / 2))) {
// Approach that is optimzied for the case where input segments are disjoint
// and the number of segment ids is smallish.
SubsetDefinition except_segment = segmentation_info.FullDefinition();
except_segment.Subtract(combined);
return except_segment;
}

// Otherwise this approach will always work even with non-disjoint segments
SegmentSet except_segment_ids = segment_ids;
except_segment_ids.invert();

uint32_t num_segments = segmentation_info.Segments().size();
SubsetDefinition except_segment = segmentation_info.InitFontSegment();
for (segment_index_t s : except_segment_ids) {
if (s >= num_segments) {
break;
}
except_segment.Union(segmentation_info.Segments()[s].Definition());
}

return except_segment;
}

Status GlyphClosureCache::AnalyzeSegment(
const RequestedSegmentationInformation& segmentation_info,
const SegmentSet& segment_ids, GlyphSet& and_gids, GlyphSet& or_gids,
Expand Down Expand Up @@ -95,20 +127,19 @@ Status GlyphClosureCache::AnalyzeSegment(
// * I - D: the activation conditions for these glyphs is s_i OR …
// Where … is one or more additional segments.
// * D intersection I: the activation conditions for these glyphs is only s_i
SubsetDefinition except_segment = segmentation_info.InitFontSegment();
for (uint32_t s = 0; s < segmentation_info.Segments().size(); s++) {
if (segment_ids.contains(s)) {
continue;
}
except_segment.Union(segmentation_info.Segments()[s].Definition());

SubsetDefinition
combined; // This is the subset definition of the unions of segment_ids.
for (segment_index_t s_id : segment_ids) {
combined.Union(segmentation_info.Segments()[s_id].Definition());
}

SubsetDefinition except_segment =
ComputExceptSegment(segmentation_info, segment_ids, combined);
auto B_except_segment_closure = TRY(GlyphClosure(except_segment));

SubsetDefinition only_segment = segmentation_info.InitFontSegment();
for (segment_index_t s_id : segment_ids) {
only_segment.Union(segmentation_info.Segments()[s_id].Definition());
}
SubsetDefinition only_segment = combined;
only_segment.Union(segmentation_info.InitFontSegment());

auto I_only_segment_closure = TRY(GlyphClosure(only_segment));
I_only_segment_closure.subtract(segmentation_info.InitFontGlyphs());
Expand Down
19 changes: 19 additions & 0 deletions ift/encoder/merge_strategy.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,22 @@ class MergeStrategy {
return init_font_merge_probability_threshold_;
}

uint32_t PreClosureGroupSize() const {
return pre_closure_group_size_;
}

double PreClosureProbabilityThreshold() const {
return pre_closure_probability_threshold_;
}

void SetPreClosureGroupSize(uint32_t value) {
pre_closure_group_size_ = value;
}

void SetPreClosureProbabilityThreshold(double value) {
pre_closure_probability_threshold_ = value;
}

void SetInitFontMergeThreshold(std::optional<double> value) {
init_font_merge_threshold_ = value;
}
Expand Down Expand Up @@ -187,6 +203,9 @@ class MergeStrategy {
std::optional<double> init_font_merge_probability_threshold_ = std::nullopt;
bool use_patch_merges_ = false;

uint32_t pre_closure_group_size_ = 1;
double pre_closure_probability_threshold_ = 0.0;

std::shared_ptr<freq::ProbabilityCalculator> probability_calculator_;
};

Expand Down
18 changes: 18 additions & 0 deletions ift/encoder/requested_segmentation_information.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,24 @@ RequestedSegmentationInformation::RequestedSegmentationInformation(
GlyphClosureCache& closure_cache)
: segments_(std::move(segments)), init_font_segment_() {
ReassignInitSubset(closure_cache, std::move(init_font_segment));

segments_disjoint_ = true;

full_definition_ = init_font_segment_;
for (const auto& s : segments_) {
const auto& def = s.Definition();
if (segments_disjoint_) {
for (hb_tag_t tag : def.feature_tags) {
if (full_definition_.feature_tags.contains(tag)) {
segments_disjoint_ = false;
}
}
segments_disjoint_ =
segments_disjoint_ &&
!full_definition_.codepoints.intersects(def.codepoints);
}
full_definition_.Union(s.Definition());
}
}

} // namespace ift::encoder
6 changes: 6 additions & 0 deletions ift/encoder/requested_segmentation_information.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ class RequestedSegmentationInformation {

const common::GlyphSet& FullClosure() const { return full_closure_; }

const SubsetDefinition& FullDefinition() const { return full_definition_; }

bool SegmentsAreDisjoint() const { return segments_disjoint_; }

const std::vector<Segment>& Segments() const { return segments_; }

const std::vector<SubsetDefinition> SegmentSubsetDefinitions() const {
Expand Down Expand Up @@ -125,8 +129,10 @@ class RequestedSegmentationInformation {

std::vector<Segment> segments_;
SubsetDefinition init_font_segment_;
SubsetDefinition full_definition_;
common::GlyphSet init_font_glyphs_;
common::GlyphSet full_closure_;
bool segments_disjoint_;
};

} // namespace ift::encoder
Expand Down
6 changes: 4 additions & 2 deletions ift/encoder/segmentation_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,14 +176,16 @@ class SegmentationContext {
// too small to be worthwhile.
absl::StatusOr<segment_index_t> ComputeSegmentCutoff() const;

static std::unique_ptr<PatchSizeCache> NewPatchSizeCache(hb_face_t* face, uint32_t brotli_quality) {
static std::unique_ptr<PatchSizeCache> NewPatchSizeCache(
hb_face_t* face, uint32_t brotli_quality) {
if (brotli_quality == 0) {
auto cache = EstimatedPatchSizeCache::New(face);
if (cache.ok()) {
return std::move(*cache);
}
}
return std::unique_ptr<PatchSizeCache>(new PatchSizeCacheImpl(face, brotli_quality));
return std::unique_ptr<PatchSizeCache>(
new PatchSizeCacheImpl(face, brotli_quality));
}

public:
Expand Down
Loading
Loading