Skip to content

Commit 7d1c5b2

Browse files
committed
Added typed variants (glyph, codepoint, segment) of IntSet's
Helps make it more clear what the contents of the sets are.
1 parent 25ca1e6 commit 7d1c5b2

15 files changed

+217
-166
lines changed

common/font_helper.cc

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "common/font_data.h"
1010
#include "common/hb_set_unique_ptr.h"
1111
#include "common/indexed_data_reader.h"
12+
#include "common/int_set.h"
1213
#include "hb-ot.h"
1314
#include "hb-subset.h"
1415
#include "hb.h"
@@ -215,9 +216,9 @@ StatusOr<uint32_t> FontHelper::GvarSharedTupleCount(const hb_face_t* face) {
215216
return *count;
216217
}
217218

218-
IntSet FontHelper::GidsToUnicodes(hb_face_t* face, const IntSet& gids) {
219+
CodepointSet FontHelper::GidsToUnicodes(hb_face_t* face, const GlyphSet& gids) {
219220
auto gid_to_unicode = FontHelper::GidToUnicodeMap(face);
220-
IntSet result;
221+
CodepointSet result;
221222
for (uint32_t gid : gids) {
222223
auto unicode = gid_to_unicode.find(gid);
223224
if (unicode != gid_to_unicode.end()) {
@@ -243,10 +244,10 @@ flat_hash_map<uint32_t, uint32_t> FontHelper::GidToUnicodeMap(hb_face_t* face) {
243244
return gid_to_unicode;
244245
}
245246

246-
IntSet FontHelper::ToCodepointsSet(hb_face_t* face) {
247+
CodepointSet FontHelper::ToCodepointsSet(hb_face_t* face) {
247248
hb_set_unique_ptr codepoints = make_hb_set();
248249
hb_face_collect_unicodes(face, codepoints.get());
249-
return IntSet(codepoints);
250+
return CodepointSet(codepoints);
250251
}
251252

252253
absl::flat_hash_set<hb_tag_t> FontHelper::GetTags(hb_face_t* face) {

common/font_helper.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,9 @@ class FontHelper {
193193
static absl::flat_hash_map<uint32_t, uint32_t> GidToUnicodeMap(
194194
hb_face_t* face);
195195

196-
static IntSet GidsToUnicodes(hb_face_t* face, const IntSet& gids);
196+
static CodepointSet GidsToUnicodes(hb_face_t* face, const GlyphSet& gids);
197197

198-
static IntSet ToCodepointsSet(hb_face_t* face);
198+
static CodepointSet ToCodepointsSet(hb_face_t* face);
199199

200200
static absl::flat_hash_set<hb_tag_t> GetTags(hb_face_t* face);
201201
static std::vector<hb_tag_t> GetOrderedTags(hb_face_t* face);

common/int_set.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,31 @@ class IntSet {
272272
hb_set_unique_ptr set_;
273273
};
274274

275+
// Typed variants
276+
class GlyphSet : public IntSet {
277+
public:
278+
GlyphSet() : IntSet(){};
279+
GlyphSet(std::initializer_list<hb_codepoint_t> values) : IntSet(values) {}
280+
explicit GlyphSet(const hb_set_t* set) : IntSet(set) {}
281+
explicit GlyphSet(const hb_set_unique_ptr& set) : IntSet(set) {}
282+
};
283+
284+
class CodepointSet : public IntSet {
285+
public:
286+
CodepointSet() : IntSet(){};
287+
CodepointSet(std::initializer_list<hb_codepoint_t> values) : IntSet(values) {}
288+
explicit CodepointSet(const hb_set_t* set) : IntSet(set) {}
289+
explicit CodepointSet(const hb_set_unique_ptr& set) : IntSet(set) {}
290+
};
291+
292+
class SegmentSet : public IntSet {
293+
public:
294+
SegmentSet() : IntSet(){};
295+
SegmentSet(std::initializer_list<hb_codepoint_t> values) : IntSet(values) {}
296+
explicit SegmentSet(const hb_set_t* set) : IntSet(set) {}
297+
explicit SegmentSet(const hb_set_unique_ptr& set) : IntSet(set) {}
298+
};
299+
275300
} // namespace common
276301

277302
#endif

ift/encoder/closure_glyph_segmenter.cc

Lines changed: 55 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,16 @@ using absl::flat_hash_set;
2626
using absl::Status;
2727
using absl::StatusOr;
2828
using absl::StrCat;
29+
using common::CodepointSet;
2930
using common::CompatId;
3031
using common::FontData;
3132
using common::FontHelper;
33+
using common::GlyphSet;
3234
using common::hb_set_unique_ptr;
3335
using common::IntSet;
3436
using common::make_hb_face;
3537
using common::make_hb_set;
38+
using common::SegmentSet;
3639
using ift::GlyphKeyedDiff;
3740

3841
namespace ift::encoder {
@@ -53,26 +56,26 @@ namespace ift::encoder {
5356

5457
class GlyphConditions {
5558
public:
56-
GlyphConditions() : and_segments(make_hb_set()), or_segments(make_hb_set()) {}
57-
IntSet and_segments;
58-
IntSet or_segments;
59+
GlyphConditions() : and_segments(), or_segments() {}
60+
SegmentSet and_segments;
61+
SegmentSet or_segments;
5962

60-
void RemoveSegments(const IntSet& segments) {
63+
void RemoveSegments(const SegmentSet& segments) {
6164
and_segments.subtract(segments);
6265
or_segments.subtract(segments);
6366
}
6467
};
6568

6669
class SegmentationContext;
6770

68-
Status AnalyzeSegment(SegmentationContext& context, const IntSet& codepoints,
69-
IntSet& and_gids, IntSet& or_gids,
70-
IntSet& exclusive_gids);
71+
Status AnalyzeSegment(SegmentationContext& context,
72+
const CodepointSet& codepoints, GlyphSet& and_gids,
73+
GlyphSet& or_gids, GlyphSet& exclusive_gids);
7174

7275
class SegmentationContext {
7376
public:
74-
SegmentationContext(hb_face_t* face, const IntSet& initial_segment,
75-
const std::vector<IntSet>& codepoint_segments)
77+
SegmentationContext(hb_face_t* face, const CodepointSet& initial_segment,
78+
const std::vector<CodepointSet>& codepoint_segments)
7679
: preprocessed_face(make_hb_face(hb_subset_preprocess(face))),
7780
original_face(make_hb_face(hb_face_reference(face))),
7881
segments(),
@@ -108,7 +111,7 @@ class SegmentationContext {
108111
fallback_segments = {};
109112
}
110113

111-
StatusOr<IntSet> GlyphClosure(const IntSet& codepoints) {
114+
StatusOr<GlyphSet> GlyphClosure(const CodepointSet& codepoints) {
112115
auto it = glyph_closure_cache.find(codepoints);
113116
if (it != glyph_closure_cache.end()) {
114117
glyph_closure_cache_hit++;
@@ -140,9 +143,9 @@ class SegmentationContext {
140143
hb_map_values(new_to_old, gids.get());
141144
hb_subset_plan_destroy(plan);
142145

143-
glyph_closure_cache.insert(std::pair(codepoints, IntSet(gids)));
146+
glyph_closure_cache.insert(std::pair(codepoints, GlyphSet(gids)));
144147

145-
return IntSet(gids);
148+
return GlyphSet(gids);
146149
}
147150

148151
void LogClosureCount(absl::string_view operation) {
@@ -167,17 +170,17 @@ class SegmentationContext {
167170
<< " misses)";
168171
}
169172

170-
StatusOr<IntSet> CodepointsToOrGids(const IntSet& codepoints) {
173+
StatusOr<GlyphSet> CodepointsToOrGids(const CodepointSet& codepoints) {
171174
auto it = code_point_set_to_or_gids_cache.find(codepoints);
172175
if (it != code_point_set_to_or_gids_cache.end()) {
173176
code_point_set_to_or_gids_cache_hit++;
174177
return it->second;
175178
}
176179

177180
code_point_set_to_or_gids_cache_miss++;
178-
IntSet and_gids;
179-
IntSet or_gids;
180-
IntSet exclusive_gids;
181+
GlyphSet and_gids;
182+
GlyphSet or_gids;
183+
GlyphSet exclusive_gids;
181184
TRYV(AnalyzeSegment(*this, codepoints, and_gids, or_gids, exclusive_gids));
182185

183186
code_point_set_to_or_gids_cache.insert(std::pair(codepoints, or_gids));
@@ -187,12 +190,12 @@ class SegmentationContext {
187190
// Init
188191
common::hb_face_unique_ptr preprocessed_face;
189192
common::hb_face_unique_ptr original_face;
190-
std::vector<IntSet> segments;
193+
std::vector<CodepointSet> segments;
191194

192-
IntSet initial_codepoints;
193-
IntSet all_codepoints;
194-
IntSet full_closure;
195-
IntSet initial_closure;
195+
CodepointSet initial_codepoints;
196+
CodepointSet all_codepoints;
197+
GlyphSet full_closure;
198+
GlyphSet initial_closure;
196199

197200
uint32_t patch_size_min_bytes = 0;
198201
uint32_t patch_size_max_bytes = UINT32_MAX;
@@ -201,27 +204,27 @@ class SegmentationContext {
201204
std::vector<GlyphConditions> gid_conditions;
202205

203206
// Phase 2
204-
IntSet unmapped_glyphs;
205-
btree_map<IntSet, IntSet> and_glyph_groups;
206-
btree_map<IntSet, IntSet> or_glyph_groups;
207-
IntSet fallback_segments;
207+
GlyphSet unmapped_glyphs;
208+
btree_map<SegmentSet, GlyphSet> and_glyph_groups;
209+
btree_map<SegmentSet, GlyphSet> or_glyph_groups;
210+
SegmentSet fallback_segments;
208211

209212
// Caches and logging
210-
flat_hash_map<IntSet, IntSet> glyph_closure_cache;
213+
flat_hash_map<CodepointSet, GlyphSet> glyph_closure_cache;
211214
uint32_t glyph_closure_cache_hit = 0;
212215
uint32_t glyph_closure_cache_miss = 0;
213216

214-
flat_hash_map<IntSet, IntSet> code_point_set_to_or_gids_cache;
217+
flat_hash_map<CodepointSet, GlyphSet> code_point_set_to_or_gids_cache;
215218
uint32_t code_point_set_to_or_gids_cache_hit = 0;
216219
uint32_t code_point_set_to_or_gids_cache_miss = 0;
217220

218221
uint32_t closure_count_cumulative = 0;
219222
uint32_t closure_count_delta = 0;
220223
};
221224

222-
Status AnalyzeSegment(SegmentationContext& context, const IntSet& codepoints,
223-
IntSet& and_gids, IntSet& or_gids,
224-
IntSet& exclusive_gids) {
225+
Status AnalyzeSegment(SegmentationContext& context,
226+
const CodepointSet& codepoints, GlyphSet& and_gids,
227+
GlyphSet& or_gids, GlyphSet& exclusive_gids) {
225228
if (codepoints.empty()) {
226229
// Skip empty sets, they will never contribute any conditions.
227230
return absl::OkStatus();
@@ -251,16 +254,16 @@ Status AnalyzeSegment(SegmentationContext& context, const IntSet& codepoints,
251254
// * I - D: the activation conditions for these glyphs is s_i OR …
252255
// Where … is one or more additional segments.
253256
// * D intersection I: the activation conditions for these glyphs is only s_i
254-
IntSet except_segment = context.all_codepoints;
257+
CodepointSet except_segment = context.all_codepoints;
255258
except_segment.subtract(codepoints);
256259
auto B_except_segment_closure = TRY(context.GlyphClosure(except_segment));
257260

258-
IntSet only_segment = context.initial_codepoints;
261+
CodepointSet only_segment = context.initial_codepoints;
259262
only_segment.union_set(codepoints);
260263
auto I_only_segment_closure = TRY(context.GlyphClosure(only_segment));
261264
I_only_segment_closure.subtract(context.initial_closure);
262265

263-
IntSet D_dropped = context.full_closure;
266+
GlyphSet D_dropped = context.full_closure;
264267
D_dropped.subtract(B_except_segment_closure);
265268

266269
and_gids.union_set(D_dropped);
@@ -276,10 +279,11 @@ Status AnalyzeSegment(SegmentationContext& context, const IntSet& codepoints,
276279
}
277280

278281
Status AnalyzeSegment(SegmentationContext& context,
279-
segment_index_t segment_index, const IntSet& codepoints) {
280-
IntSet and_gids;
281-
IntSet or_gids;
282-
IntSet exclusive_gids;
282+
segment_index_t segment_index,
283+
const CodepointSet& codepoints) {
284+
GlyphSet and_gids;
285+
GlyphSet or_gids;
286+
GlyphSet exclusive_gids;
283287
TRYV(AnalyzeSegment(context, codepoints, and_gids, or_gids, exclusive_gids));
284288

285289
for (uint32_t and_gid : exclusive_gids) {
@@ -300,7 +304,7 @@ Status AnalyzeSegment(SegmentationContext& context,
300304
}
301305

302306
Status GroupGlyphs(SegmentationContext& context) {
303-
IntSet fallback_segments_set;
307+
SegmentSet fallback_segments_set;
304308
for (segment_index_t s = 0; s < context.segments.size(); s++) {
305309
if (context.segments[s].empty()) {
306310
// Ignore empty segments.
@@ -329,12 +333,12 @@ Status GroupGlyphs(SegmentationContext& context) {
329333
// conditions that were not detected. Therefore we need to rule out the
330334
// presence of these additional conditions if an or group is able to be used.
331335
for (auto& [or_group, glyphs] : context.or_glyph_groups) {
332-
IntSet all_other_codepoints = context.all_codepoints;
336+
CodepointSet all_other_codepoints = context.all_codepoints;
333337
for (uint32_t s : or_group) {
334338
all_other_codepoints.subtract(context.segments[s]);
335339
}
336340

337-
IntSet or_gids = TRY(context.CodepointsToOrGids(all_other_codepoints));
341+
GlyphSet or_gids = TRY(context.CodepointsToOrGids(all_other_codepoints));
338342

339343
// Any "OR" glyphs associated with all other codepoints have some additional
340344
// conditions to activate so we can't safely include them into this or
@@ -376,24 +380,24 @@ void MergeSegments(const SegmentationContext& context, const IntSet& segments,
376380
}
377381

378382
StatusOr<uint32_t> EstimatePatchSize(SegmentationContext& context,
379-
const IntSet& codepoints) {
380-
IntSet and_gids;
381-
IntSet or_gids;
382-
IntSet exclusive_gids;
383+
const CodepointSet& codepoints) {
384+
GlyphSet and_gids;
385+
GlyphSet or_gids;
386+
GlyphSet exclusive_gids;
383387
TRYV(AnalyzeSegment(context, codepoints, and_gids, or_gids, exclusive_gids));
384388
return PatchSizeBytes(context.original_face.get(), exclusive_gids);
385389
}
386390

387391
StatusOr<bool> TryMerge(SegmentationContext& context,
388392
segment_index_t base_segment_index,
389-
const IntSet& segments) {
393+
const SegmentSet& segments) {
390394
// Create a merged segment, and remove all of the others
391-
IntSet to_merge_segments = segments;
395+
SegmentSet to_merge_segments = segments;
392396
to_merge_segments.erase(base_segment_index);
393397

394398
uint32_t size_before = context.segments[base_segment_index].size();
395399

396-
IntSet merged_codepoints = context.segments[base_segment_index];
400+
CodepointSet merged_codepoints = context.segments[base_segment_index];
397401
MergeSegments(context, to_merge_segments, merged_codepoints);
398402

399403
uint32_t new_patch_size = TRY(EstimatePatchSize(context, merged_codepoints));
@@ -446,7 +450,7 @@ StatusOr<bool> TryMergingACompositeCondition(
446450
continue;
447451
}
448452

449-
IntSet triggering_segments = next_condition->TriggeringSegments();
453+
SegmentSet triggering_segments = next_condition->TriggeringSegments();
450454
if (!triggering_segments.contains(base_segment_index)) {
451455
next_condition++;
452456
continue;
@@ -485,7 +489,7 @@ StatusOr<bool> TryMergingABaseSegment(
485489
continue;
486490
}
487491

488-
IntSet triggering_segments = next_condition->TriggeringSegments();
492+
SegmentSet triggering_segments = next_condition->TriggeringSegments();
489493
if (!TRY(TryMerge(context, base_segment_index, triggering_segments))) {
490494
next_condition++;
491495
continue;
@@ -606,8 +610,8 @@ Status ValidateSegmentation(const SegmentationContext& context,
606610
}
607611

608612
StatusOr<GlyphSegmentation> ClosureGlyphSegmenter::CodepointToGlyphSegments(
609-
hb_face_t* face, IntSet initial_segment,
610-
std::vector<IntSet> codepoint_segments, uint32_t patch_size_min_bytes,
613+
hb_face_t* face, CodepointSet initial_segment,
614+
std::vector<CodepointSet> codepoint_segments, uint32_t patch_size_min_bytes,
611615
uint32_t patch_size_max_bytes) const {
612616
SegmentationContext context(face, initial_segment, codepoint_segments);
613617
context.patch_size_min_bytes = patch_size_min_bytes;

ift/encoder/closure_glyph_segmenter.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ class ClosureGlyphSegmenter {
3131
* initial ift font.
3232
*/
3333
absl::StatusOr<GlyphSegmentation> CodepointToGlyphSegments(
34-
hb_face_t* face, common::IntSet initial_segment,
35-
std::vector<common::IntSet> codepoint_segments,
34+
hb_face_t* face, common::CodepointSet initial_segment,
35+
std::vector<common::CodepointSet> codepoint_segments,
3636
uint32_t patch_size_min_bytes = 0,
3737
uint32_t patch_size_max_bytes = UINT32_MAX) const;
3838
};

0 commit comments

Comments
 (0)