diff --git a/brotli/brotli_font_diff.cc b/brotli/brotli_font_diff.cc index 02526fed..e6dca822 100644 --- a/brotli/brotli_font_diff.cc +++ b/brotli/brotli_font_diff.cc @@ -6,14 +6,14 @@ #include "brotli/hmtx_differ.h" #include "brotli/loca_differ.h" #include "brotli/table_range.h" -#include "common/hb_set_unique_ptr.h" +#include "common/int_set.h" namespace brotli { using absl::Span; using absl::Status; using common::FontData; -using common::hb_set_unique_ptr; +using common::IntSet; static bool HasTable(hb_face_t* face, hb_tag_t tag) { hb_blob_t* table = hb_face_reference_table(face, tag); @@ -49,7 +49,7 @@ class DiffDriver { public: DiffDriver(hb_subset_plan_t* base_plan, hb_face_t* base_face, hb_subset_plan_t* derived_plan, hb_face_t* derived_face, - const hb_set_t* custom_diff_tables, BrotliStream& stream) + const IntSet& custom_diff_tables, BrotliStream& stream) : out(stream), base_new_to_old(hb_subset_plan_new_to_old_glyph_mapping(base_plan)), derived_old_to_new( @@ -77,8 +77,7 @@ class DiffDriver { constexpr hb_tag_t LOCA = HB_TAG('l', 'o', 'c', 'a'); constexpr hb_tag_t GLYF = HB_TAG('g', 'l', 'y', 'f'); - hb_tag_t tag = HB_SET_VALUE_INVALID; - while (hb_set_next(custom_diff_tables, &tag)) { + for (hb_tag_t tag : custom_diff_tables) { switch (tag) { case HMTX: if (HasTable(base_face, derived_face, HMTX) && @@ -229,8 +228,8 @@ class DiffDriver { } }; -void BrotliFontDiff::SortForDiff(const hb_set_t* immutable_tables, - const hb_set_t* custom_diff_tables, +void BrotliFontDiff::SortForDiff(const IntSet& immutable_tables, + const IntSet& custom_diff_tables, const hb_face_t* original_face, hb_face_t* face_builder) { // Place generic diff tables, @@ -244,21 +243,19 @@ void BrotliFontDiff::SortForDiff(const hb_set_t* immutable_tables, num_tables)) { for (unsigned i = 0; i < num_tables; ++i) { hb_tag_t tag = table_tags[i]; - if (!hb_set_has(immutable_tables, tag) && - !hb_set_has(custom_diff_tables, tag)) { + if (!immutable_tables.contains(tag) && + !custom_diff_tables.contains(tag)) { table_order.push_back(tag); } } offset += num_tables; } - hb_codepoint_t tag = HB_SET_VALUE_INVALID; - while (hb_set_next(immutable_tables, &tag)) { + for (hb_codepoint_t tag : immutable_tables) { table_order.push_back(tag); } - tag = HB_SET_VALUE_INVALID; - while (hb_set_next(custom_diff_tables, &tag)) { + for (hb_codepoint_t tag : custom_diff_tables) { table_order.push_back(tag); } @@ -287,15 +284,13 @@ Status BrotliFontDiff::Diff(hb_subset_plan_t* base_plan, hb_blob_t* base, unsigned base_end_offset = 0; DiffDriver diff_driver(base_plan, base_face, derived_plan, derived_face, - custom_diff_tables_.get(), out); + custom_diff_tables_, out); - const hb_set_t* tag_sets[] = {immutable_tables_.get(), - custom_diff_tables_.get()}; + const IntSet* tag_sets[] = {&immutable_tables_, &custom_diff_tables_}; unsigned base_region_sizes[] = {0, 0}; unsigned i = 0; - for (const hb_set_t* set : tag_sets) { - hb_tag_t tag = HB_SET_VALUE_INVALID; - while (hb_set_next(set, &tag)) { + for (const IntSet* set : tag_sets) { + for (hb_tag_t tag : *set) { if (!HasTable(derived_face, tag)) { continue; } diff --git a/brotli/brotli_font_diff.h b/brotli/brotli_font_diff.h index a57c7eba..a8889bb3 100644 --- a/brotli/brotli_font_diff.h +++ b/brotli/brotli_font_diff.h @@ -3,7 +3,7 @@ #include "absl/status/status.h" #include "common/font_data.h" -#include "common/hb_set_unique_ptr.h" +#include "common/int_set.h" #include "hb-subset.h" namespace brotli { @@ -16,23 +16,23 @@ class BrotliFontDiff { public: // Sorts the tables in face_builder into the order expected by the font // differ. - static void SortForDiff(const hb_set_t* immutable_tables, - const hb_set_t* custom_diff_tables, + static void SortForDiff(const common::IntSet& immutable_tables, + const common::IntSet& custom_diff_tables, const hb_face_t* original_face, hb_face_t* face_builder /* IN/OUT */); - BrotliFontDiff(const hb_set_t* immutable_tables, - const hb_set_t* custom_diff_tables) - : immutable_tables_(hb_set_copy(immutable_tables), &hb_set_destroy), - custom_diff_tables_(hb_set_copy(custom_diff_tables), &hb_set_destroy) {} + BrotliFontDiff(const common::IntSet& immutable_tables, + const common::IntSet& custom_diff_tables) + : immutable_tables_(immutable_tables), + custom_diff_tables_(custom_diff_tables) {} absl::Status Diff(hb_subset_plan_t* base_plan, hb_blob_t* base, hb_subset_plan_t* derived_plan, hb_blob_t* derived, common::FontData* patch) const; private: - common::hb_set_unique_ptr immutable_tables_; - common::hb_set_unique_ptr custom_diff_tables_; + common::IntSet immutable_tables_; + common::IntSet custom_diff_tables_; }; } // namespace brotli diff --git a/brotli/brotli_font_diff_test.cc b/brotli/brotli_font_diff_test.cc index 071f4d2e..1b38ba0c 100644 --- a/brotli/brotli_font_diff_test.cc +++ b/brotli/brotli_font_diff_test.cc @@ -2,7 +2,7 @@ #include "absl/types/span.h" #include "common/brotli_binary_patch.h" -#include "common/hb_set_unique_ptr.h" +#include "common/int_set.h" #include "gtest/gtest.h" #include "hb-subset.h" @@ -12,8 +12,7 @@ using absl::Span; using absl::Status; using common::BrotliBinaryPatch; using common::FontData; -using common::hb_set_unique_ptr; -using common::make_hb_set; +using common::IntSet; const std::string kTestDataDir = "common/testdata/"; @@ -47,10 +46,10 @@ class BrotliFontDiffTest : public ::testing::Test { input = hb_subset_input_create_or_fail(); - immutable_tables = make_hb_set(); + immutable_tables = IntSet{}; custom_tables = - make_hb_set(4, HB_TAG('g', 'l', 'y', 'f'), HB_TAG('l', 'o', 'c', 'a'), - HB_TAG('h', 'm', 't', 'x'), HB_TAG('v', 'm', 't', 'x')); + IntSet{HB_TAG('g', 'l', 'y', 'f'), HB_TAG('l', 'o', 'c', 'a'), + HB_TAG('h', 'm', 't', 'x'), HB_TAG('v', 'm', 't', 'x')}; } void TearDown() override { @@ -72,12 +71,11 @@ class BrotliFontDiffTest : public ::testing::Test { } void SortTables(hb_face_t* face, hb_face_t* subset) { - BrotliFontDiff::SortForDiff(immutable_tables.get(), custom_tables.get(), - face, subset); + BrotliFontDiff::SortForDiff(immutable_tables, custom_tables, face, subset); } - hb_set_unique_ptr immutable_tables = make_hb_set(); - hb_set_unique_ptr custom_tables = make_hb_set(); + IntSet immutable_tables; + IntSet custom_tables; hb_face_t* roboto; hb_face_t* noto_sans_jp; @@ -101,7 +99,7 @@ TEST_F(BrotliFontDiffTest, Diff) { FontData derived(derived_face); ASSERT_TRUE(derived_plan); - BrotliFontDiff differ(immutable_tables.get(), custom_tables.get()); + BrotliFontDiff differ(immutable_tables, custom_tables); FontData patch; ASSERT_EQ( differ.Diff(base_plan, base_blob, derived_plan, derived_blob, &patch), @@ -136,7 +134,7 @@ TEST_F(BrotliFontDiffTest, DiffRetainGids) { FontData derived(derived_face); ASSERT_TRUE(derived_plan); - BrotliFontDiff differ(immutable_tables.get(), custom_tables.get()); + BrotliFontDiff differ(immutable_tables, custom_tables); FontData patch; ASSERT_EQ( differ.Diff(base_plan, base_blob, derived_plan, derived_blob, &patch), @@ -175,7 +173,7 @@ TEST_F(BrotliFontDiffTest, LongLoca) { FontData derived(derived_face); ASSERT_TRUE(derived_plan); - BrotliFontDiff differ(immutable_tables.get(), custom_tables.get()); + BrotliFontDiff differ(immutable_tables, custom_tables); FontData patch; ASSERT_EQ( differ.Diff(base_plan, base_blob, derived_plan, derived_blob, &patch), @@ -213,7 +211,7 @@ TEST_F(BrotliFontDiffTest, ShortToLongLoca) { FontData derived(derived_face); ASSERT_TRUE(derived_plan); - BrotliFontDiff differ(immutable_tables.get(), custom_tables.get()); + BrotliFontDiff differ(immutable_tables, custom_tables); FontData patch; ASSERT_EQ( differ.Diff(base_plan, base_blob, derived_plan, derived_blob, &patch), @@ -235,8 +233,9 @@ TEST_F(BrotliFontDiffTest, WithImmutableTables) { HB_TAG('G', 'S', 'U', 'B')); hb_set_add(hb_subset_input_set(input, HB_SUBSET_SETS_NO_SUBSET_TABLE_TAG), HB_TAG('G', 'P', 'O', 'S')); - hb_set_add(immutable_tables.get(), HB_TAG('G', 'S', 'U', 'B')); - hb_set_add(immutable_tables.get(), HB_TAG('G', 'P', 'O', 'S')); + + immutable_tables.insert(HB_TAG('G', 'S', 'U', 'B')); + immutable_tables.insert(HB_TAG('G', 'P', 'O', 'S')); hb_set_add_range(hb_subset_input_unicode_set(input), 0x41, 0x5A); hb_subset_plan_t* base_plan = hb_subset_plan_create_or_fail(roboto, input); @@ -254,7 +253,7 @@ TEST_F(BrotliFontDiffTest, WithImmutableTables) { FontData derived(derived_face); ASSERT_TRUE(derived_plan); - BrotliFontDiff differ(immutable_tables.get(), custom_tables.get()); + BrotliFontDiff differ(immutable_tables, custom_tables); FontData patch; ASSERT_EQ( differ.Diff(base_plan, base_blob, derived_plan, derived_blob, &patch), diff --git a/common/font_helper.cc b/common/font_helper.cc index c8929f10..11741a8f 100644 --- a/common/font_helper.cc +++ b/common/font_helper.cc @@ -9,6 +9,7 @@ #include "common/font_data.h" #include "common/hb_set_unique_ptr.h" #include "common/indexed_data_reader.h" +#include "common/int_set.h" #include "hb-ot.h" #include "hb-subset.h" #include "hb.h" @@ -215,10 +216,9 @@ StatusOr FontHelper::GvarSharedTupleCount(const hb_face_t* face) { return *count; } -btree_set FontHelper::GidsToUnicodes( - hb_face_t* face, const btree_set& gids) { +CodepointSet FontHelper::GidsToUnicodes(hb_face_t* face, const GlyphSet& gids) { auto gid_to_unicode = FontHelper::GidToUnicodeMap(face); - btree_set result; + CodepointSet result; for (uint32_t gid : gids) { auto unicode = gid_to_unicode.find(gid); if (unicode != gid_to_unicode.end()) { @@ -244,17 +244,10 @@ flat_hash_map FontHelper::GidToUnicodeMap(hb_face_t* face) { return gid_to_unicode; } -btree_set FontHelper::ToCodepointsSet(hb_face_t* face) { +CodepointSet FontHelper::ToCodepointsSet(hb_face_t* face) { hb_set_unique_ptr codepoints = make_hb_set(); hb_face_collect_unicodes(face, codepoints.get()); - - btree_set result; - hb_codepoint_t cp = HB_SET_VALUE_INVALID; - while (hb_set_next(codepoints.get(), &cp)) { - result.insert(cp); - } - - return result; + return CodepointSet(codepoints); } absl::flat_hash_set FontHelper::GetTags(hb_face_t* face) { diff --git a/common/font_helper.h b/common/font_helper.h index 65577402..1bf061f0 100644 --- a/common/font_helper.h +++ b/common/font_helper.h @@ -12,6 +12,7 @@ #include "absl/strings/string_view.h" #include "common/axis_range.h" #include "common/font_data.h" +#include "common/int_set.h" #include "hb.h" namespace common { @@ -192,10 +193,9 @@ class FontHelper { static absl::flat_hash_map GidToUnicodeMap( hb_face_t* face); - static absl::btree_set GidsToUnicodes( - hb_face_t* face, const absl::btree_set& gids); + static CodepointSet GidsToUnicodes(hb_face_t* face, const GlyphSet& gids); - static absl::btree_set ToCodepointsSet(hb_face_t* face); + static CodepointSet ToCodepointsSet(hb_face_t* face); static absl::flat_hash_set GetTags(hb_face_t* face); static std::vector GetOrderedTags(hb_face_t* face); diff --git a/common/hb_set_unique_ptr.cc b/common/hb_set_unique_ptr.cc index 68f097a1..d0a3dbc9 100644 --- a/common/hb_set_unique_ptr.cc +++ b/common/hb_set_unique_ptr.cc @@ -1,13 +1,9 @@ #include "common/hb_set_unique_ptr.h" #include -#include #include "hb.h" -using absl::btree_set; -using absl::flat_hash_set; - namespace common { hb_set_unique_ptr make_hb_set() { @@ -18,14 +14,6 @@ hb_set_unique_ptr make_hb_set(hb_set_t* set) { return hb_set_unique_ptr(set, &hb_set_destroy); } -hb_set_unique_ptr make_hb_set(const absl::flat_hash_set& int_set) { - hb_set_unique_ptr out = make_hb_set(); - for (uint32_t v : int_set) { - hb_set_add(out.get(), v); - } - return out; -} - hb_set_unique_ptr make_hb_set(int length, ...) { hb_set_unique_ptr result = make_hb_set(); va_list values; @@ -54,22 +42,4 @@ hb_set_unique_ptr make_hb_set_from_ranges(int number_of_ranges, ...) { return result; } -flat_hash_set to_hash_set(const hb_set_t* set) { - flat_hash_set out; - hb_codepoint_t v = HB_SET_VALUE_INVALID; - while (hb_set_next(set, &v)) { - out.insert(v); - } - return out; -} - -btree_set to_btree_set(const hb_set_t* set) { - btree_set out; - hb_codepoint_t v = HB_SET_VALUE_INVALID; - while (hb_set_next(set, &v)) { - out.insert(v); - } - return out; -} - } // namespace common diff --git a/common/hb_set_unique_ptr.h b/common/hb_set_unique_ptr.h index 9b20d1c5..579c8de4 100644 --- a/common/hb_set_unique_ptr.h +++ b/common/hb_set_unique_ptr.h @@ -3,8 +3,6 @@ #include -#include "absl/container/btree_set.h" -#include "absl/container/flat_hash_set.h" #include "hb.h" namespace common { @@ -16,18 +14,12 @@ hb_set_unique_ptr make_hb_set(); // Takes ownership of set hb_set_unique_ptr make_hb_set(hb_set_t* set); -hb_set_unique_ptr make_hb_set(const absl::flat_hash_set& int_set); - hb_set_unique_ptr make_hb_set(int length, ...); hb_set_unique_ptr make_hb_set_from_ranges(int number_of_ranges, ...); hb_set_unique_ptr make_hb_set(int length, ...); -absl::flat_hash_set to_hash_set(const hb_set_t* set); - -absl::btree_set to_btree_set(const hb_set_t* set); - } // namespace common #endif // COMMON_HB_SET_UNIQUE_PTR_H_ diff --git a/common/int_set.h b/common/int_set.h index bf2f77f1..45a00583 100644 --- a/common/int_set.h +++ b/common/int_set.h @@ -1,8 +1,11 @@ #ifndef COMMON_INT_SET #define COMMON_INT_SET +#include #include +#include +#include "absl/types/span.h" #include "common/hb_set_unique_ptr.h" namespace common { @@ -75,17 +78,17 @@ class IntSet { IntSet(std::initializer_list values) : set_(make_hb_set()) { for (auto v : values) { - this->add(v); + this->insert(v); } } - IntSet(const hb_set_t* set) : set_(make_hb_set()) { + explicit IntSet(const hb_set_t* set) : set_(make_hb_set()) { // We always keep exclusive ownership of the internal set, so copy the // contents of the input set instead of referencing it. hb_set_union(set_.get(), set); } - IntSet(const hb_set_unique_ptr& set) : set_(make_hb_set()) { + explicit IntSet(const hb_set_unique_ptr& set) : set_(make_hb_set()) { // We always keep exclusive ownership of the internal set, so copy the // contents of the input set instead of referencing it. hb_set_union(set_.get(), set.get()); @@ -167,12 +170,34 @@ class IntSet { return end(); // Calls const end() } - void add(hb_codepoint_t codepoint) { hb_set_add(set_.get(), codepoint); } + void insert(hb_codepoint_t codepoint) { hb_set_add(set_.get(), codepoint); } - void add_range(hb_codepoint_t start, hb_codepoint_t end) { + void insert_range(hb_codepoint_t start, hb_codepoint_t end) { hb_set_add_range(set_.get(), start, end); } + // Optimized insert that takes an array of sorted values + void insert_sorted_array(absl::Span sorted_values) { + hb_set_add_sorted_array(set_.get(), sorted_values.data(), + sorted_values.size()); + } + + std::vector to_vector() const { + std::vector values; + auto size = this->size(); + values.resize(size); + hb_set_next_many(set_.get(), HB_SET_VALUE_INVALID, values.data(), size); + return values; + } + + template + void insert(It start, It end) { + while (start != end) { + insert(*start); + ++start; + } + } + bool contains(hb_codepoint_t codepoint) const { return hb_set_has(set_.get(), codepoint); } @@ -197,7 +222,14 @@ class IntSet { return value; } - void erase(hb_codepoint_t codepoint) { hb_set_del(set_.get(), codepoint); } + size_t erase(hb_codepoint_t codepoint) { + bool has = contains(codepoint); + if (has) { + hb_set_del(set_.get(), codepoint); + return 1; + } + return 0; + } size_t size() const { return hb_set_get_population(set_.get()); } @@ -211,6 +243,8 @@ class IntSet { hb_set_union(set_.get(), other.set_.get()); } + void union_into(hb_set_t* other) const { hb_set_union(other, set_.get()); } + // Compute the intersection of this and other, store the result in this set. void intersect(const IntSet& other) { hb_set_intersect(set_.get(), other.set_.get()); @@ -238,6 +272,31 @@ class IntSet { hb_set_unique_ptr set_; }; +// Typed variants +class GlyphSet : public IntSet { + public: + GlyphSet() : IntSet(){}; + GlyphSet(std::initializer_list values) : IntSet(values) {} + explicit GlyphSet(const hb_set_t* set) : IntSet(set) {} + explicit GlyphSet(const hb_set_unique_ptr& set) : IntSet(set) {} +}; + +class CodepointSet : public IntSet { + public: + CodepointSet() : IntSet(){}; + CodepointSet(std::initializer_list values) : IntSet(values) {} + explicit CodepointSet(const hb_set_t* set) : IntSet(set) {} + explicit CodepointSet(const hb_set_unique_ptr& set) : IntSet(set) {} +}; + +class SegmentSet : public IntSet { + public: + SegmentSet() : IntSet(){}; + SegmentSet(std::initializer_list values) : IntSet(values) {} + explicit SegmentSet(const hb_set_t* set) : IntSet(set) {} + explicit SegmentSet(const hb_set_unique_ptr& set) : IntSet(set) {} +}; + } // namespace common #endif \ No newline at end of file diff --git a/common/int_set_test.cc b/common/int_set_test.cc index 0ececb93..d463f998 100644 --- a/common/int_set_test.cc +++ b/common/int_set_test.cc @@ -2,6 +2,8 @@ #include +#include "absl/container/btree_set.h" +#include "absl/container/flat_hash_set.h" #include "absl/hash/hash_testing.h" #include "common/hb_set_unique_ptr.h" #include "gtest/gtest.h" @@ -14,10 +16,10 @@ TEST_F(IntSetTest, BasicOperations) { IntSet set; ASSERT_TRUE(set.empty()); - set.add(5); - set.add(7); - set.add(7); - set.add(8); + set.insert(5); + set.insert(7); + set.insert(7); + set.insert(8); ASSERT_FALSE(set.contains(4)); ASSERT_TRUE(set.contains(5)); @@ -245,15 +247,45 @@ TEST_F(IntSetTest, MinMax) { ASSERT_EQ(*b.max(), 11); } -TEST_F(IntSetTest, AddRange) { +TEST_F(IntSetTest, InsertRange) { IntSet a{7, 8, 11}; - a.add_range(10, 15); + a.insert_range(10, 15); IntSet expected{7, 8, 10, 11, 12, 13, 14, 15}; ASSERT_EQ(a, expected); } +TEST_F(IntSetTest, InsertSorted) { + IntSet a{7, 8, 11}; + + hb_codepoint_t sorted[]{9, 11, 13, 14}; + a.insert_sorted_array(sorted); + + IntSet expected{7, 8, 9, 11, 13, 14}; + ASSERT_EQ(a, expected); +} + +TEST_F(IntSetTest, InsertIterator) { + IntSet a{7, 8, 11}; + + std::vector b{5, 15, 21}; + + a.insert(b.begin(), b.end()); + IntSet expected{5, 7, 8, 11, 15, 21}; + + ASSERT_EQ(a, expected); +} + +TEST_F(IntSetTest, ToVector) { + IntSet empty{}; + IntSet a{7, 8, 11}; + + ASSERT_EQ(empty.to_vector(), std::vector()); + std::vector expected{7, 8, 11}; + ASSERT_EQ(a.to_vector(), expected); +} + TEST_F(IntSetTest, IsSubsetOf) { IntSet empty; IntSet a{7, 8}; @@ -280,6 +312,15 @@ TEST_F(IntSetTest, Union) { a.union_set(b); ASSERT_EQ(a, expected); + + hb_set_unique_ptr c = make_hb_set(1, 7); + + b.union_into(c.get()); + + ASSERT_TRUE(hb_set_has(c.get(), 7)); + ASSERT_TRUE(hb_set_has(c.get(), 8)); + ASSERT_TRUE(hb_set_has(c.get(), 11)); + ASSERT_EQ(hb_set_get_population(c.get()), 3); } TEST_F(IntSetTest, Intersect) { diff --git a/common/sparse_bit_set.cc b/common/sparse_bit_set.cc index e3b821f0..91889798 100644 --- a/common/sparse_bit_set.cc +++ b/common/sparse_bit_set.cc @@ -9,12 +9,14 @@ #include "common/bit_input_buffer.h" #include "common/bit_output_buffer.h" #include "common/branch_factor.h" +#include "common/int_set.h" #include "hb.h" namespace common { using absl::StatusOr; using absl::string_view; +using common::IntSet; using std::string; using std::unordered_map; using std::vector; @@ -52,12 +54,8 @@ uint8_t ValuesPerBitLog2ForLayer(uint32_t layer, uint32_t tree_depth, } StatusOr SparseBitSet::Decode(string_view sparse_bit_set, - hb_set_t* out) { + IntSet& out) { // TODO(garretrieger): ignore values beyond unicode max as required by spec. - - if (!out) { - return absl::InvalidArgumentError("out is null."); - } if (sparse_bit_set.empty()) { return sparse_bit_set; } @@ -100,8 +98,7 @@ StatusOr SparseBitSet::Decode(string_view sparse_bit_set, // This is a completely filled node encoded as a zero! uint32_t leaf_node_base = node_base * node_base_factor; // Add to the set now; range additions are efficient. - hb_set_add_range(out, leaf_node_base, - leaf_node_base + leaf_node_size - 1); + out.insert_range(leaf_node_base, leaf_node_base + leaf_node_size - 1); } else { // It's a normally encoded node. for (uint32_t bit_index = 0u; bit_index < kBFNodeSize[branch_factor]; @@ -132,8 +129,7 @@ StatusOr SparseBitSet::Decode(string_view sparse_bit_set, next_level_node_bases.clear(); } if (!pending_codepoints.empty()) { - hb_set_add_sorted_array(out, pending_codepoints.data(), - pending_codepoints.size()); + out.insert_sorted_array(pending_codepoints); } return bits.Remaining(); @@ -688,34 +684,29 @@ string EncodeSet(const vector& codepoints, BranchFactor branch_factor, return bit_buffer.to_string(); } -string SparseBitSet::Encode(const hb_set_t& set, BranchFactor branch_factor) { - uint32_t tree_depth = TreeDepthFor(hb_set_get_max(&set), branch_factor); +string SparseBitSet::Encode(const IntSet& set, BranchFactor branch_factor) { + uint32_t tree_depth = + TreeDepthFor(set.max().value_or(HB_SET_VALUE_INVALID), branch_factor); if (tree_depth > kBFMaxDepth[branch_factor] && branch_factor == BF2) { // It's possible for uint32_t::MAX to exceed the max tree depth on BF2, // upgrade to 4 in that case. branch_factor = BF4; } - uint32_t size = hb_set_get_population(&set); - if (size == 0) { + if (set.empty()) { return string{0b00000000}; } - vector codepoints; - codepoints.resize(size); - hb_set_next_many(&set, HB_SET_VALUE_INVALID, codepoints.data(), size); + vector codepoints = set.to_vector(); vector filled_twigs; FindFilledTwigs(codepoints, branch_factor, filled_twigs); return EncodeSet(codepoints, branch_factor, filled_twigs); } -string SparseBitSet::Encode(const hb_set_t& set) { - uint32_t size = hb_set_get_population(&set); - if (size == 0) { +string SparseBitSet::Encode(const IntSet& set) { + if (set.empty()) { return ""; } - vector codepoints; - codepoints.resize(size); - hb_set_next_many(&set, HB_SET_VALUE_INVALID, codepoints.data(), size); + vector codepoints = set.to_vector(); vector filled_twigs; BranchFactor branch_factor = ChooseBranchFactor(codepoints, filled_twigs); diff --git a/common/sparse_bit_set.h b/common/sparse_bit_set.h index 08264519..f2599abd 100644 --- a/common/sparse_bit_set.h +++ b/common/sparse_bit_set.h @@ -4,7 +4,7 @@ #include "absl/status/statusor.h" #include "absl/strings/string_view.h" #include "common/branch_factor.h" -#include "hb.h" +#include "common/int_set.h" namespace common { @@ -49,15 +49,16 @@ class SparseBitSet { // items are appended to any existing items in out. Returns a sub string of // 'sparse_bit_set' with the consumed bytes removed. static absl::StatusOr Decode( - absl::string_view sparse_bit_set, hb_set_t* out); + absl::string_view sparse_bit_set, IntSet& out); // Encode a set of integers into a sparse bit set binary blob. - static std::string Encode(const hb_set_t& set, BranchFactor branch_factor); + static std::string Encode(const common::IntSet& set, + BranchFactor branch_factor); /* * Encode a set of integers into a sparse bit set binary blob. * The optimal branch_factor will be estimated and used automatically. */ - static std::string Encode(const hb_set_t& set); + static std::string Encode(const common::IntSet& set); }; } // namespace common diff --git a/common/sparse_bit_set_test.cc b/common/sparse_bit_set_test.cc index 8a511d1b..5a85bcf8 100644 --- a/common/sparse_bit_set_test.cc +++ b/common/sparse_bit_set_test.cc @@ -4,7 +4,7 @@ #include #include "absl/status/status.h" -#include "common/hb_set_unique_ptr.h" +#include "common/int_set.h" #include "gtest/gtest.h" #include "hb.h" @@ -19,38 +19,38 @@ using std::vector; class SparseBitSetTest : public ::testing::Test { protected: - static void TestEncodeDecode(hb_set_unique_ptr set, int expected_size) { - TestEncodeDecode(set.get(), BF8, expected_size); + static void TestEncodeDecode(const IntSet& set, int expected_size) { + TestEncodeDecode(set, BF8, expected_size); } - static void TestEncodeDecode(hb_set_unique_ptr set, BranchFactor bf) { - TestEncodeDecode(set.get(), bf, 0); + static void TestEncodeDecode(const IntSet& set, BranchFactor bf) { + TestEncodeDecode(set, bf, 0); } - static void TestEncodeDecode(hb_set_t *set, BranchFactor bf, + static void TestEncodeDecode(const IntSet& set, BranchFactor bf, int expected_size) { - string encoded_bits = SparseBitSet::Encode(*set, bf); + string encoded_bits = SparseBitSet::Encode(set, bf); if (expected_size) { EXPECT_EQ(encoded_bits.size(), expected_size); } - hb_set_unique_ptr decoded = make_hb_set(); - auto sc = SparseBitSet::Decode(encoded_bits, decoded.get()); + IntSet decoded; + auto sc = SparseBitSet::Decode(encoded_bits, decoded); if (!sc.ok()) { string bits = Bits(encoded_bits, bf); EXPECT_EQ("Decode worked", "Unable to decode bits: " + bits); } EXPECT_EQ(absl::OkStatus(), sc.status()); - if (!hb_set_is_equal(set, decoded.get())) { + if (set != decoded) { string set_in = SetContents(set); string encoded_bit_str = Bits(encoded_bits, bf); - string set_out = SetContents(decoded.get()); + string set_out = SetContents(decoded); printf("In: %s\nBits: %s\nOut: %s\n", encoded_bit_str.c_str(), encoded_bit_str.c_str(), set_out.c_str()); } - EXPECT_TRUE(hb_set_is_equal(set, decoded.get())); + EXPECT_EQ(set, decoded); } - static string Bits(const string &s, BranchFactor bf) { + static string Bits(const string& s, BranchFactor bf) { if (s.empty()) { return ""; } @@ -83,13 +83,13 @@ class SparseBitSetTest : public ::testing::Test { return first8_bits + " " + result; } - static string Bits(hb_set_unique_ptr set, BranchFactor bf) { - string bits = SparseBitSet::Encode(*set, bf); + static string Bits(const IntSet& set, BranchFactor bf) { + string bits = SparseBitSet::Encode(set, bf); return Bits(bits, bf); } - static string Bits(hb_set_unique_ptr set) { - string bits = SparseBitSet::Encode(*set); + static string Bits(const IntSet& set) { + string bits = SparseBitSet::Encode(set); char bfc = bits[0] & 0b11; BranchFactor bf; if (bfc == 0) { @@ -104,7 +104,7 @@ class SparseBitSetTest : public ::testing::Test { return Bits(bits, bf); } - static string FromChars(const string &s) { + static string FromChars(const string& s) { string result; if (s.empty()) { return result; @@ -131,9 +131,9 @@ class SparseBitSetTest : public ::testing::Test { return result; } - static string SetContents(hb_set_t *set) { + static string SetContents(const IntSet& set) { std::vector results; - for (hb_codepoint_t cp = HB_SET_VALUE_INVALID; hb_set_next(set, &cp);) { + for (hb_codepoint_t cp : set) { results.push_back(cp); } string results_str; @@ -147,136 +147,130 @@ class SparseBitSetTest : public ::testing::Test { return results_str; } - static string FromBits(const string &s) { - hb_set_unique_ptr set = make_hb_set(); + static string FromBits(const string& s) { + IntSet set; EXPECT_EQ(absl::OkStatus(), - SparseBitSet::Decode(FromChars(s), set.get()).status()); - return SetContents(set.get()); + SparseBitSet::Decode(FromChars(s), set).status()); + return SetContents(set); } - static hb_set_unique_ptr Set(const vector> &pairs) { - hb_set_unique_ptr set = make_hb_set(); + static IntSet Set(const vector>& pairs) { + IntSet set; for (pair pair : pairs) { for (int i = pair.first; i <= pair.second; i++) { - hb_set_add(set.get(), i); + set.insert(i); } } return set; } }; -TEST_F(SparseBitSetTest, DecodeNullSet) { - EXPECT_TRUE(absl::IsInvalidArgument( - SparseBitSet::Decode(string(), nullptr).status())); -} - TEST_F(SparseBitSetTest, DecodeAppends) { - hb_set_unique_ptr set = make_hb_set(1, 42); - ASSERT_EQ( - SparseBitSet::Decode(string{0b00000101, 0b00000001}, set.get()).status(), - // ^ d1 bf8 ^ - absl::OkStatus()); - hb_set_unique_ptr expected = make_hb_set(2, 0, 42); - EXPECT_TRUE(hb_set_is_equal(expected.get(), set.get())); + IntSet set{42}; + ASSERT_EQ(SparseBitSet::Decode(string{0b00000101, 0b00000001}, set).status(), + // ^ d1 bf8 ^ + absl::OkStatus()); + IntSet expected{0, 42}; + EXPECT_EQ(expected, set); } TEST_F(SparseBitSetTest, DecodeInvalid) { // The encoded set here is truncated and missing 2 bytes. string encoded{0b00001010, 0b01010101, 0b00000001, 0b00000001}; // ^ d2 bf8 ^ - hb_set_unique_ptr set = make_hb_set(); - EXPECT_TRUE(absl::IsInvalidArgument( - SparseBitSet::Decode(encoded, set.get()).status())); + IntSet set; + EXPECT_TRUE( + absl::IsInvalidArgument(SparseBitSet::Decode(encoded, set).status())); } TEST_F(SparseBitSetTest, RemainingOnDecode) { - auto set = make_hb_set(4, 5, 12, 17, 38); - std::string encoded = SparseBitSet::Encode(*set); + IntSet set{5, 12, 17, 38}; + std::string encoded = SparseBitSet::Encode(set); encoded.append("abcd"); - auto s = SparseBitSet::Decode(encoded, set.get()); + auto s = SparseBitSet::Decode(encoded, set); ASSERT_TRUE(s.ok()) << s.status(); ASSERT_EQ(*s, "abcd"); } TEST_F(SparseBitSetTest, RemainingOnDecode_Empty) { - auto set = make_hb_set(4, 5, 12, 17, 38); - std::string encoded = SparseBitSet::Encode(*set); + IntSet set{5, 12, 17, 38}; + std::string encoded = SparseBitSet::Encode(set); - auto s = SparseBitSet::Decode(encoded, set.get()); + auto s = SparseBitSet::Decode(encoded, set); ASSERT_TRUE(s.ok()) << s.status(); ASSERT_EQ(*s, ""); } -TEST_F(SparseBitSetTest, EncodeEmpty) { TestEncodeDecode(make_hb_set(), 0); } +TEST_F(SparseBitSetTest, EncodeEmpty) { TestEncodeDecode(IntSet{}, 0); } TEST_F(SparseBitSetTest, EncodeOneLayer) { - TestEncodeDecode(make_hb_set(1, 0), 2); - TestEncodeDecode(make_hb_set(1, 7), 2); - TestEncodeDecode(make_hb_set(2, 2, 5), 2); - TestEncodeDecode(make_hb_set(8, 0, 1, 2, 3, 4, 5, 6, 7), 2); + TestEncodeDecode(IntSet{0}, 2); + TestEncodeDecode(IntSet{7}, 2); + TestEncodeDecode(IntSet{2, 5}, 2); + TestEncodeDecode(IntSet{0, 1, 2, 3, 4, 5, 6, 7}, 2); } TEST_F(SparseBitSetTest, EncodeTwoLayers) { - TestEncodeDecode(make_hb_set(1, 63), 3); - TestEncodeDecode(make_hb_set(2, 0, 63), 4); - TestEncodeDecode(make_hb_set(3, 2, 5, 60), 4); - TestEncodeDecode(make_hb_set(5, 0, 30, 31, 33, 63), 6); + TestEncodeDecode(IntSet{63}, 3); + TestEncodeDecode(IntSet{0, 63}, 4); + TestEncodeDecode(IntSet{2, 5, 60}, 4); + TestEncodeDecode(IntSet{0, 30, 31, 33, 63}, 6); } TEST_F(SparseBitSetTest, EncodeManyLayers) { - TestEncodeDecode(make_hb_set(2, 10, 49596), 12); - TestEncodeDecode(make_hb_set(3, 10, 49595, 49596), 12); - TestEncodeDecode(make_hb_set(3, 10, 49588, 49596), 13); + TestEncodeDecode(IntSet{10, 49596}, 12); + TestEncodeDecode(IntSet{10, 49595, 49596}, 12); + TestEncodeDecode(IntSet{10, 49588, 49596}, 13); } TEST_F(SparseBitSetTest, Encode2BitSingleNode) { - EXPECT_EQ("00|100000 10 00 00 00", Bits(make_hb_set(1, 0), BF2)); + EXPECT_EQ("00|100000 10 00 00 00", Bits(IntSet{0}, BF2)); } TEST_F(SparseBitSetTest, Encode2BitMisc) { EXPECT_EQ("00|110000 11 01 10 11 11 00 00 00", - Bits(make_hb_set(4, 2, 3, 4, 5), BF2)); + Bits(IntSet{2, 3, 4, 5}, BF2)); } TEST_F(SparseBitSetTest, Encode4BitEmpty) { - EXPECT_EQ("00|000000 ", Bits(make_hb_set(0, 0), BF4)); + EXPECT_EQ("00|000000 ", Bits(IntSet{}, BF4)); } TEST_F(SparseBitSetTest, Encode4BitSingleNode) { - EXPECT_EQ("10|100000 1000 0000", Bits(make_hb_set(1, 0), BF4)); + EXPECT_EQ("10|100000 1000 0000", Bits(IntSet{0}, BF4)); // ^bf4 d1^ - EXPECT_EQ("10|100000 0100 0000", Bits(make_hb_set(1, 1), BF4)); - EXPECT_EQ("10|100000 0010 0000", Bits(make_hb_set(1, 2), BF4)); - EXPECT_EQ("10|100000 0001 0000", Bits(make_hb_set(1, 3), BF4)); + EXPECT_EQ("10|100000 0100 0000", Bits(IntSet{1}, BF4)); + EXPECT_EQ("10|100000 0010 0000", Bits(IntSet{2}, BF4)); + EXPECT_EQ("10|100000 0001 0000", Bits(IntSet{3}, BF4)); - EXPECT_EQ("10|100000 1100 0000", Bits(make_hb_set(2, 0, 1), BF4)); - EXPECT_EQ("10|100000 0011 0000", Bits(make_hb_set(2, 2, 3), BF4)); + EXPECT_EQ("10|100000 1100 0000", Bits(IntSet{0, 1}, BF4)); + EXPECT_EQ("10|100000 0011 0000", Bits(IntSet{2, 3}, BF4)); - EXPECT_EQ("10|100000 1111 0000", Bits(make_hb_set(4, 0, 1, 2, 3), BF4)); + EXPECT_EQ("10|100000 1111 0000", Bits(IntSet{0, 1, 2, 3}, BF4)); } TEST_F(SparseBitSetTest, Encode4BitMultipleNodes) { - EXPECT_EQ("10|010000 0100 1000", Bits(make_hb_set(1, 4), BF4)); - EXPECT_EQ("10|010000 0100 0100", Bits(make_hb_set(1, 5), BF4)); - EXPECT_EQ("10|010000 0100 0010", Bits(make_hb_set(1, 6), BF4)); - EXPECT_EQ("10|010000 0100 0001", Bits(make_hb_set(1, 7), BF4)); + EXPECT_EQ("10|010000 0100 1000", Bits(IntSet{4}, BF4)); + EXPECT_EQ("10|010000 0100 0100", Bits(IntSet{5}, BF4)); + EXPECT_EQ("10|010000 0100 0010", Bits(IntSet{6}, BF4)); + EXPECT_EQ("10|010000 0100 0001", Bits(IntSet{7}, BF4)); // ^bf4 d2^ - EXPECT_EQ("10|010000 0010 1000", Bits(make_hb_set(1, 8), BF4)); - EXPECT_EQ("10|010000 0010 0100", Bits(make_hb_set(1, 9), BF4)); - EXPECT_EQ("10|010000 0010 0010", Bits(make_hb_set(1, 10), BF4)); - EXPECT_EQ("10|010000 0010 0001", Bits(make_hb_set(1, 11), BF4)); + EXPECT_EQ("10|010000 0010 1000", Bits(IntSet{8}, BF4)); + EXPECT_EQ("10|010000 0010 0100", Bits(IntSet{9}, BF4)); + EXPECT_EQ("10|010000 0010 0010", Bits(IntSet{10}, BF4)); + EXPECT_EQ("10|010000 0010 0001", Bits(IntSet{11}, BF4)); - EXPECT_EQ("10|010000 0001 1000", Bits(make_hb_set(1, 12), BF4)); - EXPECT_EQ("10|010000 0001 0100", Bits(make_hb_set(1, 13), BF4)); - EXPECT_EQ("10|010000 0001 0010", Bits(make_hb_set(1, 14), BF4)); - EXPECT_EQ("10|010000 0001 0001", Bits(make_hb_set(1, 15), BF4)); + EXPECT_EQ("10|010000 0001 1000", Bits(IntSet{12}, BF4)); + EXPECT_EQ("10|010000 0001 0100", Bits(IntSet{13}, BF4)); + EXPECT_EQ("10|010000 0001 0010", Bits(IntSet{14}, BF4)); + EXPECT_EQ("10|010000 0001 0001", Bits(IntSet{15}, BF4)); - EXPECT_EQ("10|010000 1100 1000 1000 0000", Bits(make_hb_set(2, 0, 4), BF4)); - EXPECT_EQ("10|010000 0011 1000 1000 0000", Bits(make_hb_set(2, 8, 12), BF4)); + EXPECT_EQ("10|010000 1100 1000 1000 0000", Bits(IntSet{0, 4}, BF4)); + EXPECT_EQ("10|010000 0011 1000 1000 0000", Bits(IntSet{8, 12}, BF4)); } TEST_F(SparseBitSetTest, Encode8) { @@ -284,7 +278,7 @@ TEST_F(SparseBitSetTest, Encode8) { "01|110000 " // bf8, d3 "10000100 10001000 10000000 00100000 01000000 00010000", - Bits(make_hb_set(3, 2, 33, 323), BF8)); + Bits(IntSet{2, 33, 323}, BF8)); } TEST_F(SparseBitSetTest, LeafNodesNeverFilled) { @@ -404,7 +398,7 @@ TEST_F(SparseBitSetTest, DecodeFilledLeaf) { TEST_F(SparseBitSetTest, MostlyFilledExampleTranscode) { string bits = - SparseBitSet::Encode(*Set({{0, 115}, {117, 217}, {219, 255}}), BF4); + SparseBitSet::Encode(Set({{0, 115}, {117, 217}, {219, 255}}), BF4); string bits_str = Bits(bits, BF4); EXPECT_EQ( "10|001000 " // BF=4, tree height = 4 (values 0..255). @@ -419,11 +413,10 @@ TEST_F(SparseBitSetTest, MostlyFilledExampleTranscode) { // ^ missing 116 ^ missing 218 // padding bits_str); - hb_set_unique_ptr decoded = make_hb_set(); - EXPECT_EQ(absl::OkStatus(), - SparseBitSet::Decode(bits, decoded.get()).status()); - hb_set_unique_ptr expected = Set({{0, 115}, {117, 217}, {219, 255}}); - EXPECT_TRUE(hb_set_is_equal(expected.get(), decoded.get())); + IntSet decoded; + EXPECT_EQ(absl::OkStatus(), SparseBitSet::Decode(bits, decoded).status()); + IntSet expected = Set({{0, 115}, {117, 217}, {219, 255}}); + EXPECT_EQ(expected, decoded); } TEST_F(SparseBitSetTest, OneMissingValue2) { @@ -465,105 +458,101 @@ TEST_F(SparseBitSetTest, RandomSets) { unsigned int seed = 42; for (int i = 0; i < 5000; i++) { int size = rand_r(&seed) % 6000; - hb_set_unique_ptr input = make_hb_set(); + IntSet input; for (int j = 0; j < size; j++) { - hb_set_add(input.get(), rand_r(&seed) % 2048); + input.insert(rand_r(&seed) % 2048); } for (BranchFactor bf : {BF2, BF4, BF8, BF32}) { - string bit_set = SparseBitSet::Encode(*input, bf); - hb_set_unique_ptr output = make_hb_set(); + string bit_set = SparseBitSet::Encode(input, bf); + IntSet output; EXPECT_EQ(absl::OkStatus(), - SparseBitSet::Decode(bit_set, output.get()).status()); - EXPECT_TRUE(hb_set_is_equal(input.get(), output.get())); + SparseBitSet::Decode(bit_set, output).status()); + EXPECT_EQ(input, output); } } } TEST_F(SparseBitSetTest, DepthLimits2) { - hb_set_unique_ptr output = make_hb_set(); + IntSet output; // Depth 31 is OK. - EXPECT_EQ( - absl::OkStatus(), - SparseBitSet::Decode(FromChars("00|111110 00"), output.get()).status()); + EXPECT_EQ(absl::OkStatus(), + SparseBitSet::Decode(FromChars("00|111110 00"), output).status()); } TEST_F(SparseBitSetTest, DepthLimits4) { - hb_set_unique_ptr output = make_hb_set(); + IntSet output; // Depth 16 is OK. - EXPECT_EQ(absl::OkStatus(), - SparseBitSet::Decode(FromChars("10|000010 00000000"), output.get()) - .status()); + EXPECT_EQ( + absl::OkStatus(), + SparseBitSet::Decode(FromChars("10|000010 00000000"), output).status()); } TEST_F(SparseBitSetTest, DepthLimits8) { - hb_set_unique_ptr output = make_hb_set(); + IntSet output; // Depth 11 is OK. - EXPECT_EQ(absl::OkStatus(), - SparseBitSet::Decode(FromChars("01|110100 00000000"), output.get()) - .status()); + EXPECT_EQ( + absl::OkStatus(), + SparseBitSet::Decode(FromChars("01|110100 00000000"), output).status()); // Depth 12 is too much. EXPECT_TRUE(absl::IsInvalidArgument( - SparseBitSet::Decode(FromChars("01|001100 00000000"), output.get()) - .status())); + SparseBitSet::Decode(FromChars("01|001100 00000000"), output).status())); } TEST_F(SparseBitSetTest, DepthLimits32) { - hb_set_unique_ptr output = make_hb_set(); + IntSet output; // Depth 7 is OK. - EXPECT_EQ(absl::OkStatus(), - SparseBitSet::Decode( - FromChars("11|111000 00000000000000000000000000000000"), - output.get()) - .status()); + EXPECT_EQ( + absl::OkStatus(), + SparseBitSet::Decode( + FromChars("11|111000 00000000000000000000000000000000"), output) + .status()); // Depth 8 is too much. EXPECT_TRUE(absl::IsInvalidArgument( SparseBitSet::Decode( - FromChars("11|000100 00000000000000000000000000000000"), - output.get()) + FromChars("11|000100 00000000000000000000000000000000"), output) .status())); } TEST_F(SparseBitSetTest, Entire32BitRange) { for (BranchFactor bf : {BF2, BF4, BF8, BF32}) { - TestEncodeDecode(make_hb_set(1, 0xFFFFFFFE), bf); + TestEncodeDecode(IntSet{0xFFFFFFFE}, bf); } EXPECT_EQ( "10|000010 " "0001 0001 0001 0001 0001 0001 0001 0001 0001 0001 0001 0001 0001 0001 " "0001 0010", - Bits(make_hb_set(1, 0xFFFFFFFE), BF4)); + Bits(IntSet{0xFFFFFFFE}, BF4)); EXPECT_EQ( "01|110100 " "00010000 00000001 00000001 00000001 00000001 00000001 00000001 00000001 " "00000001 00000001 00000010", - Bits(make_hb_set(1, 0xFFFFFFFE), BF8)); + Bits(IntSet{0xFFFFFFFE}, BF8)); EXPECT_EQ( "11|111000 " "00010000000000000000000000000000 00000000000000000000000000000001 " "00000000000000000000000000000001 00000000000000000000000000000001 " "00000000000000000000000000000001 00000000000000000000000000000001 " "00000000000000000000000000000010", - Bits(make_hb_set(1, 0xFFFFFFFE), BF32)); + Bits(IntSet{0xFFFFFFFE}, BF32)); } TEST_F(SparseBitSetTest, ChooseBranchFactor) { - EXPECT_EQ("11|100000 10101010101010101010101010101010", - Bits(make_hb_set(16, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, - 26, 28, 30))); + EXPECT_EQ( + "11|100000 10101010101010101010101010101010", + Bits(IntSet{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30})); EXPECT_EQ( "10|110000 1100 1111 1111 1010 1010 1010 1010 1010 1010 1010 1010 0000", - Bits(make_hb_set(16, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, - 28, 30), + Bits(IntSet{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}, BF4)); - EXPECT_EQ("01|010000 11110000 10101010 10101010 10101010 10101010", - Bits(make_hb_set(16, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, - 26, 28, 30), - BF8)); - EXPECT_EQ("11|100000 10101010101010101010101010101010", - Bits(make_hb_set(16, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, - 26, 28, 30), - BF32)); + EXPECT_EQ( + "01|010000 11110000 10101010 10101010 10101010 10101010", + Bits(IntSet{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}, + BF8)); + EXPECT_EQ( + "11|100000 10101010101010101010101010101010", + Bits(IntSet{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}, + BF32)); EXPECT_EQ( "00|010100 " @@ -681,10 +670,10 @@ TEST_F(SparseBitSetTest, ChooseBranchFactor) { } TEST_F(SparseBitSetTest, RegressionTest32BitRanges) { - TestEncodeDecode(make_hb_set(2, 1, 2546490705), BF2); - TestEncodeDecode(make_hb_set(2, 1, 2546490705), BF4); - TestEncodeDecode(make_hb_set(2, 1, 2546490705), BF8); - TestEncodeDecode(make_hb_set(2, 1, 2546490705), BF32); + TestEncodeDecode(IntSet{1, 2546490705}, BF2); + TestEncodeDecode(IntSet{1, 2546490705}, BF4); + TestEncodeDecode(IntSet{1, 2546490705}, BF8); + TestEncodeDecode(IntSet{1, 2546490705}, BF32); } } // namespace common diff --git a/ift/client/fontations_client.cc b/ift/client/fontations_client.cc index 2bbff8ad..5e350def 100644 --- a/ift/client/fontations_client.cc +++ b/ift/client/fontations_client.cc @@ -7,6 +7,7 @@ #include "absl/status/status.h" #include "common/axis_range.h" #include "common/font_data.h" +#include "common/int_set.h" #include "ift/encoder/encoder.h" using absl::btree_set; @@ -15,6 +16,7 @@ using absl::Status; using absl::StatusOr; using common::AxisRange; using common::FontData; +using common::IntSet; using common::make_hb_blob; using common::make_hb_face; using ift::encoder::Encoder; @@ -133,9 +135,9 @@ Status ToGraph(const Encoder::Encoding& encoding, graph& out, } StatusOr ExtendWithDesignSpace( - const Encoder::Encoding& encoding, btree_set codepoints, - btree_set feature_tags, - flat_hash_map design_space, + const Encoder::Encoding& encoding, const IntSet& codepoints, + const btree_set& feature_tags, + const flat_hash_map& design_space, btree_set* applied_uris) { auto font_path_str = WriteFontToDisk(encoding); if (!font_path_str.ok()) { @@ -201,7 +203,7 @@ StatusOr ExtendWithDesignSpace( } StatusOr Extend(const Encoder::Encoding& encoding, - absl::btree_set codepoints) { + const IntSet& codepoints) { absl::flat_hash_map design_space; return ExtendWithDesignSpace(encoding, codepoints, {}, design_space, nullptr); } diff --git a/ift/client/fontations_client.h b/ift/client/fontations_client.h index 6fbae473..9c4bfa15 100644 --- a/ift/client/fontations_client.h +++ b/ift/client/fontations_client.h @@ -11,6 +11,7 @@ #include "absl/status/status.h" #include "common/axis_range.h" #include "common/font_data.h" +#include "common/int_set.h" #include "ift/encoder/encoder.h" namespace ift::client { @@ -33,14 +34,14 @@ absl::Status ToGraph(const ift::encoder::Encoder::Encoding& encoding, */ absl::StatusOr ExtendWithDesignSpace( const ift::encoder::Encoder::Encoding& encoding, - absl::btree_set codepoints, - absl::btree_set feature_tags, - absl::flat_hash_map design_space, + const common::IntSet& codepoints, + const absl::btree_set& feature_tags, + const absl::flat_hash_map& design_space, absl::btree_set* applied_uris = nullptr); absl::StatusOr Extend( const ift::encoder::Encoder::Encoding& encoding, - absl::btree_set codepoints); + const common::IntSet& codepoints); } // namespace ift::client diff --git a/ift/encoder/closure_glyph_segmenter.cc b/ift/encoder/closure_glyph_segmenter.cc index 98bff1e9..8350691c 100644 --- a/ift/encoder/closure_glyph_segmenter.cc +++ b/ift/encoder/closure_glyph_segmenter.cc @@ -1,5 +1,7 @@ #include "ift/encoder/closure_glyph_segmenter.h" +#include + #include "absl/container/btree_map.h" #include "absl/container/btree_set.h" #include "absl/container/flat_hash_map.h" @@ -12,7 +14,9 @@ #include "common/font_data.h" #include "common/font_helper.h" #include "common/hb_set_unique_ptr.h" +#include "common/int_set.h" #include "common/try.h" +#include "ift/encoder/glyph_segmentation.h" #include "ift/glyph_keyed_diff.h" using absl::btree_map; @@ -22,14 +26,16 @@ using absl::flat_hash_set; using absl::Status; using absl::StatusOr; using absl::StrCat; +using common::CodepointSet; using common::CompatId; using common::FontData; using common::FontHelper; +using common::GlyphSet; using common::hb_set_unique_ptr; +using common::IntSet; using common::make_hb_face; using common::make_hb_set; -using common::to_btree_set; -using common::to_hash_set; +using common::SegmentSet; using ift::GlyphKeyedDiff; namespace ift::encoder { @@ -50,53 +56,49 @@ namespace ift::encoder { class GlyphConditions { public: - GlyphConditions() : and_segments(make_hb_set()), or_segments(make_hb_set()) {} - hb_set_unique_ptr and_segments; - hb_set_unique_ptr or_segments; + GlyphConditions() : and_segments(), or_segments() {} + SegmentSet and_segments; + SegmentSet or_segments; - void RemoveSegments(const hb_set_t* segments) { - hb_set_subtract(and_segments.get(), segments); - hb_set_subtract(or_segments.get(), segments); + void RemoveSegments(const SegmentSet& segments) { + and_segments.subtract(segments); + or_segments.subtract(segments); } }; class SegmentationContext; -Status AnalyzeSegment(SegmentationContext& context, const hb_set_t* codepoints, - hb_set_t* and_gids, hb_set_t* or_gids, - hb_set_t* exclusive_gids); +Status AnalyzeSegment(SegmentationContext& context, + const CodepointSet& codepoints, GlyphSet& and_gids, + GlyphSet& or_gids, GlyphSet& exclusive_gids); class SegmentationContext { public: - SegmentationContext( - hb_face_t* face, const flat_hash_set& initial_segment, - const std::vector>& codepoint_segments) + SegmentationContext(hb_face_t* face, const CodepointSet& initial_segment, + const std::vector& codepoint_segments) : preprocessed_face(make_hb_face(hb_subset_preprocess(face))), original_face(make_hb_face(hb_face_reference(face))), segments(), - initial_codepoints(make_hb_set(initial_segment)), - all_codepoints(make_hb_set()), - full_closure(make_hb_set()), - initial_closure(make_hb_set()) { + initial_codepoints(initial_segment), + all_codepoints(), + full_closure(), + initial_closure() { + all_codepoints.union_set(initial_codepoints); for (const auto& s : codepoint_segments) { - segments.push_back(make_hb_set(s)); - } - - hb_set_union(all_codepoints.get(), initial_codepoints.get()); - for (const auto& s : segments) { - hb_set_union(all_codepoints.get(), s.get()); + segments.push_back(s); + all_codepoints.union_set(s); } { - auto closure = GlyphClosure(initial_codepoints.get()); + auto closure = GlyphClosure(initial_codepoints); if (closure.ok()) { - initial_closure.reset(closure->release()); + initial_closure = std::move(*closure); } } - auto closure = GlyphClosure(all_codepoints.get()); + auto closure = GlyphClosure(all_codepoints); if (closure.ok()) { - full_closure.reset(closure->release()); + full_closure = std::move(*closure); } gid_conditions.resize(hb_face_get_glyph_count(original_face.get())); @@ -109,15 +111,11 @@ class SegmentationContext { fallback_segments = {}; } - StatusOr GlyphClosure(const hb_set_t* codepoints) { - auto cache_key = to_hash_set(codepoints); - - auto it = glyph_closure_cache.find(cache_key); + StatusOr GlyphClosure(const CodepointSet& codepoints) { + auto it = glyph_closure_cache.find(codepoints); if (it != glyph_closure_cache.end()) { glyph_closure_cache_hit++; - hb_set_unique_ptr result = make_hb_set(); - hb_set_union(result.get(), it->second.get()); - return result; + return it->second; } glyph_closure_cache_miss++; @@ -129,7 +127,7 @@ class SegmentationContext { return absl::InternalError("Closure subset configuration failed."); } - hb_set_union(hb_subset_input_unicode_set(input), codepoints); + codepoints.union_into(hb_subset_input_unicode_set(input)); // TODO(garretrieger): configure features (and other settings) appropriately // based on the IFT default feature list. @@ -145,11 +143,9 @@ class SegmentationContext { hb_map_values(new_to_old, gids.get()); hb_subset_plan_destroy(plan); - hb_set_unique_ptr cached_gids = make_hb_set(); - hb_set_union(cached_gids.get(), gids.get()); - glyph_closure_cache.insert(std::pair(cache_key, std::move(cached_gids))); + glyph_closure_cache.insert(std::pair(codepoints, GlyphSet(gids))); - return gids; + return GlyphSet(gids); } void LogClosureCount(absl::string_view operation) { @@ -174,37 +170,32 @@ class SegmentationContext { << " misses)"; } - StatusOr CodepointsToOrGids(const hb_set_t* codepoints) { - auto hash_set_codepoints = common::to_hash_set(codepoints); - - auto it = code_point_set_to_or_gids_cache.find(hash_set_codepoints); + StatusOr CodepointsToOrGids(const CodepointSet& codepoints) { + auto it = code_point_set_to_or_gids_cache.find(codepoints); if (it != code_point_set_to_or_gids_cache.end()) { code_point_set_to_or_gids_cache_hit++; - return it->second.get(); + return it->second; } code_point_set_to_or_gids_cache_miss++; - hb_set_unique_ptr and_gids = make_hb_set(); - hb_set_unique_ptr or_gids = make_hb_set(); - hb_set_unique_ptr exclusive_gids = make_hb_set(); - TRYV(AnalyzeSegment(*this, codepoints, and_gids.get(), or_gids.get(), - exclusive_gids.get())); - - const hb_set_t* or_gids_ptr = or_gids.get(); - code_point_set_to_or_gids_cache.insert( - std::pair(hash_set_codepoints, std::move(or_gids))); - return or_gids_ptr; + GlyphSet and_gids; + GlyphSet or_gids; + GlyphSet exclusive_gids; + TRYV(AnalyzeSegment(*this, codepoints, and_gids, or_gids, exclusive_gids)); + + code_point_set_to_or_gids_cache.insert(std::pair(codepoints, or_gids)); + return or_gids; } // Init common::hb_face_unique_ptr preprocessed_face; common::hb_face_unique_ptr original_face; - std::vector segments; + std::vector segments; - hb_set_unique_ptr initial_codepoints; - hb_set_unique_ptr all_codepoints; - hb_set_unique_ptr full_closure; - hb_set_unique_ptr initial_closure; + CodepointSet initial_codepoints; + CodepointSet all_codepoints; + GlyphSet full_closure; + GlyphSet initial_closure; uint32_t patch_size_min_bytes = 0; uint32_t patch_size_max_bytes = UINT32_MAX; @@ -213,18 +204,17 @@ class SegmentationContext { std::vector gid_conditions; // Phase 2 - btree_set unmapped_glyphs; - btree_map, btree_set> and_glyph_groups; - btree_map, btree_set> or_glyph_groups; - btree_set fallback_segments; + GlyphSet unmapped_glyphs; + btree_map and_glyph_groups; + btree_map or_glyph_groups; + SegmentSet fallback_segments; // Caches and logging - flat_hash_map, hb_set_unique_ptr> glyph_closure_cache; + flat_hash_map glyph_closure_cache; uint32_t glyph_closure_cache_hit = 0; uint32_t glyph_closure_cache_miss = 0; - flat_hash_map, hb_set_unique_ptr> - code_point_set_to_or_gids_cache; + flat_hash_map code_point_set_to_or_gids_cache; uint32_t code_point_set_to_or_gids_cache_hit = 0; uint32_t code_point_set_to_or_gids_cache_miss = 0; @@ -232,10 +222,10 @@ class SegmentationContext { uint32_t closure_count_delta = 0; }; -Status AnalyzeSegment(SegmentationContext& context, const hb_set_t* codepoints, - hb_set_t* and_gids, hb_set_t* or_gids, - hb_set_t* exclusive_gids) { - if (hb_set_is_empty(codepoints)) { +Status AnalyzeSegment(SegmentationContext& context, + const CodepointSet& codepoints, GlyphSet& and_gids, + GlyphSet& or_gids, GlyphSet& exclusive_gids) { + if (codepoints.empty()) { // Skip empty sets, they will never contribute any conditions. return absl::OkStatus(); } @@ -264,67 +254,59 @@ Status AnalyzeSegment(SegmentationContext& context, const hb_set_t* codepoints, // * I - D: the activation conditions for these glyphs is s_i OR … // Where … is one or more additional segments. // * D intersection I: the activation conditions for these glyphs is only s_i - hb_set_unique_ptr except_segment = make_hb_set(); - hb_set_union(except_segment.get(), context.all_codepoints.get()); - hb_set_subtract(except_segment.get(), codepoints); - auto B_except_segment_closure = - TRY(context.GlyphClosure(except_segment.get())); + CodepointSet except_segment = context.all_codepoints; + except_segment.subtract(codepoints); + auto B_except_segment_closure = TRY(context.GlyphClosure(except_segment)); - hb_set_unique_ptr only_segment = make_hb_set(); - hb_set_union(only_segment.get(), context.initial_codepoints.get()); - hb_set_union(only_segment.get(), codepoints); - auto I_only_segment_closure = TRY(context.GlyphClosure(only_segment.get())); - hb_set_subtract(I_only_segment_closure.get(), context.initial_closure.get()); + CodepointSet only_segment = context.initial_codepoints; + only_segment.union_set(codepoints); + auto I_only_segment_closure = TRY(context.GlyphClosure(only_segment)); + I_only_segment_closure.subtract(context.initial_closure); - hb_set_unique_ptr D_dropped = make_hb_set(); - hb_set_union(D_dropped.get(), context.full_closure.get()); - hb_set_subtract(D_dropped.get(), B_except_segment_closure.get()); + GlyphSet D_dropped = context.full_closure; + D_dropped.subtract(B_except_segment_closure); - hb_set_union(and_gids, D_dropped.get()); - hb_set_subtract(and_gids, I_only_segment_closure.get()); + and_gids.union_set(D_dropped); + and_gids.subtract(I_only_segment_closure); - hb_set_union(or_gids, I_only_segment_closure.get()); - hb_set_subtract(or_gids, D_dropped.get()); + or_gids.union_set(I_only_segment_closure); + or_gids.subtract(D_dropped); - hb_set_union(exclusive_gids, I_only_segment_closure.get()); - hb_set_intersect(exclusive_gids, D_dropped.get()); + exclusive_gids.union_set(I_only_segment_closure); + exclusive_gids.intersect(D_dropped); return absl::OkStatus(); } Status AnalyzeSegment(SegmentationContext& context, segment_index_t segment_index, - const hb_set_t* codepoints) { - hb_set_unique_ptr and_gids = make_hb_set(); - hb_set_unique_ptr or_gids = make_hb_set(); - hb_set_unique_ptr exclusive_gids = make_hb_set(); - TRYV(AnalyzeSegment(context, codepoints, and_gids.get(), or_gids.get(), - exclusive_gids.get())); - - hb_codepoint_t and_gid = HB_SET_VALUE_INVALID; - while (hb_set_next(exclusive_gids.get(), &and_gid)) { + const CodepointSet& codepoints) { + GlyphSet and_gids; + GlyphSet or_gids; + GlyphSet exclusive_gids; + TRYV(AnalyzeSegment(context, codepoints, and_gids, or_gids, exclusive_gids)); + + for (uint32_t and_gid : exclusive_gids) { // TODO(garretrieger): if we are assigning an exclusive gid there should be // no other and segments, check and error if this is violated. - hb_set_add(context.gid_conditions[and_gid].and_segments.get(), - segment_index); + context.gid_conditions[and_gid].and_segments.insert(segment_index); } - while (hb_set_next(and_gids.get(), &and_gid)) { - hb_set_add(context.gid_conditions[and_gid].and_segments.get(), - segment_index); + + for (uint32_t and_gid : and_gids) { + context.gid_conditions[and_gid].and_segments.insert(segment_index); } - hb_codepoint_t or_gid = HB_SET_VALUE_INVALID; - while (hb_set_next(or_gids.get(), &or_gid)) { - hb_set_add(context.gid_conditions[or_gid].or_segments.get(), segment_index); + for (uint32_t or_gid : or_gids) { + context.gid_conditions[or_gid].or_segments.insert(segment_index); } return absl::OkStatus(); } Status GroupGlyphs(SegmentationContext& context) { - btree_set fallback_segments_set; + SegmentSet fallback_segments_set; for (segment_index_t s = 0; s < context.segments.size(); s++) { - if (hb_set_is_empty(context.segments[s].get())) { + if (context.segments[s].empty()) { // Ignore empty segments. continue; } @@ -333,19 +315,16 @@ Status GroupGlyphs(SegmentationContext& context) { for (glyph_id_t gid = 0; gid < context.gid_conditions.size(); gid++) { const auto& condition = context.gid_conditions[gid]; - if (!hb_set_is_empty(condition.and_segments.get())) { - auto set = to_btree_set(condition.and_segments.get()); - context.and_glyph_groups[set].insert(gid); + if (!condition.and_segments.empty()) { + context.and_glyph_groups[condition.and_segments].insert(gid); } - if (!hb_set_is_empty(condition.or_segments.get())) { - auto set = to_btree_set(condition.or_segments.get()); - context.or_glyph_groups[set].insert(gid); + if (!condition.or_segments.empty()) { + context.or_glyph_groups[condition.or_segments].insert(gid); } - if (hb_set_is_empty(condition.and_segments.get()) && - hb_set_is_empty(condition.or_segments.get()) && - !hb_set_has(context.initial_closure.get(), gid) && - hb_set_has(context.full_closure.get(), gid)) { + if (condition.and_segments.empty() && condition.or_segments.empty() && + !context.initial_closure.contains(gid) && + context.full_closure.contains(gid)) { context.unmapped_glyphs.insert(gid); } } @@ -354,20 +333,17 @@ Status GroupGlyphs(SegmentationContext& context) { // conditions that were not detected. Therefore we need to rule out the // presence of these additional conditions if an or group is able to be used. for (auto& [or_group, glyphs] : context.or_glyph_groups) { - hb_set_unique_ptr all_other_codepoints = make_hb_set(); - hb_set_union(all_other_codepoints.get(), context.all_codepoints.get()); + CodepointSet all_other_codepoints = context.all_codepoints; for (uint32_t s : or_group) { - hb_set_subtract(all_other_codepoints.get(), context.segments[s].get()); + all_other_codepoints.subtract(context.segments[s]); } - const hb_set_t* or_gids = - TRY(context.CodepointsToOrGids(all_other_codepoints.get())); + GlyphSet or_gids = TRY(context.CodepointsToOrGids(all_other_codepoints)); // Any "OR" glyphs associated with all other codepoints have some additional // conditions to activate so we can't safely include them into this or // condition. They are instead moved to the set of unmapped glyphs. - uint32_t gid = HB_SET_VALUE_INVALID; - while (hb_set_next(or_gids, &gid)) { + for (uint32_t gid : or_gids) { if (glyphs.erase(gid) > 0) { context.unmapped_glyphs.insert(gid); } @@ -386,7 +362,7 @@ Status GroupGlyphs(SegmentationContext& context) { } StatusOr PatchSizeBytes(hb_face_t* original_face, - const absl::btree_set& gids) { + const IntSet& gids) { FontData font_data(original_face); CompatId id; // Since this is just an estimate and we don't need ultra precise numbers run @@ -396,68 +372,57 @@ StatusOr PatchSizeBytes(hb_face_t* original_face, return patch_data.size(); } -void MergeSegments(const SegmentationContext& context, const hb_set_t* segments, - hb_set_t* base) { - segment_index_t next = HB_SET_VALUE_INVALID; - while (hb_set_next(segments, &next)) { - const hb_set_t* codepoints = context.segments[next].get(); - hb_set_union(base, codepoints); +void MergeSegments(const SegmentationContext& context, const IntSet& segments, + IntSet& base) { + for (uint32_t next : segments) { + base.union_set(context.segments[next]); } } StatusOr EstimatePatchSize(SegmentationContext& context, - const hb_set_t* codepoints) { - hb_set_unique_ptr and_gids = make_hb_set(); - hb_set_unique_ptr or_gids = make_hb_set(); - hb_set_unique_ptr exclusive_gids = make_hb_set(); - TRYV(AnalyzeSegment(context, codepoints, and_gids.get(), or_gids.get(), - exclusive_gids.get())); - - auto btree_gids = to_btree_set(exclusive_gids.get()); - return PatchSizeBytes(context.original_face.get(), btree_gids); + const CodepointSet& codepoints) { + GlyphSet and_gids; + GlyphSet or_gids; + GlyphSet exclusive_gids; + TRYV(AnalyzeSegment(context, codepoints, and_gids, or_gids, exclusive_gids)); + return PatchSizeBytes(context.original_face.get(), exclusive_gids); } StatusOr TryMerge(SegmentationContext& context, segment_index_t base_segment_index, - const hb_set_t* segments) { + const SegmentSet& segments) { // Create a merged segment, and remove all of the others - hb_set_unique_ptr to_merge_segments = make_hb_set(hb_set_copy(segments)); - hb_set_del(to_merge_segments.get(), base_segment_index); - - uint32_t size_before = - hb_set_get_population(context.segments[base_segment_index].get()); - hb_set_unique_ptr merged_codepoints = make_hb_set(); - hb_set_union(merged_codepoints.get(), - context.segments[base_segment_index].get()); - MergeSegments(context, to_merge_segments.get(), merged_codepoints.get()); - - uint32_t new_patch_size = - TRY(EstimatePatchSize(context, merged_codepoints.get())); + SegmentSet to_merge_segments = segments; + to_merge_segments.erase(base_segment_index); + + uint32_t size_before = context.segments[base_segment_index].size(); + + CodepointSet merged_codepoints = context.segments[base_segment_index]; + MergeSegments(context, to_merge_segments, merged_codepoints); + + uint32_t new_patch_size = TRY(EstimatePatchSize(context, merged_codepoints)); if (new_patch_size > context.patch_size_max_bytes) { return false; } - hb_set_union(context.segments[base_segment_index].get(), - merged_codepoints.get()); - uint32_t size_after = - hb_set_get_population(context.segments[base_segment_index].get()); + context.segments[base_segment_index].union_set(merged_codepoints); + uint32_t size_after = context.segments[base_segment_index].size(); VLOG(0) << " Merged " << size_before << " codepoints up to " << size_after << " codepoints for segment " << base_segment_index << ". New patch size " << new_patch_size << " bytes."; - segment_index_t segment_index = HB_SET_VALUE_INVALID; - while (hb_set_next(to_merge_segments.get(), &segment_index)) { + for (segment_index_t segment_index : to_merge_segments) { // To avoid changing the indices of other segments set the ones we're // removing to empty sets. That effectively disables them. - hb_set_clear(context.segments[segment_index].get()); + context.segments[segment_index].clear(); } // Remove all segments we touched here from gid_conditions so they can be // recalculated. - hb_set_add(to_merge_segments.get(), base_segment_index); + to_merge_segments.insert(base_segment_index); for (auto& condition : context.gid_conditions) { - condition.RemoveSegments(to_merge_segments.get()); + condition.RemoveSegments(to_merge_segments); } return true; @@ -485,15 +450,13 @@ StatusOr TryMergingACompositeCondition( continue; } - hb_set_unique_ptr triggering_segments = make_hb_set(); - next_condition->TriggeringSegments(triggering_segments.get()); - if (!hb_set_has(triggering_segments.get(), base_segment_index)) { + SegmentSet triggering_segments = next_condition->TriggeringSegments(); + if (!triggering_segments.contains(base_segment_index)) { next_condition++; continue; } - if (!TRY( - TryMerge(context, base_segment_index, triggering_segments.get()))) { + if (!TRY(TryMerge(context, base_segment_index, triggering_segments))) { next_condition++; continue; } @@ -526,11 +489,8 @@ StatusOr TryMergingABaseSegment( continue; } - hb_set_unique_ptr triggering_segments = make_hb_set(); - next_condition->TriggeringSegments(triggering_segments.get()); - - if (!TRY( - TryMerge(context, base_segment_index, triggering_segments.get()))) { + SegmentSet triggering_segments = next_condition->TriggeringSegments(); + if (!TRY(TryMerge(context, base_segment_index, triggering_segments))) { next_condition++; continue; } @@ -576,7 +536,6 @@ StatusOr IsPatchTooSmall(SegmentationContext& context, StatusOr> MergeNextBaseSegment( SegmentationContext& context, const GlyphSegmentation& candidate_segmentation, uint32_t start_segment) { - hb_set_unique_ptr triggering_patches = make_hb_set(); for (auto condition = candidate_segmentation.Conditions().begin(); condition != candidate_segmentation.Conditions().end(); condition++) { if (!condition->IsExclusive()) { @@ -624,26 +583,25 @@ StatusOr> MergeNextBaseSegment( */ Status ValidateSegmentation(const SegmentationContext& context, const GlyphSegmentation& segementation) { - hb_set_unique_ptr visited = make_hb_set(); + IntSet visited; for (const auto& [id, gids] : segementation.GidSegments()) { for (glyph_id_t gid : gids) { - if (hb_set_has(context.initial_closure.get(), gid)) { + if (context.initial_closure.contains(gid)) { return absl::FailedPreconditionError( "Initial font glyph is present in a patch."); } - if (hb_set_has(visited.get(), gid)) { + if (visited.contains(gid)) { return absl::FailedPreconditionError( "Glyph segments are not disjoint."); } - hb_set_add(visited.get(), gid); + visited.insert(gid); } } - hb_set_unique_ptr full_minus_initial = make_hb_set(); - hb_set_union(full_minus_initial.get(), context.full_closure.get()); - hb_set_subtract(full_minus_initial.get(), context.initial_closure.get()); + IntSet full_minus_initial = context.full_closure; + full_minus_initial.subtract(context.initial_closure); - if (!hb_set_is_equal(full_minus_initial.get(), visited.get())) { + if (full_minus_initial != visited) { return absl::FailedPreconditionError( "Not all glyphs in the full closure have been placed."); } @@ -652,9 +610,9 @@ Status ValidateSegmentation(const SegmentationContext& context, } StatusOr ClosureGlyphSegmenter::CodepointToGlyphSegments( - hb_face_t* face, flat_hash_set initial_segment, - std::vector> codepoint_segments, - uint32_t patch_size_min_bytes, uint32_t patch_size_max_bytes) const { + hb_face_t* face, CodepointSet initial_segment, + std::vector codepoint_segments, uint32_t patch_size_min_bytes, + uint32_t patch_size_max_bytes) const { SegmentationContext context(face, initial_segment, codepoint_segments); context.patch_size_min_bytes = patch_size_min_bytes; context.patch_size_max_bytes = patch_size_max_bytes; @@ -662,7 +620,7 @@ StatusOr ClosureGlyphSegmenter::CodepointToGlyphSegments( VLOG(0) << "Forming initial segmentation plan."; segment_index_t segment_index = 0; for (const auto& segment : context.segments) { - TRYV(AnalyzeSegment(context, segment_index, segment.get())); + TRYV(AnalyzeSegment(context, segment_index, segment)); segment_index++; } context.LogClosureCount("Inital segment analysis"); @@ -672,9 +630,9 @@ StatusOr ClosureGlyphSegmenter::CodepointToGlyphSegments( context.ResetGroupings(); TRYV(GroupGlyphs(context)); - GlyphSegmentation segmentation( - to_btree_set(context.initial_codepoints.get()), - to_btree_set(context.initial_closure.get()), context.unmapped_glyphs); + GlyphSegmentation segmentation(context.initial_codepoints, + context.initial_closure, + context.unmapped_glyphs); segmentation.CopySegments(context.segments); TRYV(GlyphSegmentation::GroupsToSegmentation( @@ -701,7 +659,7 @@ StatusOr ClosureGlyphSegmenter::CodepointToGlyphSegments( VLOG(0) << "Re-analyzing segment " << last_merged_segment_index << " due to merge."; TRYV(AnalyzeSegment(context, last_merged_segment_index, - context.segments[last_merged_segment_index].get())); + context.segments[last_merged_segment_index])); } return absl::InternalError("unreachable"); diff --git a/ift/encoder/closure_glyph_segmenter.h b/ift/encoder/closure_glyph_segmenter.h index 7e4b572d..a5ca7e66 100644 --- a/ift/encoder/closure_glyph_segmenter.h +++ b/ift/encoder/closure_glyph_segmenter.h @@ -2,8 +2,8 @@ #ifndef IFT_ENCODER_CLOSURE_GLYPH_SEGMENTER_H_ #define IFT_ENCODER_CLOSURE_GLYPH_SEGMENTER_H_ -#include "absl/container/flat_hash_set.h" #include "absl/status/statusor.h" +#include "common/int_set.h" #include "ift/encoder/glyph_segmentation.h" namespace ift::encoder { @@ -31,8 +31,8 @@ class ClosureGlyphSegmenter { * initial ift font. */ absl::StatusOr CodepointToGlyphSegments( - hb_face_t* face, absl::flat_hash_set initial_segment, - std::vector> codepoint_segments, + hb_face_t* face, common::CodepointSet initial_segment, + std::vector codepoint_segments, uint32_t patch_size_min_bytes = 0, uint32_t patch_size_max_bytes = UINT32_MAX) const; }; diff --git a/ift/encoder/closure_glyph_segmenter_test.cc b/ift/encoder/closure_glyph_segmenter_test.cc index 6f54e82c..02e06de3 100644 --- a/ift/encoder/closure_glyph_segmenter_test.cc +++ b/ift/encoder/closure_glyph_segmenter_test.cc @@ -1,12 +1,13 @@ #include "ift/encoder/closure_glyph_segmenter.h" -#include "absl/container/btree_set.h" #include "common/font_data.h" +#include "common/int_set.h" #include "gtest/gtest.h" -using absl::btree_set; +using common::CodepointSet; using common::FontData; using common::hb_face_unique_ptr; +using common::IntSet; using common::make_hb_face; namespace ift::encoder { @@ -41,7 +42,7 @@ TEST_F(ClosureGlyphSegmenterTest, SimpleSegmentation) { segmenter.CodepointToGlyphSegments(roboto.get(), {'a'}, {{'b'}, {'c'}}); ASSERT_TRUE(segmentation.ok()) << segmentation.status(); - std::vector> expected_segments = {{'b'}, {'c'}}; + std::vector expected_segments = {{'b'}, {'c'}}; ASSERT_EQ(segmentation->Segments(), expected_segments); ASSERT_EQ(segmentation->ToString(), @@ -58,7 +59,7 @@ TEST_F(ClosureGlyphSegmenterTest, AndCondition) { segmenter.CodepointToGlyphSegments(roboto.get(), {'a'}, {{'f'}, {'i'}}); ASSERT_TRUE(segmentation.ok()) << segmentation.status(); - std::vector> expected_segments = {{'f'}, {'i'}}; + std::vector expected_segments = {{'f'}, {'i'}}; ASSERT_EQ(segmentation->Segments(), expected_segments); ASSERT_EQ(segmentation->ToString(), @@ -77,7 +78,7 @@ TEST_F(ClosureGlyphSegmenterTest, OrCondition) { {{0xc1}, {0x106}}); ASSERT_TRUE(segmentation.ok()) << segmentation.status(); - std::vector> expected_segments = {{0xc1}, {0x106}}; + std::vector expected_segments = {{0xc1}, {0x106}}; ASSERT_EQ(segmentation->Segments(), expected_segments); ASSERT_EQ(segmentation->ToString(), @@ -101,7 +102,7 @@ TEST_F(ClosureGlyphSegmenterTest, MergeBase_ViaConditions) { {{'a', 'b', 'd'}, {'e', 'f'}, {'j', 'k', 'm', 'n'}, {'i', 'l'}}, 370); ASSERT_TRUE(segmentation.ok()) << segmentation.status(); - std::vector> expected_segments = { + std::vector expected_segments = { {'a', 'b', 'd'}, {'e', 'f', 'i', 'l'}, {'j', 'k', 'm', 'n'}, {}}; ASSERT_EQ(segmentation->Segments(), expected_segments); @@ -124,7 +125,7 @@ TEST_F(ClosureGlyphSegmenterTest, MergeBases) { {{'a', 'b', 'd'}, {'e', 'f'}, {'j', 'k'}, {'m', 'n', 'o', 'p'}}, 370); ASSERT_TRUE(segmentation.ok()) << segmentation.status(); - std::vector> expected_segments = { + std::vector expected_segments = { {'a', 'b', 'd'}, {'e', 'f', 'j', 'k'}, {}, @@ -153,7 +154,7 @@ TEST_F(ClosureGlyphSegmenterTest, MergeBases_MaxSize) { 700); ASSERT_TRUE(segmentation.ok()) << segmentation.status(); - std::vector> expected_segments = { + std::vector expected_segments = { {'a', 'b', 'd'}, {'e', 'f', 'j', 'k'}, {'m', 'n', 'o', 'p'}, {}}; ASSERT_EQ(segmentation->Segments(), expected_segments); @@ -173,8 +174,7 @@ TEST_F(ClosureGlyphSegmenterTest, MixedAndOr) { roboto.get(), {'a'}, {{'f', 0xc1}, {'i', 0x106}}); ASSERT_TRUE(segmentation.ok()) << segmentation.status(); - std::vector> expected_segments = {{'f', 0xc1}, - {'i', 0x106}}; + std::vector expected_segments = {{'f', 0xc1}, {'i', 0x106}}; ASSERT_EQ(segmentation->Segments(), expected_segments); ASSERT_EQ(segmentation->ToString(), diff --git a/ift/encoder/condition.h b/ift/encoder/condition.h index a096da0f..00f17ff4 100644 --- a/ift/encoder/condition.h +++ b/ift/encoder/condition.h @@ -4,6 +4,7 @@ #include #include +#include "common/int_set.h" #include "ift/encoder/subset_definition.h" namespace ift::encoder { @@ -18,7 +19,7 @@ namespace ift::encoder { */ struct Condition { SubsetDefinition subset_definition; - absl::btree_set child_conditions; + common::IntSet child_conditions; bool conjunctive = false; std::optional activated_patch_id = std::nullopt; diff --git a/ift/encoder/encoder.cc b/ift/encoder/encoder.cc index 4e72b4b3..f8dbda23 100644 --- a/ift/encoder/encoder.cc +++ b/ift/encoder/encoder.cc @@ -16,7 +16,7 @@ #include "common/compat_id.h" #include "common/font_data.h" #include "common/font_helper.h" -#include "common/hb_set_unique_ptr.h" +#include "common/int_set.h" #include "common/try.h" #include "common/woff2.h" #include "hb-subset.h" @@ -29,7 +29,6 @@ using absl::btree_set; using absl::flat_hash_map; -using absl::flat_hash_set; using absl::Status; using absl::StatusOr; using absl::StrCat; @@ -41,7 +40,7 @@ using common::FontData; using common::FontHelper; using common::hb_blob_unique_ptr; using common::hb_face_unique_ptr; -using common::hb_set_unique_ptr; +using common::IntSet; using common::make_hb_blob; using common::make_hb_face; using common::make_hb_set; @@ -106,12 +105,6 @@ StatusOr Encoder::FullyExpandedSubset( return CutSubset(context, face_.get(), all, false); } -bool is_subset(const flat_hash_set& a, - const flat_hash_set& b) { - return std::all_of(b.begin(), b.end(), - [&a](const uint32_t& v) { return a.count(v) > 0; }); -} - std::vector Encoder::OutgoingEdges( const SubsetDefinition& base_subset, uint32_t choose) const { std::vector remaining_subsets; @@ -146,8 +139,7 @@ SubsetDefinition Encoder::Combine(const SubsetDefinition& s1, return result; } -Status Encoder::AddGlyphDataPatch(uint32_t id, - const absl::btree_set& gids) { +Status Encoder::AddGlyphDataPatch(uint32_t id, const IntSet& gids) { if (!face_) { return absl::FailedPreconditionError("Encoder must have a face set."); } @@ -304,7 +296,7 @@ Status Encoder::EnsureGlyphKeyedPatchesPopulated( return absl::OkStatus(); } - flat_hash_set reachable_segments; + IntSet reachable_segments; for (const auto& condition : glyph_patch_conditions_) { if (condition.activated_patch_id.has_value()) { reachable_segments.insert(*condition.activated_patch_id); @@ -604,7 +596,7 @@ StatusOr Encoder::GenerateBaseGvar( // Generate a CFF2 CharStrings index that retains glyph ids, but contains // glyph data from face only for gids. absl::StatusOr GenerateCharStringsTable(hb_face_t* face, - const hb_set_t* gids) { + const IntSet& gids) { // Create the per glyph data and offsets std::string charstrings_per_glyph; @@ -613,7 +605,7 @@ absl::StatusOr GenerateCharStringsTable(hb_face_t* face, std::vector offsets; for (uint32_t gid = 0; gid < glyph_count; gid++) { offsets.push_back(current_offset); - if (!hb_set_has(gids, gid)) { + if (!gids.contains(gid)) { continue; } @@ -696,9 +688,9 @@ StatusOr Encoder::GenerateBaseCff2( int index = -1; uint32_t old_gid = HB_MAP_VALUE_INVALID; uint32_t new_gid = HB_MAP_VALUE_INVALID; - hb_set_unique_ptr gids = make_hb_set(); + IntSet gids; while (hb_map_next(old_to_new, &index, &old_gid, &new_gid)) { - hb_set_add(gids.get(), old_gid); + gids.insert(old_gid); } hb_subset_plan_destroy(plan); @@ -712,7 +704,7 @@ StatusOr Encoder::GenerateBaseCff2( // This charstring table includes charstring data from "instance_face" for all // glyphs in "gids". std::string charstrings = - TRY(GenerateCharStringsTable(instance_face.get(), gids.get())); + TRY(GenerateCharStringsTable(instance_face.get(), gids)); // Step 5: assemble the composite table. std::string composite_table = instance_non_charstrings.string() + charstrings; diff --git a/ift/encoder/encoder.h b/ift/encoder/encoder.h index af6072d5..0c75e44f 100644 --- a/ift/encoder/encoder.h +++ b/ift/encoder/encoder.h @@ -6,11 +6,11 @@ #include "absl/container/btree_set.h" #include "absl/container/flat_hash_map.h" -#include "absl/container/flat_hash_set.h" #include "absl/status/statusor.h" #include "absl/strings/string_view.h" #include "common/compat_id.h" #include "common/font_data.h" +#include "common/int_set.h" #include "hb-subset.h" #include "ift/encoder/condition.h" #include "ift/encoder/subset_definition.h" @@ -55,8 +55,7 @@ class Encoder { * An id is provided which uniquely identifies this segment and can be used to * specify dependencies against this segment. */ - absl::Status AddGlyphDataPatch(uint32_t patch_id, - const absl::btree_set& gids); + absl::Status AddGlyphDataPatch(uint32_t patch_id, const common::IntSet& gids); /* * Adds a condition which may trigger the inclusion of a glyph data patch. @@ -239,7 +238,7 @@ class Encoder { common::CompatId& compat_id) const; common::hb_face_unique_ptr face_; - absl::btree_map> glyph_data_patches_; + absl::btree_map glyph_data_patches_; std::vector glyph_patch_conditions_; SubsetDefinition base_subset_; diff --git a/ift/encoder/encoder_test.cc b/ift/encoder/encoder_test.cc index 8b279eea..a2f204aa 100644 --- a/ift/encoder/encoder_test.cc +++ b/ift/encoder/encoder_test.cc @@ -10,7 +10,6 @@ #include "absl/container/btree_map.h" #include "absl/container/btree_set.h" -#include "absl/container/flat_hash_set.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "common/axis_range.h" @@ -18,7 +17,7 @@ #include "common/brotli_binary_patch.h" #include "common/font_data.h" #include "common/font_helper.h" -#include "common/hb_set_unique_ptr.h" +#include "common/int_set.h" #include "gtest/gtest.h" #include "ift/client/fontations_client.h" #include "ift/encoder/subset_definition.h" @@ -29,7 +28,6 @@ using absl::btree_map; using absl::btree_set; using absl::flat_hash_map; -using absl::flat_hash_set; using absl::Span; using absl::Status; using absl::StatusOr; @@ -38,9 +36,11 @@ using absl::string_view; using common::AxisRange; using common::BinaryPatch; using common::BrotliBinaryPatch; +using common::CodepointSet; using common::FontData; using common::FontHelper; -using common::hb_set_unique_ptr; +using common::GlyphSet; +using common::IntSet; using common::make_hb_set; using ift::client::ToGraph; using ift::proto::DEFAULT_ENCODING; @@ -71,19 +71,18 @@ class EncoderTest : public ::testing::Test { noto_sans_jp = from_file("ift/testdata/NotoSansJP-Regular.subset.ttf"); auto face = noto_sans_jp.face(); - hb_set_unique_ptr init = make_hb_set(); - hb_set_add_range(init.get(), 0, hb_face_get_glyph_count(face.get()) - 1); - hb_set_unique_ptr excluded = make_hb_set(); - hb_set_add_sorted_array(excluded.get(), testdata::TEST_SEGMENT_1, - std::size(testdata::TEST_SEGMENT_1)); - hb_set_add_sorted_array(excluded.get(), testdata::TEST_SEGMENT_2, - std::size(testdata::TEST_SEGMENT_2)); - hb_set_add_sorted_array(excluded.get(), testdata::TEST_SEGMENT_3, - std::size(testdata::TEST_SEGMENT_3)); - hb_set_add_sorted_array(excluded.get(), testdata::TEST_SEGMENT_4, - std::size(testdata::TEST_SEGMENT_4)); - hb_set_subtract(init.get(), excluded.get()); - segment_0_gids = common::to_btree_set(init.get()); + GlyphSet init; + init.insert_range(0, hb_face_get_glyph_count(face.get()) - 1); + + GlyphSet excluded; + excluded.insert_sorted_array(testdata::TEST_SEGMENT_1); + excluded.insert_sorted_array(testdata::TEST_SEGMENT_2); + excluded.insert_sorted_array(testdata::TEST_SEGMENT_3); + excluded.insert_sorted_array(testdata::TEST_SEGMENT_4); + + init.subtract(excluded); + + segment_0_gids = init; segment_1_gids = TestSegment1(); segment_2_gids = TestSegment2(); segment_3_gids = TestSegment3(); @@ -102,17 +101,17 @@ class EncoderTest : public ::testing::Test { FontData vf_font; FontData noto_sans_jp; - btree_set segment_0_gids; - btree_set segment_1_gids; - btree_set segment_2_gids; - btree_set segment_3_gids; - btree_set segment_4_gids; + GlyphSet segment_0_gids; + GlyphSet segment_1_gids; + GlyphSet segment_2_gids; + GlyphSet segment_3_gids; + GlyphSet segment_4_gids; - btree_set segment_0_cps; - btree_set segment_1_cps; - btree_set segment_2_cps; - btree_set segment_3_cps; - btree_set segment_4_cps; + CodepointSet segment_0_cps; + CodepointSet segment_1_cps; + CodepointSet segment_2_cps; + CodepointSet segment_3_cps; + CodepointSet segment_4_cps; uint32_t chunk0_cp = 0x47; uint32_t chunk1_cp = 0xb7; @@ -130,22 +129,6 @@ class EncoderTest : public ::testing::Test { return result; } - btree_set ToCodepointsSet(const FontData& font_data) { - hb_face_t* face = font_data.reference_face(); - - hb_set_unique_ptr codepoints = make_hb_set(); - hb_face_collect_unicodes(face, codepoints.get()); - hb_face_destroy(face); - - btree_set result; - hb_codepoint_t cp = HB_SET_VALUE_INVALID; - while (hb_set_next(codepoints.get(), &cp)) { - result.insert(cp); - } - - return result; - } - std::string GetVarInfo(const FontData& font_data) { auto face = font_data.face(); constexpr uint32_t max_axes = 5; @@ -188,10 +171,10 @@ StatusOr PatchHasGvar(const flat_hash_map& patches, TEST_F(EncoderTest, OutgoingEdges) { Encoder encoder; - encoder.AddNonGlyphDataSegment(flat_hash_set{1, 2}); - encoder.AddNonGlyphDataSegment(flat_hash_set{3, 4}); - encoder.AddNonGlyphDataSegment(flat_hash_set{5, 6}); - encoder.AddNonGlyphDataSegment(flat_hash_set{7, 8}); + encoder.AddNonGlyphDataSegment(IntSet{1, 2}); + encoder.AddNonGlyphDataSegment(IntSet{3, 4}); + encoder.AddNonGlyphDataSegment(IntSet{5, 6}); + encoder.AddNonGlyphDataSegment(IntSet{7, 8}); SubsetDefinition s1{1, 2}; SubsetDefinition s2{3, 4}; @@ -261,7 +244,7 @@ TEST_F(EncoderTest, OutgoingEdges_DesignSpace_PointToRange) { base.design_space[kWght] = AxisRange::Point(300); Encoder encoder; - encoder.AddNonGlyphDataSegment(flat_hash_set{3, 4}); + encoder.AddNonGlyphDataSegment(IntSet{3, 4}); encoder.AddDesignSpaceSegment({{kWght, *AxisRange::Range(300, 400)}}); SubsetDefinition s1{3, 4}; @@ -282,7 +265,7 @@ TEST_F(EncoderTest, OutgoingEdges_DesignSpace_AddAxis_1) { base.design_space[kWght] = *AxisRange::Range(200, 500); Encoder encoder; - encoder.AddNonGlyphDataSegment(flat_hash_set{3, 4}); + encoder.AddNonGlyphDataSegment(IntSet{3, 4}); encoder.AddDesignSpaceSegment({{kWdth, *AxisRange::Range(300, 400)}}); SubsetDefinition s1{3, 4}; @@ -303,7 +286,7 @@ TEST_F(EncoderTest, OutgoingEdges_DesignSpace_AddAxis_OverlappingAxisRange) { base.design_space[kWght] = *AxisRange::Range(200, 500); Encoder encoder; - encoder.AddNonGlyphDataSegment(flat_hash_set{3, 4}); + encoder.AddNonGlyphDataSegment(IntSet{3, 4}); encoder.AddDesignSpaceSegment({ {kWght, *AxisRange::Range(300, 700)}, {kWdth, *AxisRange::Range(300, 400)}, @@ -389,13 +372,13 @@ TEST_F(EncoderTest, DontClobberBaseSubset) { auto s = encoder.AddGlyphDataPatch(1, segment_1_gids); ASSERT_TRUE(s.ok()) << s; - s = encoder.SetBaseSubset(flat_hash_set{}); + s = encoder.SetBaseSubset(IntSet{}); ASSERT_TRUE(s.ok()) << s; - s = encoder.SetBaseSubset(flat_hash_set{1}); + s = encoder.SetBaseSubset(IntSet{1}); ASSERT_TRUE(s.ok()) << s; - s = encoder.SetBaseSubset(flat_hash_set{}); + s = encoder.SetBaseSubset(IntSet{}); ASSERT_TRUE(absl::IsFailedPrecondition(s)) << s; } @@ -404,7 +387,7 @@ TEST_F(EncoderTest, Encode_OneSubset) { hb_face_t* face = font.reference_face(); encoder.SetFace(face); - auto s = encoder.SetBaseSubset(flat_hash_set{'a', 'd'}); + auto s = encoder.SetBaseSubset(IntSet{'a', 'd'}); ASSERT_TRUE(s.ok()) << s; auto encoding = encoder.Encode(); hb_face_destroy(face); @@ -420,11 +403,11 @@ TEST_F(EncoderTest, Encode_OneSubset) { } TEST_F(EncoderTest, Encode_TwoSubsets) { - absl::flat_hash_set s1 = {'b', 'c'}; + IntSet s1 = {'b', 'c'}; Encoder encoder; hb_face_t* face = font.reference_face(); encoder.SetFace(face); - auto s = encoder.SetBaseSubset(flat_hash_set{'a', 'd'}); + auto s = encoder.SetBaseSubset(IntSet{'a', 'd'}); ASSERT_TRUE(s.ok()) << s; encoder.AddNonGlyphDataSegment(s1); @@ -442,11 +425,11 @@ TEST_F(EncoderTest, Encode_TwoSubsets) { } TEST_F(EncoderTest, Encode_TwoSubsetsAndOptionalFeature) { - absl::flat_hash_set s1 = {'B', 'C'}; + IntSet s1 = {'B', 'C'}; Encoder encoder; hb_face_t* face = full_font.reference_face(); encoder.SetFace(face); - auto s = encoder.SetBaseSubset(flat_hash_set{'A', 'D'}); + auto s = encoder.SetBaseSubset(IntSet{'A', 'D'}); ASSERT_TRUE(s.ok()) << s; encoder.AddNonGlyphDataSegment(s1); encoder.AddFeatureGroupSegment({HB_TAG('c', '2', 's', 'c')}); @@ -470,12 +453,12 @@ TEST_F(EncoderTest, Encode_TwoSubsetsAndOptionalFeature) { } TEST_F(EncoderTest, Encode_ThreeSubsets) { - absl::flat_hash_set s1 = {'b'}; - absl::flat_hash_set s2 = {'c'}; + IntSet s1 = {'b'}; + IntSet s2 = {'c'}; Encoder encoder; hb_face_t* face = font.reference_face(); encoder.SetFace(face); - auto s = encoder.SetBaseSubset(flat_hash_set{'a'}); + auto s = encoder.SetBaseSubset(IntSet{'a'}); ASSERT_TRUE(s.ok()) << s; encoder.AddNonGlyphDataSegment(s1); encoder.AddNonGlyphDataSegment(s2); @@ -500,12 +483,12 @@ TEST_F(EncoderTest, Encode_ThreeSubsets) { } TEST_F(EncoderTest, Encode_ThreeSubsets_WithOverlaps) { - absl::flat_hash_set s1 = {'b', 'c'}; - absl::flat_hash_set s2 = {'b', 'd'}; + IntSet s1 = {'b', 'c'}; + IntSet s2 = {'b', 'd'}; Encoder encoder; hb_face_t* face = font.reference_face(); encoder.SetFace(face); - auto s = encoder.SetBaseSubset(flat_hash_set{'a'}); + auto s = encoder.SetBaseSubset(IntSet{'a'}); ASSERT_TRUE(s.ok()) << s; encoder.AddNonGlyphDataSegment(s1); encoder.AddNonGlyphDataSegment(s2); @@ -539,7 +522,7 @@ TEST_F(EncoderTest, Encode_ThreeSubsets_VF) { auto s = encoder.SetBaseSubsetFromDef(base_def); ASSERT_TRUE(s.ok()) << s; - encoder.AddNonGlyphDataSegment(flat_hash_set{'b'}); + encoder.AddNonGlyphDataSegment(IntSet{'b'}); encoder.AddDesignSpaceSegment({{kWdth, *AxisRange::Range(75.0f, 100.0f)}}); auto encoding = encoder.Encode(); @@ -582,13 +565,13 @@ TEST_F(EncoderTest, Encode_ThreeSubsets_Mixed) { s.Update(encoder.AddGlyphDataPatchCondition(Condition::SimpleCondition( SubsetDefinition::Codepoints(segment_4_cps), 4))); - flat_hash_set base_subset; + IntSet base_subset; base_subset.insert(segment_0_cps.begin(), segment_0_cps.end()); base_subset.insert(segment_1_cps.begin(), segment_1_cps.end()); base_subset.insert(segment_2_cps.begin(), segment_2_cps.end()); s.Update(encoder.SetBaseSubset(base_subset)); - flat_hash_set extension_segment; + IntSet extension_segment; extension_segment.insert(segment_3_cps.begin(), segment_3_cps.end()); extension_segment.insert(segment_4_cps.begin(), segment_4_cps.end()); encoder.AddNonGlyphDataSegment(extension_segment); @@ -598,7 +581,8 @@ TEST_F(EncoderTest, Encode_ThreeSubsets_Mixed) { auto encoding = encoder.Encode(); ASSERT_TRUE(encoding.ok()) << encoding.status(); - auto cps = ToCodepointsSet(encoding->init_font); + auto face = encoding->init_font.face(); + auto cps = FontHelper::ToCodepointsSet(face.get()); ASSERT_TRUE(cps.contains(chunk0_cp)); ASSERT_TRUE(cps.contains(chunk1_cp)); ASSERT_TRUE(cps.contains(chunk2_cp)); @@ -640,22 +624,17 @@ TEST_F(EncoderTest, Encode_ThreeSubsets_Mixed_VF) { s.Update(encoder.AddGlyphDataPatch(1, {41, 42, 43, 44})); ASSERT_TRUE(s.ok()) << s; - s.Update(encoder.AddGlyphDataPatchCondition(Condition::SimpleCondition( - SubsetDefinition::Codepoints( - flat_hash_set{0x41, 0x42, 0x43, 0x44}), - 0))); - s.Update(encoder.AddGlyphDataPatchCondition(Condition::SimpleCondition( - SubsetDefinition::Codepoints( - flat_hash_set{0x45, 0x46, 0x47, 0x48}), - 1))); + s.Update(encoder.AddGlyphDataPatchCondition( + Condition::SimpleCondition(SubsetDefinition{0x41, 0x42, 0x43, 0x44}, 0))); + s.Update(encoder.AddGlyphDataPatchCondition( + Condition::SimpleCondition(SubsetDefinition{0x45, 0x46, 0x47, 0x48}, 1))); SubsetDefinition base_subset; base_subset.design_space[kWdth] = AxisRange::Point(100.0f); base_subset.design_space[kWght] = AxisRange::Point(300.0f); s.Update(encoder.SetBaseSubsetFromDef(base_subset)); - flat_hash_set extension_segment = {0x41, 0x42, 0x43, 0x44, - 0x45, 0x46, 0x47, 0x48}; + IntSet extension_segment = {0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48}; encoder.AddNonGlyphDataSegment(extension_segment); encoder.AddDesignSpaceSegment({{kWght, *AxisRange::Range(200.0f, 700.0f)}}); @@ -719,12 +698,12 @@ TEST_F(EncoderTest, Encode_ThreeSubsets_Mixed_WithFeatureMappings) { ASSERT_TRUE(s.ok()) << s; // Partitions {0, 1}, {2, 3, 4}, +ccmp - flat_hash_set base_subset; + IntSet base_subset; base_subset.insert(segment_0_cps.begin(), segment_0_cps.end()); base_subset.insert(segment_1_cps.begin(), segment_1_cps.end()); s.Update(encoder.SetBaseSubset(base_subset)); - flat_hash_set extension_segment; + IntSet extension_segment; extension_segment.insert(segment_2_cps.begin(), segment_2_cps.end()); extension_segment.insert(segment_3_cps.begin(), segment_3_cps.end()); extension_segment.insert(segment_4_cps.begin(), segment_4_cps.end()); @@ -746,13 +725,13 @@ TEST_F(EncoderTest, Encode_ThreeSubsets_Mixed_WithFeatureMappings) { } TEST_F(EncoderTest, Encode_FourSubsets) { - absl::flat_hash_set s1 = {'b'}; - absl::flat_hash_set s2 = {'c'}; - absl::flat_hash_set s3 = {'d'}; + IntSet s1 = {'b'}; + IntSet s2 = {'c'}; + IntSet s3 = {'d'}; Encoder encoder; hb_face_t* face = font.reference_face(); encoder.SetFace(face); - auto s = encoder.SetBaseSubset(flat_hash_set{'a'}); + auto s = encoder.SetBaseSubset(IntSet{'a'}); ASSERT_TRUE(s.ok()) << s; encoder.AddNonGlyphDataSegment(s1); encoder.AddNonGlyphDataSegment(s2); @@ -777,13 +756,13 @@ TEST_F(EncoderTest, Encode_FourSubsets) { } TEST_F(EncoderTest, Encode_FourSubsets_WithJumpAhead) { - absl::flat_hash_set s1 = {'b'}; - absl::flat_hash_set s2 = {'c'}; - absl::flat_hash_set s3 = {'d'}; + IntSet s1 = {'b'}; + IntSet s2 = {'c'}; + IntSet s3 = {'d'}; Encoder encoder; hb_face_t* face = font.reference_face(); encoder.SetFace(face); - auto s = encoder.SetBaseSubset(flat_hash_set{'a'}); + auto s = encoder.SetBaseSubset(IntSet{'a'}); ASSERT_TRUE(s.ok()) << s; encoder.AddNonGlyphDataSegment(s1); encoder.AddNonGlyphDataSegment(s2); @@ -824,7 +803,7 @@ TEST_F(EncoderTest, Encode_ComplicatedActivationConditions) { hb_face_t* face = font.reference_face(); encoder.SetFace(face); - auto s = encoder.SetBaseSubset(flat_hash_set{}); + auto s = encoder.SetBaseSubset(IntSet{}); s.Update(encoder.AddGlyphDataPatch(1, {69})); // a s.Update(encoder.AddGlyphDataPatch(2, {70})); // b s.Update(encoder.AddGlyphDataPatch(3, {71})); // c @@ -832,30 +811,28 @@ TEST_F(EncoderTest, Encode_ComplicatedActivationConditions) { s.Update(encoder.AddGlyphDataPatch(5, {50})); s.Update(encoder.AddGlyphDataPatch(6, {60})); - encoder.AddNonGlyphDataSegment(flat_hash_set{'a', 'b', 'c', 'd'}); + encoder.AddNonGlyphDataSegment(IntSet{'a', 'b', 'c', 'd'}); // 0 - s.Update(encoder.AddGlyphDataPatchCondition(Condition::SimpleCondition( - SubsetDefinition::Codepoints(flat_hash_set{'b'}), 2))); + s.Update(encoder.AddGlyphDataPatchCondition( + Condition::SimpleCondition(SubsetDefinition{'b'}, 2))); // 1 - s.Update(encoder.AddGlyphDataPatchCondition(Condition::SimpleCondition( - SubsetDefinition::Codepoints(flat_hash_set{'c'}), 4))); + s.Update(encoder.AddGlyphDataPatchCondition( + Condition::SimpleCondition(SubsetDefinition{'c'}, 4))); { // 2 Condition condition; condition.activated_patch_id = std::nullopt; - condition.subset_definition = - SubsetDefinition::Codepoints(flat_hash_set{'a'}); + condition.subset_definition = SubsetDefinition{'a'}; s.Update(encoder.AddGlyphDataPatchCondition(condition)); } { // 3 Condition condition; condition.activated_patch_id = std::nullopt; - condition.subset_definition = - SubsetDefinition::Codepoints(flat_hash_set{'d'}); + condition.subset_definition = SubsetDefinition{'d'}; s.Update(encoder.AddGlyphDataPatchCondition(condition)); } diff --git a/ift/encoder/glyph_segmentation.cc b/ift/encoder/glyph_segmentation.cc index ee8d459a..6ffc433d 100644 --- a/ift/encoder/glyph_segmentation.cc +++ b/ift/encoder/glyph_segmentation.cc @@ -10,6 +10,7 @@ #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" #include "absl/status/statusor.h" +#include "common/int_set.h" using absl::btree_map; using absl::btree_set; @@ -18,16 +19,17 @@ using absl::Span; using absl::Status; using absl::StatusOr; using absl::StrCat; +using common::CodepointSet; +using common::GlyphSet; +using common::IntSet; +using common::SegmentSet; namespace ift::encoder { Status GlyphSegmentation::GroupsToSegmentation( - const btree_map, btree_set>& - and_glyph_groups, - const btree_map, btree_set>& - or_glyph_groups, - const btree_set& fallback_group, - GlyphSegmentation& segmentation) { + const btree_map& and_glyph_groups, + const btree_map& or_glyph_groups, + const SegmentSet& fallback_group, GlyphSegmentation& segmentation) { patch_id_t next_id = 0; // Map segments into patch ids @@ -91,22 +93,22 @@ GlyphSegmentation::ActivationCondition::exclusive_segment( } GlyphSegmentation::ActivationCondition -GlyphSegmentation::ActivationCondition::and_segments( - const absl::btree_set& segments, patch_id_t activated) { +GlyphSegmentation::ActivationCondition::and_segments(const SegmentSet& segments, + patch_id_t activated) { ActivationCondition conditions; conditions.activated_ = activated; for (auto id : segments) { - conditions.conditions_.push_back({id}); + conditions.conditions_.push_back(SegmentSet{id}); } return conditions; } GlyphSegmentation::ActivationCondition -GlyphSegmentation::ActivationCondition::or_segments( - const absl::btree_set& segments, patch_id_t activated, - bool is_fallback) { +GlyphSegmentation::ActivationCondition::or_segments(const SegmentSet& segments, + patch_id_t activated, + bool is_fallback) { ActivationCondition conditions; conditions.activated_ = activated; conditions.conditions_.push_back(segments); @@ -117,8 +119,7 @@ GlyphSegmentation::ActivationCondition::or_segments( GlyphSegmentation::ActivationCondition GlyphSegmentation::ActivationCondition::composite_condition( - absl::Span> groups, - patch_id_t activated) { + absl::Span groups, patch_id_t activated) { ActivationCondition conditions; conditions.activated_ = activated; for (const auto& group : groups) { @@ -243,8 +244,7 @@ bool GlyphSegmentation::ActivationCondition::operator<( StatusOr> GlyphSegmentation::ActivationConditionsToConditionEntries( Span conditions, - const absl::flat_hash_map>& segments) { + const absl::flat_hash_map& segments) { // TODO(garretrieger): extend this to work with segments that are // SubsetDefinition's instead of just codepoints. This would allow for // features and other things to be worked into conditions. @@ -319,8 +319,7 @@ GlyphSegmentation::ActivationConditionsToConditionEntries( // written in phase one. When writing an entry if the triggering group is the // only one in the condition then that condition can utilize the entry (just // like in Phase 1). - flat_hash_map, uint32_t> - segment_group_to_entry_index; + flat_hash_map segment_group_to_entry_index; for (auto condition = remaining_conditions.begin(); condition != remaining_conditions.end();) { bool remove = false; @@ -388,7 +387,7 @@ GlyphSegmentation::ActivationConditionsToConditionEntries( } template -ProtoType ToSetProto(const btree_set& set) { +ProtoType ToSetProto(const IntSet& set) { ProtoType values; for (uint32_t v : set) { values.add_values(v); @@ -437,10 +436,10 @@ EncoderConfig GlyphSegmentation::ToConfigProto() const { } void GlyphSegmentation::CopySegments( - const std::vector& segments) { + const std::vector& segments) { segments_.clear(); for (const auto& set : segments) { - segments_.push_back(common::to_btree_set(set.get())); + segments_.push_back(set); } } diff --git a/ift/encoder/glyph_segmentation.h b/ift/encoder/glyph_segmentation.h index 88804bf1..6ffe3ce1 100644 --- a/ift/encoder/glyph_segmentation.h +++ b/ift/encoder/glyph_segmentation.h @@ -6,11 +6,9 @@ #include "absl/container/btree_map.h" #include "absl/container/btree_set.h" -#include "absl/container/flat_hash_set.h" #include "absl/status/statusor.h" #include "absl/types/span.h" -#include "common/hb_set_unique_ptr.h" -#include "hb.h" +#include "common/int_set.h" #include "ift/encoder/condition.h" #include "util/encoder_config.pb.h" @@ -50,35 +48,33 @@ class GlyphSegmentation { /* * Constructs a condition that activates when the input intersects(patch_1) - * AND ... AND inersects(patch_n). + * AND ... AND inersects(segment_n). */ - static ActivationCondition and_segments( - const absl::btree_set& ids, patch_id_t activated); + static ActivationCondition and_segments(const common::SegmentSet& ids, + patch_id_t activated); /* * Constructs a condition that activates when the input intersects - * (patch_1) OR ... OR inersects(patch_n). + * (segment_1) OR ... OR inersects(segment_n). */ - static ActivationCondition or_segments( - const absl::btree_set& ids, patch_id_t activated, - bool is_fallback = false); + static ActivationCondition or_segments(const common::SegmentSet& ids, + patch_id_t activated, + bool is_fallback = false); /* * Constructs a condition that activates when the input intersects: * (s1 OR ..) AND (si OR ...) AND ... */ static ActivationCondition composite_condition( - absl::Span> groups, - patch_id_t activated); + absl::Span groups, patch_id_t activated); /* - * This condition is activated if every set of patch ids intersects the - * input subset definition. ie. input subset def intersects {p_1, p_2} AND + * This condition is activated if every set of segments intersects the + * input subset definition. ie. input subset def intersects {s_1, s_2} AND * input subset def intersects {...} AND ... - * which is effectively: (p_1 OR p_2) AND ... + * which is effectively: (s_1 OR s_2) AND ... */ - const absl::Span> conditions() - const { + const absl::Span conditions() const { return conditions_; } @@ -88,12 +84,14 @@ class GlyphSegmentation { * Populates out with the set of patch ids that are part of this condition * (excluding the activated patch) */ - void TriggeringSegments(hb_set_t* out) const { + common::SegmentSet TriggeringSegments() const { + common::SegmentSet out; for (auto g : conditions_) { for (auto segment_id : g) { - hb_set_add(out, segment_id); + out.insert(segment_id); } } + return out; } /* @@ -125,13 +123,13 @@ class GlyphSegmentation { bool is_fallback_ = false; bool is_exclusive_ = false; - std::vector> conditions_; + std::vector conditions_; patch_id_t activated_; }; - GlyphSegmentation(absl::btree_set init_font_codepoints, - absl::btree_set init_font_glyphs, - absl::btree_set unmapped_glyphs) + GlyphSegmentation(common::CodepointSet init_font_codepoints, + common::GlyphSet init_font_glyphs, + common::GlyphSet unmapped_glyphs) : init_font_codepoints_(init_font_codepoints), init_font_glyphs_(init_font_glyphs), unmapped_glyphs_(unmapped_glyphs) {} @@ -143,8 +141,8 @@ class GlyphSegmentation { static absl::StatusOr> ActivationConditionsToConditionEntries( absl::Span conditions, - const absl::flat_hash_map>& segments); + const absl::flat_hash_map& + segments); /* * Returns a human readable string representation of this segmentation and @@ -166,7 +164,7 @@ class GlyphSegmentation { * * Segment indices in conditions refer to a set of codepoints here. */ - const std::vector>& Segments() const { + const std::vector& Segments() const { return segments_; } @@ -174,8 +172,7 @@ class GlyphSegmentation { * The list of glyphs in each patch. The key in the map is an id used to * identify the patch within the activation conditions. */ - const absl::btree_map>& GidSegments() - const { + const absl::btree_map& GidSegments() const { return patches_; } @@ -186,43 +183,41 @@ class GlyphSegmentation { * TODO(garretrieger): instead of treating them separately generate a catch * all patch that contains the unmapped glyphs. */ - const absl::btree_set& UnmappedGlyphs() const { - return unmapped_glyphs_; - }; + const common::GlyphSet& UnmappedGlyphs() const { return unmapped_glyphs_; }; /* * These glyphs should be included in the initial font. */ - const absl::btree_set& InitialFontGlyphs() const { + const common::GlyphSet& InitialFontGlyphs() const { return init_font_glyphs_; }; /* * These codepoints should be included in the initial font. */ - const absl::btree_set& InitialFontCodepoints() const { + const common::CodepointSet& InitialFontCodepoints() const { return init_font_codepoints_; }; EncoderConfig ToConfigProto() const; static absl::Status GroupsToSegmentation( - const absl::btree_map, - absl::btree_set>& and_glyph_groups, - const absl::btree_map, - absl::btree_set>& or_glyph_groups, - const absl::btree_set& fallback_group, + const absl::btree_map& + and_glyph_groups, + const absl::btree_map& + or_glyph_groups, + const common::SegmentSet& fallback_group, GlyphSegmentation& segmentation); - void CopySegments(const std::vector& segments); + void CopySegments(const std::vector& segments); private: - absl::btree_set init_font_codepoints_; - absl::btree_set init_font_glyphs_; - absl::btree_set unmapped_glyphs_; + common::CodepointSet init_font_codepoints_; + common::GlyphSet init_font_glyphs_; + common::GlyphSet unmapped_glyphs_; absl::btree_set conditions_; - std::vector> segments_; - absl::btree_map> patches_; + std::vector segments_; + absl::btree_map patches_; }; } // namespace ift::encoder diff --git a/ift/encoder/glyph_segmentation_test.cc b/ift/encoder/glyph_segmentation_test.cc index 12f89037..920d1b3e 100644 --- a/ift/encoder/glyph_segmentation_test.cc +++ b/ift/encoder/glyph_segmentation_test.cc @@ -5,13 +5,15 @@ #include #include "common/font_data.h" +#include "common/int_set.h" #include "gtest/gtest.h" #include "ift/encoder/closure_glyph_segmenter.h" #include "ift/encoder/condition.h" -using absl::btree_set; +using common::CodepointSet; using common::FontData; using common::hb_face_unique_ptr; +using common::IntSet; using common::make_hb_face; using google::protobuf::TextFormat; @@ -42,13 +44,12 @@ class GlyphSegmentationTest : public ::testing::Test { }; TEST_F(GlyphSegmentationTest, ActivationConditionsToEncoderConditions) { - absl::flat_hash_map> - segments = { - {1, {'a', 'b'}}, - {2, {'c'}}, - {3, {'d', 'e', 'f'}}, - {4, {'g'}}, - }; + absl::flat_hash_map segments = { + {1, {'a', 'b'}}, + {2, {'c'}}, + {3, {'d', 'e', 'f'}}, + {4, {'g'}}, + }; std::vector activation_conditions = { GlyphSegmentation::ActivationCondition::exclusive_segment(2, 2), diff --git a/ift/encoder/subset_definition.cc b/ift/encoder/subset_definition.cc index afe99ba3..3ade6f36 100644 --- a/ift/encoder/subset_definition.cc +++ b/ift/encoder/subset_definition.cc @@ -2,9 +2,11 @@ #include "absl/container/btree_set.h" #include "common/font_helper.h" +#include "common/int_set.h" using absl::btree_set; using common::FontHelper; +using common::IntSet; using ift::proto::PatchMap; namespace ift::encoder { @@ -12,13 +14,8 @@ namespace ift::encoder { void PrintTo(const SubsetDefinition& def, std::ostream* os) { *os << "[{"; - btree_set sorted; - for (uint32_t cp : def.codepoints) { - sorted.insert(cp); - } - bool first = true; - for (uint32_t cp : sorted) { + for (uint32_t cp : def.codepoints) { if (!first) { *os << ", "; } @@ -85,19 +82,16 @@ design_space_t subtract(const design_space_t& a, const design_space_t& b) { } void SubsetDefinition::Subtract(const SubsetDefinition& other) { - codepoints = subtract(codepoints, other.codepoints); - gids = subtract(gids, other.gids); + codepoints.subtract(other.codepoints); + gids.subtract(other.codepoints); feature_tags = subtract(feature_tags, other.feature_tags); design_space = subtract(design_space, other.design_space); } void SubsetDefinition::Union(const SubsetDefinition& other) { - std::copy(other.codepoints.begin(), other.codepoints.end(), - std::inserter(codepoints, codepoints.begin())); - std::copy(other.gids.begin(), other.gids.end(), - std::inserter(gids, gids.begin())); - std::copy(other.feature_tags.begin(), other.feature_tags.end(), - std::inserter(feature_tags, feature_tags.begin())); + codepoints.union_set(other.codepoints); + gids.union_set(other.gids); + feature_tags.insert(other.feature_tags.begin(), other.feature_tags.end()); for (const auto& [tag, range] : other.design_space) { auto existing = design_space.find(tag); @@ -120,10 +114,7 @@ void SubsetDefinition::Union(const SubsetDefinition& other) { void SubsetDefinition::ConfigureInput(hb_subset_input_t* input, hb_face_t* face) const { - hb_set_t* unicodes = hb_subset_input_unicode_set(input); - for (hb_codepoint_t cp : codepoints) { - hb_set_add(unicodes, cp); - } + codepoints.union_into(hb_subset_input_unicode_set(input)); hb_set_t* features = hb_subset_input_set(input, HB_SUBSET_SETS_LAYOUT_FEATURE_TAG); @@ -140,11 +131,7 @@ void SubsetDefinition::ConfigureInput(hb_subset_input_t* input, return; } - hb_set_t* gids_set = hb_subset_input_glyph_set(input); - hb_set_add(gids_set, 0); - for (hb_codepoint_t gid : gids) { - hb_set_add(gids_set, gid); - } + gids.union_into(hb_subset_input_glyph_set(input)); } PatchMap::Coverage SubsetDefinition::ToCoverage() const { diff --git a/ift/encoder/subset_definition.h b/ift/encoder/subset_definition.h index ceb778e2..b228b07f 100644 --- a/ift/encoder/subset_definition.h +++ b/ift/encoder/subset_definition.h @@ -8,6 +8,7 @@ #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" #include "common/axis_range.h" +#include "common/int_set.h" #include "hb-subset.h" #include "ift/proto/patch_map.h" @@ -23,17 +24,16 @@ struct SubsetDefinition { } } - template - static SubsetDefinition Codepoints(const T& codepoints) { + static SubsetDefinition Codepoints(const common::CodepointSet& codepoints) { SubsetDefinition def; - def.codepoints.insert(codepoints.begin(), codepoints.end()); + def.codepoints = codepoints; return def; } friend void PrintTo(const SubsetDefinition& point, std::ostream* os); - absl::flat_hash_set codepoints; - absl::flat_hash_set gids; + common::CodepointSet codepoints; + common::GlyphSet gids; absl::btree_set feature_tags; design_space_t design_space; diff --git a/ift/glyph_keyed_diff.cc b/ift/glyph_keyed_diff.cc index b14f3889..c0d30dfb 100644 --- a/ift/glyph_keyed_diff.cc +++ b/ift/glyph_keyed_diff.cc @@ -11,6 +11,7 @@ #include "common/compat_id.h" #include "common/font_data.h" #include "common/font_helper.h" +#include "common/int_set.h" #include "common/try.h" #include "ift/proto/ift_table.h" #include "ift/proto/patch_map.h" @@ -27,6 +28,7 @@ using common::FontData; using common::FontHelper; using common::hb_blob_unique_ptr; using common::hb_face_unique_ptr; +using common::IntSet; using common::make_hb_blob; using common::make_hb_face; using ift::proto::IFTTable; @@ -34,8 +36,7 @@ using ift::proto::PatchMap; namespace ift { -StatusOr GlyphKeyedDiff::CreatePatch( - const btree_set& gids) const { +StatusOr GlyphKeyedDiff::CreatePatch(const IntSet& gids) const { // TODO(garretrieger): use write macros that check for overflows. std::string patch; FontHelper::WriteUInt32(HB_TAG('i', 'f', 'g', 'k'), patch); // Format Tag @@ -46,7 +47,7 @@ StatusOr GlyphKeyedDiff::CreatePatch( "There must be at least one gid in the requested patch."); } - uint32_t max_gid = *std::max_element(gids.begin(), gids.end()); + uint32_t max_gid = *gids.max(); if (max_gid > (1 << 24) - 1) { return absl::InvalidArgumentError("Larger then 24 bit gid requested."); } @@ -122,8 +123,8 @@ struct Cff2DataOperator { }; template -Status PopulateTableData(const absl::btree_set& gids, - uint32_t offset_bias, Operator glyph_data_lookup, +Status PopulateTableData(const IntSet& gids, uint32_t offset_bias, + Operator glyph_data_lookup, std::string& per_glyph_data, std::string& offset_data) { for (auto gid : gids) { @@ -138,8 +139,8 @@ Status PopulateTableData(const absl::btree_set& gids, return absl::OkStatus(); } -StatusOr GlyphKeyedDiff::CreateDataStream( - const btree_set& gids, bool u16_gids) const { +StatusOr GlyphKeyedDiff::CreateDataStream(const IntSet& gids, + bool u16_gids) const { // check for unsupported tags. for (auto tag : tags_) { if (tag != FontHelper::kGlyf && tag != FontHelper::kGvar && diff --git a/ift/glyph_keyed_diff.h b/ift/glyph_keyed_diff.h index 118de609..fb3cda44 100644 --- a/ift/glyph_keyed_diff.h +++ b/ift/glyph_keyed_diff.h @@ -6,6 +6,7 @@ #include "common/brotli_binary_diff.h" #include "common/compat_id.h" #include "common/font_data.h" +#include "common/int_set.h" namespace ift { @@ -21,11 +22,11 @@ class GlyphKeyedDiff { brotli_diff_(quality) {} absl::StatusOr CreatePatch( - const absl::btree_set& gids) const; + const common::IntSet& gids) const; private: - absl::StatusOr CreateDataStream( - const absl::btree_set& gids, bool u16_gids) const; + absl::StatusOr CreateDataStream(const common::IntSet& gids, + bool u16_gids) const; const common::FontData& font_; common::CompatId base_compat_id_; diff --git a/ift/glyph_keyed_diff_test.cc b/ift/glyph_keyed_diff_test.cc index 23ab5e82..e63a396b 100644 --- a/ift/glyph_keyed_diff_test.cc +++ b/ift/glyph_keyed_diff_test.cc @@ -7,12 +7,12 @@ #include "common/compat_id.h" #include "common/font_data.h" #include "common/font_helper.h" +#include "common/int_set.h" #include "gtest/gtest.h" #include "hb-subset.h" #include "hb.h" #include "ift/proto/ift_table.h" -using absl::flat_hash_set; using absl::StatusOr; using absl::StrCat; using absl::string_view; @@ -23,6 +23,7 @@ using common::FontHelper; using common::hb_blob_unique_ptr; using common::hb_face_unique_ptr; using common::hb_font_unique_ptr; +using common::IntSet; using common::make_hb_blob; using common::make_hb_face; using common::make_hb_font; @@ -158,7 +159,7 @@ class GlyphKeyedDiffTest : public ::testing::Test { return FontData(make_hb_blob(hb_blob_create_from_file(filename))); } - FontData Subset(const FontData& font, flat_hash_set gids) { + FontData Subset(const FontData& font, IntSet gids) { hb_face_unique_ptr face = font.face(); hb_subset_input_t* input = hb_subset_input_create_or_fail(); for (uint32_t gid : gids) { diff --git a/ift/integration_test.cc b/ift/integration_test.cc index c035d643..57ab8453 100644 --- a/ift/integration_test.cc +++ b/ift/integration_test.cc @@ -2,12 +2,11 @@ #include #include -#include "absl/container/flat_hash_set.h" #include "absl/strings/str_cat.h" #include "common/axis_range.h" #include "common/font_data.h" #include "common/font_helper.h" -#include "common/hb_set_unique_ptr.h" +#include "common/int_set.h" #include "common/try.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -21,16 +20,16 @@ using absl::btree_set; using absl::flat_hash_map; -using absl::flat_hash_set; using absl::Status; using absl::StatusOr; using absl::StrCat; using common::AxisRange; using common::FontData; using common::FontHelper; +using common::GlyphSet; using common::hb_blob_unique_ptr; using common::hb_face_unique_ptr; -using common::hb_set_unique_ptr; +using common::IntSet; using common::make_hb_blob; using common::make_hb_face; using common::make_hb_set; @@ -96,26 +95,23 @@ class IntegrationTest : public ::testing::Test { roboto_vf_.set(blob.get()); } - StatusOr> InitEncoderForMixedMode(Encoder& encoder) { + StatusOr InitEncoderForMixedMode(Encoder& encoder) { auto face = noto_sans_jp_.face(); - hb_set_unique_ptr init = make_hb_set(); - hb_set_add_range(init.get(), 0, hb_face_get_glyph_count(face.get()) - 1); - hb_set_unique_ptr excluded = make_hb_set(); - hb_set_add_sorted_array(excluded.get(), testdata::TEST_SEGMENT_1, - std::size(testdata::TEST_SEGMENT_1)); - hb_set_add_sorted_array(excluded.get(), testdata::TEST_SEGMENT_2, - std::size(testdata::TEST_SEGMENT_2)); - hb_set_add_sorted_array(excluded.get(), testdata::TEST_SEGMENT_3, - std::size(testdata::TEST_SEGMENT_3)); - hb_set_add_sorted_array(excluded.get(), testdata::TEST_SEGMENT_4, - std::size(testdata::TEST_SEGMENT_4)); - hb_set_subtract(init.get(), excluded.get()); - auto init_segment = common::to_btree_set(init.get()); + GlyphSet init; + init.insert_range(0, hb_face_get_glyph_count(face.get()) - 1); + + GlyphSet excluded; + excluded.insert_sorted_array(testdata::TEST_SEGMENT_1); + excluded.insert_sorted_array(testdata::TEST_SEGMENT_2); + excluded.insert_sorted_array(testdata::TEST_SEGMENT_3); + excluded.insert_sorted_array(testdata::TEST_SEGMENT_4); + + init.subtract(excluded); encoder.SetFace(face.get()); - auto sc = encoder.AddGlyphDataPatch(0, init_segment); + auto sc = encoder.AddGlyphDataPatch(0, init); sc.Update(encoder.AddGlyphDataPatch(1, TestSegment1())); sc.Update(encoder.AddGlyphDataPatch(2, TestSegment2())); sc.Update(encoder.AddGlyphDataPatch(3, TestSegment3())); @@ -125,7 +121,7 @@ class IntegrationTest : public ::testing::Test { return sc; } - return init_segment; + return init; } Status InitEncoderForMixedModeCff(Encoder& encoder) { @@ -152,63 +148,54 @@ class IntegrationTest : public ::testing::Test { return absl::OkStatus(); } - StatusOr> InitEncoderForVfMixedMode(Encoder& encoder) { + StatusOr InitEncoderForVfMixedMode(Encoder& encoder) { auto face = noto_sans_vf_.face(); encoder.SetFace(face.get()); - hb_set_unique_ptr init = make_hb_set(); - hb_set_add_range(init.get(), 0, hb_face_get_glyph_count(face.get()) - 1); - hb_set_unique_ptr excluded = make_hb_set(); - hb_set_add_sorted_array(excluded.get(), testdata::TEST_VF_SEGMENT_1, - std::size(testdata::TEST_VF_SEGMENT_1)); - hb_set_add_sorted_array(excluded.get(), testdata::TEST_VF_SEGMENT_2, - std::size(testdata::TEST_VF_SEGMENT_2)); - hb_set_add_sorted_array(excluded.get(), testdata::TEST_VF_SEGMENT_3, - std::size(testdata::TEST_VF_SEGMENT_3)); - hb_set_add_sorted_array(excluded.get(), testdata::TEST_VF_SEGMENT_4, - std::size(testdata::TEST_VF_SEGMENT_4)); - hb_set_subtract(init.get(), excluded.get()); - auto init_segment = common::to_btree_set(init.get()); - - auto sc = encoder.AddGlyphDataPatch(0, init_segment); + GlyphSet init; + init.insert_range(0, hb_face_get_glyph_count(face.get()) - 1); + + GlyphSet excluded; + excluded.insert_sorted_array(testdata::TEST_VF_SEGMENT_1); + excluded.insert_sorted_array(testdata::TEST_VF_SEGMENT_2); + excluded.insert_sorted_array(testdata::TEST_VF_SEGMENT_3); + excluded.insert_sorted_array(testdata::TEST_VF_SEGMENT_4); + + init.subtract(excluded); + + auto sc = encoder.AddGlyphDataPatch(0, init); sc.Update(encoder.AddGlyphDataPatch(1, TestVfSegment1())); sc.Update(encoder.AddGlyphDataPatch(2, TestVfSegment2())); sc.Update(encoder.AddGlyphDataPatch(3, TestVfSegment3())); sc.Update(encoder.AddGlyphDataPatch(4, TestVfSegment4())); - return init_segment; + return init; } - StatusOr> InitEncoderForMixedModeFeatureTest( - Encoder& encoder) { + StatusOr InitEncoderForMixedModeFeatureTest(Encoder& encoder) { auto face = feature_test_.face(); encoder.SetFace(face.get()); - hb_set_unique_ptr init = make_hb_set(); - hb_set_add_range(init.get(), 0, hb_face_get_glyph_count(face.get()) - 1); - hb_set_unique_ptr excluded = make_hb_set(); - hb_set_add_sorted_array(excluded.get(), testdata::TEST_FEATURE_SEGMENT_1, - std::size(testdata::TEST_FEATURE_SEGMENT_1)); - hb_set_add_sorted_array(excluded.get(), testdata::TEST_FEATURE_SEGMENT_2, - std::size(testdata::TEST_FEATURE_SEGMENT_2)); - hb_set_add_sorted_array(excluded.get(), testdata::TEST_FEATURE_SEGMENT_3, - std::size(testdata::TEST_FEATURE_SEGMENT_3)); - hb_set_add_sorted_array(excluded.get(), testdata::TEST_FEATURE_SEGMENT_4, - std::size(testdata::TEST_FEATURE_SEGMENT_4)); - hb_set_add_sorted_array(excluded.get(), testdata::TEST_FEATURE_SEGMENT_5, - std::size(testdata::TEST_FEATURE_SEGMENT_5)); - hb_set_add_sorted_array(excluded.get(), testdata::TEST_FEATURE_SEGMENT_6, - std::size(testdata::TEST_FEATURE_SEGMENT_6)); - hb_set_subtract(init.get(), excluded.get()); - auto init_segment = common::to_btree_set(init.get()); - - auto sc = encoder.AddGlyphDataPatch(0, init_segment); + GlyphSet init; + init.insert_range(0, hb_face_get_glyph_count(face.get()) - 1); + + GlyphSet excluded; + excluded.insert_sorted_array(testdata::TEST_FEATURE_SEGMENT_1); + excluded.insert_sorted_array(testdata::TEST_FEATURE_SEGMENT_2); + excluded.insert_sorted_array(testdata::TEST_FEATURE_SEGMENT_3); + excluded.insert_sorted_array(testdata::TEST_FEATURE_SEGMENT_4); + excluded.insert_sorted_array(testdata::TEST_FEATURE_SEGMENT_5); + excluded.insert_sorted_array(testdata::TEST_FEATURE_SEGMENT_6); + + init.subtract(excluded); + + auto sc = encoder.AddGlyphDataPatch(0, init); sc.Update(encoder.AddGlyphDataPatch(1, TestFeatureSegment1())); sc.Update(encoder.AddGlyphDataPatch(2, TestFeatureSegment2())); sc.Update(encoder.AddGlyphDataPatch(3, TestFeatureSegment3())); sc.Update(encoder.AddGlyphDataPatch(4, TestFeatureSegment4())); sc.Update(encoder.AddGlyphDataPatch(5, TestFeatureSegment5())); sc.Update(encoder.AddGlyphDataPatch(6, TestFeatureSegment6())); - return init_segment; + return init; } Status InitEncoderForTableKeyed(Encoder& encoder) { @@ -315,11 +302,11 @@ TEST_F(IntegrationTest, TableKeyedOnly) { auto sc = InitEncoderForTableKeyed(encoder); ASSERT_TRUE(sc.ok()) << sc; - sc = encoder.SetBaseSubset(flat_hash_set{0x41, 0x42, 0x43}); - encoder.AddNonGlyphDataSegment(flat_hash_set{0x45, 0x46, 0x47}); - encoder.AddNonGlyphDataSegment(flat_hash_set{0x48, 0x49, 0x4A}); - encoder.AddNonGlyphDataSegment(flat_hash_set{0x4B, 0x4C, 0x4D}); - encoder.AddNonGlyphDataSegment(flat_hash_set{0x4E, 0x4F, 0x50}); + sc = encoder.SetBaseSubset(IntSet{0x41, 0x42, 0x43}); + encoder.AddNonGlyphDataSegment(IntSet{0x45, 0x46, 0x47}); + encoder.AddNonGlyphDataSegment(IntSet{0x48, 0x49, 0x4A}); + encoder.AddNonGlyphDataSegment(IntSet{0x4B, 0x4C, 0x4D}); + encoder.AddNonGlyphDataSegment(IntSet{0x4E, 0x4F, 0x50}); ASSERT_TRUE(sc.ok()) << sc; auto encoding = encoder.Encode(); @@ -356,11 +343,11 @@ TEST_F(IntegrationTest, TableKeyedMultiple) { auto sc = InitEncoderForTableKeyed(encoder); ASSERT_TRUE(sc.ok()) << sc; - sc = encoder.SetBaseSubset(flat_hash_set{0x41, 0x42, 0x43}); - encoder.AddNonGlyphDataSegment(flat_hash_set{0x45, 0x46, 0x47}); - encoder.AddNonGlyphDataSegment(flat_hash_set{0x48, 0x49, 0x4A}); - encoder.AddNonGlyphDataSegment(flat_hash_set{0x4B, 0x4C, 0x4D}); - encoder.AddNonGlyphDataSegment(flat_hash_set{0x4E, 0x4F, 0x50}); + sc = encoder.SetBaseSubset(IntSet{0x41, 0x42, 0x43}); + encoder.AddNonGlyphDataSegment(IntSet{0x45, 0x46, 0x47}); + encoder.AddNonGlyphDataSegment(IntSet{0x48, 0x49, 0x4A}); + encoder.AddNonGlyphDataSegment(IntSet{0x4B, 0x4C, 0x4D}); + encoder.AddNonGlyphDataSegment(IntSet{0x4E, 0x4F, 0x50}); ASSERT_TRUE(sc.ok()) << sc; auto encoding = encoder.Encode(); @@ -397,12 +384,12 @@ TEST_F(IntegrationTest, TableKeyedWithOverlaps) { auto sc = InitEncoderForTableKeyed(encoder); ASSERT_TRUE(sc.ok()) << sc; - sc = encoder.SetBaseSubset(flat_hash_set{0x41, 0x42, 0x43}); - encoder.AddNonGlyphDataSegment(flat_hash_set{ - 0x45, 0x46, 0x47, 0x48}); // 0x48 is in two subsets - encoder.AddNonGlyphDataSegment(flat_hash_set{0x48, 0x49, 0x4A}); - encoder.AddNonGlyphDataSegment(flat_hash_set{0x4B, 0x4C, 0x4D}); - encoder.AddNonGlyphDataSegment(flat_hash_set{0x4E, 0x4F, 0x50}); + sc = encoder.SetBaseSubset(IntSet{0x41, 0x42, 0x43}); + encoder.AddNonGlyphDataSegment( + IntSet{0x45, 0x46, 0x47, 0x48}); // 0x48 is in two subsets + encoder.AddNonGlyphDataSegment(IntSet{0x48, 0x49, 0x4A}); + encoder.AddNonGlyphDataSegment(IntSet{0x4B, 0x4C, 0x4D}); + encoder.AddNonGlyphDataSegment(IntSet{0x4E, 0x4F, 0x50}); ASSERT_TRUE(sc.ok()) << sc; auto encoding = encoder.Encode(); @@ -451,8 +438,8 @@ TEST_F(IntegrationTest, TableKeyed_DesignSpaceAugmentation_IgnoresDesignSpace) { def.design_space[kWdth] = AxisRange::Point(100.0f); sc = encoder.SetBaseSubsetFromDef(def); - encoder.AddNonGlyphDataSegment(flat_hash_set{'d', 'e', 'f'}); - encoder.AddNonGlyphDataSegment(flat_hash_set{'h', 'i', 'j'}); + encoder.AddNonGlyphDataSegment(IntSet{'d', 'e', 'f'}); + encoder.AddNonGlyphDataSegment(IntSet{'h', 'i', 'j'}); encoder.AddDesignSpaceSegment({{kWdth, *AxisRange::Range(75.0f, 100.0f)}}); ASSERT_TRUE(sc.ok()) << sc; @@ -461,10 +448,12 @@ TEST_F(IntegrationTest, TableKeyed_DesignSpaceAugmentation_IgnoresDesignSpace) { auto encoded_face = encoding->init_font.face(); auto codepoints = FontHelper::ToCodepointsSet(encoded_face.get()); - ASSERT_THAT(codepoints, IsSupersetOf({'a', 'b', 'c'})); - ASSERT_THAT(codepoints, AllOf(Not(Contains('d')), Not(Contains('e')), - Not(Contains('f')), Not(Contains('h')), - Not(Contains('i')), Not(Contains('j')))); + btree_set codepoints_btree; + codepoints_btree.insert(codepoints.begin(), codepoints.end()); + ASSERT_THAT(codepoints_btree, IsSupersetOf({'a', 'b', 'c'})); + ASSERT_THAT(codepoints_btree, AllOf(Not(Contains('d')), Not(Contains('e')), + Not(Contains('f')), Not(Contains('h')), + Not(Contains('i')), Not(Contains('j')))); auto ds = FontHelper::GetDesignSpace(encoded_face.get()); flat_hash_map expected_ds{ @@ -483,9 +472,11 @@ TEST_F(IntegrationTest, TableKeyed_DesignSpaceAugmentation_IgnoresDesignSpace) { ASSERT_EQ(*ds, expected_ds); codepoints = FontHelper::ToCodepointsSet(extended_face.get()); - ASSERT_THAT(codepoints, IsSupersetOf({'a', 'b', 'c', 'd', 'e', 'f'})); - ASSERT_THAT(codepoints, AllOf(Not(Contains('h')), Not(Contains('i')), - Not(Contains('j')))); + codepoints_btree.clear(); + codepoints_btree.insert(codepoints.begin(), codepoints.end()); + ASSERT_THAT(codepoints_btree, IsSupersetOf({'a', 'b', 'c', 'd', 'e', 'f'})); + ASSERT_THAT(codepoints_btree, AllOf(Not(Contains('h')), Not(Contains('i')), + Not(Contains('j')))); } TEST_F(IntegrationTest, SharedBrotli_DesignSpaceAugmentation) { @@ -497,8 +488,8 @@ TEST_F(IntegrationTest, SharedBrotli_DesignSpaceAugmentation) { def.design_space[kWdth] = AxisRange::Point(100.0f); sc = encoder.SetBaseSubsetFromDef(def); - encoder.AddNonGlyphDataSegment(flat_hash_set{'d', 'e', 'f'}); - encoder.AddNonGlyphDataSegment(flat_hash_set{'h', 'i', 'j'}); + encoder.AddNonGlyphDataSegment(IntSet{'d', 'e', 'f'}); + encoder.AddNonGlyphDataSegment(IntSet{'h', 'i', 'j'}); encoder.AddDesignSpaceSegment({{kWdth, *AxisRange::Range(75.0f, 100.0f)}}); ASSERT_TRUE(sc.ok()) << sc; @@ -507,10 +498,12 @@ TEST_F(IntegrationTest, SharedBrotli_DesignSpaceAugmentation) { auto encoded_face = encoding->init_font.face(); auto codepoints = FontHelper::ToCodepointsSet(encoded_face.get()); - ASSERT_THAT(codepoints, IsSupersetOf({'a', 'b', 'c'})); - ASSERT_THAT(codepoints, AllOf(Not(Contains('d')), Not(Contains('e')), - Not(Contains('f')), Not(Contains('h')), - Not(Contains('i')), Not(Contains('j')))); + btree_set codepoints_btree; + codepoints_btree.insert(codepoints.begin(), codepoints.end()); + ASSERT_THAT(codepoints_btree, IsSupersetOf({'a', 'b', 'c'})); + ASSERT_THAT(codepoints_btree, AllOf(Not(Contains('d')), Not(Contains('e')), + Not(Contains('f')), Not(Contains('h')), + Not(Contains('i')), Not(Contains('j')))); auto ds = FontHelper::GetDesignSpace(encoded_face.get()); flat_hash_map expected_ds{ @@ -532,10 +525,12 @@ TEST_F(IntegrationTest, SharedBrotli_DesignSpaceAugmentation) { ASSERT_EQ(*ds, expected_ds); codepoints = FontHelper::ToCodepointsSet(extended_face.get()); - ASSERT_THAT(codepoints, IsSupersetOf({'a', 'b', 'c'})); - ASSERT_THAT(codepoints, AllOf(Not(Contains('d')), Not(Contains('e')), - Not(Contains('f')), Not(Contains('h')), - Not(Contains('i')), Not(Contains('j')))); + codepoints_btree.clear(); + codepoints_btree.insert(codepoints.begin(), codepoints.end()); + ASSERT_THAT(codepoints_btree, IsSupersetOf({'a', 'b', 'c'})); + ASSERT_THAT(codepoints_btree, AllOf(Not(Contains('d')), Not(Contains('e')), + Not(Contains('f')), Not(Contains('h')), + Not(Contains('i')), Not(Contains('j')))); // Try extending the updated font again. encoding->init_font.shallow_copy(*extended); @@ -544,7 +539,9 @@ TEST_F(IntegrationTest, SharedBrotli_DesignSpaceAugmentation) { extended_face = extended->face(); codepoints = FontHelper::ToCodepointsSet(extended_face.get()); - ASSERT_THAT(codepoints, IsSupersetOf({'a', 'b', 'c', 'd', 'e', 'f'})); + codepoints_btree.clear(); + codepoints_btree.insert(codepoints.begin(), codepoints.end()); + ASSERT_THAT(codepoints_btree, IsSupersetOf({'a', 'b', 'c', 'd', 'e', 'f'})); ds = FontHelper::GetDesignSpace(extended_face.get()); expected_ds = { @@ -569,7 +566,7 @@ TEST_F(IntegrationTest, MixedMode) { auto segment_3 = FontHelper::GidsToUnicodes(face.get(), TestSegment3()); auto segment_4 = FontHelper::GidsToUnicodes(face.get(), TestSegment4()); - flat_hash_set base; + IntSet base; base.insert(segment_0.begin(), segment_0.end()); base.insert(segment_1.begin(), segment_1.end()); auto sc = encoder.SetBaseSubset(base); @@ -751,14 +748,14 @@ TEST_F(IntegrationTest, MixedMode_CompositeConditions) { auto segment_2 = FontHelper::GidsToUnicodes(face.get(), TestSegment2()); auto segment_3 = FontHelper::GidsToUnicodes(face.get(), TestSegment3()); auto segment_4 = FontHelper::GidsToUnicodes(face.get(), TestSegment4()); - flat_hash_set all; + IntSet all; all.insert(segment_1.begin(), segment_1.end()); all.insert(segment_2.begin(), segment_2.end()); all.insert(segment_3.begin(), segment_3.end()); all.insert(segment_4.begin(), segment_4.end()); // target paritions: {}, {{1}, {2}, {3, 4}} - auto sc = encoder.SetBaseSubset(flat_hash_set{}); + auto sc = encoder.SetBaseSubset(IntSet{}); encoder.AddNonGlyphDataSegment(all); ASSERT_TRUE(sc.ok()) << sc; @@ -1024,8 +1021,8 @@ TEST_F(IntegrationTest, MixedMode_SequentialDependentPatches) { auto segment_4 = FontHelper::GidsToUnicodes(face.get(), TestSegment4()); // target paritions: {{0, 1}, {2}, {3}, {4}} - btree_set segment_0_and_1 = segment_0; - segment_0_and_1.insert(segment_1.begin(), segment_1.end()); + IntSet segment_0_and_1 = segment_0; + segment_0_and_1.union_set(segment_1); auto sc = encoder.SetBaseSubset(segment_0_and_1); encoder.AddNonGlyphDataSegment(segment_2); encoder.AddNonGlyphDataSegment(segment_3); @@ -1223,20 +1220,17 @@ TEST_F(IntegrationTest, MixedMode_Cff) { auto sc = InitEncoderForMixedModeCff(encoder); ASSERT_TRUE(sc.ok()) << sc; - ASSERT_TRUE(encoder.SetBaseSubset(btree_set()).ok()); + ASSERT_TRUE(encoder.SetBaseSubset(IntSet{}).ok()); - auto all_codepoints = - btree_set{'A', 'B', 'H', 'I', 'J', 'M', 'N', 'Z'}; + IntSet all_codepoints{'A', 'B', 'H', 'I', 'J', 'M', 'N', 'Z'}; auto face = noto_sans_jp_cff_.face(); encoder.AddNonGlyphDataSegment(all_codepoints); // Setup activations for patches 1 and 2 - sc.Update(encoder.AddGlyphDataPatchCondition(Condition::SimpleCondition( - SubsetDefinition::Codepoints(btree_set{'A', 'B', 'M', 'N'}), - 1))); - sc.Update(encoder.AddGlyphDataPatchCondition(Condition::SimpleCondition( - SubsetDefinition::Codepoints(btree_set{'H', 'I', 'J', 'Z'}), - 2))); + sc.Update(encoder.AddGlyphDataPatchCondition( + Condition::SimpleCondition(SubsetDefinition{'A', 'B', 'M', 'N'}, 1))); + sc.Update(encoder.AddGlyphDataPatchCondition( + Condition::SimpleCondition(SubsetDefinition{'H', 'I', 'J', 'Z'}, 2))); auto encoding = encoder.Encode(); ASSERT_TRUE(encoding.ok()) << encoding.status(); @@ -1289,18 +1283,17 @@ TEST_F(IntegrationTest, MixedMode_Cff2) { auto sc = InitEncoderForMixedModeCff2(encoder); ASSERT_TRUE(sc.ok()) << sc; - ASSERT_TRUE(encoder.SetBaseSubset(btree_set()).ok()); + ASSERT_TRUE(encoder.SetBaseSubset(IntSet{}).ok()); - auto all_codepoints = btree_set{'A', 'B', 'C', 'M', 'N', 'P', 'Z'}; + IntSet all_codepoints{'A', 'B', 'C', 'M', 'N', 'P', 'Z'}; auto face = noto_sans_jp_cff2_.face(); encoder.AddNonGlyphDataSegment(all_codepoints); // Setup activations for patches 1 and 2 - sc.Update(encoder.AddGlyphDataPatchCondition(Condition::SimpleCondition( - SubsetDefinition::Codepoints(btree_set{'A', 'B', 'C'}), 1))); - sc.Update(encoder.AddGlyphDataPatchCondition(Condition::SimpleCondition( - SubsetDefinition::Codepoints(btree_set{'M', 'N', 'P', 'Z'}), - 2))); + sc.Update(encoder.AddGlyphDataPatchCondition( + Condition::SimpleCondition(SubsetDefinition{'A', 'B', 'C'}, 1))); + sc.Update(encoder.AddGlyphDataPatchCondition( + Condition::SimpleCondition(SubsetDefinition{'M', 'N', 'P', 'Z'}, 2))); auto encoding = encoder.Encode(); ASSERT_TRUE(encoding.ok()) << encoding.status(); @@ -1314,7 +1307,7 @@ TEST_F(IntegrationTest, MixedMode_Cff2) { auto codepoints = FontHelper::ToCodepointsSet(encoded_face.get()); // Last gid (Z) is always included in initial font to force correct glyph // count in CFF/CFF2. - ASSERT_EQ(codepoints, btree_set{'Z'}); + ASSERT_EQ(codepoints, IntSet{'Z'}); auto extended = Extend(*encoding, {'B'}); ASSERT_TRUE(extended.ok()) << extended.status(); diff --git a/ift/proto/format_2_patch_map.cc b/ift/proto/format_2_patch_map.cc index 7e7632f0..acdc623b 100644 --- a/ift/proto/format_2_patch_map.cc +++ b/ift/proto/format_2_patch_map.cc @@ -9,7 +9,7 @@ #include "common/compat_id.h" #include "common/font_helper.h" #include "common/font_helper_macros.h" -#include "common/hb_set_unique_ptr.h" +#include "common/int_set.h" #include "common/sparse_bit_set.h" #include "ift/proto/ift_table.h" #include "ift/proto/patch_encoding.h" @@ -23,7 +23,7 @@ using absl::StrCat; using absl::string_view; using common::CompatId; using common::FontHelper; -using common::hb_set_unique_ptr; +using common::IntSet; using common::make_hb_set; using common::SparseBitSet; @@ -232,12 +232,12 @@ void EncodeCodepoints(uint8_t bias_bytes, const PatchMap::Coverage& coverage, uint32_t max_bias = (1 << ((uint32_t)bias_bytes) * 8) - 1; uint32_t bias = std::min(coverage.SmallestCodepoint(), max_bias); - hb_set_unique_ptr biased_set = make_hb_set(); + IntSet biased_set; for (uint32_t cp : coverage.codepoints) { - hb_set_add(biased_set.get(), cp - bias); + biased_set.insert(cp - bias); } - std::string sparse_bit_set = SparseBitSet::Encode(*biased_set); + std::string sparse_bit_set = SparseBitSet::Encode(biased_set); if (bias_bytes == 2) { FontHelper::WriteUInt16(bias, out); diff --git a/ift/proto/ift_table.cc b/ift/proto/ift_table.cc index 51176fcc..7d4c1d12 100644 --- a/ift/proto/ift_table.cc +++ b/ift/proto/ift_table.cc @@ -1,7 +1,6 @@ #include "ift/proto/ift_table.h" #include -#include #include #include @@ -12,7 +11,6 @@ #include "absl/strings/string_view.h" #include "common/compat_id.h" #include "common/font_helper.h" -#include "common/hb_set_unique_ptr.h" #include "common/sparse_bit_set.h" #include "common/try.h" #include "hb.h" @@ -26,8 +24,6 @@ using absl::StrCat; using common::CompatId; using common::FontData; using common::FontHelper; -using common::hb_set_unique_ptr; -using common::make_hb_set; using common::SparseBitSet; namespace ift::proto { diff --git a/ift/proto/ift_table_test.cc b/ift/proto/ift_table_test.cc index 4e1c725e..fef23b52 100644 --- a/ift/proto/ift_table_test.cc +++ b/ift/proto/ift_table_test.cc @@ -10,7 +10,6 @@ #include "common/compat_id.h" #include "common/font_data.h" #include "common/font_helper.h" -#include "common/hb_set_unique_ptr.h" #include "common/sparse_bit_set.h" #include "gtest/gtest.h" #include "ift/proto/format_2_patch_map.h" @@ -24,10 +23,8 @@ using common::FontData; using common::FontHelper; using common::hb_blob_unique_ptr; using common::hb_face_unique_ptr; -using common::hb_set_unique_ptr; using common::make_hb_blob; using common::make_hb_face; -using common::make_hb_set; using common::SparseBitSet; using ift::proto::GLYPH_KEYED; using ift::proto::TABLE_KEYED_PARTIAL; diff --git a/ift/proto/patch_map.cc b/ift/proto/patch_map.cc index 22ef9c0d..7a0c8832 100644 --- a/ift/proto/patch_map.cc +++ b/ift/proto/patch_map.cc @@ -7,7 +7,7 @@ #include "absl/status/statusor.h" #include "absl/types/span.h" #include "common/font_helper.h" -#include "common/hb_set_unique_ptr.h" +#include "common/int_set.h" #include "common/sparse_bit_set.h" #include "ift/feature_registry/feature_registry.h" #include "ift/proto/patch_encoding.h" @@ -18,8 +18,7 @@ using absl::Span; using absl::Status; using absl::StatusOr; using common::FontHelper; -using common::hb_set_unique_ptr; -using common::make_hb_set; +using common::IntSet; using common::SparseBitSet; using ift::feature_registry::FeatureTagToIndex; using ift::feature_registry::IndexToFeatureTag; @@ -40,9 +39,7 @@ static bool sets_intersect(const S& a, const S& b) { } void PrintTo(const PatchMap::Coverage& coverage, std::ostream* os) { - absl::btree_set sorted_codepoints; - std::copy(coverage.codepoints.begin(), coverage.codepoints.end(), - std::inserter(sorted_codepoints, sorted_codepoints.begin())); + const IntSet& sorted_codepoints = coverage.codepoints; if (!coverage.features.empty() || !coverage.design_space.empty()) { *os << "{"; diff --git a/ift/proto/patch_map.h b/ift/proto/patch_map.h index 100150d0..9176337a 100644 --- a/ift/proto/patch_map.h +++ b/ift/proto/patch_map.h @@ -6,11 +6,9 @@ #include "absl/container/btree_map.h" #include "absl/container/btree_set.h" -#include "absl/container/flat_hash_map.h" -#include "absl/container/flat_hash_set.h" -#include "absl/status/statusor.h" #include "absl/types/span.h" #include "common/axis_range.h" +#include "common/int_set.h" #include "hb.h" #include "ift/proto/patch_encoding.h" @@ -31,7 +29,7 @@ class PatchMap { Coverage() {} Coverage(std::initializer_list codepoints_list) : codepoints(codepoints_list) {} - Coverage(const absl::flat_hash_set& codepoints_list) + Coverage(const common::IntSet& codepoints_list) : codepoints(codepoints_list) {} friend void PrintTo(const Coverage& point, std::ostream* os); @@ -51,8 +49,7 @@ class PatchMap { return min; } - // TODO(garretrieger): use hb sets instead? - absl::flat_hash_set codepoints; + common::IntSet codepoints; absl::btree_set features; absl::btree_map design_space; @@ -61,7 +58,7 @@ class PatchMap { // Set of child entry indices // (https://w3c.github.io/IFT/Overview.html#mapping-entry-childentrymatchmodeandcount) // values are the indices of previous entries. - absl::btree_set child_indices; + common::IntSet child_indices; }; struct Entry { diff --git a/ift/proto/patch_map_test.cc b/ift/proto/patch_map_test.cc index 62f1c706..711ca4ae 100644 --- a/ift/proto/patch_map_test.cc +++ b/ift/proto/patch_map_test.cc @@ -11,7 +11,6 @@ #include "absl/types/span.h" #include "common/font_data.h" #include "common/font_helper.h" -#include "common/hb_set_unique_ptr.h" #include "common/sparse_bit_set.h" #include "gtest/gtest.h" #include "ift/proto/patch_encoding.h" @@ -24,8 +23,6 @@ using absl::Status; using absl::StrCat; using common::FontData; using common::FontHelper; -using common::hb_set_unique_ptr; -using common::make_hb_set; using common::SparseBitSet; namespace ift::proto { diff --git a/ift/table_keyed_diff.cc b/ift/table_keyed_diff.cc index e3cc506f..75df7a40 100644 --- a/ift/table_keyed_diff.cc +++ b/ift/table_keyed_diff.cc @@ -136,7 +136,7 @@ Status TableKeyedDiff::Diff(const FontData& font_base, return absl::OkStatus(); } -void TableKeyedDiff::AddAllMatching(const flat_hash_set& tags, +void TableKeyedDiff::AddAllMatching(const flat_hash_set& tags, btree_set& result) const { for (const uint32_t& t : tags) { std::string tag = FontHelper::ToString(t); @@ -147,8 +147,8 @@ void TableKeyedDiff::AddAllMatching(const flat_hash_set& tags, } btree_set TableKeyedDiff::TagsToDiff( - const absl::flat_hash_set& before, - const absl::flat_hash_set& after) const { + const absl::flat_hash_set& before, + const absl::flat_hash_set& after) const { btree_set result; AddAllMatching(before, result); AddAllMatching(after, result); diff --git a/ift/table_keyed_diff.h b/ift/table_keyed_diff.h index 6c17ae71..32bd4796 100644 --- a/ift/table_keyed_diff.h +++ b/ift/table_keyed_diff.h @@ -45,11 +45,11 @@ class TableKeyedDiff : public common::BinaryDiff { common::FontData* patch /* OUT */) const override; private: - void AddAllMatching(const absl::flat_hash_set& tags, + void AddAllMatching(const absl::flat_hash_set& tags, absl::btree_set& result) const; absl::btree_set TagsToDiff( - const absl::flat_hash_set& before, - const absl::flat_hash_set& after) const; + const absl::flat_hash_set& before, + const absl::flat_hash_set& after) const; common::BrotliBinaryDiff binary_diff_; common::CompatId base_compat_id_; diff --git a/ift/testdata/test_segments.h b/ift/testdata/test_segments.h index 3ba79c78..72f4dbbe 100644 --- a/ift/testdata/test_segments.h +++ b/ift/testdata/test_segments.h @@ -3,8 +3,7 @@ #include -#include "absl/container/btree_set.h" -#include "absl/container/flat_hash_set.h" +#include "common/int_set.h" namespace ift::testdata { @@ -28,8 +27,8 @@ static uint32_t TEST_SEGMENT_1[] = { 987, 992, 993, 994, 1001, 1002, 1003, 1004, 1006, 1008, 1010, 1012, 1013, 1014, 1015, 1016, 1022, 1030, 1033, 1034}; -static absl::btree_set TestSegment1() { - absl::btree_set result; +static common::GlyphSet TestSegment1() { + common::GlyphSet result; for (uint32_t v : TEST_SEGMENT_1) { result.insert(v); } @@ -58,8 +57,8 @@ static uint32_t TEST_SEGMENT_2[] = { 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055}; -static absl::btree_set TestSegment2() { - absl::btree_set result; +static common::GlyphSet TestSegment2() { + common::GlyphSet result; for (uint32_t v : TEST_SEGMENT_2) { result.insert(v); } @@ -68,8 +67,8 @@ static absl::btree_set TestSegment2() { static uint32_t TEST_SEGMENT_3[] = {169}; -static absl::btree_set TestSegment3() { - absl::btree_set result; +static common::GlyphSet TestSegment3() { + common::GlyphSet result; for (uint32_t v : TEST_SEGMENT_3) { result.insert(v); } @@ -114,8 +113,8 @@ static uint32_t TEST_SEGMENT_4[] = { 847, 925, 928, 931, 932, 933, 934, 936, 938, 939, 1017, 1019, 1020, 1026, 1027, 1028, 1029, 1032}; -static absl::btree_set TestSegment4() { - absl::btree_set result; +static common::GlyphSet TestSegment4() { + common::GlyphSet result; for (uint32_t v : TEST_SEGMENT_4) { result.insert(v); } @@ -142,8 +141,8 @@ static uint32_t TEST_VF_SEGMENT_1[] = { 994, 1001, 1002, 1003, 1004, 1006, 1008, 1010, 1012, 1013, 1014, 1015, 1016, 1022, 1030}; -static absl::btree_set TestVfSegment1() { - absl::btree_set result; +static common::GlyphSet TestVfSegment1() { + common::GlyphSet result; for (uint32_t v : TEST_VF_SEGMENT_1) { result.insert(v); } @@ -173,8 +172,8 @@ static uint32_t TEST_VF_SEGMENT_2[] = { 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, }; -static absl::btree_set TestVfSegment2() { - absl::btree_set result; +static common::GlyphSet TestVfSegment2() { + common::GlyphSet result; for (uint32_t v : TEST_VF_SEGMENT_2) { result.insert(v); } @@ -183,8 +182,8 @@ static absl::btree_set TestVfSegment2() { static uint32_t TEST_VF_SEGMENT_3[] = {169}; -static absl::btree_set TestVfSegment3() { - absl::btree_set result; +static common::GlyphSet TestVfSegment3() { + common::GlyphSet result; for (uint32_t v : TEST_VF_SEGMENT_3) { result.insert(v); } @@ -217,8 +216,8 @@ static uint32_t TEST_VF_SEGMENT_4[] = { 843, 844, 846, 847, 925, 928, 931, 932, 933, 934, 936, 938, 939, 1017, 1019, 1020, 1026, 1027, 1028, 1029, 1032}; -static absl::btree_set TestVfSegment4() { - absl::btree_set result; +static common::GlyphSet TestVfSegment4() { + common::GlyphSet result; for (uint32_t v : TEST_VF_SEGMENT_4) { result.insert(v); } @@ -236,8 +235,8 @@ static uint32_t TEST_FEATURE_SEGMENT_1[] = { 461, 462, 463, 469, 477, 478, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 817, 822, 826, 827}; -static absl::btree_set TestFeatureSegment1() { - absl::btree_set result; +static common::GlyphSet TestFeatureSegment1() { + common::GlyphSet result; for (uint32_t v : TEST_FEATURE_SEGMENT_1) { result.insert(v); } @@ -255,8 +254,8 @@ static uint32_t TEST_FEATURE_SEGMENT_2[] = { 465, 468, 470, 471, 472, 479, 816, 818, 819, 820, 821, 823, }; -static absl::btree_set TestFeatureSegment2() { - absl::btree_set result; +static common::GlyphSet TestFeatureSegment2() { + common::GlyphSet result; for (uint32_t v : TEST_FEATURE_SEGMENT_2) { result.insert(v); } @@ -265,8 +264,8 @@ static absl::btree_set TestFeatureSegment2() { static uint32_t TEST_FEATURE_SEGMENT_3[] = {169}; -static absl::btree_set TestFeatureSegment3() { - absl::btree_set result; +static common::GlyphSet TestFeatureSegment3() { + common::GlyphSet result; for (uint32_t v : TEST_FEATURE_SEGMENT_3) { result.insert(v); } @@ -307,8 +306,8 @@ static uint32_t TEST_FEATURE_SEGMENT_4[] = { 762, 763, 764, 765, 766, 767, 768, 769, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800}; -static absl::btree_set TestFeatureSegment4() { - absl::btree_set result; +static common::GlyphSet TestFeatureSegment4() { + common::GlyphSet result; for (uint32_t v : TEST_FEATURE_SEGMENT_4) { result.insert(v); } @@ -335,8 +334,8 @@ static uint32_t TEST_FEATURE_SEGMENT_5[] = { 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055}; -static absl::btree_set TestFeatureSegment5() { - absl::btree_set result; +static common::IntSet TestFeatureSegment5() { + common::IntSet result; for (uint32_t v : TEST_FEATURE_SEGMENT_5) { result.insert(v); } @@ -347,8 +346,8 @@ static uint32_t TEST_FEATURE_SEGMENT_6[] = { 777, 778, 838, 839, 841, 842, 843, 844, 846, 847, 925, 928, 931, 932, 933, 934, 936, 938, 939, 1017, 1019, 1020, 1026, 1027, 1028, 1029, 1032}; -static absl::btree_set TestFeatureSegment6() { - absl::btree_set result; +static common::GlyphSet TestFeatureSegment6() { + common::GlyphSet result; for (uint32_t v : TEST_FEATURE_SEGMENT_6) { result.insert(v); } diff --git a/util/closure_glyph_keyed_segmenter_util.cc b/util/closure_glyph_keyed_segmenter_util.cc index e4fc9422..65f4f0e9 100644 --- a/util/closure_glyph_keyed_segmenter_util.cc +++ b/util/closure_glyph_keyed_segmenter_util.cc @@ -2,14 +2,10 @@ #include #include -#include #include -#include #include "absl/container/btree_map.h" -#include "absl/container/btree_set.h" #include "absl/container/flat_hash_map.h" -#include "absl/container/flat_hash_set.h" #include "absl/flags/flag.h" #include "absl/flags/parse.h" #include "absl/log/globals.h" @@ -17,7 +13,8 @@ #include "absl/status/statusor.h" #include "absl/strings/str_cat.h" #include "common/font_data.h" -#include "common/hb_set_unique_ptr.h" +#include "common/font_helper.h" +#include "common/int_set.h" #include "common/try.h" #include "hb.h" #include "ift/encoder/closure_glyph_segmenter.h" @@ -66,19 +63,18 @@ ABSL_FLAG(uint32_t, max_patch_size_bytes, UINT32_MAX, "this amount."); using absl::btree_map; -using absl::btree_set; using absl::flat_hash_map; -using absl::flat_hash_set; using absl::Status; using absl::StatusOr; using absl::StrCat; +using common::CodepointSet; using common::FontData; using common::FontHelper; +using common::GlyphSet; using common::hb_blob_unique_ptr; using common::hb_face_unique_ptr; -using common::hb_set_unique_ptr; +using common::IntSet; using common::make_hb_blob; -using common::make_hb_set; using google::protobuf::TextFormat; using ift::URLTemplate; using ift::encoder::ClosureGlyphSegmenter; @@ -89,21 +85,20 @@ using ift::encoder::SubsetDefinition; StatusOr> TargetCodepoints( hb_face_t* font, const std::string& codepoints_file) { - hb_set_unique_ptr font_unicodes = make_hb_set(); - hb_face_collect_unicodes(font, font_unicodes.get()); + IntSet font_unicodes = FontHelper::ToCodepointsSet(font); + std::vector codepoints_filtered; if (!codepoints_file.empty()) { auto codepoints = TRY(util::LoadCodepointsOrdered(codepoints_file.c_str())); for (auto cp : codepoints) { - if (hb_set_has(font_unicodes.get(), cp)) { + if (font_unicodes.contains(cp)) { codepoints_filtered.push_back(cp); } } } else { // No codepoints file, just use the full set of codepoints supported by the // font. - hb_codepoint_t cp = HB_SET_VALUE_INVALID; - while (hb_set_next(font_unicodes.get(), &cp)) { + for (uint32_t cp : font_unicodes) { codepoints_filtered.push_back(cp); } } @@ -207,9 +202,9 @@ StatusOr IdealSegmentationSize(hb_face_t* font, const GlyphSegmentation& segmentation, uint32_t number_input_segments) { fprintf(stderr, "IdealSegmentationSize():\n"); - btree_set glyphs; + IntSet glyphs; for (const auto& [id, glyph_set] : segmentation.GidSegments()) { - glyphs.insert(glyph_set.begin(), glyph_set.end()); + glyphs.union_set(glyph_set); } uint32_t glyphs_per_patch = glyphs.size() / number_input_segments; @@ -218,9 +213,9 @@ StatusOr IdealSegmentationSize(hb_face_t* font, Encoder encoder; encoder.SetFace(font); - flat_hash_set all_unicodes; + IntSet all_unicodes; - TRYV(encoder.SetBaseSubset(flat_hash_set{})); + TRYV(encoder.SetBaseSubset(IntSet{})); auto glyphs_it = glyphs.begin(); for (uint32_t i = 0; i < number_input_segments; i++) { @@ -231,7 +226,7 @@ StatusOr IdealSegmentationSize(hb_face_t* font, remainder_glyphs--; } - btree_set gids; + GlyphSet gids; gids.insert(begin, glyphs_it); auto unicodes = FontHelper::GidsToUnicodes(font, gids); @@ -264,18 +259,18 @@ StatusOr SegmentationSize(hb_face_t* font, Encoder encoder; encoder.SetFace(font); - flat_hash_set all_segments; + IntSet all_segments; - TRYV(encoder.SetBaseSubset(flat_hash_set{})); + TRYV(encoder.SetBaseSubset(IntSet{})); for (const auto& [id, glyph_set] : segmentation.GidSegments()) { - btree_set s; + IntSet s; s.insert(glyph_set.begin(), glyph_set.end()); TRYV(encoder.AddGlyphDataPatch(id, s)); all_segments.insert(id); } - btree_set all_codepoints; + IntSet all_codepoints; for (const auto& s : segmentation.Segments()) { all_codepoints.insert(s.begin(), s.end()); } @@ -286,7 +281,7 @@ StatusOr SegmentationSize(hb_face_t* font, conditions.push_back(c); } - flat_hash_map> segments; + flat_hash_map segments; uint32_t i = 0; for (const auto& s : segmentation.Segments()) { segments[i++].insert(s.begin(), s.end()); @@ -303,12 +298,12 @@ StatusOr SegmentationSize(hb_face_t* font, return EncodingSize(&segmentation, encoding); } -std::vector> GroupCodepoints( - std::vector codepoints, uint32_t number_of_segments) { +std::vector GroupCodepoints(std::vector codepoints, + uint32_t number_of_segments) { uint32_t per_group = codepoints.size() / number_of_segments; uint32_t remainder = codepoints.size() % number_of_segments; - std::vector> out; + std::vector out; auto end = codepoints.begin(); for (uint32_t i = 0; i < number_of_segments; i++) { auto start = end; @@ -318,8 +313,7 @@ std::vector> GroupCodepoints( remainder--; } - flat_hash_set group; - btree_set sorted_group; + CodepointSet group; group.insert(start, end); out.push_back(group); } diff --git a/util/convert_iftb.cc b/util/convert_iftb.cc index ce2b2479..2fac8415 100644 --- a/util/convert_iftb.cc +++ b/util/convert_iftb.cc @@ -5,18 +5,18 @@ #include #include "absl/container/btree_map.h" -#include "absl/container/btree_set.h" #include "absl/status/statusor.h" #include "absl/strings/string_view.h" #include "common/font_helper.h" +#include "common/int_set.h" #include "hb.h" #include "util/encoder_config.pb.h" using absl::btree_map; -using absl::btree_set; using absl::StatusOr; using absl::string_view; using common::FontHelper; +using common::IntSet; namespace util { @@ -36,8 +36,8 @@ string_view next_token(string_view line, string_view delim, std::string& out) { return line.substr(index + delim.size()); } -btree_set load_chunk_set(string_view line) { - btree_set result; +IntSet load_chunk_set(string_view line) { + IntSet result; std::string next; while (!line.empty()) { @@ -68,7 +68,7 @@ btree_map load_gid_map(string_view line) { StatusOr create_config( const btree_map& gid_map, - const btree_set& loaded_chunks, hb_face_t* face) { + const IntSet& loaded_chunks, hb_face_t* face) { auto gid_to_unicode = FontHelper::GidToUnicodeMap(face); EncoderConfig config; // Populate segments in the config. chunks are directly analagous to segments. @@ -95,7 +95,7 @@ StatusOr create_config( // Add all non-initial segments to a single non-glyph segment // TODO(garretrieger): flag to configure having more than one table keyed // segment. - btree_set non_initial_segments; + IntSet non_initial_segments; for (const auto [gid, chunk] : gid_map) { if (loaded_chunks.contains(chunk)) { continue; @@ -117,7 +117,7 @@ StatusOr create_config( StatusOr convert_iftb(string_view iftb_dump, hb_face_t* face) { btree_map gid_map; - btree_set loaded_chunks; + IntSet loaded_chunks; while (!iftb_dump.empty()) { std::string line; diff --git a/util/font2ift.cc b/util/font2ift.cc index ff52a2e5..ccbc15ac 100644 --- a/util/font2ift.cc +++ b/util/font2ift.cc @@ -7,13 +7,13 @@ #include "absl/container/btree_set.h" #include "absl/container/flat_hash_map.h" -#include "absl/container/flat_hash_set.h" #include "absl/flags/flag.h" #include "absl/flags/parse.h" #include "absl/status/statusor.h" #include "absl/strings/str_cat.h" #include "common/axis_range.h" #include "common/font_data.h" +#include "common/int_set.h" #include "common/try.h" #include "hb.h" #include "ift/encoder/condition.h" @@ -45,15 +45,17 @@ ABSL_FLAG(std::string, output_font, "out.ttf", using absl::btree_set; using absl::flat_hash_map; -using absl::flat_hash_set; using absl::Status; using absl::StatusOr; using absl::StrCat; +using common::CodepointSet; using common::FontData; using common::FontHelper; using common::hb_blob_unique_ptr; using common::hb_face_unique_ptr; +using common::IntSet; using common::make_hb_blob; +using common::SegmentSet; using ift::encoder::Condition; using ift::encoder::design_space_t; using ift::encoder::Encoder; @@ -128,8 +130,8 @@ int write_output(const Encoder::Encoding& encoding) { } template -btree_set values(const T& proto_set) { - btree_set result; +IntSet values(const T& proto_set) { + IntSet result; for (uint32_t v : proto_set.values()) { result.insert(v); } @@ -159,9 +161,9 @@ GlyphSegmentation::ActivationCondition FromProto( const ActivationConditionProto& condition) { // TODO(garretrieger): once glyph segmentation activation conditions can // support features copy those here. - std::vector> groups; + std::vector groups; for (const auto& group : condition.required_codepoint_sets()) { - btree_set set; + SegmentSet set; set.insert(group.values().begin(), group.values().end()); groups.push_back(set); } @@ -186,7 +188,7 @@ Status ConfigureEncoder(EncoderConfig config, Encoder& encoder) { activation_conditions.push_back(FromProto(c)); } - flat_hash_map> codepoint_sets; + flat_hash_map codepoint_sets; for (const auto& [id, set] : config.codepoint_sets()) { codepoint_sets[id].insert(set.values().begin(), set.values().end()); } @@ -235,14 +237,14 @@ Status ConfigureEncoder(EncoderConfig config, Encoder& encoder) { } for (const auto& sets : config.non_glyph_codepoint_set_groups()) { - flat_hash_set codepoints; + IntSet codepoints; for (const auto& set_id : sets.values()) { auto set = codepoint_sets.find(set_id); if (set == codepoint_sets.end()) { return absl::InvalidArgumentError( StrCat("Codepoint set id, ", set_id, ", not found.")); } - codepoints.insert(set->second.begin(), set->second.end()); + codepoints.union_set(set->second); } encoder.AddNonGlyphDataSegment(codepoints); } diff --git a/util/generate_table_keyed_config.cc b/util/generate_table_keyed_config.cc index a4b406af..1a341318 100644 --- a/util/generate_table_keyed_config.cc +++ b/util/generate_table_keyed_config.cc @@ -4,17 +4,17 @@ #include #include -#include "absl/container/btree_set.h" #include "absl/flags/flag.h" #include "absl/flags/parse.h" #include "common/font_data.h" #include "common/font_helper.h" +#include "common/int_set.h" #include "util/encoder_config.pb.h" #include "util/load_codepoints.h" -using absl::btree_set; using common::FontData; using common::FontHelper; +using common::IntSet; using google::protobuf::TextFormat; ABSL_FLAG( @@ -24,7 +24,7 @@ ABSL_FLAG( "which are not covered by the input subset files."); template -ProtoType ToSetProto(const btree_set& set) { +ProtoType ToSetProto(const IntSet& set) { ProtoType values; for (uint32_t v : set) { values.add_values(v); @@ -70,14 +70,14 @@ int main(int argc, char** argv) { return -1; } - std::vector> sets; + std::vector sets; bool first = true; for (const char* arg : args) { if (first) { first = false; continue; } - btree_set set; + IntSet set; auto result = util::LoadCodepointsOrdered(arg); if (!result.ok()) { std::cerr << "Failed to load codepoints from " << arg << ": "