From 903db81efd45e3550f290424d059d49643e30692 Mon Sep 17 00:00:00 2001 From: Keita Iwabuchi Date: Tue, 12 Sep 2023 09:38:55 -0700 Subject: [PATCH 01/25] Remove race warnings --- include/metall/basic_manager.hpp | 1 + include/metall/kernel/chunk_directory.hpp | 117 +++++++++++--------- include/metall/kernel/segment_allocator.hpp | 48 ++++---- test/kernel/chunk_directory_test.cpp | 16 +-- 4 files changed, 99 insertions(+), 83 deletions(-) diff --git a/include/metall/basic_manager.hpp b/include/metall/basic_manager.hpp index 32def8d7..8d7319af 100644 --- a/include/metall/basic_manager.hpp +++ b/include/metall/basic_manager.hpp @@ -832,6 +832,7 @@ class basic_manager { // void deallocate_many(multiallocation_chain &chain); /// \brief Check if all allocated memory has been deallocated. + /// This function is not cheap if many objects has not been deallocated. /// \return Returns true if all allocated memory has been deallocated; /// otherwise, false. bool all_memory_deallocated() const noexcept { diff --git a/include/metall/kernel/chunk_directory.hpp b/include/metall/kernel/chunk_directory.hpp index 906520b2..d5174232 100644 --- a/include/metall/kernel/chunk_directory.hpp +++ b/include/metall/kernel/chunk_directory.hpp @@ -25,6 +25,9 @@ namespace { namespace mdtl = metall::mtlldetail; } +/// \brief Chunk directory class. +/// Chunk directory is a table that stores information about chunks. +/// This class assumes that race condition is handled by the caller. template class chunk_directory { @@ -54,7 +57,7 @@ class chunk_directory { // Private types and static values // -------------------- // enum chunk_type : uint8_t { - empty = 0, + unused = 0, small_chunk = 1, large_chunk_head = 2, large_chunk_body = 3 @@ -63,7 +66,7 @@ class chunk_directory { // Chunk directory is just an array of this structure struct entry_type { void init() { - type = chunk_type::empty; + type = chunk_type::unused; num_occupied_slots = 0; slot_occupancy.reset(); } @@ -78,9 +81,14 @@ class chunk_directory { // -------------------- // // Constructor & assign operator // -------------------- // + /// \brief Constructor. This constructor allocates memory for the chunk + /// directory. \param max_num_chunks Maximum number of chunks that can be + /// managed. explicit chunk_directory(const std::size_t max_num_chunks) - : m_table(nullptr), m_max_num_chunks(0), m_last_used_chunk_no(-1) { - priv_allocate(max_num_chunks); + : m_table(nullptr), + m_max_num_chunks(max_num_chunks), + m_last_used_chunk_no(-1) { + priv_allocate(); } ~chunk_directory() noexcept { priv_destroy(); } @@ -94,9 +102,10 @@ class chunk_directory { // -------------------- // // Public methods // -------------------- // - /// \brief - /// \param bin_no - /// \return + /// \brief Registers a new chunk for a bin whose bin number is 'bin_no'. + /// Requires a global lock to avoid race condition. + /// \param bin_no Bin number. + /// \return Returns the chunk number of the new chunk. chunk_no_type insert(const bin_no_type bin_no) { chunk_no_type inserted_chunk_no; @@ -105,17 +114,17 @@ class chunk_directory { } else { inserted_chunk_no = priv_insert_large_chunk(bin_no); } - assert(inserted_chunk_no < size()); return inserted_chunk_no; } - /// \brief - /// \param chunk_no + /// \brief Erases a chunk whose chunk number is 'chunk_no'. + /// Requires a global lock to avoid race condition. + /// \param chunk_no Chunk number to erase. void erase(const chunk_no_type chunk_no) { assert(chunk_no < size()); - if (empty_chunk(chunk_no)) return; + if (unused_chunk(chunk_no)) return; if (m_table[chunk_no].type == chunk_type::small_chunk) { const slot_count_type num_slots = slots(chunk_no); @@ -142,11 +151,11 @@ class chunk_directory { } } - /// \brief - /// \param chunk_no - /// \return + /// \brief Finds an available slot in the chunk whose chunk number is + /// 'chunk_no' and marks it as occupied. slot in the chunk. This function + /// modifies only the specified chunk; thus, the global lock is not required. + /// \param chunk_no Chunk number. \return Returns the marked slot number. slot_no_type find_and_mark_slot(const chunk_no_type chunk_no) { - assert(chunk_no < size()); assert(m_table[chunk_no].type == chunk_type::small_chunk); const slot_count_type num_slots = slots(chunk_no); @@ -161,7 +170,8 @@ class chunk_directory { return empty_slot_no; } - /// \brief Finds and marks multiple slots up to 'num_slots'. + /// \brief Finds and marks multiple slots up to 'num_slots'. This function + /// modifies only the specified chunk; thus, the global lock is not required. /// \param chunk_no Chunk number. /// \param num_slots Number of slots to find and mark. /// \param slots_buf Buffer to store found slots. @@ -171,7 +181,6 @@ class chunk_directory { std::size_t find_and_mark_many_slots(const chunk_no_type chunk_no, const std::size_t num_slots, slot_no_type *const slots_buf) { - assert(chunk_no < size()); assert(m_table[chunk_no].type == chunk_type::small_chunk); const slot_count_type num_holding_slots = slots(chunk_no); @@ -194,7 +203,6 @@ class chunk_directory { /// \param chunk_no /// \param slot_no void unmark_slot(const chunk_no_type chunk_no, const slot_no_type slot_no) { - assert(chunk_no < size()); assert(m_table[chunk_no].type == chunk_type::small_chunk); const slot_count_type num_slots = slots(chunk_no); @@ -205,13 +213,11 @@ class chunk_directory { --m_table[chunk_no].num_occupied_slots; } - /// \brief - /// \param chunk_no - /// \return + /// \brief Returns if all slots in the chunk are marked. + /// This function is valid only for a small chunk. + /// \param chunk_no Chunk number. Must be less than size(). + /// \return Returns true if all slots in the chunk are marked. bool all_slots_marked(const chunk_no_type chunk_no) const { - if (chunk_no >= size()) { - return false; - } assert(m_table[chunk_no].type == chunk_type::small_chunk); const slot_count_type num_slots = slots(chunk_no); @@ -220,11 +226,13 @@ class chunk_directory { return (m_table[chunk_no].num_occupied_slots == num_slots); } - /// \brief - /// \param chunk_no - /// \return + /// \brief Returns if all slots in the chunk are unmarked. + /// This function is valid only for a small chunk. + /// Even if all slots are unmarked, the chunk still holds a slot table; + /// thus, unused_chunk() return false. + /// \param chunk_no Chunk number of a small chunk. + /// \return Returns true if all slots in the chunk are unmarked. bool all_slots_unmarked(const chunk_no_type chunk_no) const { - assert(chunk_no < size()); assert(m_table[chunk_no].type == chunk_type::small_chunk); return (m_table[chunk_no].num_occupied_slots == 0); } @@ -233,9 +241,8 @@ class chunk_directory { /// \param chunk_no /// \param slot_no /// \return - bool slot_marked(const chunk_no_type chunk_no, + bool marked_slot(const chunk_no_type chunk_no, const slot_no_type slot_no) const { - assert(chunk_no < size()); assert(m_table[chunk_no].type == chunk_type::small_chunk); const slot_count_type num_slots = @@ -244,15 +251,20 @@ class chunk_directory { return m_table[chunk_no].slot_occupancy.get(num_slots, slot_no); } - /// \brief - /// \return + /// \brief Returns the chunk directory size, which is the max chunk number + + /// 1, not the number of used chunks. + /// \return Returns the chunk directory size. + /// \warning The returned value can be incorrect depending on the timing of + /// another thread. std::size_t size() const { return m_last_used_chunk_no + 1; } - /// \brief - /// \param chunk_no - /// \return - bool empty_chunk(const chunk_no_type chunk_no) const { - return (m_table[chunk_no].type == chunk_type::empty); + /// \brief Returns true if a chunk is unused. + /// 'unused' chunk means that the chunk is not used and does not hold any + /// data or slot table. + /// \param chunk_no Chunk number. Must be less than size(). + /// \return Returns true if the chunk is not used. + bool unused_chunk(const chunk_no_type chunk_no) const { + return (m_table[chunk_no].type == chunk_type::unused); } /// \brief @@ -266,7 +278,6 @@ class chunk_directory { /// \param chunk_no /// \return const slot_count_type slots(const chunk_no_type chunk_no) const { - assert(chunk_no < size()); assert(m_table[chunk_no].type == chunk_type::small_chunk); const auto bin_no = m_table[chunk_no].bin_no; @@ -277,7 +288,6 @@ class chunk_directory { /// \param chunk_no /// \return slot_count_type occupied_slots(const chunk_no_type chunk_no) const { - assert(chunk_no < size()); assert(m_table[chunk_no].type == chunk_type::small_chunk); return m_table[chunk_no].num_occupied_slots; } @@ -294,7 +304,7 @@ class chunk_directory { } for (chunk_no_type chunk_no = 0; chunk_no < size(); ++chunk_no) { - if (empty_chunk(chunk_no)) { + if (unused_chunk(chunk_no)) { continue; } @@ -488,23 +498,22 @@ class chunk_directory { return k_chunk_size / object_size; } - /// \brief Reserves chunk directory. + /// \brief Allocates memory for 'm_max_num_chunks' chunks. + /// This function assumes that 'm_max_num_chunks' is set. /// Allocates 'uncommitted pages' so that not to waste physical memory until /// the pages are touched. Accordingly, this function does not initialize an - /// allocate data. \param max_num_chunks - bool priv_allocate(const std::size_t max_num_chunks) { + /// allocate data. + bool priv_allocate() { assert(!m_table); - m_max_num_chunks = max_num_chunks; // Assume that mmap + MAP_ANONYMOUS returns 'uncommitted pages'. // An uncommitted page will be zero-initialized when it is touched first - // time; however, this class does not relies on that. The table entries will + // time; however, this class does not rely on that. The table entries will // be initialized just before they are used. m_table = static_cast(mdtl::map_anonymous_write_mode( nullptr, m_max_num_chunks * sizeof(entry_type))); if (!m_table) { - m_max_num_chunks = 0; logger::perror(logger::level::error, __FILE__, __LINE__, "Cannot allocate chunk table"); return false; @@ -525,7 +534,6 @@ class chunk_directory { } mdtl::os_munmap(m_table, m_max_num_chunks * sizeof(entry_type)); m_table = nullptr; - m_max_num_chunks = 0; m_last_used_chunk_no = -1; } @@ -544,12 +552,9 @@ class chunk_directory { } for (chunk_no_type chunk_no = 0; chunk_no < m_max_num_chunks; ++chunk_no) { - if (chunk_no > m_last_used_chunk_no) { - // Initialize (empty) it before just in case - m_table[chunk_no].init(); - } + if (chunk_no > m_last_used_chunk_no || unused_chunk(chunk_no)) { + m_table[chunk_no].init(); // init just in case - if (empty_chunk(chunk_no)) { m_table[chunk_no].bin_no = bin_no; m_table[chunk_no].type = chunk_type::small_chunk; m_table[chunk_no].num_occupied_slots = 0; @@ -585,7 +590,7 @@ class chunk_directory { m_table[chunk_no].init(); } - if (!empty_chunk(chunk_no)) { + if (chunk_no <= m_last_used_chunk_no && !unused_chunk(chunk_no)) { count_continuous_empty_chunks = 0; continue; } @@ -617,8 +622,9 @@ class chunk_directory { ssize_t find_next_used_chunk_backward( const chunk_no_type start_chunk_no) const { + assert(start_chunk_no < size()); for (ssize_t chunk_no = start_chunk_no; chunk_no >= 0; --chunk_no) { - if (!empty_chunk(chunk_no)) { + if (!unused_chunk(chunk_no)) { return chunk_no; } } @@ -629,7 +635,8 @@ class chunk_directory { // Private fields // -------------------- // entry_type *m_table; - std::size_t m_max_num_chunks; + // Use const here to avoid race condition risks + const std::size_t m_max_num_chunks; ssize_t m_last_used_chunk_no; }; diff --git a/include/metall/kernel/segment_allocator.hpp b/include/metall/kernel/segment_allocator.hpp index d95441e0..08afc38d 100644 --- a/include/metall/kernel/segment_allocator.hpp +++ b/include/metall/kernel/segment_allocator.hpp @@ -150,7 +150,6 @@ class segment_allocator { ? priv_allocate_small_object(bin_no) : priv_allocate_large_object(bin_no); assert(offset >= 0 || offset == k_null_offset); - assert(offset < (difference_type)size() || offset == k_null_offset); return offset; } @@ -170,7 +169,7 @@ class segment_allocator { // from the minimum allocation size (i.e., 8 bytes) to maximum allocation // size exist in the object size table - // alignment must be equal to or large than the min allocation size + // alignment must be equal to or larger than the min allocation size if (alignment < bin_no_mngr::to_object_size(0)) { return k_null_offset; } @@ -202,9 +201,7 @@ class segment_allocator { /// \param offset void deallocate(const difference_type offset) { if (offset == k_null_offset) return; - assert(offset >= 0); - assert(offset < (difference_type)size()); const chunk_no_type chunk_no = offset / k_chunk_size; const bin_no_type bin_no = m_chunk_directory.bin_no(chunk_no); @@ -217,13 +214,19 @@ class segment_allocator { } /// \brief Checks if all memory is deallocated. + /// This function is not cheap if many objects are allocated. + /// \return Returns true if all memory is deallocated. bool all_memory_deallocated() const { +#if ENABLE_MUTEX_IN_METALL_SEGMENT_ALLOCATOR + lock_guard_type chunk_guard(*m_chunk_mutex); +#endif + if (m_chunk_directory.size() == 0) { return true; } #ifndef METALL_DISABLE_OBJECT_CACHE - if (priv_check_all_small_allocations_are_cached() && + if (priv_check_all_small_allocations_are_cached_without_lock() && m_chunk_directory.num_used_large_chunks() == 0) { return true; } @@ -232,8 +235,10 @@ class segment_allocator { return false; } - /// \brief - /// \return Returns the size of the segment range being used + /// \brief Returns the size of the segment being used. + /// \return The size of the segment being used. + /// \warning Be careful: the returned value can be incorrect because another + /// thread can increase or decrease chunk directory size at the same time. size_type size() const { return m_chunk_directory.size() * k_chunk_size; } /// \brief @@ -302,7 +307,7 @@ class segment_allocator { << "\n"; for (chunk_no_type chunk_no = 0; chunk_no < m_chunk_directory.size(); ++chunk_no) { - if (m_chunk_directory.empty_chunk(chunk_no)) { + if (m_chunk_directory.unused_chunk(chunk_no)) { (*log_out) << chunk_no << "\t0\t0\n"; } else { const bin_no_type bin_no = m_chunk_directory.bin_no(chunk_no); @@ -469,7 +474,7 @@ class segment_allocator { lock_guard_type chunk_guard(*m_chunk_mutex); #endif new_chunk_no = m_chunk_directory.insert(bin_no); - if (!priv_extend_segment(new_chunk_no, 1)) { + if (!priv_extend_segment_without_lock(new_chunk_no, 1)) { return false; } m_non_full_chunk_bin.insert(bin_no, new_chunk_no); @@ -483,7 +488,9 @@ class segment_allocator { const chunk_no_type new_chunk_no = m_chunk_directory.insert(bin_no); const size_type num_chunks = (bin_no_mngr::to_object_size(bin_no) + k_chunk_size - 1) / k_chunk_size; - if (!priv_extend_segment(new_chunk_no, num_chunks)) { + if (!priv_extend_segment_without_lock(new_chunk_no, num_chunks)) { + // Failed to extend the segment (fatal error) + // Do clean up just in case and return k_null_offset m_chunk_directory.erase(new_chunk_no); return k_null_offset; } @@ -491,8 +498,8 @@ class segment_allocator { return offset; } - bool priv_extend_segment(const chunk_no_type head_chunk_no, - const size_type num_chunks) { + bool priv_extend_segment_without_lock(const chunk_no_type head_chunk_no, + const size_type num_chunks) { const size_type required_segment_size = (head_chunk_no + num_chunks) * k_chunk_size; if (required_segment_size <= m_segment_storage->size()) { @@ -564,14 +571,15 @@ class segment_allocator { } #ifdef METALL_FREE_SMALL_OBJECT_SIZE_HINT - priv_free_slot(object_size, chunk_no, slot_no, - METALL_FREE_SMALL_OBJECT_SIZE_HINT); + priv_free_slot_without_bin_lock(object_size, chunk_no, slot_no, + METALL_FREE_SMALL_OBJECT_SIZE_HINT); #endif } - void priv_free_slot(const size_type object_size, const chunk_no_type chunk_no, - const chunk_slot_no_type slot_no, - const size_type min_free_size_hint) { + void priv_free_slot_without_bin_lock(const size_type object_size, + const chunk_no_type chunk_no, + const chunk_slot_no_type slot_no, + const size_type min_free_size_hint) { // To simplify the implementation, free slots only when object_size is at // least double of the page size const size_type min_free_size = std::max( @@ -589,7 +597,7 @@ class segment_allocator { if (range_begin % m_segment_storage->page_size() != 0) { assert(slot_no > 0); // Assume that chunk is page aligned - if (m_chunk_directory.slot_marked(chunk_no, slot_no - 1)) { + if (m_chunk_directory.marked_slot(chunk_no, slot_no - 1)) { // Round up to the next multiple of page size // The left region will be freed when the previous slot is freed range_begin = @@ -613,7 +621,7 @@ class segment_allocator { // aligned assert(object_size * (slot_no + 1) < k_chunk_size); - if (m_chunk_directory.slot_marked(chunk_no, slot_no + 1)) { + if (m_chunk_directory.marked_slot(chunk_no, slot_no + 1)) { range_end = mdtl::round_down(range_end, m_segment_storage->page_size()); } else { range_end = mdtl::round_up(range_end, m_segment_storage->page_size()); @@ -668,7 +676,7 @@ class segment_allocator { #ifndef METALL_DISABLE_OBJECT_CACHE /// \brief Checks if all marked (used) slots in the chunk directory exist in /// the object cache. - bool priv_check_all_small_allocations_are_cached() const { + bool priv_check_all_small_allocations_are_cached_without_lock() const { const auto marked_slots = m_chunk_directory.get_all_marked_slots(); std::set small_allocs; for (const auto &item : marked_slots) { diff --git a/test/kernel/chunk_directory_test.cpp b/test/kernel/chunk_directory_test.cpp index ae67a814..5e26bc39 100644 --- a/test/kernel/chunk_directory_test.cpp +++ b/test/kernel/chunk_directory_test.cpp @@ -56,13 +56,13 @@ TEST(ChunkDirectoryTest, EraseChunk) { ASSERT_GT(directory.size(), 0); directory.erase(chno0); - ASSERT_TRUE(directory.empty_chunk(chno0)); + ASSERT_TRUE(chno0 >= directory.size() || directory.unused_chunk(chno0)); directory.erase(chno1); - ASSERT_TRUE(directory.empty_chunk(chno1)); + ASSERT_TRUE(chno1 >= directory.size() || directory.unused_chunk(chno1)); directory.erase(chno2); - ASSERT_TRUE(directory.empty_chunk(chno2)); + ASSERT_TRUE(chno2 >= directory.size() || directory.unused_chunk(chno2)); directory.erase(chno3); - ASSERT_TRUE(directory.empty_chunk(chno3)); + ASSERT_TRUE(chno3 >= directory.size() || directory.unused_chunk(chno3)); ASSERT_EQ(directory.size(), 0); } @@ -79,10 +79,10 @@ TEST(ChunkDirectoryTest, MarkSlot) { auto chunk_no = static_cast(i); const std::size_t object_size = bin_no_mngr::to_object_size(bin_no); for (uint64_t k = 0; k < k_chunk_size / object_size; ++k) { - ASSERT_FALSE(directory.slot_marked(chunk_no, k)); + ASSERT_FALSE(directory.marked_slot(chunk_no, k)); ASSERT_FALSE(directory.all_slots_marked(chunk_no)); ASSERT_EQ(directory.find_and_mark_slot(chunk_no), k); - ASSERT_TRUE(directory.slot_marked(chunk_no, k)); + ASSERT_TRUE(directory.marked_slot(chunk_no, k)); } ASSERT_TRUE(directory.all_slots_marked(chunk_no)); } @@ -104,9 +104,9 @@ TEST(ChunkDirectoryTest, UnmarkSlot) { directory.find_and_mark_slot(chunk_no); } for (uint64_t k = 0; k < k_chunk_size / object_size; ++k) { - ASSERT_TRUE(directory.slot_marked(chunk_no, k)); + ASSERT_TRUE(directory.marked_slot(chunk_no, k)); directory.unmark_slot(chunk_no, k); - ASSERT_FALSE(directory.slot_marked(chunk_no, k)); + ASSERT_FALSE(directory.marked_slot(chunk_no, k)); ASSERT_EQ(directory.find_and_mark_slot(chunk_no), k); } } From e903e349aabeefabfa4cb55c897874937bddce64 Mon Sep 17 00:00:00 2001 From: Keita Iwabuchi Date: Wed, 13 Sep 2023 16:33:01 -0700 Subject: [PATCH 02/25] Update API document --- include/metall/basic_manager.hpp | 245 +++++++++++++++++++++++-------- 1 file changed, 186 insertions(+), 59 deletions(-) diff --git a/include/metall/basic_manager.hpp b/include/metall/basic_manager.hpp index 8d7319af..fb18ca9f 100644 --- a/include/metall/basic_manager.hpp +++ b/include/metall/basic_manager.hpp @@ -193,14 +193,10 @@ class basic_manager { // Public methods // -------------------- // - // Attributed object construction function family - // Each function also works with '[ ]' operator to generate an array, - // leveraging the proxy class (construct_proxy) - /// \private - /// \class common_doc_const_find - /// \brief - /// Object construction API developed by Boost.Interprocess + /// \class doc_object_attrb_obj_family + /// \details + /// Attributed object construction family API developed by Boost.Interprocess /// /// (see details). @@ -208,9 +204,44 @@ class basic_manager { /// /// A named object must be associated with non-empty name. /// The name of an unique object is typeid(T).name(). + /// An anonymous object has no name. + /// \warning + /// Constructing or destroying attributed objects breaks attributed object + /// iterators. + + /// \private + /// \class doc_thread_safe + /// \details This function is thread-safe. + + /// \private + /// \class doc_single_thread + /// \warning This function is not thread-safe and must be called by a single + /// thread at a time. + + /// \private + /// \class doc_thread_safe_alloc + /// \details This function is thread-safe. Other threads can also call the + /// attributed object construction functions and allocate functions + /// simultaneously. + + /// \private + /// \class doc_object_attrb_obj_const_thread_safe + /// \note This function is thread-safe as long as no other threads call + /// non-const attributed object construction functions simultaneously. + + /// \private + /// \class doc_no_alloc_thread_safe + /// \note This function is thread-safe as long as no other threads allocate + /// or deallocates memory at the same time. + + /// \private + /// \class doc_const_datastore_thread_safe + /// \note This function is thread-safe as long as no other threads modify + /// the same datastore simultaneously. /// \brief Allocates an object of type T. - /// \copydoc common_doc_const_find + /// \copydoc doc_object_attrb_obj_family + /// \copydoc doc_thread_safe_alloc /// /// \details /// If T's constructor throws, the function throws that exception. @@ -237,7 +268,9 @@ class basic_manager { } /// \brief Tries to find an already constructed object. If not exist, - /// constructs an object of type T. \copydoc common_doc_const_find + /// constructs an object of type T. + /// \copydoc doc_object_attrb_obj_family + /// \copydoc doc_thread_safe_alloc /// /// \details /// If T's constructor throws, the function throws that exception. @@ -264,7 +297,9 @@ class basic_manager { } /// \brief Allocates an array of objects of type T, receiving arguments from - /// iterators. \copydoc common_doc_const_find + /// iterators. + /// \copydoc doc_object_attrb_obj_family + /// \copydoc doc_thread_safe_alloc /// /// \details /// If T's constructor throws, the function throws that exception. @@ -289,7 +324,9 @@ class basic_manager { /// \brief Tries to find an already constructed object. /// If not exist, constructs an array of objects of type T, receiving - /// arguments from iterators. \copydoc common_doc_const_find + /// arguments from iterators. + /// \copydoc doc_object_attrb_obj_family + /// \copydoc doc_thread_safe_alloc /// /// \details /// If T's constructor throws, the function throws that exception. @@ -313,10 +350,8 @@ class basic_manager { } /// \brief Tries to find a previously created object. - /// \copydoc common_doc_const_find - /// \warning There is no mutex inside. - /// Calling this function with other construct/destroy methods that updates an - /// object directory simultaneously will cause a concurrent issue. + /// \copydoc doc_object_attrb_obj_family + /// \copydoc doc_object_attrb_obj_const_thread_safe /// /// \details /// Example: @@ -346,7 +381,8 @@ class basic_manager { /// \brief Destroys a previously created object. /// Calls the destructor and frees the memory. - /// \copydoc common_doc_const_find + /// \copydoc doc_object_attrb_obj_family + /// \copydoc doc_thread_safe_alloc /// /// \details /// @@ -376,7 +412,8 @@ class basic_manager { /// \brief Destroys a unique object of type T. /// Calls the destructor and frees the memory. - /// \copydoc common_doc_const_find + /// \copydoc doc_object_attrb_obj_family + /// \copydoc doc_thread_safe_alloc /// /// \details /// @@ -406,7 +443,9 @@ class basic_manager { /// \brief Destroys a object (named, unique, or anonymous) by its address. /// Calls the destructor and frees the memory. /// Cannot destroy an object not allocated by construct/find_or_construct - /// functions. \copydoc common_doc_const_find + /// functions. + /// \copydoc doc_object_attrb_obj_family + /// \copydoc doc_thread_safe_alloc /// /// \details /// @@ -438,6 +477,8 @@ class basic_manager { /// \brief Returns the name of an object created with /// construct/find_or_construct functions. + /// \copydoc doc_object_attrb_obj_family + /// \copydoc doc_object_attrb_obj_const_thread_safe /// /// \details /// Example: @@ -467,6 +508,8 @@ class basic_manager { /// \brief Returns the kind of an object created with /// construct/find_or_construct functions. + /// \copydoc doc_object_attrb_obj_family + /// \copydoc doc_object_attrb_obj_const_thread_safe /// /// \details /// Example: @@ -494,6 +537,8 @@ class basic_manager { /// \brief Returns the length of an object created with /// construct/find_or_construct functions (1 if is a single element, >=1 if /// it's an array). + /// \copydoc doc_object_attrb_obj_family + /// \copydoc doc_object_attrb_obj_const_thread_safe /// /// \details /// Example: @@ -520,6 +565,7 @@ class basic_manager { /// \brief Checks if the type of an object, which was created with /// construct/find_or_construct functions, is T. + /// \copydoc doc_object_attrb_obj_const_thread_safe /// /// \details /// Example: @@ -546,6 +592,7 @@ class basic_manager { /// \brief Gets the description of an object created with /// construct/find_or_construct + /// \copydoc doc_object_attrb_obj_const_thread_safe /// /// \details /// Example: @@ -575,6 +622,7 @@ class basic_manager { /// \brief Sets a description to an object created with /// construct/find_or_construct + /// \copydoc doc_object_attrb_obj_const_thread_safe /// /// \details /// Example: @@ -605,9 +653,11 @@ class basic_manager { /// \brief Returns Returns the number of named objects stored in the managed /// segment. + /// \copydoc doc_object_attrb_obj_family + /// \copydoc doc_object_attrb_obj_const_thread_safe /// /// \details - /// \copydoc common_doc_const_find + /// \copydoc doc_object_attrb_obj_family /// /// \return The number of named objects stored in the managed segment. size_type get_num_named_objects() const noexcept { @@ -625,9 +675,8 @@ class basic_manager { /// \brief Returns Returns the number of unique objects stored in the managed /// segment. - /// - /// \details - /// \copydoc common_doc_const_find + /// \copydoc doc_object_attrb_obj_family + /// \copydoc doc_object_attrb_obj_const_thread_safe /// /// \return The number of unique objects stored in the managed segment. size_type get_num_unique_objects() const noexcept { @@ -645,8 +694,10 @@ class basic_manager { /// \brief Returns Returns the number of anonymous objects (objects /// constructed with metall::anonymous_instance) stored in the managed - /// segment. \return The number of anonymous objects stored in the managed /// segment. + /// \copydoc doc_object_attrb_obj_const_thread_safe + /// + /// \return The number of anonymous objects stored in the managed segment. size_type get_num_anonymous_objects() const noexcept { if (!check_sanity()) { return 0; @@ -661,9 +712,8 @@ class basic_manager { } /// \brief Returns a constant iterator to the index storing the named objects. - /// - /// \details - /// \copydoc common_doc_const_find + /// \copydoc doc_object_attrb_obj_family + /// \copydoc doc_object_attrb_obj_const_thread_safe /// /// \return A constant iterator to the index storing the named objects. const_named_iterator named_begin() const noexcept { @@ -681,9 +731,8 @@ class basic_manager { /// \brief Returns a constant iterator to the end of the index storing the /// named allocations. - /// - /// \details - /// \copydoc common_doc_const_find + /// \copydoc doc_object_attrb_obj_family + /// \copydoc doc_object_attrb_obj_const_thread_safe /// /// \return A constant iterator. const_named_iterator named_end() const noexcept { @@ -701,9 +750,8 @@ class basic_manager { /// \brief Returns a constant iterator to the index storing the unique /// objects. - /// - /// \details - /// \copydoc common_doc_const_find + /// \copydoc doc_object_attrb_obj_family + /// \copydoc doc_object_attrb_obj_const_thread_safe /// /// \return A constant iterator to the index storing the unique objects. const_unique_iterator unique_begin() const noexcept { @@ -720,10 +768,9 @@ class basic_manager { } /// \brief Returns a constant iterator to the end of the index - /// storing the unique allocations. NOT thread-safe. Never throws. - /// - /// \details - /// \copydoc common_doc_const_find + /// storing the unique allocations. + /// \copydoc doc_object_attrb_obj_family + /// \copydoc doc_object_attrb_obj_const_thread_safe /// /// \return A constant iterator. const_unique_iterator unique_end() const noexcept { @@ -740,7 +787,10 @@ class basic_manager { } /// \brief Returns a constant iterator to the index storing the anonymous - /// objects. \return A constant iterator to the index storing the anonymous + /// objects. + /// \copydoc doc_object_attrb_obj_const_thread_safe + /// + /// \return A constant iterator to the index storing the anonymous /// objects. const_anonymous_iterator anonymous_begin() const noexcept { if (!check_sanity()) { @@ -756,7 +806,9 @@ class basic_manager { } /// \brief Returns a constant iterator to the end of the index - /// storing the anonymous allocations. NOT thread-safe. Never throws. + /// storing the anonymous allocations. + /// \copydoc doc_object_attrb_obj_const_thread_safe + /// /// \return A constant iterator. const_anonymous_iterator anonymous_end() const noexcept { if (!check_sanity()) { @@ -775,7 +827,10 @@ class basic_manager { // bool belongs_to_segment (const void *ptr) const; // ---------- Allocate memory by size ---------- // + /// \brief Allocates nbytes bytes. + /// \copydoc doc_thread_safe_alloc + /// /// \param nbytes Number of bytes to allocate. /// \return Returns a pointer to the allocated memory. void *allocate(size_type nbytes) noexcept { @@ -793,7 +848,10 @@ class basic_manager { } /// \brief Allocates nbytes bytes. The address of the allocated memory will be - /// a multiple of alignment. \param nbytes Number of bytes to allocate. Must + /// a multiple of alignment. + /// \copydoc doc_thread_safe_alloc + /// + /// \param nbytes Number of bytes to allocate. Must /// be a multiple alignment. \param alignment Alignment size. Alignment must /// be a power of two and satisfy [min allocation size, chunk size]. \return /// Returns a pointer to the allocated memory. @@ -815,6 +873,8 @@ class basic_manager { // size_type n_elements, multiallocation_chain &chain); /// \brief Deallocates the allocated memory. + /// \copydoc doc_thread_safe_alloc + /// /// \param addr A pointer to the allocated memory to be deallocated. void deallocate(void *addr) noexcept { if (!check_sanity()) { @@ -832,9 +892,12 @@ class basic_manager { // void deallocate_many(multiallocation_chain &chain); /// \brief Check if all allocated memory has been deallocated. + /// \copydoc doc_no_alloc_thread_safe + /// + /// \details /// This function is not cheap if many objects has not been deallocated. - /// \return Returns true if all allocated memory has been deallocated; - /// otherwise, false. + /// \return Returns + /// true if all allocated memory has been deallocated; otherwise, false. bool all_memory_deallocated() const noexcept { if (!check_sanity()) { return false; @@ -851,6 +914,8 @@ class basic_manager { // ---------- Flush ---------- // /// \brief Flush data to persistent memory. + /// \copydoc doc_single_thread + /// /// \param synchronous If true, performs synchronous operation; /// otherwise, performs asynchronous operation. void flush(const bool synchronous = true) noexcept { @@ -868,6 +933,8 @@ class basic_manager { // -------- Snapshot, copy, data store management -------- // /// \brief Takes a snapshot of the current data. The snapshot has a new UUID. + /// \copydoc doc_single_thread + /// /// \param destination_dir_path Path to store a snapshot. /// \param clone Use the file clone mechanism (reflink) instead of normal copy /// if it is available. \param num_max_copy_threads The maximum number of copy @@ -891,7 +958,11 @@ class basic_manager { /// \brief Copies data store synchronously. /// The behavior of copying a data store that is open without the read-only - /// mode is undefined. \param source_dir_path Source data store path. \param + /// mode is undefined. + /// \copydoc doc_thread_safe + /// \details Copying to the same path simultaneously is prohibited. + /// + /// \param source_dir_path Source data store path.\param /// destination_dir_path Destination data store path. \param clone Use the /// file clone mechanism (reflink) instead of normal copy if it is available. /// \param num_max_copy_threads The maximum number of copy threads to use. @@ -913,7 +984,11 @@ class basic_manager { /// \brief Copies data store asynchronously. /// The behavior of copying a data store that is open without the read-only - /// mode is undefined. \param source_dir_path Source data store path. \param + /// mode is undefined. + /// \copydoc doc_thread_safe + /// \details Copying to the same path simultaneously is prohibited. + /// + /// \param source_dir_path Source data store path. \param /// destination_dir_path Destination data store path. \param clone Use the /// file clone mechanism (reflink) instead of normal copy if it is available. /// \param num_max_copy_threads The maximum number of copy threads to use. @@ -935,8 +1010,11 @@ class basic_manager { } /// \brief Removes data store synchronously. - /// \param dir_path Path to a data store to remove. - /// \return If succeeded, returns true; other false. + /// \copydoc doc_thread_safe + /// \details Must not remove the same data store simultaneously. + /// + /// \param dir_path Path to a data store to remove. \return If + /// succeeded, returns true; other false. static bool remove(const char_type *dir_path) noexcept { try { return manager_kernel_type::remove(dir_path); @@ -948,6 +1026,9 @@ class basic_manager { } /// \brief Remove data store asynchronously. + /// \copydoc doc_thread_safe + /// \details Must not remove the same data store simultaneously. + /// /// \param dir_path Path to a data store to remove. /// \return Returns an object of std::future. /// If succeeded, its get() returns true; other false @@ -962,7 +1043,15 @@ class basic_manager { } /// \brief Check if a data store exists and is consistent (i.e., it was closed - /// properly in the previous run). \param dir_path Path to a data store. + /// properly in the previous run). + /// \copydoc doc_thread_safe + /// + /// \details + /// Calling this function against a data store that is open without the + /// read-only mode is undefined. + /// If the data store is not consistent, it is recommended to remove + /// the data store and create a new one. + /// \param dir_path Path to a data store. /// \return Returns true if it exists and is consistent; otherwise, returns /// false. static bool consistent(const char_type *dir_path) noexcept { @@ -976,6 +1065,8 @@ class basic_manager { } /// \brief Returns a UUID of the data store. + /// \copydoc doc_thread_safe + /// /// \return UUID in the std::string format; returns an empty string on error. std::string get_uuid() const noexcept { if (!check_sanity()) { @@ -991,6 +1082,8 @@ class basic_manager { } /// \brief Returns a UUID of the data store. + /// \copydoc doc_thread_safe + /// /// \param dir_path Path to a data store. /// \return UUID in the std::string format; returns an empty string on error. static std::string get_uuid(const char_type *dir_path) noexcept { @@ -1004,6 +1097,8 @@ class basic_manager { } /// \brief Gets the version of the Metall that created the backing data store. + /// \copydoc doc_thread_safe + /// /// \return Returns a version number; returns 0 on error. version_type get_version() const noexcept { if (!check_sanity()) { @@ -1019,6 +1114,8 @@ class basic_manager { } /// \brief Gets the version of the Metall that created the backing data store. + /// \copydoc doc_thread_safe + /// /// \param dir_path Path to a data store. /// \return Returns a version number; returns 0 on error. static version_type get_version(const char_type *dir_path) noexcept { @@ -1035,7 +1132,11 @@ class basic_manager { /// \brief Sets a description to a Metall data store. /// An existing description is overwritten (only one description per data - /// store). \warning This method is not thread-safe. \param description An + /// store). + /// \copydoc doc_single_thread + /// + /// \copydoc doc_single_thread + /// \param description An /// std::string object that holds a description. \return Returns true on /// success; otherwise, false. bool set_description(const std::string &description) noexcept { @@ -1054,11 +1155,12 @@ class basic_manager { /// \brief Sets a description to a Metall data store. /// An existing description is overwritten (only one description per data - /// store). \warning This function is not thread-safe. Updating the same data - /// store with multiple threads simultaneously could cause an issue. \param - /// dir_path Path to a data store. \param description An std::string object - /// that holds a description. \return Returns true on success; otherwise, - /// false. + /// store). + /// \copydoc doc_const_datastore_thread_safe + /// + /// \param dir_path Path to a data store. \param description An std::string + /// object that holds a description. \return Returns true on success; + /// otherwise, false. static bool set_description(const char *dir_path, const std::string &description) noexcept { try { @@ -1072,7 +1174,10 @@ class basic_manager { /// \brief Gets a description. /// If there is no description, nothing to happen to the given description - /// object. \param description A pointer to an std::string object to store a + /// object. + /// \copydoc doc_const_datastore_thread_safe + /// + /// \param description A pointer to an std::string object to store a /// description if it exists. \return Returns true on success; returns false /// on error. Trying to get a non-existent description is not considered as an /// error. @@ -1091,7 +1196,10 @@ class basic_manager { /// \brief Gets a description. /// If there is no description, nothing to happen to the given description - /// object. \param dir_path Path to a data store. \param description A pointer + /// object. + /// \copydoc doc_const_datastore_thread_safe + /// + /// \param dir_path Path to a data store. \param description A pointer /// to an std::string object to store a description if it exists. \return /// Returns true on success; returns false on error. Trying to get a /// non-existent description is not considered as an error. @@ -1108,7 +1216,10 @@ class basic_manager { // ---------- Object attribute ---------- // /// \brief Returns an instance that provides access to the attribute of named - /// objects. \param dir_path Path to a data store. \return Returns an instance + /// objects. + /// \copydoc doc_object_attrb_obj_const_thread_safe + /// + /// \param dir_path Path to a data store. \return Returns an instance /// of named_object_attribute_accessor_type. static named_object_attribute_accessor_type access_named_object_attribute( const char *dir_path) noexcept { @@ -1122,7 +1233,10 @@ class basic_manager { } /// \brief Returns an instance that provides access to the attribute of unique - /// object. \param dir_path Path to a data store. \return Returns an instance + /// object. + /// \copydoc doc_object_attrb_obj_const_thread_safe + /// + /// \param dir_path Path to a data store. \return Returns an instance /// of unique_object_attribute_accessor_type. static unique_object_attribute_accessor_type access_unique_object_attribute( const char *dir_path) noexcept { @@ -1136,7 +1250,10 @@ class basic_manager { } /// \brief Returns an instance that provides access to the attribute of - /// anonymous object. \param dir_path Path to a data store. \return Returns an + /// anonymous object. + /// \copydoc doc_object_attrb_obj_const_thread_safe + /// + /// \param dir_path Path to a data store. \return Returns an /// instance of anonymous_object_attribute_accessor_type. static anonymous_object_attribute_accessor_type access_anonymous_object_attribute(const char *dir_path) noexcept { @@ -1151,6 +1268,8 @@ class basic_manager { // ---------- etc ---------- // /// \brief Returns a STL compatible allocator object. + /// \copydoc doc_thread_safe + /// /// \tparam T Type of the object. /// \return Returns a STL compatible allocator object. template @@ -1169,10 +1288,14 @@ class basic_manager { } /// \brief Returns the internal chunk size. + /// \copydoc doc_thread_safe + /// /// \return The size of internal chunk size. static constexpr size_type chunk_size() noexcept { return k_chunk_size; } /// \brief Returns the address of the application data segment. + /// \copydoc doc_thread_safe + /// /// \return The address of the application data segment. const void *get_address() const noexcept { if (!check_sanity()) { @@ -1189,8 +1312,10 @@ class basic_manager { /// \brief Returns the size (i.e., the maximum total allocation size) of the /// application data segment. This is a theoretical value. The actual total - /// allocation size Metall can handle will be less than that. \return The size - /// of the application data segment. + /// allocation size Metall can handle will be less than that. + /// \copydoc doc_thread_safe + /// + /// \return The size of the application data segment. size_type get_size() const noexcept { if (!check_sanity()) { return 0; @@ -1207,6 +1332,8 @@ class basic_manager { // bool belongs_to_segment (const void *ptr) const /// \brief Checks the sanity. + /// \copydoc doc_thread_safe + /// /// \return Returns true if there is no issue; otherwise, returns false. bool check_sanity() const noexcept { return !!m_kernel && m_kernel->good(); } From 9411aaf4a1bbb64a09bbfb133e393bbea948623a Mon Sep 17 00:00:00 2001 From: Keita Iwabuchi Date: Thu, 14 Sep 2023 18:33:32 -0700 Subject: [PATCH 03/25] Remove data race warnings. --- include/metall/kernel/manager_kernel.hpp | 3 +- include/metall/kernel/manager_kernel_impl.ipp | 37 +++++++++++-------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/include/metall/kernel/manager_kernel.hpp b/include/metall/kernel/manager_kernel.hpp index 649c76c6..f5ca062b 100644 --- a/include/metall/kernel/manager_kernel.hpp +++ b/include/metall/kernel/manager_kernel.hpp @@ -486,7 +486,7 @@ class manager_kernel { // Private methods // -------------------- // - bool priv_initialized() const; + void priv_sanity_check() const; bool priv_validate_runtime_configuration() const; difference_type priv_to_offset(const void *const ptr) const; void *priv_to_address(difference_type offset) const; @@ -585,7 +585,6 @@ class manager_kernel { // Private fields // -------------------- // bool m_good{false}; - bool m_open{false}; std::string m_base_dir_path{}; size_type m_vm_region_size{0}; void *m_vm_region{nullptr}; diff --git a/include/metall/kernel/manager_kernel_impl.ipp b/include/metall/kernel/manager_kernel_impl.ipp index efdf48e6..123ef31a 100644 --- a/include/metall/kernel/manager_kernel_impl.ipp +++ b/include/metall/kernel/manager_kernel_impl.ipp @@ -58,8 +58,9 @@ bool manager_kernel::open( template void manager_kernel::close() { - // Update m_good - if (priv_initialized()) { + if (m_vm_region) { + priv_sanity_check(); + m_good = false; if (!m_segment_storage.read_only()) { priv_serialize_management_data(); m_segment_storage.sync(true); @@ -78,14 +79,14 @@ void manager_kernel::close() { template void manager_kernel::flush(const bool synchronous) { - assert(priv_initialized()); + priv_sanity_check(); m_segment_storage.sync(synchronous); } template void *manager_kernel::allocate( const manager_kernel::size_type nbytes) { - assert(priv_initialized()); + priv_sanity_check(); if (m_segment_storage.read_only()) return nullptr; const auto offset = m_segment_memory_allocator.allocate(nbytes); @@ -101,7 +102,7 @@ template void *manager_kernel::allocate_aligned( const manager_kernel::size_type nbytes, const manager_kernel::size_type alignment) { - assert(priv_initialized()); + priv_sanity_check(); if (m_segment_storage.read_only()) return nullptr; // This requirement could be removed, but it would need some work to do @@ -122,7 +123,7 @@ void *manager_kernel::allocate_aligned( template void manager_kernel::deallocate(void *addr) { - assert(priv_initialized()); + priv_sanity_check(); if (m_segment_storage.read_only()) return; if (!addr) return; m_segment_memory_allocator.deallocate(priv_to_offset(addr)); @@ -130,7 +131,7 @@ void manager_kernel::deallocate(void *addr) { template bool manager_kernel::all_memory_deallocated() const { - assert(priv_initialized()); + priv_sanity_check(); return m_segment_memory_allocator.all_memory_deallocated(); } @@ -138,7 +139,7 @@ template template std::pair::size_type> manager_kernel::find(char_ptr_holder_type name) const { - assert(priv_initialized()); + priv_sanity_check(); if (name.is_anonymous()) { return std::make_pair(nullptr, 0); @@ -166,7 +167,7 @@ manager_kernel::find(char_ptr_holder_type name) const { template template bool manager_kernel::destroy(char_ptr_holder_type name) { - assert(priv_initialized()); + priv_sanity_check(); if (m_segment_storage.read_only()) return false; if (name.is_anonymous()) { @@ -199,7 +200,7 @@ bool manager_kernel::destroy(char_ptr_holder_type name) { template template bool manager_kernel::destroy_ptr(const T *ptr) { - assert(priv_initialized()); + priv_sanity_check(); if (m_segment_storage.read_only()) return false; size_type length = 0; @@ -425,7 +426,7 @@ template T *manager_kernel::generic_construct( char_ptr_holder_type name, const size_type num, const bool try2find, [[maybe_unused]] const bool do_throw, mdtl::in_place_interface &table) { - assert(priv_initialized()); + priv_sanity_check(); return priv_generic_construct(name, num, try2find, table); } @@ -658,11 +659,15 @@ bool manager_kernel::priv_init_datastore_directory( } template -bool manager_kernel::priv_initialized() const { +void manager_kernel::priv_sanity_check() const { + assert(m_good); assert(!m_base_dir_path.empty()); + assert(m_vm_region_size > 0); + assert(m_vm_region); + assert(m_segment_header); + // TODO: add sanity check functions in other classes assert(m_segment_storage.get_segment()); - return (m_vm_region && m_vm_region_size > 0 && m_segment_header && - m_segment_storage.size() > 0); + assert(m_manager_metadata); } template @@ -1076,7 +1081,7 @@ bool manager_kernel::priv_create( // ---------- For serializing/deserializing ---------- // template bool manager_kernel::priv_serialize_management_data() { - assert(priv_initialized()); + priv_sanity_check(); if (m_segment_storage.read_only()) return true; @@ -1157,7 +1162,7 @@ template bool manager_kernel::priv_snapshot( const char *destination_base_dir_path, const bool clone, const int num_max_copy_threads) { - assert(priv_initialized()); + priv_sanity_check(); m_segment_storage.sync(true); priv_serialize_management_data(); From decd58b58ce18f0af02c1f5df21003ed28256069 Mon Sep 17 00:00:00 2001 From: Keita Iwabuchi Date: Thu, 14 Sep 2023 18:45:50 -0700 Subject: [PATCH 04/25] Bugfix in closing manager kernel --- include/metall/kernel/manager_kernel_impl.ipp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/metall/kernel/manager_kernel_impl.ipp b/include/metall/kernel/manager_kernel_impl.ipp index 123ef31a..f75b3d8c 100644 --- a/include/metall/kernel/manager_kernel_impl.ipp +++ b/include/metall/kernel/manager_kernel_impl.ipp @@ -60,12 +60,12 @@ template void manager_kernel::close() { if (m_vm_region) { priv_sanity_check(); - m_good = false; if (!m_segment_storage.read_only()) { priv_serialize_management_data(); m_segment_storage.sync(true); } + m_good = false; m_segment_storage.destroy(); priv_deallocate_segment_header(); priv_release_vm_region(); From e6178cb82c84fcccc9a41a8c77feb798521cb204 Mon Sep 17 00:00:00 2001 From: iwabuchi Date: Fri, 22 Sep 2023 15:33:31 -0700 Subject: [PATCH 05/25] Disable asserts that cause false positive race warnings --- include/metall/kernel/manager_kernel_impl.ipp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/metall/kernel/manager_kernel_impl.ipp b/include/metall/kernel/manager_kernel_impl.ipp index f75b3d8c..29b01498 100644 --- a/include/metall/kernel/manager_kernel_impl.ipp +++ b/include/metall/kernel/manager_kernel_impl.ipp @@ -94,7 +94,10 @@ void *manager_kernel::allocate( return nullptr; } assert(offset >= 0); +#if !(defined(__has_feature) && __has_feature(thread_sanitizer)) assert(offset + nbytes <= m_segment_storage.size()); +#endif + return priv_to_address(offset); } @@ -114,7 +117,9 @@ void *manager_kernel::allocate_aligned( return nullptr; } assert(offset >= 0); +#if !(defined(__has_feature) && __has_feature(thread_sanitizer)) assert(offset + nbytes <= m_segment_storage.size()); +#endif auto *addr = priv_to_address(offset); assert((uint64_t)addr % alignment == 0); From 2a7f29cba86d7d77b4ab751c7ea2964d756b7c74 Mon Sep 17 00:00:00 2001 From: iwabuchi Date: Fri, 22 Sep 2023 20:21:49 -0700 Subject: [PATCH 06/25] Remove asserts that cause false positive race warnings --- include/metall/kernel/manager_kernel_impl.ipp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/metall/kernel/manager_kernel_impl.ipp b/include/metall/kernel/manager_kernel_impl.ipp index 29b01498..6f4eaa20 100644 --- a/include/metall/kernel/manager_kernel_impl.ipp +++ b/include/metall/kernel/manager_kernel_impl.ipp @@ -94,9 +94,6 @@ void *manager_kernel::allocate( return nullptr; } assert(offset >= 0); -#if !(defined(__has_feature) && __has_feature(thread_sanitizer)) - assert(offset + nbytes <= m_segment_storage.size()); -#endif return priv_to_address(offset); } @@ -117,9 +114,6 @@ void *manager_kernel::allocate_aligned( return nullptr; } assert(offset >= 0); -#if !(defined(__has_feature) && __has_feature(thread_sanitizer)) - assert(offset + nbytes <= m_segment_storage.size()); -#endif auto *addr = priv_to_address(offset); assert((uint64_t)addr % alignment == 0); From 47f50156dc92fe9c5396ebf0a90674e4d3f3533c Mon Sep 17 00:00:00 2001 From: iwabuchi Date: Sat, 30 Sep 2023 13:07:28 -0700 Subject: [PATCH 07/25] (JSON) Bugfixes in key_value_pair - Fixed duplicated memory free in key_value_pair - Change the way to access string data in general_key_value_pair_equal --- include/metall/json/key_value_pair.hpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/metall/json/key_value_pair.hpp b/include/metall/json/key_value_pair.hpp index 2d92ddf3..5fb0ee56 100644 --- a/include/metall/json/key_value_pair.hpp +++ b/include/metall/json/key_value_pair.hpp @@ -26,7 +26,9 @@ template &key_value, const other_key_value_pair_type &other_key_value) noexcept { - if (std::strcmp(key_value.c_str(), other_key_value.c_str()) != 0) + if (key_value.key().length() != other_key_value.key().length()) + return false; + if (std::strcmp(key_value.key_c_str(), other_key_value.key_c_str()) != 0) return false; return key_value.value() == other_key_value.value(); } @@ -100,8 +102,10 @@ class key_value_pair { m_short_key_buf = other.m_short_key_buf; } else { m_long_key = std::move(other.m_long_key); + other.m_long_key = nullptr; } m_key_length = other.m_key_length; + other.m_key_length = 0; } /// \brief Allocator-extended move constructor @@ -112,6 +116,7 @@ class key_value_pair { m_short_key_buf = other.m_short_key_buf; } else { m_long_key = std::move(other.m_long_key); + other.m_long_key = nullptr; } m_key_length = other.m_key_length; other.m_key_length = 0; @@ -151,6 +156,7 @@ class key_value_pair { m_short_key_buf = other.m_short_key_buf; } else { m_long_key = std::move(other.m_long_key); + other.m_long_key = nullptr; } m_key_length = other.m_key_length; other.m_key_length = 0; From 401b0db0ea3889b4eaeee8f2b2914fb1e38ba363 Mon Sep 17 00:00:00 2001 From: iwabuchi Date: Tue, 3 Oct 2023 13:25:45 -0700 Subject: [PATCH 08/25] (JSON) Optimize value_from(). --- include/metall/json/value_from.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/metall/json/value_from.hpp b/include/metall/json/value_from.hpp index d1074d9c..6652830f 100644 --- a/include/metall/json/value_from.hpp +++ b/include/metall/json/value_from.hpp @@ -31,7 +31,8 @@ inline value value_from_impl( } else if (input_bj_value.is_double()) { out_value = input_bj_value.as_double(); } else if (input_bj_value.is_string()) { - out_value = input_bj_value.as_string().c_str(); + const auto& str = input_bj_value.as_string(); + out_value.emplace_string().assign(str.c_str(), str.size()); } else if (input_bj_value.is_array()) { auto &out_array = out_value.emplace_array(); for (const auto &item : input_bj_value.as_array()) { From e0db6e4c870399f309c3fc59eb12af0da30a8387 Mon Sep 17 00:00:00 2001 From: iwabuchi Date: Wed, 4 Oct 2023 21:07:30 -0700 Subject: [PATCH 09/25] Add overwrite option in mpi-adaptor and file remove utility function --- example/mpi_create.cpp | 17 ++- .../segment_storage/mmap_segment_storage.hpp | 7 +- include/metall/utility/filesystem.hpp | 22 ++++ include/metall/utility/metall_mpi_adaptor.hpp | 122 ++++++++++++------ 4 files changed, 118 insertions(+), 50 deletions(-) create mode 100644 include/metall/utility/filesystem.hpp diff --git a/example/mpi_create.cpp b/example/mpi_create.cpp index 5457ce20..fc28c583 100644 --- a/example/mpi_create.cpp +++ b/example/mpi_create.cpp @@ -4,17 +4,22 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) #include +#include int main(int argc, char **argv) { ::MPI_Init(&argc, &argv); { - // mpi_adaptor with the create mode fails if the directory exists. - // This remove function fails if directory exists and created by a different - // number of MPI ranks. - metall::utility::metall_mpi_adaptor::remove("/tmp/metall_mpi"); + // This over-write mode fails if the existing file/directory is not Metall + // datastore or the existing datastore was created by a different number of + // MPI ranks. + // To forcibly remove the existing datastore, one can use the following + // code. + // metall::utility::filesystem::remove("/tmp/metall_mpi"); + // ::MPI_Barrier(MPI_COMM_WORLD); + bool overwrite = true; - metall::utility::metall_mpi_adaptor mpi_adaptor(metall::create_only, - "/tmp/metall_mpi"); + metall::utility::metall_mpi_adaptor mpi_adaptor( + metall::create_only, "/tmp/metall_mpi", MPI_COMM_WORLD, overwrite); auto &metall_manager = mpi_adaptor.get_local_manager(); auto rank = metall_manager.construct("my-rank")(); diff --git a/include/metall/kernel/segment_storage/mmap_segment_storage.hpp b/include/metall/kernel/segment_storage/mmap_segment_storage.hpp index 013b5067..3017476f 100644 --- a/include/metall/kernel/segment_storage/mmap_segment_storage.hpp +++ b/include/metall/kernel/segment_storage/mmap_segment_storage.hpp @@ -330,12 +330,13 @@ class mmap_segment_storage { if (is_open()) return false; // Cannot open multiple segments simultaneously. - std::string s("Open a segment under: " + base_path); - logger::out(logger::level::info, __FILE__, __LINE__, s.c_str()); + { + std::string s("Open a segment under: " + base_path); + logger::out(logger::level::info, __FILE__, __LINE__, s.c_str()); + } m_base_path = base_path; m_vm_region_size = mdtl::round_down(vm_region_size, page_size()); - ; m_segment = reinterpret_cast( mdtl::round_up(reinterpret_cast(vm_region), page_size())); m_read_only = read_only; diff --git a/include/metall/utility/filesystem.hpp b/include/metall/utility/filesystem.hpp new file mode 100644 index 00000000..0b9bc45f --- /dev/null +++ b/include/metall/utility/filesystem.hpp @@ -0,0 +1,22 @@ +// Copyright 2023 Lawrence Livermore National Security, LLC and other Metall +// Project Developers. See the top-level COPYRIGHT file for details. +// +// SPDX-License-Identifier: (Apache-2.0 OR MIT) + +#ifndef METALL_UTILITY_FILESYSTEM_HPP +#define METALL_UTILITY_FILESYSTEM_HPP + +#include + +namespace metall::utility::filesystem { + +/// \brief Remove a file or directory +/// \return Upon successful completion, returns true; otherwise, false is +/// returned. If the file or directory does not exist, true is returned. +inline bool remove(std::string_view path) { + return metall::mtlldetail::remove_file(path.data()); +} + +} // namespace metall::utility::filesystem + +#endif // METALL_UTILITY_FILESYSTEM_HPP diff --git a/include/metall/utility/metall_mpi_adaptor.hpp b/include/metall/utility/metall_mpi_adaptor.hpp index 783b0bd6..9af343a8 100644 --- a/include/metall/utility/metall_mpi_adaptor.hpp +++ b/include/metall/utility/metall_mpi_adaptor.hpp @@ -40,7 +40,9 @@ class metall_mpi_adaptor { : m_mpi_comm(comm), m_root_dir_prefix(root_dir_prefix), m_local_metall_manager(nullptr) { - priv_verify_num_partitions(root_dir_prefix, comm); + if (!priv_verify_num_partitions(root_dir_prefix, comm)) { + ::MPI_Abort(comm, -1); + } m_local_metall_manager = std::make_unique( metall::open_only, ds::make_local_dir_path(m_root_dir_prefix, priv_mpi_comm_rank(comm)) @@ -56,7 +58,9 @@ class metall_mpi_adaptor { : m_mpi_comm(comm), m_root_dir_prefix(root_dir_prefix), m_local_metall_manager(nullptr) { - priv_verify_num_partitions(root_dir_prefix, comm); + if (!priv_verify_num_partitions(root_dir_prefix, comm)) { + ::MPI_Abort(comm, -1); + } m_local_metall_manager = std::make_unique( metall::open_read_only, ds::make_local_dir_path(m_root_dir_prefix, priv_mpi_comm_rank(comm)) @@ -67,12 +71,16 @@ class metall_mpi_adaptor { /// \param root_dir_prefix A root directory path of a Metall datastore. /// The same name of file or directory must not exist. /// \param comm A MPI communicator. + /// \param overwrite If true, overwrite an existing datastore. + /// This mode does not overwrite an existing datastore if it is not Metall + /// datastore created by the same number of MPI processes. metall_mpi_adaptor(metall::create_only_t, const std::string &root_dir_prefix, - const MPI_Comm &comm = MPI_COMM_WORLD) + const MPI_Comm &comm = MPI_COMM_WORLD, + bool overwrite = false) : m_mpi_comm(comm), m_root_dir_prefix(root_dir_prefix), m_local_metall_manager(nullptr) { - priv_setup_root_dir(root_dir_prefix, comm); + priv_setup_root_dir(root_dir_prefix, overwrite, comm); m_local_metall_manager = std::make_unique( metall::create_only, ds::make_local_dir_path(m_root_dir_prefix, priv_mpi_comm_rank(comm)) @@ -84,13 +92,17 @@ class metall_mpi_adaptor { /// The same name of file or directory must not exist. /// \param capacity The max capacity of the datastore. /// \param comm A MPI communicator. + /// \param overwrite If true, overwrite an existing datastore. + /// This mode does not overwrite an existing datastore if it is not Metall + /// datastore created by the same number of MPI processes. metall_mpi_adaptor(metall::create_only_t, const std::string &root_dir_prefix, const std::size_t capacity, - const MPI_Comm &comm = MPI_COMM_WORLD) + const MPI_Comm &comm = MPI_COMM_WORLD, + bool overwrite = false) : m_mpi_comm(comm), m_root_dir_prefix(root_dir_prefix), m_local_metall_manager(nullptr) { - priv_setup_root_dir(root_dir_prefix, comm); + priv_setup_root_dir(root_dir_prefix, overwrite, comm); m_local_metall_manager = std::make_unique( metall::create_only, ds::make_local_dir_path(m_root_dir_prefix, priv_mpi_comm_rank(comm)) @@ -146,11 +158,15 @@ class metall_mpi_adaptor { /// mode is undefined. \param source_dir_path A path to a source datastore. /// \param destination_dir_path A path to a destination datastore. /// \param comm A MPI communicator. + /// \param overwrite If true, overwrite an existing datastore. + /// This mode does not overwrite an existing datastore if it is not Metall + /// datastore created by the same number of MPI processes. /// \return Returns true if all processes success; /// otherwise, returns false. - static bool copy(const char *source_dir_path, - const char *destination_dir_path, - const MPI_Comm &comm = MPI_COMM_WORLD) { + static bool copy(const std::string &source_dir_path, + const std::string &destination_dir_path, + const MPI_Comm &comm = MPI_COMM_WORLD, + bool overwrite = false) { if (!consistent(source_dir_path, comm)) { if (priv_mpi_comm_rank(comm) == 0) { std::stringstream ss; @@ -161,7 +177,7 @@ class metall_mpi_adaptor { } return false; } - priv_setup_root_dir(destination_dir_path, comm); + priv_setup_root_dir(destination_dir_path, overwrite, comm); const int rank = priv_mpi_comm_rank(comm); return priv_global_and( manager_type::copy( @@ -172,10 +188,14 @@ class metall_mpi_adaptor { /// \brief Take a snapshot of the current Metall datastore to another /// location. \param destination_dir_path A path to a destination datastore. + /// \param overwrite If true, overwrite an existing datastore. + /// This mode does not overwrite an existing datastore if it is not Metall + /// datastore created by the same number of MPI processes. /// \return Returns true if all processes success; /// otherwise, returns false. - bool snapshot(const char *destination_dir_path) { - priv_setup_root_dir(destination_dir_path, m_mpi_comm); + bool snapshot(const std::string &destination_dir_path, + bool overwrite = false) { + priv_setup_root_dir(destination_dir_path, overwrite, m_mpi_comm); const int rank = priv_mpi_comm_rank(m_mpi_comm); return priv_global_and( m_local_metall_manager->snapshot( @@ -188,35 +208,35 @@ class metall_mpi_adaptor { /// \param comm A MPI communicator. /// \return Returns true if all processes success; /// otherwise, returns false. - static bool remove(const char *root_dir_prefix, + /// If there is no directory with the given name, returns true. + static bool remove(const std::string &root_dir_prefix, const MPI_Comm &comm = MPI_COMM_WORLD) { const int rank = priv_mpi_comm_rank(comm); const int size = priv_mpi_comm_size(comm); + if (!metall::mtlldetail::file_exist( + ds::make_root_dir_path(root_dir_prefix))) { + // As long as the root directory does not exist, we consider it as a + // success. + return true; + } + // ----- Check if this is a Metall datastore ----- // - bool corrent_dir = true; + bool metall_dir = true; if (!metall::mtlldetail::file_exist( ds::make_root_dir_path(root_dir_prefix) + "/" + k_datastore_mark_file_name)) { - corrent_dir = false; - } - if (!priv_global_and(corrent_dir, comm)) { - return false; + metall_dir = false; } - - // ----- Check if #of MPI processes matches ----- // - bool correct_mpi_size = true; - if (rank == 0) { - const int read_size = priv_read_num_partitions(root_dir_prefix, comm); - if (read_size != size) { - correct_mpi_size = false; - std::stringstream ss; - ss << " Invalid number of MPI processes (provided " << size << ", " - << "expected " << correct_mpi_size << ")"; - logger::out(logger::level::error, __FILE__, __LINE__, ss.str().c_str()); + if (!priv_global_and(metall_dir, comm)) { + if (rank == 0) { + std::string s("This is not a Metall datastore: " + + ds::make_root_dir_path(root_dir_prefix)); + logger::out(logger::level::error, __FILE__, __LINE__, s.c_str()); } + return false; } - if (!priv_global_and(correct_mpi_size, comm)) { + if (!priv_verify_num_partitions(root_dir_prefix, comm)) { return false; } @@ -244,9 +264,9 @@ class metall_mpi_adaptor { /// \param root_dir_prefix A root directory path of datastore. /// \param comm A MPI communicator. /// \return The number of partitions of a Metall datastore. - static int partitions(const char *root_dir_prefix, + static int partitions(const std::string &root_dir_prefix, const MPI_Comm &comm = MPI_COMM_WORLD) { - return priv_read_num_partitions(root_dir_prefix, comm); + return priv_read_partition_size(root_dir_prefix, comm); } /// \brief Checks if all local datastores are consistent. @@ -254,7 +274,7 @@ class metall_mpi_adaptor { /// \param comm A MPI communicator. /// \return Returns true if all datastores are consistent; /// otherwise, returns false. - static bool consistent(const char *root_dir_prefix, + static bool consistent(const std::string &root_dir_prefix, const MPI_Comm &comm = MPI_COMM_WORLD) { const int rank = priv_mpi_comm_rank(comm); const auto local_path = ds::make_local_dir_path(root_dir_prefix, rank); @@ -271,12 +291,28 @@ class metall_mpi_adaptor { // -------------------- // // Private methods // -------------------- // + static void priv_remove_for_overwrite(const std::string &root_dir_prefix, + const MPI_Comm &comm) { + if (!remove(root_dir_prefix, comm)) { + if (priv_mpi_comm_rank(comm) == 0) { + std::stringstream ss; + ss << "Failed to overwrite " << root_dir_prefix; + logger::out(logger::level::error, __FILE__, __LINE__, ss.str().c_str()); + ::MPI_Abort(comm, -1); + } + } + } + static void priv_setup_root_dir(const std::string &root_dir_prefix, - const MPI_Comm &comm) { + bool overwrite, const MPI_Comm &comm) { const int rank = priv_mpi_comm_rank(comm); const int size = priv_mpi_comm_size(comm); const std::string root_dir_path = ds::make_root_dir_path(root_dir_prefix); + if (overwrite) { + priv_remove_for_overwrite(root_dir_prefix, comm); + } + // Make sure the root directory and a file with the same name do not exist const auto local_ret = metall::mtlldetail::file_exist(root_dir_path); if (priv_global_or(local_ret, comm)) { @@ -333,7 +369,7 @@ class metall_mpi_adaptor { ofs.close(); } - static int priv_read_num_partitions(const std::string &root_dir_prefix, + static int priv_read_partition_size(const std::string &root_dir_prefix, const MPI_Comm &comm) { const std::string path = ds::make_root_dir_path(root_dir_prefix) + "/" + k_partition_size_file_name; @@ -353,19 +389,23 @@ class metall_mpi_adaptor { return read_size; } - static void priv_verify_num_partitions(const std::string &root_dir_prefix, + static bool priv_verify_num_partitions(const std::string &root_dir_prefix, const MPI_Comm &comm) { const int rank = priv_mpi_comm_rank(comm); const int size = priv_mpi_comm_size(comm); + bool correct_mpi_size = true; if (rank == 0) { - if (priv_read_num_partitions(root_dir_prefix, comm) != size) { - logger::out(logger::level::error, __FILE__, __LINE__, - "Invalid number of MPI processes"); - ::MPI_Abort(comm, -1); + const int read_size = priv_read_partition_size(root_dir_prefix, comm); + if (read_size != size) { + correct_mpi_size = false; + std::stringstream ss; + ss << "Invalid number of MPI processes (provided " << size << ", " + << "expected " << read_size << ")"; + logger::out(logger::level::error, __FILE__, __LINE__, ss.str().c_str()); } } - priv_mpi_barrier(comm); + return priv_global_and(correct_mpi_size, comm); } static int priv_mpi_comm_rank(const MPI_Comm &comm) { From 3d6f0071a878bb44d2419c99e965933f726c0372 Mon Sep 17 00:00:00 2001 From: iwabuchi Date: Thu, 5 Oct 2023 12:36:51 -0700 Subject: [PATCH 10/25] Make hash functions more generic --- .../edge_generator/rmat_edge_generator.hpp | 4 +-- .../test/compare_key_value_lists.cpp | 4 +-- .../multithread_adjacency_list.hpp | 13 ++++---- .../container/experimental/jgraph/jgraph.hpp | 13 ++++---- include/metall/detail/hash.hpp | 30 +++++++++++-------- .../kernel/attributed_object_directory.hpp | 4 +-- include/metall/kernel/object_cache.hpp | 4 +-- include/metall/utility/hash.hpp | 20 ++++++------- 8 files changed, 49 insertions(+), 43 deletions(-) diff --git a/bench/adjacency_list/edge_generator/rmat_edge_generator.hpp b/bench/adjacency_list/edge_generator/rmat_edge_generator.hpp index 983c929a..056c7185 100644 --- a/bench/adjacency_list/edge_generator/rmat_edge_generator.hpp +++ b/bench/adjacency_list/edge_generator/rmat_edge_generator.hpp @@ -94,9 +94,9 @@ class rmat_edge_generator_iterator { const uint64_t mask = (1ULL << m_ptr_parent->m_vertex_scale) - 1; // Assume utility::hash is a good hash function m_current_edge.first = - metall::utility::hash()(m_current_edge.first) & mask; + metall::utility::hash<>()(m_current_edge.first) & mask; m_current_edge.second = - metall::utility::hash()(m_current_edge.second) & mask; + metall::utility::hash<>()(m_current_edge.second) & mask; } ++m_num_generated_edges; } diff --git a/bench/adjacency_list/test/compare_key_value_lists.cpp b/bench/adjacency_list/test/compare_key_value_lists.cpp index 5d134004..dff64556 100644 --- a/bench/adjacency_list/test/compare_key_value_lists.cpp +++ b/bench/adjacency_list/test/compare_key_value_lists.cpp @@ -17,8 +17,8 @@ using key_type = uint64_t; using value_type = uint64_t; using item_type = std::pair; -using table_type = std::unordered_map>; +using table_type = + std::unordered_map>; void ingest_item(const std::string& file_name, table_type* table) { std::ifstream ifs(file_name); diff --git a/bench/data_structure/multithread_adjacency_list.hpp b/bench/data_structure/multithread_adjacency_list.hpp index adb5b39a..c14b0243 100644 --- a/bench/data_structure/multithread_adjacency_list.hpp +++ b/bench/data_structure/multithread_adjacency_list.hpp @@ -48,9 +48,10 @@ class multithread_adjacency_list { using key_table_allocator_type = container::scoped_allocator_adaptor< other_allocator_type>>; - using key_table_type = container::unordered_map< - key_type, list_type, metall::utility::hash, - std::equal_to, key_table_allocator_type>; + using key_table_type = + container::unordered_map, + std::equal_to, + key_table_allocator_type>; using bank_table_allocator_type = container::scoped_allocator_adaptor>; @@ -81,9 +82,9 @@ class multithread_adjacency_list { m_bank_table[bank_index(key)][key].emplace_back(std::move(value)); #else m_bank_table[bank_index(key)][key].emplace_back(std::move(value)); -// m_bank_table[bank_index(key)].try_emplace(key, -// list_allocator_type(m_bank_table.get_allocator())); -// m_bank_table[bank_index(key)].at(key).emplace_back(std::move(value)); + // m_bank_table[bank_index(key)].try_emplace(key, + // list_allocator_type(m_bank_table.get_allocator())); + // m_bank_table[bank_index(key)].at(key).emplace_back(std::move(value)); #endif #else // MEMO: GCC does not work with STL Containers (tested with GCC 10.2.0 on diff --git a/include/metall/container/experimental/jgraph/jgraph.hpp b/include/metall/container/experimental/jgraph/jgraph.hpp index a8fccd7e..14b02b10 100644 --- a/include/metall/container/experimental/jgraph/jgraph.hpp +++ b/include/metall/container/experimental/jgraph/jgraph.hpp @@ -116,11 +116,11 @@ class jgraph { value_type m_value; }; - using vertex_storage_type = mc::unordered_map< - internal_id_type, vertex_data_type, - metall::utility::hash, std::equal_to<>, - other_scoped_allocator< - std::pair>>; + using vertex_storage_type = + mc::unordered_map, std::equal_to<>, + other_scoped_allocator>>; class edge_data_type { public: @@ -184,8 +184,7 @@ class jgraph { using edge_storage_type = mc::unordered_map, - std::equal_to<>, + metall::utility::hash<>, std::equal_to<>, other_scoped_allocator< std::pair>>; diff --git a/include/metall/detail/hash.hpp b/include/metall/detail/hash.hpp index 241bc219..ee0c0dc4 100644 --- a/include/metall/detail/hash.hpp +++ b/include/metall/detail/hash.hpp @@ -46,7 +46,6 @@ static inline uint64_t murmurhash_getblock(const uint64_t *p) { METALL_PRAGMA_IGNORE_GCC_UNINIT_WARNING_BEGIN inline uint64_t murmur_hash_64a(const void *key, int len, uint64_t seed) noexcept { - const uint64_t m = 0xc6a4a7935bd1e995LLU; const int r = 47; @@ -71,7 +70,7 @@ inline uint64_t murmur_hash_64a(const void *key, int len, switch (len & 7) { case 7: h ^= uint64_t(data2[6]) << 48; - [[fallthrough]]; + [[fallthrough]]; case 6: h ^= uint64_t(data2[5]) << 40; [[fallthrough]]; @@ -108,24 +107,31 @@ inline uint64_t MurmurHash64A(const void *key, int len, } /// \brief Hash a value of type T. Provides the same interface as std::hash. -/// \tparam T The type of a value to hash. /// \tparam seed A seed value used for hashing. -template +template struct hash { - inline uint64_t operator()(const T &key) const noexcept { + template + inline std::size_t operator()(const T &key) const noexcept { return murmur_hash_64a(&key, sizeof(T), seed); } }; /// \brief Hash string data. -/// \tparam string_type A string class. /// \tparam seed A seed value used for hashing. -template -struct string_hash { - inline uint64_t operator()(const string_type &key) const noexcept { - return murmur_hash_64a( - key.c_str(), key.length() * sizeof(typename string_type::value_type), - seed); +template +struct str_hash { + using is_transparent = void; + + template + inline std::size_t operator()(const string_type &str) const noexcept { + if constexpr (std::is_same_v || + std::is_same_v) { + return murmur_hash_64a(str, std::char_traits::length(str), seed); + } else { + return murmur_hash_64a( + str.c_str(), str.length() * sizeof(typename string_type::value_type), + seed); + } } }; diff --git a/include/metall/kernel/attributed_object_directory.hpp b/include/metall/kernel/attributed_object_directory.hpp index 11da2168..3d85d5b4 100644 --- a/include/metall/kernel/attributed_object_directory.hpp +++ b/include/metall/kernel/attributed_object_directory.hpp @@ -112,10 +112,10 @@ class attributed_object_directory { // Following tables hold the index of the corresponding entry of each key using offset_index_table_type = boost::unordered_map>; + mdtl::hash<>>; using name_index_table_type = boost::unordered_map>; + mdtl::str_hash<>>; public: // -------------------- // diff --git a/include/metall/kernel/object_cache.hpp b/include/metall/kernel/object_cache.hpp index 319958af..0ec4fa54 100644 --- a/include/metall/kernel/object_cache.hpp +++ b/include/metall/kernel/object_cache.hpp @@ -200,11 +200,11 @@ class object_cache { std::hash{}(std::this_thread::get_id()) % k_num_cache_per_core; const std::size_t core_num = priv_get_core_no(); - return mdtl::hash{}(core_num * k_num_cache_per_core + + return mdtl::hash<>{}(core_num * k_num_cache_per_core + sub_cache_no) % m_cache_table.size(); #else - thread_local static const auto hashed_thread_id = mdtl::hash{}( + thread_local static const auto hashed_thread_id = mdtl::hash<>{}( std::hash{}(std::this_thread::get_id())); return hashed_thread_id % m_cache_table.size(); #endif diff --git a/include/metall/utility/hash.hpp b/include/metall/utility/hash.hpp index 98231950..b1d63119 100644 --- a/include/metall/utility/hash.hpp +++ b/include/metall/utility/hash.hpp @@ -10,17 +10,17 @@ namespace metall::utility { -/// \brief Hash a value of type T -/// \tparam T The type of a value to hash -/// \tparam seed A seed value used for hashing -template -using hash = metall::mtlldetail::hash; +/// \brief Hash a value of type T. +/// \tparam T Data type to hash. +/// If void is specified, the hash data type is determined by () operator. +/// \tparam seed A seed value used for hashing. +template +using hash = metall::mtlldetail::hash; -/// \brief Hash string data -/// \tparam string_type A string class -/// \tparam seed A seed value used for hashing -template -using string_hash = metall::mtlldetail::string_hash; +/// \brief Hash function for std::string-compatible string container. +/// \tparam seed A seed value used for hashing. +template +using str_hash = metall::mtlldetail::str_hash; } // namespace metall::utility From 822585aa54c5e4c5cf6ae0cc21f274e009c3f823 Mon Sep 17 00:00:00 2001 From: iwabuchi Date: Mon, 9 Oct 2023 11:08:40 -0700 Subject: [PATCH 11/25] Add utility functions in the object cache container --- include/metall/kernel/object_cache_container.hpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/metall/kernel/object_cache_container.hpp b/include/metall/kernel/object_cache_container.hpp index cab9470f..92c193f2 100644 --- a/include/metall/kernel/object_cache_container.hpp +++ b/include/metall/kernel/object_cache_container.hpp @@ -85,18 +85,31 @@ class object_cache_container { using bin_no_type = typename bin_no_manager::bin_no_type; private: + // Hold all cached objects' offsets in a 1D array rather than splitting them + // into bins for better locality. using cache_table_type = std::vector; + // Cache capacity per bin. static constexpr auto capacity_table = objccdetail::capacity_table; + // Offset to the beginning of each bin in the cache table. static constexpr auto offset_table = objccdetail::offset_table; + // Maximum number of the cache table can hold. static constexpr auto k_cache_capacity = objccdetail::compute_capacity(); public: using const_iterator = typename cache_table_type::const_iterator; + static constexpr std::size_t bin_size() noexcept { return k_bin_size; } + + static std::size_t num_bins() noexcept { return k_num_bins; } + + static std::size_t bin_capacity(const bin_no_type bin_no) noexcept { + return capacity_table[bin_no]; + } + object_cache_container() : m_count_table(k_num_bins, 0), m_cache(k_cache_capacity) {} From 4f74811b38cf9085a544c7bc58402da72c4c1c82 Mon Sep 17 00:00:00 2001 From: iwabuchi Date: Mon, 9 Oct 2023 11:55:33 -0700 Subject: [PATCH 12/25] Log object cache status --- include/metall/kernel/object_cache.hpp | 32 ++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/include/metall/kernel/object_cache.hpp b/include/metall/kernel/object_cache.hpp index 0ec4fa54..8218a088 100644 --- a/include/metall/kernel/object_cache.hpp +++ b/include/metall/kernel/object_cache.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -89,6 +90,7 @@ class object_cache { m_mutex(m_cache_table.size()) #endif { + priv_const_helper(); } ~object_cache() noexcept = default; @@ -194,14 +196,40 @@ class object_cache { static_cast(8)); } + void priv_const_helper() { + if (get_num_cores() == 0) { + logger::out(logger::level::critical, __FILE__, __LINE__, + "The achieved number of cores is zero"); + return; + } + { + std::stringstream ss; + ss << "The number of cores: " << get_num_cores(); + logger::out(logger::level::info, __FILE__, __LINE__, ss.str().c_str()); + } + { + std::stringstream ss; + ss << "#of caches: " << m_cache_table.size(); + logger::out(logger::level::info, __FILE__, __LINE__, ss.str().c_str()); + } + { + std::stringstream ss; + ss << "Cache capacity per bin: "; + for (std::size_t b = 0; b < single_cache_type::num_bins(); ++b) { + ss << single_cache_type::bin_capacity(b); + if (b < single_cache_type::num_bins() - 1) ss << " "; + } + logger::out(logger::level::info, __FILE__, __LINE__, ss.str().c_str()); + } + } + std::size_t priv_comp_cache_no() const { #if SUPPORT_GET_CPU_CORE_NO thread_local static const auto sub_cache_no = std::hash{}(std::this_thread::get_id()) % k_num_cache_per_core; const std::size_t core_num = priv_get_core_no(); - return mdtl::hash<>{}(core_num * k_num_cache_per_core + - sub_cache_no) % + return mdtl::hash<>{}(core_num * k_num_cache_per_core + sub_cache_no) % m_cache_table.size(); #else thread_local static const auto hashed_thread_id = mdtl::hash<>{}( From 47dea96f8650955cdef9f2b29ec8bbf27a157493 Mon Sep 17 00:00:00 2001 From: iwabuchi Date: Mon, 9 Oct 2023 16:02:14 -0700 Subject: [PATCH 13/25] Add single thread allocation mode (experimental). --- CMakeLists.txt | 9 +++++++ include/metall/kernel/object_cache.hpp | 36 +++++++++++++++++++------- test/kernel/CMakeLists.txt | 3 +++ 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 16c14325..7b268253 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,6 +101,10 @@ set(INITIAL_SEGMENT_SIZE "0" CACHE STRING "Set the initial segment size (use the internally defined value if 0 is specified)") # ---------- Experimental options ---------- # +# This mode still uses multiple threads inside Metall. +# However, applications must not use metall with multiple threads. +option(SINGLE_THREAD_ALLOC "Optimize Metall kernel for single thread usage" OFF) + option(USE_ANONYMOUS_NEW_MAP "Use the anonymous map when creating a new map region" OFF) set(UMAP_ROOT "" CACHE PATH "UMap installed root directory") @@ -211,6 +215,11 @@ if (USE_ANONYMOUS_NEW_MAP) message(STATUS "Use the anonymous map for new map region") endif () +if (SINGLE_THREAD_ALLOC) + list(APPEND METALL_DEFS "METALL_SINGLE_THREAD_ALLOC") + message(STATUS "Optimize Metall kernel for single thread usage") +endif () + # Requirements for GCC if (NOT RUN_BUILD_AND_TEST_WITH_CI) if (("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") OR ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")) diff --git a/include/metall/kernel/object_cache.hpp b/include/metall/kernel/object_cache.hpp index 8218a088..4c2890a0 100644 --- a/include/metall/kernel/object_cache.hpp +++ b/include/metall/kernel/object_cache.hpp @@ -1,4 +1,4 @@ -// Copyright 2019 Lawrence Livermore National Security, LLC and other Metall +// Copyright 2023 Lawrence Livermore National Security, LLC and other Metall // Project Developers. See the top-level COPYRIGHT file for details. // // SPDX-License-Identifier: (Apache-2.0 OR MIT) @@ -19,10 +19,13 @@ #include #include #include -#define ENABLE_MUTEX_IN_METALL_OBJECT_CACHE 1 -#if ENABLE_MUTEX_IN_METALL_OBJECT_CACHE + +#ifndef METALL_SINGLE_THREAD_ALLOC +#define METALL_ENABLE_MUTEX_IN_OBJECT_CACHE +#ifdef METALL_ENABLE_MUTEX_IN_OBJECT_CACHE #include #endif +#endif namespace metall::kernel { @@ -54,7 +57,12 @@ class object_cache { // Private types and static values // -------------------- // - static constexpr std::size_t k_num_cache_per_core = 4; + static constexpr std::size_t k_num_cache_per_core = +#ifdef METALL_SINGLE_THREAD_ALLOC + 1; +#else + 4; +#endif static constexpr std::size_t k_cache_bin_size = 1ULL << 20ULL; static constexpr std::size_t k_max_cache_block_size = 64; // Add and remove caches by up to this size @@ -69,7 +77,7 @@ class object_cache { difference_type, bin_no_manager>; using cache_table_type = std::vector; -#if ENABLE_MUTEX_IN_METALL_OBJECT_CACHE +#ifdef METALL_ENABLE_MUTEX_IN_OBJECT_CACHE using mutex_type = mdtl::mutex; using lock_guard_type = mdtl::mutex_lock_guard; #endif @@ -85,7 +93,7 @@ class object_cache { // -------------------- // object_cache() : m_cache_table(get_num_cores() * k_num_cache_per_core) -#if ENABLE_MUTEX_IN_METALL_OBJECT_CACHE +#ifdef METALL_ENABLE_MUTEX_IN_OBJECT_CACHE , m_mutex(m_cache_table.size()) #endif @@ -112,7 +120,7 @@ class object_cache { if (bin_no > max_bin_no()) return -1; const auto cache_no = priv_comp_cache_no(); -#if ENABLE_MUTEX_IN_METALL_OBJECT_CACHE +#ifdef METALL_ENABLE_MUTEX_IN_OBJECT_CACHE lock_guard_type guard(m_mutex[cache_no]); #endif if (m_cache_table[cache_no].empty(bin_no)) { @@ -140,7 +148,7 @@ class object_cache { if (bin_no > max_bin_no()) return false; // Error const auto cache_no = priv_comp_cache_no(); -#if ENABLE_MUTEX_IN_METALL_OBJECT_CACHE +#ifdef METALL_ENABLE_MUTEX_IN_OBJECT_CACHE lock_guard_type guard(m_mutex[cache_no]); #endif m_cache_table[cache_no].push(bin_no, object_offset); @@ -224,6 +232,9 @@ class object_cache { } std::size_t priv_comp_cache_no() const { +#ifdef METALL_SINGLE_THREAD_ALLOC + return 0; +#endif #if SUPPORT_GET_CPU_CORE_NO thread_local static const auto sub_cache_no = std::hash{}(std::this_thread::get_id()) % @@ -241,6 +252,9 @@ class object_cache { /// \brief Get CPU core number. /// This function does not call the system call every time as it is slow. static std::size_t priv_get_core_no() { +#ifdef METALL_SINGLE_THREAD_ALLOC + return 0; +#endif thread_local static int cached_core_no = 0; thread_local static int cached_count = 0; if (cached_core_no == 0) { @@ -251,14 +265,18 @@ class object_cache { } static std::size_t get_num_cores() { +#ifdef METALL_SINGLE_THREAD_ALLOC + return 1; +#else return std::thread::hardware_concurrency(); +#endif } // -------------------- // // Private fields // -------------------- // cache_table_type m_cache_table; -#if ENABLE_MUTEX_IN_METALL_OBJECT_CACHE +#ifdef METALL_ENABLE_MUTEX_IN_OBJECT_CACHE std::vector m_mutex; #endif }; diff --git a/test/kernel/CMakeLists.txt b/test/kernel/CMakeLists.txt index 46507536..ee042dd8 100644 --- a/test/kernel/CMakeLists.txt +++ b/test/kernel/CMakeLists.txt @@ -10,6 +10,9 @@ add_metall_test_executable(chunk_directory_test chunk_directory_test.cpp) add_metall_test_executable(manager_test manager_test.cpp) +add_metall_test_executable(manager_test_single_thread manager_test.cpp) +target_compile_definitions(manager_test_single_thread PRIVATE METALL_SINGLE_THREAD_ALLOC) + add_metall_test_executable(snapshot_test snapshot_test.cpp) add_metall_test_executable(copy_file_test copy_file_test.cpp) From 7026a806c211798ad1e87ae63015a70207042453 Mon Sep 17 00:00:00 2001 From: iwabuchi Date: Mon, 9 Oct 2023 19:37:58 -0700 Subject: [PATCH 14/25] Change the single thread alloc macro name --- CMakeLists.txt | 9 ------ include/metall/basic_manager.hpp | 11 +++++++ include/metall/kernel/manager_kernel.hpp | 10 ++++--- include/metall/kernel/manager_kernel_impl.ipp | 10 +++---- include/metall/kernel/object_cache.hpp | 12 ++++---- include/metall/kernel/segment_allocator.hpp | 29 ++++++++++--------- test/kernel/CMakeLists.txt | 2 +- 7 files changed, 45 insertions(+), 38 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7b268253..16c14325 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,10 +101,6 @@ set(INITIAL_SEGMENT_SIZE "0" CACHE STRING "Set the initial segment size (use the internally defined value if 0 is specified)") # ---------- Experimental options ---------- # -# This mode still uses multiple threads inside Metall. -# However, applications must not use metall with multiple threads. -option(SINGLE_THREAD_ALLOC "Optimize Metall kernel for single thread usage" OFF) - option(USE_ANONYMOUS_NEW_MAP "Use the anonymous map when creating a new map region" OFF) set(UMAP_ROOT "" CACHE PATH "UMap installed root directory") @@ -215,11 +211,6 @@ if (USE_ANONYMOUS_NEW_MAP) message(STATUS "Use the anonymous map for new map region") endif () -if (SINGLE_THREAD_ALLOC) - list(APPEND METALL_DEFS "METALL_SINGLE_THREAD_ALLOC") - message(STATUS "Optimize Metall kernel for single thread usage") -endif () - # Requirements for GCC if (NOT RUN_BUILD_AND_TEST_WITH_CI) if (("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") OR ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")) diff --git a/include/metall/basic_manager.hpp b/include/metall/basic_manager.hpp index fb18ca9f..8b9c3066 100644 --- a/include/metall/basic_manager.hpp +++ b/include/metall/basic_manager.hpp @@ -6,6 +6,17 @@ #ifndef METALL_BASIC_MANAGER_HPP #define METALL_BASIC_MANAGER_HPP +#ifdef DOXYGEN_SKIP +/// \brief A macro to disable concurrency support. +/// \details +/// If this macro is defined, Metall disables concurrency support and optimizes +/// the internal behavior for single-thread usage. Applications must not call +/// any Metall functions concurrently if this macro is defined. On the other +/// hand, Metall still may use multi-threading for internal operations, such +/// as synchronizing data with files. +#define METALL_DISABLE_CONCURRENCY +#endif + #include #include diff --git a/include/metall/kernel/manager_kernel.hpp b/include/metall/kernel/manager_kernel.hpp index f5ca062b..04ab6391 100644 --- a/include/metall/kernel/manager_kernel.hpp +++ b/include/metall/kernel/manager_kernel.hpp @@ -43,8 +43,10 @@ #include #endif -#define ENABLE_MUTEX_IN_METALL_MANAGER_KERNEL 1 -#if ENABLE_MUTEX_IN_METALL_MANAGER_KERNEL +#ifndef METALL_DISABLE_CONCURRENCY +#define METALL_ENABLE_MUTEX_IN_MANAGER_KERNEL +#endif +#ifdef METALL_ENABLE_MUTEX_IN_MANAGER_KERNEL #include #endif @@ -140,7 +142,7 @@ class manager_kernel { using json_store = mdtl::ptree::node_type; -#if ENABLE_MUTEX_IN_METALL_MANAGER_KERNEL +#ifdef METALL_ENABLE_MUTEX_IN_MANAGER_KERNEL using mutex_type = mdtl::mutex; using lock_guard_type = mdtl::mutex_lock_guard; #endif @@ -596,7 +598,7 @@ class manager_kernel { segment_memory_allocator m_segment_memory_allocator{nullptr}; std::unique_ptr m_manager_metadata{nullptr}; -#if ENABLE_MUTEX_IN_METALL_MANAGER_KERNEL +#ifdef METALL_ENABLE_MUTEX_IN_MANAGER_KERNEL std::unique_ptr m_object_directories_mutex{nullptr}; #endif }; diff --git a/include/metall/kernel/manager_kernel_impl.ipp b/include/metall/kernel/manager_kernel_impl.ipp index 6f4eaa20..2d9f7641 100644 --- a/include/metall/kernel/manager_kernel_impl.ipp +++ b/include/metall/kernel/manager_kernel_impl.ipp @@ -21,7 +21,7 @@ manager_kernel::manager_kernel() if (!m_manager_metadata) { return; } -#if ENABLE_MUTEX_IN_METALL_MANAGER_KERNEL +#ifdef METALL_ENABLE_MUTEX_IN_MANAGER_KERNEL m_object_directories_mutex = std::make_unique(); if (!m_object_directories_mutex) { return; @@ -177,7 +177,7 @@ bool manager_kernel::destroy(char_ptr_holder_type name) { size_type length = 0; { -#if ENABLE_MUTEX_IN_METALL_MANAGER_KERNEL +#ifdef METALL_ENABLE_MUTEX_IN_MANAGER_KERNEL lock_guard_type guard(*m_object_directories_mutex); #endif @@ -204,7 +204,7 @@ bool manager_kernel::destroy_ptr(const T *ptr) { size_type length = 0; { -#if ENABLE_MUTEX_IN_METALL_MANAGER_KERNEL +#ifdef METALL_ENABLE_MUTEX_IN_MANAGER_KERNEL lock_guard_type guard(*m_object_directories_mutex); #endif @@ -820,7 +820,7 @@ T *manager_kernel::priv_generic_construct( void *ptr = nullptr; try { -#if ENABLE_MUTEX_IN_METALL_MANAGER_KERNEL +#ifdef METALL_ENABLE_MUTEX_IN_MANAGER_KERNEL lock_guard_type guard(*m_object_directories_mutex); #endif @@ -856,7 +856,7 @@ T *manager_kernel::priv_generic_construct( ptr, [this](void *const ptr) { try { { -#if ENABLE_MUTEX_IN_METALL_MANAGER_KERNEL +#ifdef METALL_ENABLE_MUTEX_IN_MANAGER_KERNEL lock_guard_type guard(*m_object_directories_mutex); #endif priv_remove_attr_object_no_mutex(priv_to_offset(ptr)); diff --git a/include/metall/kernel/object_cache.hpp b/include/metall/kernel/object_cache.hpp index 4c2890a0..0d84f706 100644 --- a/include/metall/kernel/object_cache.hpp +++ b/include/metall/kernel/object_cache.hpp @@ -20,12 +20,12 @@ #include #include -#ifndef METALL_SINGLE_THREAD_ALLOC +#ifndef METALL_DISABLE_CONCURRENCY #define METALL_ENABLE_MUTEX_IN_OBJECT_CACHE +#endif #ifdef METALL_ENABLE_MUTEX_IN_OBJECT_CACHE #include #endif -#endif namespace metall::kernel { @@ -58,7 +58,7 @@ class object_cache { // -------------------- // static constexpr std::size_t k_num_cache_per_core = -#ifdef METALL_SINGLE_THREAD_ALLOC +#ifdef METALL_DISABLE_CONCURRENCY 1; #else 4; @@ -232,7 +232,7 @@ class object_cache { } std::size_t priv_comp_cache_no() const { -#ifdef METALL_SINGLE_THREAD_ALLOC +#ifdef METALL_DISABLE_CONCURRENCY return 0; #endif #if SUPPORT_GET_CPU_CORE_NO @@ -252,7 +252,7 @@ class object_cache { /// \brief Get CPU core number. /// This function does not call the system call every time as it is slow. static std::size_t priv_get_core_no() { -#ifdef METALL_SINGLE_THREAD_ALLOC +#ifdef METALL_DISABLE_CONCURRENCY return 0; #endif thread_local static int cached_core_no = 0; @@ -265,7 +265,7 @@ class object_cache { } static std::size_t get_num_cores() { -#ifdef METALL_SINGLE_THREAD_ALLOC +#ifdef METALL_DISABLE_CONCURRENCY return 1; #else return std::thread::hardware_concurrency(); diff --git a/include/metall/kernel/segment_allocator.hpp b/include/metall/kernel/segment_allocator.hpp index 08afc38d..a92895cd 100644 --- a/include/metall/kernel/segment_allocator.hpp +++ b/include/metall/kernel/segment_allocator.hpp @@ -24,8 +24,11 @@ #include #include -#define ENABLE_MUTEX_IN_METALL_SEGMENT_ALLOCATOR 1 -#if ENABLE_MUTEX_IN_METALL_SEGMENT_ALLOCATOR +#ifndef METALL_DISABLE_CONCURRENCY +#define METALL_ENABLE_MUTEX_IN_SEGMENT_ALLOCATOR +#endif + +#ifdef METALL_ENABLE_MUTEX_IN_SEGMENT_ALLOCATOR #include #endif @@ -94,7 +97,7 @@ class segment_allocator { myself>; #endif -#if ENABLE_MUTEX_IN_METALL_SEGMENT_ALLOCATOR +#ifdef METALL_ENABLE_MUTEX_IN_SEGMENT_ALLOCATOR using mutex_type = mdtl::mutex; using lock_guard_type = mdtl::mutex_lock_guard; #endif @@ -114,13 +117,13 @@ class segment_allocator { , m_object_cache() #endif -#if ENABLE_MUTEX_IN_METALL_SEGMENT_ALLOCATOR +#ifdef METALL_ENABLE_MUTEX_IN_SEGMENT_ALLOCATOR , m_chunk_mutex(nullptr), m_bin_mutex(nullptr) #endif { -#if ENABLE_MUTEX_IN_METALL_SEGMENT_ALLOCATOR +#ifdef METALL_ENABLE_MUTEX_IN_SEGMENT_ALLOCATOR m_chunk_mutex = std::make_unique(); m_bin_mutex = std::make_unique>(); #endif @@ -217,7 +220,7 @@ class segment_allocator { /// This function is not cheap if many objects are allocated. /// \return Returns true if all memory is deallocated. bool all_memory_deallocated() const { -#if ENABLE_MUTEX_IN_METALL_SEGMENT_ALLOCATOR +#ifdef METALL_ENABLE_MUTEX_IN_SEGMENT_ALLOCATOR lock_guard_type chunk_guard(*m_chunk_mutex); #endif @@ -384,7 +387,7 @@ class segment_allocator { void priv_allocate_small_objects_from_global( const bin_no_type bin_no, const size_type num_allocates, difference_type *const allocated_offsets) { -#if ENABLE_MUTEX_IN_METALL_SEGMENT_ALLOCATOR +#ifdef METALL_ENABLE_MUTEX_IN_SEGMENT_ALLOCATOR lock_guard_type bin_guard(m_bin_mutex->at(bin_no)); #endif @@ -470,7 +473,7 @@ class segment_allocator { bool priv_insert_new_small_object_chunk(const bin_no_type bin_no) { chunk_no_type new_chunk_no; -#if ENABLE_MUTEX_IN_METALL_SEGMENT_ALLOCATOR +#ifdef METALL_ENABLE_MUTEX_IN_SEGMENT_ALLOCATOR lock_guard_type chunk_guard(*m_chunk_mutex); #endif new_chunk_no = m_chunk_directory.insert(bin_no); @@ -482,7 +485,7 @@ class segment_allocator { } difference_type priv_allocate_large_object(const bin_no_type bin_no) { -#if ENABLE_MUTEX_IN_METALL_SEGMENT_ALLOCATOR +#ifdef METALL_ENABLE_MUTEX_IN_SEGMENT_ALLOCATOR lock_guard_type chunk_guard(*m_chunk_mutex); #endif const chunk_no_type new_chunk_no = m_chunk_directory.insert(bin_no); @@ -535,7 +538,7 @@ class segment_allocator { void priv_deallocate_small_objects_from_global( const bin_no_type bin_no, const size_type num_deallocates, const difference_type offsets[]) { -#if ENABLE_MUTEX_IN_METALL_SEGMENT_ALLOCATOR +#ifdef METALL_ENABLE_MUTEX_IN_SEGMENT_ALLOCATOR lock_guard_type bin_guard(m_bin_mutex->at(bin_no)); #endif for (size_type i = 0; i < num_deallocates; ++i) { @@ -559,7 +562,7 @@ class segment_allocator { } else if (m_chunk_directory.all_slots_unmarked(chunk_no)) { // All slots in the chunk are not used, deallocate it { -#if ENABLE_MUTEX_IN_METALL_SEGMENT_ALLOCATOR +#ifdef METALL_ENABLE_MUTEX_IN_SEGMENT_ALLOCATOR lock_guard_type chunk_guard(*m_chunk_mutex); #endif m_chunk_directory.erase(chunk_no); @@ -639,7 +642,7 @@ class segment_allocator { void priv_deallocate_large_object(const chunk_no_type chunk_no, const bin_no_type bin_no) { -#if ENABLE_MUTEX_IN_METALL_SEGMENT_ALLOCATOR +#ifdef METALL_ENABLE_MUTEX_IN_SEGMENT_ALLOCATOR lock_guard_type chunk_guard(*m_chunk_mutex); #endif m_chunk_directory.erase(chunk_no); @@ -717,7 +720,7 @@ class segment_allocator { small_object_cache_type m_object_cache; #endif -#if ENABLE_MUTEX_IN_METALL_SEGMENT_ALLOCATOR +#ifdef METALL_ENABLE_MUTEX_IN_SEGMENT_ALLOCATOR std::unique_ptr m_chunk_mutex{nullptr}; std::unique_ptr> m_bin_mutex{ nullptr}; diff --git a/test/kernel/CMakeLists.txt b/test/kernel/CMakeLists.txt index ee042dd8..6f2c75a2 100644 --- a/test/kernel/CMakeLists.txt +++ b/test/kernel/CMakeLists.txt @@ -11,7 +11,7 @@ add_metall_test_executable(chunk_directory_test chunk_directory_test.cpp) add_metall_test_executable(manager_test manager_test.cpp) add_metall_test_executable(manager_test_single_thread manager_test.cpp) -target_compile_definitions(manager_test_single_thread PRIVATE METALL_SINGLE_THREAD_ALLOC) +target_compile_definitions(manager_test_single_thread PRIVATE METALL_DISABLE_CONCURRENCY) add_metall_test_executable(snapshot_test snapshot_test.cpp) From b87cb0e284077774d06a15ff7bbc786f481f0ae8 Mon Sep 17 00:00:00 2001 From: iwabuchi Date: Tue, 10 Oct 2023 20:15:50 -0700 Subject: [PATCH 15/25] Code refactoring wrt Metall macros --- CMakeLists.txt | 86 ++++--------------- docs/Doxyfile.in | 4 +- docs/readthedocs/advanced_build/cmake.md | 2 +- .../basics/compile_time_options.md | 42 +++++---- include/metall/basic_manager.hpp | 11 --- include/metall/defs.hpp | 66 ++++++++++++++ include/metall/json/json_fwd.hpp | 4 +- include/metall/kernel/manager_kernel.hpp | 16 +++- include/metall/kernel/manager_kernel_defs.hpp | 28 ------ include/metall/metall.hpp | 1 + scripts/CI/build_and_test.sh | 32 +++---- scripts/release_test/full_build_and_test.sh | 31 +++++-- test/CMakeLists.txt | 2 +- test/kernel/CMakeLists.txt | 2 +- 14 files changed, 161 insertions(+), 166 deletions(-) create mode 100644 include/metall/defs.hpp delete mode 100644 include/metall/kernel/manager_kernel_defs.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 16c14325..b8bf92ad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.10) +cmake_minimum_required(VERSION 3.12) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") include(FetchContent) @@ -82,33 +82,20 @@ option(JUST_INSTALL_METALL_HEADER "Just install Metall header files (do not buil option(BUILD_UTILITY "Build utility programs" OFF) option(BUILD_DOC "Build API documentation" OFF) option(BUILD_C "Build C examples and libraries" OFF) -option(VERBOSE_SYSTEM_SUPPORT_WARNING "Show compile time warning regarding system support" OFF) -option(DISABLE_FREE_FILE_SPACE "Disable freeing file space" OFF) -option(DISABLE_SMALL_OBJECT_CACHE "Disable small object cache" OFF) option(BUILD_EXAMPLE "Build the examples" OFF) -option(BUILD_BENCH "Build the benchmark" OFF) option(BUILD_TEST "Build the test" OFF) option(RUN_LARGE_SCALE_TEST "Run large scale tests" OFF) option(RUN_BUILD_AND_TEST_WITH_CI "Perform build and basic test with CI" OFF) -option(BUILD_VERIFICATION "Build verification directory" OFF) -option(USE_SORTED_BIN "Use VM space aware algorithm in the bin directory" OFF) - -set(DEFAULT_VM_RESERVE_SIZE "0" CACHE STRING - "Set the default VM reserve size (use the internally defined value if 0 is specified)") -set(MAX_SEGMENT_SIZE "0" CACHE STRING - "Set the max segment size (use the internally defined value if 0 is specified)") -set(INITIAL_SEGMENT_SIZE "0" CACHE STRING - "Set the initial segment size (use the internally defined value if 0 is specified)") +option(BUILD_BENCH "Build the benchmark" OFF) +option(BUILD_VERIFICATION "Build verification programs" OFF) +set(COMPILER_DEFS "" CACHE STRING "A list of Metall compile definitions to be added to all targets") # ---------- Experimental options ---------- # -option(USE_ANONYMOUS_NEW_MAP "Use the anonymous map when creating a new map region" OFF) set(UMAP_ROOT "" CACHE PATH "UMap installed root directory") option(ONLY_DOWNLOAD_GTEST "Only downloading Google Test" OFF) option(SKIP_DOWNLOAD_GTEST "Skip downloading Google Test" OFF) option(BUILD_NUMA "Build programs that require the NUMA policy library (numa.h)" OFF) -set(FREE_SMALL_OBJECT_SIZE_HINT "0" CACHE STRING - "Try to free the associated pages and file space when objects equal to or larger than that is deallocated") # -------------------------------------------------------------------------------- # @@ -153,64 +140,23 @@ endif () # -------------------------------------------------------------------------------- # # Executables # -------------------------------------------------------------------------------- # + +# ---------- Metall Macros ---------- # +foreach(X ${COMPILER_DEFS}) + message(STATUS "Metall compile definition: ${X}") +endforeach() + + +# ---------- CMAKE_BUILD_TYPE ---------- # if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) message(STATUS "CMAKE_BUILD_TYPE is set as Release") endif () + # ---------- MPI ---------- # find_package(MPI) -# ---------- Configure Metall ---------- # -if (FREE_SMALL_OBJECT_SIZE_HINT GREATER 0) - list(APPEND METALL_DEFS "METALL_FREE_SMALL_OBJECT_SIZE_HINT=${FREE_SMALL_OBJECT_SIZE_HINT}") - message(STATUS "Try to free space for objects >= ${FREE_SMALL_OBJECT_SIZE_HINT} bytes") -endif () - -if (VERBOSE_SYSTEM_SUPPORT_WARNING) - list(APPEND METALL_DEFS "METALL_VERBOSE_SYSTEM_SUPPORT_WARNING") - message(STATUS "Show compile time warning regarding system support") -endif () - -if (DISABLE_FREE_FILE_SPACE) - list(APPEND METALL_DEFS "METALL_DISABLE_FREE_FILE_SPACE") - message(STATUS "Disable freeing file space in Metall") -endif () - -if (DISABLE_SMALL_OBJECT_CACHE) - list(APPEND METALL_DEFS "METALL_DISABLE_OBJECT_CACHE") - message(STATUS "Disable small object cache") -endif () - -if (DEFAULT_VM_RESERVE_SIZE GREATER 0) - list(APPEND METALL_DEFS "METALL_DEFAULT_VM_RESERVE_SIZE=${DEFAULT_VM_RESERVE_SIZE}") - message(STATUS "METALL_DEFAULT_VM_RESERVE_SIZE=${DEFAULT_VM_RESERVE_SIZE}") -endif () - -if (MAX_SEGMENT_SIZE GREATER 0) - list(APPEND METALL_DEFS "METALL_MAX_SEGMENT_SIZE=${MAX_SEGMENT_SIZE}") - message(STATUS "METALL_MAX_SEGMENT_SIZE=${MAX_SEGMENT_SIZE}") -endif () - -if (INITIAL_SEGMENT_SIZE GREATER 0) - list(APPEND METALL_DEFS "METALL_INITIAL_SEGMENT_SIZE=${INITIAL_SEGMENT_SIZE}") - message(STATUS "METALL_INITIAL_SEGMENT_SIZE=${INITIAL_SEGMENT_SIZE}") -endif () - -if (USE_SORTED_BIN) - list(APPEND METALL_DEFS "METALL_USE_SORTED_BIN") - message(STATUS "Use VM space aware algorithm in the bin directory") -endif () - -if (USE_ANONYMOUS_NEW_MAP) - if (USE_ANONYMOUS_NEW_MAP AND UMAP_ROOT) - message(FATAL_ERROR "USE_ANONYMOUS_NEW_MAP and UMAP_ROOT options cannot coexist") - endif () - - list(APPEND METALL_DEFS "METALL_USE_ANONYMOUS_NEW_MAP") - message(STATUS "Use the anonymous map for new map region") -endif () - # Requirements for GCC if (NOT RUN_BUILD_AND_TEST_WITH_CI) if (("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") OR ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")) @@ -304,8 +250,8 @@ function(common_setup_for_metall_executable name) # -------------------- # ----- Compile Definitions ----- # - foreach(X IN LISTS METALL_DEFS) - target_compile_definitions(${name} PRIVATE ${X}) + foreach(X ${COMPILER_DEFS}) + target_compile_definitions(${name} PRIVATE ${X}) endforeach() # -------------------- @@ -325,7 +271,7 @@ function(common_setup_for_metall_executable name) target_include_directories(${name} PRIVATE ${UMAP_ROOT}/include) if (LIBUMAP) target_link_libraries(${name} PRIVATE ${LIBUMAP}) - target_compile_definitions(${name} PRIVATE METALL_USE_UMAP) + target_compile_definitions(${name} PRIVATE "METALL_USE_UMAP") endif () endif () # -------------------- diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in index 8c7af008..b7ae43f3 100644 --- a/docs/Doxyfile.in +++ b/docs/Doxyfile.in @@ -835,8 +835,6 @@ INPUT = ../include/metall \ ../include/metall/json \ ../include/metall/container/experimental/jgraph \ ../include/metall/utility \ - ../include/metall/detail/base_stl_allocator.hpp \ - ../include/metall/detail/utility/named_proxy.hpp \ ../example/graph_data_structure \ ../example/json @@ -2139,7 +2137,7 @@ ENABLE_PREPROCESSING = YES # The default value is: NO. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. -MACRO_EXPANSION = NO +MACRO_EXPANSION = YES # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then # the macro expansion is limited to the macros specified with the PREDEFINED and diff --git a/docs/readthedocs/advanced_build/cmake.md b/docs/readthedocs/advanced_build/cmake.md index f2230892..40fdfd96 100644 --- a/docs/readthedocs/advanced_build/cmake.md +++ b/docs/readthedocs/advanced_build/cmake.md @@ -17,7 +17,7 @@ cmake build_doc # option; BUILD_DOC must be ON when running cmake ## Required - - CMake 3.10 or more. + - CMake 3.12 or more. - GCC 8.1 or more. - Boost C++ Libraries 1.64 or more (build is not required; needs only their header files). diff --git a/docs/readthedocs/basics/compile_time_options.md b/docs/readthedocs/basics/compile_time_options.md index f30fdc98..b26e8d71 100644 --- a/docs/readthedocs/basics/compile_time_options.md +++ b/docs/readthedocs/basics/compile_time_options.md @@ -1,26 +1,36 @@ -# Compile-time Options +# Compile-Time Options There are some compile-time options (C/C++ macro) as follows to configure the behavior of Metall: +- METALL_DEFAULT_CAPACITY=*bytes* + - The default capacity of a segment/datastore. + - This value is used when a user does not specify the capacity of a datastore when creating it. -- METALL_DISABLE_FREE_FILE_SPACE - - If defined, Metall does not free file space +- METALL_VERBOSE_SYSTEM_SUPPORT_WARNING + - If defined, Metall shows warning messages at compile time if the system does not support important features. +- METALL_DISABLE_CONCURRENCY + - Disable concurrency support in Metall. This option is useful when Metall is used in a single-threaded application. + - If this macro is defined, applications must not call Metall concurrently from multiple threads. + - Even if this option is enabled, Metall still uses multiple threads for background tasks, such as synchronizing segment files. -- METALL_DEFAULT_VM_RESERVE_SIZE=*bytes* - - The default virtual memory reserve size - - An internally defined value is used if 0 is specified - - Wll be rounded up to a multiple of the system page size (e.g., 4 KB) internally +- METALL_USE_SORTED_BIN + - If defined, Metall stores addresses in sorted order in the bin directory. + - This option enables Metall to use memory space more efficiently, but it increases the cost of the bin directory operations. +- METALL_FREE_SMALL_OBJECT_SIZE_HINT=*bytes* + - If defined, Metall tries to free space when an object equal to or larger than the specified bytes is deallocated. + - Will be rounded up to a multiple of the page size internally. -- METALL_INITIAL_SEGMENT_SIZE=*bytes* - - The initial segment size - - Use the internally defined value if 0 is specified - - Wll be rounded up to a multiple of the system page size internally +**Macros for the segment storage manager:** -- METALL_FREE_SMALL_OBJECT_SIZE_HINT=*bytes* - - Experimental option - - If defined, Metall tries to free space when an object equal or larger than specified bytes is deallocated - - Wll be rounded up to a multiple of the page size internally - \ No newline at end of file +- METALL_SEGMENT_BLOCK_SIZE=*bytes* + - The segment block size. + - Metall allocates a backing file with this size. + +- METALL_DISABLE_FREE_FILE_SPACE + - If defined, Metall does not free file space. + +- METALL_USE_ANONYMOUS_NEW_MAP + - If defined, Metall uses anonymous memory mapping instead of file mapping when creating a new map region. \ No newline at end of file diff --git a/include/metall/basic_manager.hpp b/include/metall/basic_manager.hpp index 8b9c3066..fb18ca9f 100644 --- a/include/metall/basic_manager.hpp +++ b/include/metall/basic_manager.hpp @@ -6,17 +6,6 @@ #ifndef METALL_BASIC_MANAGER_HPP #define METALL_BASIC_MANAGER_HPP -#ifdef DOXYGEN_SKIP -/// \brief A macro to disable concurrency support. -/// \details -/// If this macro is defined, Metall disables concurrency support and optimizes -/// the internal behavior for single-thread usage. Applications must not call -/// any Metall functions concurrently if this macro is defined. On the other -/// hand, Metall still may use multi-threading for internal operations, such -/// as synchronizing data with files. -#define METALL_DISABLE_CONCURRENCY -#endif - #include #include diff --git a/include/metall/defs.hpp b/include/metall/defs.hpp new file mode 100644 index 00000000..72021665 --- /dev/null +++ b/include/metall/defs.hpp @@ -0,0 +1,66 @@ +// Copyright 2023 Lawrence Livermore National Security, LLC and other Metall +// Project Developers. See the top-level COPYRIGHT file for details. +// +// SPDX-License-Identifier: (Apache-2.0 OR MIT) + +/// \file defs.hpp +/// \brief Common definitions for Metall + +#ifndef METALL_DEFS_HPP +#define METALL_DEFS_HPP + +/// \def METALL_MAX_CAPACITY +/// The max capacity, i.e., the maximum total memory size a single Metall +/// datastore can allocate. This value is a theoretical limit, and the actual +/// limit is smaller than this value. +#ifndef METALL_MAX_CAPACITY +#define METALL_MAX_CAPACITY (1ULL << 48ULL) +#endif + +#ifdef METALL_MAX_SEGMENT_SIZE +#warning \ + "METALL_MAX_SEGMENT_SIZE is deprecated. Use METALL_MAX_CAPACITY instead." +#endif + +/// \def METALL_DEFAULT_CAPACITY +/// The default Metall store capacity, i.e., the default maximum total memory +/// size a single Metall datastore can allocate. This value is used when a new +/// Metall datastore is created without the capacity parameter. This value is a +/// theoretical limit, and the actual limit is smaller than this value. This +/// value must be less than or equal to METALL_MAX_CAPACITY. +#ifndef METALL_DEFAULT_CAPACITY +#if defined(__linux__) +#define METALL_DEFAULT_CAPACITY (1ULL << 43ULL) +#else +#define METALL_DEFAULT_CAPACITY (1ULL << 42ULL) +#endif +#endif + +#ifdef METALL_DEFAULT_VM_RESERVE_SIZE +#warning \ + "METALL_DEFAULT_VM_RESERVE_SIZE is deprecated. Use METALL_DEFAULT_CAPACITY instead." +#endif + +/// \def METALL_SEGMENT_BLOCK_SIZE +/// The segment block size the default segment storage use. +#ifndef METALL_SEGMENT_BLOCK_SIZE +#define METALL_SEGMENT_BLOCK_SIZE (1ULL << 28ULL) +#endif + +#ifdef METALL_INITIAL_SEGMENT_SIZE +#warning \ + "METALL_INITIAL_SEGMENT_SIZE is deprecated. Use METALL_SEGMENT_BLOCK_SIZE instead." +#endif + +#ifdef DOXYGEN_SKIP +/// \brief A macro to disable concurrency support. +/// \details +/// If this macro is defined, Metall disables concurrency support and optimizes +/// the internal behavior for single-thread usage. Applications must not call +/// any Metall functions concurrently if this macro is defined. On the other +/// hand, Metall still may use multi-threading for internal operations, such +/// as synchronizing data with files. +#define METALL_DISABLE_CONCURRENCY +#endif + +#endif // METALL_DEFS_HPP diff --git a/include/metall/json/json_fwd.hpp b/include/metall/json/json_fwd.hpp index 99000f82..5e740f90 100644 --- a/include/metall/json/json_fwd.hpp +++ b/include/metall/json/json_fwd.hpp @@ -12,7 +12,7 @@ #include #include -#if defined(DOXYGEN_SKIP) +#ifdef DOXYGEN_SKIP /// \brief If defined, link with a buit Boost.JSON. #define METALL_LINK_WITH_BOOST_JSON @@ -20,7 +20,7 @@ #define METALL_BOOST_JSON_SRC_INCLUDED #endif -#if METALL_LINK_WITH_BOOST_JSON +#ifdef METALL_LINK_WITH_BOOST_JSON #include #else #ifndef METALL_BOOST_JSON_SRC_INCLUDED diff --git a/include/metall/kernel/manager_kernel.hpp b/include/metall/kernel/manager_kernel.hpp index 04ab6391..c7eb3614 100644 --- a/include/metall/kernel/manager_kernel.hpp +++ b/include/metall/kernel/manager_kernel.hpp @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -85,17 +84,26 @@ class manager_kernel { static constexpr const char *k_datastore_segment_dir_name = "segment"; // For segment +#ifndef METALL_DEFAULT_CAPACITY +#error "METALL_DEFAULT_CAPACITY is not defined." +#endif static constexpr size_type k_default_vm_reserve_size = - METALL_DEFAULT_VM_RESERVE_SIZE; + METALL_DEFAULT_CAPACITY; static_assert(k_chunk_size <= k_default_vm_reserve_size, "Chunk size must be <= k_default_vm_reserve_size"); - static constexpr size_type k_max_segment_size = METALL_MAX_SEGMENT_SIZE; +#ifndef METALL_MAX_CAPACITY +#error "METALL_MAX_CAPACITY is not defined." +#endif + static constexpr size_type k_max_segment_size = METALL_MAX_CAPACITY; static_assert(k_default_vm_reserve_size <= k_max_segment_size, "k_default_vm_reserve_size must be <= k_max_segment_size"); +#ifndef METALL_SEGMENT_BLOCK_SIZE +#error "METALL_SEGMENT_BLOCK_SIZE is not defined." +#endif static constexpr size_type k_initial_segment_size = - METALL_INITIAL_SEGMENT_SIZE; + METALL_SEGMENT_BLOCK_SIZE; static_assert(k_initial_segment_size <= k_default_vm_reserve_size, "k_initial_segment_size must be <= k_default_vm_reserve_size"); static_assert(k_chunk_size <= k_initial_segment_size, diff --git a/include/metall/kernel/manager_kernel_defs.hpp b/include/metall/kernel/manager_kernel_defs.hpp deleted file mode 100644 index c92a56f8..00000000 --- a/include/metall/kernel/manager_kernel_defs.hpp +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2019 Lawrence Livermore National Security, LLC and other Metall -// Project Developers. See the top-level COPYRIGHT file for details. -// -// SPDX-License-Identifier: (Apache-2.0 OR MIT) - -#ifndef METALL_KERNEL_MANAGER_KERNEL_DEFS_HPP -#define METALL_KERNEL_MANAGER_KERNEL_DEFS_HPP - -/// \brief The default virtual memory reservation size. -#ifndef METALL_DEFAULT_VM_RESERVE_SIZE -#if defined(__linux__) -#define METALL_DEFAULT_VM_RESERVE_SIZE (1ULL << 43ULL) -#elif defined(__APPLE__) -#define METALL_DEFAULT_VM_RESERVE_SIZE (1ULL << 42ULL) -#endif -#endif - -/// \brief The max segment size, i.e., max total allocation size. -#ifndef METALL_MAX_SEGMENT_SIZE -#define METALL_MAX_SEGMENT_SIZE (1ULL << 48ULL) -#endif - -/// \brief The initial segment size. -#ifndef METALL_INITIAL_SEGMENT_SIZE -#define METALL_INITIAL_SEGMENT_SIZE (1ULL << 28ULL) -#endif - -#endif // METALL_KERNEL_MANAGER_KERNEL_DEFS_HPP diff --git a/include/metall/metall.hpp b/include/metall/metall.hpp index 7f1d86a7..af47f294 100644 --- a/include/metall/metall.hpp +++ b/include/metall/metall.hpp @@ -6,6 +6,7 @@ #ifndef METALL_METALL_HPP #define METALL_METALL_HPP +#include #include #include #include diff --git a/scripts/CI/build_and_test.sh b/scripts/CI/build_and_test.sh index af843c62..560a4cb2 100755 --- a/scripts/CI/build_and_test.sh +++ b/scripts/CI/build_and_test.sh @@ -55,27 +55,17 @@ main() { fi for BUILD_TYPE in "${BUILD_TYPES[@]}"; do - for DISABLE_FREE_FILE_SPACE in OFF; do - for DISABLE_SMALL_OBJECT_CACHE in OFF; do - for FREE_SMALL_OBJECT_SIZE_HINT in 0; do - run_build_and_test_kernel \ - -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ - -DDISABLE_FREE_FILE_SPACE=${DISABLE_FREE_FILE_SPACE} \ - -DDISABLE_SMALL_OBJECT_CACHE=${DISABLE_SMALL_OBJECT_CACHE} \ - -DFREE_SMALL_OBJECT_SIZE_HINT=${FREE_SMALL_OBJECT_SIZE_HINT} \ - -DBUILD_BENCH=ON \ - -DBUILD_TEST=ON \ - -DRUN_LARGE_SCALE_TEST=ON \ - -DBUILD_DOC=OFF \ - -DBUILD_C=ON \ - -DBUILD_UTILITY=ON \ - -DBUILD_EXAMPLE=ON \ - -DRUN_BUILD_AND_TEST_WITH_CI=ON \ - -DBUILD_VERIFICATION=OFF \ - -DVERBOSE_SYSTEM_SUPPORT_WARNING=OFF - done - done - done + run_build_and_test_kernel \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DBUILD_BENCH=ON \ + -DBUILD_TEST=ON \ + -DRUN_LARGE_SCALE_TEST=ON \ + -DBUILD_DOC=OFF \ + -DBUILD_C=ON \ + -DBUILD_UTILITY=ON \ + -DBUILD_EXAMPLE=ON \ + -DRUN_BUILD_AND_TEST_WITH_CI=ON \ + -DBUILD_VERIFICATION=OFF done } diff --git a/scripts/release_test/full_build_and_test.sh b/scripts/release_test/full_build_and_test.sh index 340b2c6e..6bf5e12c 100644 --- a/scripts/release_test/full_build_and_test.sh +++ b/scripts/release_test/full_build_and_test.sh @@ -53,16 +53,31 @@ main() { build_docs for BUILD_TYPE in Debug RelWithDebInfo Release; do - for DISABLE_FREE_FILE_SPACE in ON OFF; do - for DISABLE_SMALL_OBJECT_CACHE in ON OFF; do + for DISABLE_FREE_FILE_SPACE in true false; do + for DISABLE_SMALL_OBJECT_CACHE in true false; do for FREE_SMALL_OBJECT_SIZE_HINT in 0 8 4096 65536; do - for USE_ANONYMOUS_NEW_MAP in ON OFF; do + for USE_ANONYMOUS_NEW_MAP in true false; do + + DEFS="METALL_VERBOSE_SYSTEM_SUPPORT_WARNING;" + + if [[ "${DISABLE_FREE_FILE_SPACE}" == "true" ]]; then + DEFS="${DEFS}METALL_DISABLE_FREE_FILE_SPACE;" + fi + + if [[ "${DISABLE_SMALL_OBJECT_CACHE}" == "true" ]]; then + DEFS="${DEFS}METALL_DISABLE_SMALL_OBJECT_CACHE;" + fi + + if [ "${FREE_SMALL_OBJECT_SIZE_HINT}" -gt 0 ]; then + DEFS="${DEFS}METALL_FREE_SMALL_OBJECT_SIZE_HINT=${FREE_SMALL_OBJECT_SIZE_HINT};" + fi + + if [[ "${USE_ANONYMOUS_NEW_MAP}" == "true" ]]; then + DEFS="${DEFS}METALL_USE_ANONYMOUS_NEW_MAP;" + fi + run_build_and_test_kernel \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ - -DDISABLE_FREE_FILE_SPACE=${DISABLE_FREE_FILE_SPACE} \ - -DDISABLE_SMALL_OBJECT_CACHE=${DISABLE_SMALL_OBJECT_CACHE} \ - -DFREE_SMALL_OBJECT_SIZE_HINT=${FREE_SMALL_OBJECT_SIZE_HINT} \ - -DUSE_ANONYMOUS_NEW_MAP=${USE_ANONYMOUS_NEW_MAP} \ -DBUILD_BENCH=ON \ -DBUILD_TEST=ON \ -DRUN_LARGE_SCALE_TEST=ON \ @@ -72,7 +87,7 @@ main() { -DBUILD_EXAMPLE=ON \ -DRUN_BUILD_AND_TEST_WITH_CI=OFF \ -DBUILD_VERIFICATION=OFF \ - -DVERBOSE_SYSTEM_SUPPORT_WARNING=ON + -DCOMPILER_DEFS="${DEFS}" done done done diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 110d3eb7..eb33327d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -91,7 +91,7 @@ function(add_metall_test_executable name source) link_gtest_lib(${name}) gtest_discover_tests(${name}) if (RUN_LARGE_SCALE_TEST) - target_compile_definitions(${name} PRIVATE METALL_RUN_LARGE_SCALE_TEST) + target_compile_definitions(${name} PRIVATE "METALL_RUN_LARGE_SCALE_TEST") endif() endif() endfunction() diff --git a/test/kernel/CMakeLists.txt b/test/kernel/CMakeLists.txt index 6f2c75a2..4c458f03 100644 --- a/test/kernel/CMakeLists.txt +++ b/test/kernel/CMakeLists.txt @@ -11,7 +11,7 @@ add_metall_test_executable(chunk_directory_test chunk_directory_test.cpp) add_metall_test_executable(manager_test manager_test.cpp) add_metall_test_executable(manager_test_single_thread manager_test.cpp) -target_compile_definitions(manager_test_single_thread PRIVATE METALL_DISABLE_CONCURRENCY) +target_compile_definitions(manager_test_single_thread PRIVATE "METALL_DISABLE_CONCURRENCY") add_metall_test_executable(snapshot_test snapshot_test.cpp) From 28bdb368285bb3ba19ab8960649da838852409b7 Mon Sep 17 00:00:00 2001 From: iwabuchi Date: Wed, 11 Oct 2023 17:40:57 -0700 Subject: [PATCH 16/25] (CI) Spack loads the first match package --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1087c231..47c65f4f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -37,7 +37,7 @@ install_boost: script: - echo "=== build section ===" - module load gcc/${GCC_VERSION} - - spack load boost@${BOOST_VERSION} arch=$(spack arch) + - spack load --first boost@${BOOST_VERSION} arch=$(spack arch) - export METALL_TEST_DIR="/dev/shm/metall_test-${CI_CONCURRENT_ID}-${CI_PIPELINE_IID}" - srun -N1 -ppdebug bash ./scripts/CI/build_and_test.sh From ada530b7aa5f82e83c009024ab6549921e6d0c43 Mon Sep 17 00:00:00 2001 From: iwabuchi Date: Mon, 16 Oct 2023 17:33:40 -0700 Subject: [PATCH 17/25] (CMake) Change the way to set up OMP and OpenMPI targets --- CMakeLists.txt | 22 ++++++-------- bench/adjacency_list/CMakeLists.txt | 29 +++++-------------- .../edge_generator/CMakeLists.txt | 4 +-- bench/bfs/CMakeLists.txt | 17 +++-------- cmake/setup_mpi.cmake | 14 +++++++++ cmake/setup_omp.cmake | 11 +++++++ example/CMakeLists.txt | 12 ++------ test/kernel/CMakeLists.txt | 4 +-- 8 files changed, 51 insertions(+), 62 deletions(-) create mode 100644 cmake/setup_mpi.cmake create mode 100644 cmake/setup_omp.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index b8bf92ad..cc61d4c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -141,6 +141,15 @@ endif () # Executables # -------------------------------------------------------------------------------- # +# Requirements for GCC +if (NOT RUN_BUILD_AND_TEST_WITH_CI) + if (("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") OR ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")) + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8.1) + message(FATAL_ERROR "GCC version must be at least 8.1") + endif () + endif () +endif () + # ---------- Metall Macros ---------- # foreach(X ${COMPILER_DEFS}) message(STATUS "Metall compile definition: ${X}") @@ -154,19 +163,6 @@ if (NOT CMAKE_BUILD_TYPE) endif () -# ---------- MPI ---------- # -find_package(MPI) - -# Requirements for GCC -if (NOT RUN_BUILD_AND_TEST_WITH_CI) - if (("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") OR ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")) - if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8.1) - message(FATAL_ERROR "GCC version must be at least 8.1") - endif () - endif () -endif () - - # ---------- Threads ---------- # find_package(Threads REQUIRED) diff --git a/bench/adjacency_list/CMakeLists.txt b/bench/adjacency_list/CMakeLists.txt index a88822a8..e1d62bd2 100644 --- a/bench/adjacency_list/CMakeLists.txt +++ b/bench/adjacency_list/CMakeLists.txt @@ -1,32 +1,19 @@ -find_package(OpenMP) -if (NOT OpenMP_CXX_FOUND) - MESSAGE(STATUS "OpenMP is not found. Use single thread in adjacency_list.") -endif () +include(setup_omp) add_metall_executable(run_adj_list_bench_stl run_adj_list_bench_stl.cpp) -if (OpenMP_CXX_FOUND) - target_link_libraries(run_adj_list_bench_stl PRIVATE OpenMP::OpenMP_CXX) -endif () +setup_omp_target(run_adj_list_bench_stl) add_metall_executable(run_adj_list_bench_bip run_adj_list_bench_bip.cpp) -if (OpenMP_CXX_FOUND) - target_link_libraries(run_adj_list_bench_bip PRIVATE OpenMP::OpenMP_CXX) -endif () +setup_omp_target(run_adj_list_bench_bip) add_metall_executable(run_adj_list_bench_bip_extend run_adj_list_bench_bip_extend.cpp) -if (OpenMP_CXX_FOUND) - target_link_libraries(run_adj_list_bench_bip_extend PRIVATE OpenMP::OpenMP_CXX) -endif () +setup_omp_target(run_adj_list_bench_bip_extend) add_metall_executable(run_adj_list_bench_metall run_adj_list_bench_metall.cpp) -if (OpenMP_CXX_FOUND) - target_link_libraries(run_adj_list_bench_metall PRIVATE OpenMP::OpenMP_CXX) -endif () +setup_omp_target(run_adj_list_bench_metall) add_metall_executable(run_adj_list_bench_reflink_snapshot run_adj_list_bench_reflink_snapshot.cpp) -if (OpenMP_CXX_FOUND) - target_link_libraries(run_adj_list_bench_reflink_snapshot PRIVATE OpenMP::OpenMP_CXX) -endif () +setup_omp_target(run_adj_list_bench_reflink_snapshot) if (MEMKIND_ROOT) add_metall_executable(run_adj_list_bench_pmem run_adj_list_bench_pmem.cpp) @@ -39,9 +26,7 @@ if (MEMKIND_ROOT) message(FATAL_ERROR "Cannot find memkind library") endif () endif () - if (OpenMP_CXX_FOUND) - target_link_libraries(run_adj_list_bench_pmem PRIVATE OpenMP::OpenMP_CXX) - endif () + setup_omp_target(run_adj_list_bench_pmem) endif () configure_file(run_bench.sh run_bench.sh COPYONLY) diff --git a/bench/adjacency_list/edge_generator/CMakeLists.txt b/bench/adjacency_list/edge_generator/CMakeLists.txt index 67e03cc7..70d2eb61 100644 --- a/bench/adjacency_list/edge_generator/CMakeLists.txt +++ b/bench/adjacency_list/edge_generator/CMakeLists.txt @@ -1,4 +1,2 @@ add_metall_executable(generate_rmat_edge_list generate_rmat_edge_list.cpp) -if (OpenMP_CXX_FOUND) - target_link_libraries(generate_rmat_edge_list PRIVATE OpenMP::OpenMP_CXX) -endif () \ No newline at end of file +setup_omp_target(generate_rmat_edge_list) \ No newline at end of file diff --git a/bench/bfs/CMakeLists.txt b/bench/bfs/CMakeLists.txt index 0b84a14a..bd2b73d2 100644 --- a/bench/bfs/CMakeLists.txt +++ b/bench/bfs/CMakeLists.txt @@ -1,21 +1,12 @@ -find_package(OpenMP) -if (NOT OpenMP_CXX_FOUND) - MESSAGE(STATUS "OpenMP is not found. Use single thread in BFS") -endif() +include(setup_omp) add_metall_executable(run_bfs_bench_metall run_bfs_bench_metall.cpp) -if (OpenMP_CXX_FOUND) - target_link_libraries(run_bfs_bench_metall PRIVATE OpenMP::OpenMP_CXX) -endif () +setup_omp_target(run_bfs_bench_metall) add_metall_executable(run_bfs_bench_metall_multiple run_bfs_bench_metall_multiple.cpp) -if (OpenMP_CXX_FOUND) - target_link_libraries(run_bfs_bench_metall_multiple PRIVATE OpenMP::OpenMP_CXX) -endif () +setup_omp_target(run_bfs_bench_metall_multiple) add_metall_executable(run_bfs_bench_bip run_bfs_bench_bip.cpp) -if (OpenMP_CXX_FOUND) - target_link_libraries(run_bfs_bench_bip PRIVATE OpenMP::OpenMP_CXX) -endif () +setup_omp_target(run_bfs_bench_bip) configure_file(run_bench.sh run_bench.sh COPYONLY) \ No newline at end of file diff --git a/cmake/setup_mpi.cmake b/cmake/setup_mpi.cmake new file mode 100644 index 00000000..171c5b20 --- /dev/null +++ b/cmake/setup_mpi.cmake @@ -0,0 +1,14 @@ +if (MPI_CXX_FOUND) + find_package(MPI) +endif () + +function(setup_mpi_target target) + if (MPI_CXX_FOUND) + target_link_libraries(${target} PRIVATE MPI::MPI_CXX) + if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux") + target_link_libraries(${target} PRIVATE rt) + endif () + else () + message(SEND_ERROR "MPI not found, ${target} will not be built") + endif () +endfunction() \ No newline at end of file diff --git a/cmake/setup_omp.cmake b/cmake/setup_omp.cmake new file mode 100644 index 00000000..76b37526 --- /dev/null +++ b/cmake/setup_omp.cmake @@ -0,0 +1,11 @@ +if (NOT OpenMP_CXX_FOUND) + find_package(OpenMP) +endif () + +function(setup_omp_target target) + if (OpenMP_CXX_FOUND) + target_link_libraries(${target} PUBLIC OpenMP::OpenMP_CXX) + else () + message(WARNING "OpenMP not found, ${target} will not be built with OpenMP support") + endif () +endfunction() \ No newline at end of file diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 6d413745..fff4edf5 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -41,19 +41,13 @@ if (BUILD_C) target_link_libraries(c_api PRIVATE metall_c) endif() +include(setup_mpi) if (MPI_CXX_FOUND) add_metall_executable(mpi_create mpi_create.cpp) - target_link_libraries(mpi_create PRIVATE MPI::MPI_CXX) - if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux") - target_link_libraries(mpi_create PRIVATE rt) - endif() + setup_mpi_target(mpi_create) add_metall_executable(mpi_open mpi_open.cpp) - target_link_libraries(mpi_open PRIVATE MPI::MPI_CXX) - if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux") - target_link_libraries(mpi_open PRIVATE rt) - endif() - + setup_mpi_target(mpi_open) else() message(STATUS "Will skip building the MPI examples") endif() diff --git a/test/kernel/CMakeLists.txt b/test/kernel/CMakeLists.txt index 4c458f03..041110d7 100644 --- a/test/kernel/CMakeLists.txt +++ b/test/kernel/CMakeLists.txt @@ -17,10 +17,10 @@ add_metall_test_executable(snapshot_test snapshot_test.cpp) add_metall_test_executable(copy_file_test copy_file_test.cpp) -find_package(OpenMP) +include(setup_omp) if (OpenMP_CXX_FOUND) add_metall_test_executable(manager_multithread_test manager_multithread_test.cpp) - target_link_libraries(manager_multithread_test PRIVATE OpenMP::OpenMP_CXX) + setup_omp_target(manager_multithread_test) else() MESSAGE(STATUS "OpenMP is not found. Will not run multi-thread test.") endif() From 5f23150107b925c6f46ab037854ee66cbe23087f Mon Sep 17 00:00:00 2001 From: iwabuchi Date: Tue, 17 Oct 2023 10:18:55 -0700 Subject: [PATCH 18/25] Brush up on object cache. --- include/metall/detail/proc.hpp | 15 ++++++++-- include/metall/kernel/object_cache.hpp | 38 +++++++++++++------------- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/include/metall/detail/proc.hpp b/include/metall/detail/proc.hpp index 7c131757..45439ca6 100644 --- a/include/metall/detail/proc.hpp +++ b/include/metall/detail/proc.hpp @@ -8,6 +8,8 @@ #include +#include + namespace metall::mtlldetail { #ifdef _GNU_SOURCE #define SUPPORT_GET_CPU_CORE_NO true @@ -15,8 +17,8 @@ namespace metall::mtlldetail { #define SUPPORT_GET_CPU_CORE_NO false #endif -/// \brief Returns the number of the CPU core on which the calling thread is -/// currently executing \return Returns a non-negative CPU core number +/// \brief Returns the number of the logical CPU core on which the calling +/// thread is currently executing. inline int get_cpu_core_no() { #if SUPPORT_GET_CPU_CORE_NO @@ -36,5 +38,14 @@ inline int get_cpu_core_no() { #endif } +/// \brief Returns the number of the logical CPU cores on the system. +inline int get_num_cpu_cores() { +#if SUPPORT_GET_CPU_CORE_NO + return get_nprocs_conf(); +#else + return int(std::thread::hardware_concurrency()); +#endif +} + } // namespace metall::mtlldetail #endif // METALL_DETAIL_UTILITY_PROC_HPP diff --git a/include/metall/kernel/object_cache.hpp b/include/metall/kernel/object_cache.hpp index 0d84f706..096dae66 100644 --- a/include/metall/kernel/object_cache.hpp +++ b/include/metall/kernel/object_cache.hpp @@ -63,11 +63,13 @@ class object_cache { #else 4; #endif + // The size of each cache bin in bytes. static constexpr std::size_t k_cache_bin_size = 1ULL << 20ULL; - static constexpr std::size_t k_max_cache_block_size = - 64; // Add and remove caches by up to this size + // Add and remove cache objects with this number of objects at a time. + static constexpr std::size_t k_max_num_objects_in_block = 64; + // The maximum object size to cache in byte. static constexpr std::size_t k_max_cache_object_size = - k_cache_bin_size / k_max_cache_block_size / 2; + k_cache_bin_size / k_max_num_objects_in_block / 2; static constexpr bin_no_type k_max_bin_no = bin_no_manager::to_bin_no(k_max_cache_object_size); static constexpr std::size_t k_cpu_core_no_cache_duration = 4; @@ -107,13 +109,7 @@ class object_cache { object_cache &operator=(const object_cache &) = default; object_cache &operator=(object_cache &&) noexcept = default; - // -------------------- // - // Public methods - // -------------------- // - - /// \brief - /// \param bin_no - /// \return + /// \brief Pop an object offset from the cache. difference_type pop(const bin_no_type bin_no, object_allocator_type *const allocator_instance, object_allocate_func_type allocator_function) { @@ -123,9 +119,11 @@ class object_cache { #ifdef METALL_ENABLE_MUTEX_IN_OBJECT_CACHE lock_guard_type guard(m_mutex[cache_no]); #endif + // If the cache is empty, allocate objects if (m_cache_table[cache_no].empty(bin_no)) { - difference_type allocated_offsets[k_max_cache_block_size]; + difference_type allocated_offsets[k_max_num_objects_in_block]; const auto block_size = priv_get_cache_block_size(bin_no); + assert(block_size <= k_max_num_objects_in_block); (allocator_instance->*allocator_function)(bin_no, block_size, allocated_offsets); for (std::size_t i = 0; i < block_size; ++i) { @@ -138,9 +136,9 @@ class object_cache { return offset; } - /// \brief - /// \param bin_no - /// \param object_offset + /// \brief Cache an object. + /// If the cache is full, multiple objects are deallocated at a time. + /// Return false if an error occurs. bool push(const bin_no_type bin_no, const difference_type object_offset, object_allocator_type *const allocator_instance, object_deallocate_func_type deallocator_function) { @@ -153,10 +151,12 @@ class object_cache { #endif m_cache_table[cache_no].push(bin_no, object_offset); + // If the cache is full, deallocate objects if (m_cache_table[cache_no].full(bin_no)) { const auto block_size = priv_get_cache_block_size(bin_no); assert(m_cache_table[cache_no].size(bin_no) >= block_size); - difference_type offsets[k_max_cache_object_size]; + difference_type offsets[k_max_num_objects_in_block]; + assert(block_size <= k_max_num_objects_in_block); for (std::size_t i = 0; i < block_size; ++i) { offsets[i] = m_cache_table[cache_no].front(bin_no); m_cache_table[cache_no].pop(bin_no); @@ -199,8 +199,8 @@ class object_cache { static constexpr std::size_t priv_get_cache_block_size( const bin_no_type bin_no) noexcept { const auto object_size = bin_no_manager::to_object_size(bin_no); - // Returns a value on the interval [8, k_max_cache_block_size]. - return std::max(std::min(4096 / object_size, k_max_cache_block_size), + // Returns a value on the interval [8, k_max_num_objects_in_block]. + return std::max(std::min(4096 / object_size, k_max_num_objects_in_block), static_cast(8)); } @@ -253,7 +253,7 @@ class object_cache { /// This function does not call the system call every time as it is slow. static std::size_t priv_get_core_no() { #ifdef METALL_DISABLE_CONCURRENCY - return 0; + return 0; #endif thread_local static int cached_core_no = 0; thread_local static int cached_count = 0; @@ -268,7 +268,7 @@ class object_cache { #ifdef METALL_DISABLE_CONCURRENCY return 1; #else - return std::thread::hardware_concurrency(); + return mdtl::get_num_cpu_cores(); #endif } From 1f17c4836a3e6df187c11cb44357617413c4a676 Mon Sep 17 00:00:00 2001 From: iwabuchi Date: Tue, 17 Oct 2023 10:41:32 -0700 Subject: [PATCH 19/25] Bugfix in proc.hpp --- include/metall/detail/proc.hpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/metall/detail/proc.hpp b/include/metall/detail/proc.hpp index 45439ca6..7320520c 100644 --- a/include/metall/detail/proc.hpp +++ b/include/metall/detail/proc.hpp @@ -8,15 +8,17 @@ #include -#include - -namespace metall::mtlldetail { #ifdef _GNU_SOURCE +#include #define SUPPORT_GET_CPU_CORE_NO true #else #define SUPPORT_GET_CPU_CORE_NO false #endif +#include + +namespace metall::mtlldetail { + /// \brief Returns the number of the logical CPU core on which the calling /// thread is currently executing. inline int get_cpu_core_no() { From 41363c38af3a041eeadd347262c5d3e0f2a1134b Mon Sep 17 00:00:00 2001 From: iwabuchi Date: Tue, 17 Oct 2023 19:25:29 -0700 Subject: [PATCH 20/25] Brush up on cache --- include/metall/defs.hpp | 12 +++++ include/metall/detail/proc.hpp | 14 +++--- include/metall/kernel/object_cache.hpp | 50 +++++++++------------ include/metall/kernel/segment_allocator.hpp | 3 +- 4 files changed, 41 insertions(+), 38 deletions(-) diff --git a/include/metall/defs.hpp b/include/metall/defs.hpp index 72021665..9e54ed04 100644 --- a/include/metall/defs.hpp +++ b/include/metall/defs.hpp @@ -63,4 +63,16 @@ #define METALL_DISABLE_CONCURRENCY #endif +/// \def METALL_ENABLE_MUTEX_IN_OBJECT_CACHE +/// Cache size per bin in bytes. +#ifndef METALL_CACHE_BIN_SIZE +#define METALL_CACHE_BIN_SIZE (1ULL << 20ULL) +#endif + +/// \def METALL_NUM_CACHES_PER_CPU +/// The number of caches per CPU (logical CPU core). +#ifndef METALL_NUM_CACHES_PER_CPU +#define METALL_NUM_CACHES_PER_CPU 4 +#endif + #endif // METALL_DEFS_HPP diff --git a/include/metall/detail/proc.hpp b/include/metall/detail/proc.hpp index 7320520c..e5143423 100644 --- a/include/metall/detail/proc.hpp +++ b/include/metall/detail/proc.hpp @@ -10,9 +10,9 @@ #ifdef _GNU_SOURCE #include -#define SUPPORT_GET_CPU_CORE_NO true +#define SUPPORT_GET_CPU_NO true #else -#define SUPPORT_GET_CPU_CORE_NO false +#define SUPPORT_GET_CPU_NO false #endif #include @@ -21,8 +21,8 @@ namespace metall::mtlldetail { /// \brief Returns the number of the logical CPU core on which the calling /// thread is currently executing. -inline int get_cpu_core_no() { -#if SUPPORT_GET_CPU_CORE_NO +inline int get_cpu_no() { +#if SUPPORT_GET_CPU_NO const int cpu = ::sched_getcpu(); if (cpu == -1) { @@ -33,7 +33,7 @@ inline int get_cpu_core_no() { #else #ifdef METALL_VERBOSE_SYSTEM_SUPPORT_WARNING -#warning "CPU core number is always 0" +#warning "CPU number is always 0" #endif return 0; @@ -41,8 +41,8 @@ inline int get_cpu_core_no() { } /// \brief Returns the number of the logical CPU cores on the system. -inline int get_num_cpu_cores() { -#if SUPPORT_GET_CPU_CORE_NO +inline int get_num_cpus() { +#if SUPPORT_GET_CPU_NO return get_nprocs_conf(); #else return int(std::thread::hardware_concurrency()); diff --git a/include/metall/kernel/object_cache.hpp b/include/metall/kernel/object_cache.hpp index 096dae66..cc658bb5 100644 --- a/include/metall/kernel/object_cache.hpp +++ b/include/metall/kernel/object_cache.hpp @@ -33,7 +33,7 @@ namespace { namespace mdtl = metall::mtlldetail; } -template class object_cache { @@ -41,7 +41,6 @@ class object_cache { // -------------------- // // Public types and static values // -------------------- // - static constexpr unsigned int k_num_bins = _k_num_bins; using size_type = _size_type; using difference_type = _difference_type; using bin_no_manager = _bin_no_manager; @@ -57,14 +56,15 @@ class object_cache { // Private types and static values // -------------------- // - static constexpr std::size_t k_num_cache_per_core = + static constexpr std::size_t k_num_cache_per_cpu = #ifdef METALL_DISABLE_CONCURRENCY 1; #else - 4; + METALL_NUM_CACHES_PER_CPU; #endif + // The size of each cache bin in bytes. - static constexpr std::size_t k_cache_bin_size = 1ULL << 20ULL; + static constexpr std::size_t k_cache_bin_size = METALL_CACHE_BIN_SIZE; // Add and remove cache objects with this number of objects at a time. static constexpr std::size_t k_max_num_objects_in_block = 64; // The maximum object size to cache in byte. @@ -72,7 +72,7 @@ class object_cache { k_cache_bin_size / k_max_num_objects_in_block / 2; static constexpr bin_no_type k_max_bin_no = bin_no_manager::to_bin_no(k_max_cache_object_size); - static constexpr std::size_t k_cpu_core_no_cache_duration = 4; + static constexpr std::size_t k_cpu_no_cache_duration = 4; using single_cache_type = object_cache_container{}(std::this_thread::get_id()) % - k_num_cache_per_core; - const std::size_t core_num = priv_get_core_no(); - return mdtl::hash<>{}(core_num * k_num_cache_per_core + sub_cache_no) % + k_num_cache_per_cpu; + const std::size_t cpu_no = priv_get_cpu_no(); + return mdtl::hash<>{}(cpu_no * k_num_cache_per_cpu + sub_cache_no) % m_cache_table.size(); #else thread_local static const auto hashed_thread_id = mdtl::hash<>{}( @@ -249,27 +249,19 @@ class object_cache { #endif } - /// \brief Get CPU core number. + /// \brief Get CPU number. /// This function does not call the system call every time as it is slow. - static std::size_t priv_get_core_no() { + static std::size_t priv_get_cpu_no() { #ifdef METALL_DISABLE_CONCURRENCY return 0; #endif - thread_local static int cached_core_no = 0; + thread_local static int cached_cpu_no = 0; thread_local static int cached_count = 0; - if (cached_core_no == 0) { - cached_core_no = mdtl::get_cpu_core_no(); + if (cached_cpu_no == 0) { + cached_cpu_no = mdtl::get_cpu_no(); } - cached_count = (cached_count + 1) % k_cpu_core_no_cache_duration; - return cached_core_no; - } - - static std::size_t get_num_cores() { -#ifdef METALL_DISABLE_CONCURRENCY - return 1; -#else - return mdtl::get_num_cpu_cores(); -#endif + cached_count = (cached_count + 1) % k_cpu_no_cache_duration; + return cached_cpu_no; } // -------------------- // diff --git a/include/metall/kernel/segment_allocator.hpp b/include/metall/kernel/segment_allocator.hpp index a92895cd..04ff3da4 100644 --- a/include/metall/kernel/segment_allocator.hpp +++ b/include/metall/kernel/segment_allocator.hpp @@ -93,8 +93,7 @@ class segment_allocator { // For object cache #ifndef METALL_DISABLE_OBJECT_CACHE using small_object_cache_type = - object_cache; + object_cache; #endif #ifdef METALL_ENABLE_MUTEX_IN_SEGMENT_ALLOCATOR From 4ee1726862505f9ae9fa2ae69d2382439baf27f1 Mon Sep 17 00:00:00 2001 From: iwabuchi Date: Tue, 24 Oct 2023 21:21:27 -0700 Subject: [PATCH 21/25] New object cache --- include/metall/defs.hpp | 10 +- include/metall/detail/proc.hpp | 8 +- include/metall/kernel/object_cache.hpp | 771 +++++++++++++++--- .../metall/kernel/object_cache_container.hpp | 180 ---- include/metall/kernel/segment_allocator.hpp | 20 +- test/kernel/CMakeLists.txt | 2 + test/kernel/object_cache_test.cpp | 156 ++++ 7 files changed, 809 insertions(+), 338 deletions(-) delete mode 100644 include/metall/kernel/object_cache_container.hpp create mode 100644 test/kernel/object_cache_test.cpp diff --git a/include/metall/defs.hpp b/include/metall/defs.hpp index 9e54ed04..bd2e51d5 100644 --- a/include/metall/defs.hpp +++ b/include/metall/defs.hpp @@ -63,16 +63,16 @@ #define METALL_DISABLE_CONCURRENCY #endif -/// \def METALL_ENABLE_MUTEX_IN_OBJECT_CACHE -/// Cache size per bin in bytes. -#ifndef METALL_CACHE_BIN_SIZE -#define METALL_CACHE_BIN_SIZE (1ULL << 20ULL) +/// \def METALL_MAX_PER_CPU_CACHE_SIZE +/// The maximum size of the per CPU (logical CPU core) cache in bytes. +#ifndef METALL_MAX_PER_CPU_CACHE_SIZE +#define METALL_MAX_PER_CPU_CACHE_SIZE (1ULL << 20ULL) #endif /// \def METALL_NUM_CACHES_PER_CPU /// The number of caches per CPU (logical CPU core). #ifndef METALL_NUM_CACHES_PER_CPU -#define METALL_NUM_CACHES_PER_CPU 4 +#define METALL_NUM_CACHES_PER_CPU 2 #endif #endif // METALL_DEFS_HPP diff --git a/include/metall/detail/proc.hpp b/include/metall/detail/proc.hpp index e5143423..3d46dd01 100644 --- a/include/metall/detail/proc.hpp +++ b/include/metall/detail/proc.hpp @@ -21,7 +21,7 @@ namespace metall::mtlldetail { /// \brief Returns the number of the logical CPU core on which the calling /// thread is currently executing. -inline int get_cpu_no() { +inline unsigned int get_cpu_no() { #if SUPPORT_GET_CPU_NO const int cpu = ::sched_getcpu(); @@ -41,11 +41,11 @@ inline int get_cpu_no() { } /// \brief Returns the number of the logical CPU cores on the system. -inline int get_num_cpus() { +inline unsigned int get_num_cpus() { #if SUPPORT_GET_CPU_NO - return get_nprocs_conf(); + return ::get_nprocs_conf(); #else - return int(std::thread::hardware_concurrency()); + return std::thread::hardware_concurrency(); #endif } diff --git a/include/metall/kernel/object_cache.hpp b/include/metall/kernel/object_cache.hpp index cc658bb5..3f03e289 100644 --- a/include/metall/kernel/object_cache.hpp +++ b/include/metall/kernel/object_cache.hpp @@ -13,10 +13,8 @@ #include #include #include +#include -#include - -#include #include #include @@ -27,20 +25,334 @@ #include #endif +// #define METALL_OBJECT_CACHE_HEAVY_DEBUG +#ifdef METALL_OBJECT_CACHE_HEAVY_DEBUG +#warning "METALL_OBJECT_CACHE_HEAVY_DEBUG is defined" +#endif + namespace metall::kernel { namespace { namespace mdtl = metall::mtlldetail; } -template +namespace obcdetail { + +template +struct cache_block { + static constexpr unsigned int k_capacity = 64; + + cache_block() = delete; + + inline void clear() { + bin_no = std::numeric_limits::max(); + older_block = nullptr; + newer_block = nullptr; + bin_older_block = nullptr; + bin_newer_block = nullptr; + } + + // Remove myself from the linked-lists + inline void disconnect() { + if (newer_block) { + newer_block->older_block = older_block; + } + if (older_block) { + older_block->newer_block = newer_block; + } + if (bin_newer_block) { + bin_newer_block->bin_older_block = bin_older_block; + } + if (bin_older_block) { + bin_older_block->bin_newer_block = bin_newer_block; + } + } + + inline void link_to_older(cache_block *const block, + cache_block *const bin_block) { + older_block = block; + if (block) { + block->newer_block = this; + } + bin_older_block = bin_block; + if (bin_block) { + bin_block->bin_newer_block = this; + } + } + + // TODO: take bin_no from outside + unsigned int bin_no; + cache_block *older_block; + cache_block *newer_block; + cache_block *bin_older_block; + cache_block *bin_newer_block; + difference_type cache[k_capacity]; +}; + +template +class bin_header { + public: + using cache_block_type = cache_block; + + bin_header() { clear(); } + + inline void clear() { + m_active_block_size = 0; + m_active_block = nullptr; + } + + // Move the active block to the next block + inline void move_to_next_active_block() { + if (!m_active_block) return; + m_active_block = m_active_block->bin_older_block; + m_active_block_size = + (m_active_block) ? cache_block::k_capacity : 0; + } + + inline void update_active_block(const cache_block_type *const block, + const std::size_t num_objects) { + m_active_block = block; + m_active_block_size = num_objects; + } + + inline std::size_t &active_block_size() noexcept { + return m_active_block_size; + } + + inline std::size_t active_block_size() const noexcept { + return m_active_block_size; + } + + inline cache_block_type *active_block() noexcept { + return const_cast(m_active_block); + } + + inline const cache_block_type *active_block() const noexcept { + return m_active_block; + } + + private: + std::size_t m_active_block_size{0}; + const cache_block_type *m_active_block{nullptr}; +}; + +template +class free_blocks_list { + public: + using cache_block_type = cache_block; + + free_blocks_list(const cache_block_type *uninit_top, std::size_t num_blocks) + : m_blocks(nullptr), + m_uninit_top(uninit_top), + m_last_block(uninit_top + num_blocks - 1) { + assert(uninit_top); + assert(num_blocks > 0); + } + + inline bool empty() const noexcept { return !m_blocks && !m_uninit_top; } + + // Returns an available free block + cache_block_type *pop() { + cache_block_type *block = nullptr; + if (m_blocks) { + block = const_cast(m_blocks); + m_blocks = m_blocks->older_block; + } else { + if (m_uninit_top) { + block = const_cast(m_uninit_top); + if (m_uninit_top == m_last_block) { + m_uninit_top = nullptr; + } else { + ++m_uninit_top; + } + } + } + assert(block); + return block; + } + + // Push a block to internal block pool + void push(cache_block_type *block) { + assert(block); + block->older_block = const_cast(m_blocks); + m_blocks = block; + } + + void clear() { + m_blocks = nullptr; + m_uninit_top = nullptr; + m_last_block = nullptr; + } + + private: + const cache_block_type *m_blocks; + const cache_block_type *m_uninit_top; // uninitialized block top + const cache_block_type *m_last_block; +}; + +template +struct cache_header { + public: + using cache_block_type = cache_block; + + cache_header(const cache_block *const blocks, + std::size_t num_blocks) + : m_total_size_byte(0), m_free_blocks(blocks, num_blocks) { + assert(blocks); + assert(num_blocks > 0); + } + + void clear() { + m_total_size_byte = 0; + m_oldest_active_block = nullptr; + m_newest_active_block = nullptr; + m_free_blocks.clear(); + } + + inline void unregister(const cache_block_type *const block) { + if (block == m_newest_active_block) { + m_newest_active_block = block->older_block; + } + if (block == m_oldest_active_block) { + m_oldest_active_block = block->newer_block; + } + } + + inline void register_new_block(const cache_block_type *const block) { + m_newest_active_block = block; + if (!m_oldest_active_block) { + m_oldest_active_block = block; + } + } + + inline std::size_t &total_size_byte() noexcept { return m_total_size_byte; } + + inline std::size_t total_size_byte() const noexcept { + return m_total_size_byte; + } + + inline cache_block_type *newest_active_block() noexcept { + return const_cast(m_newest_active_block); + } + + inline const cache_block_type *newest_active_block() const noexcept { + return m_newest_active_block; + } + + inline cache_block_type *oldest_active_block() noexcept { + return const_cast(m_oldest_active_block); + } + + inline const cache_block_type *oldest_active_block() const noexcept { + return m_oldest_active_block; + } + + inline free_blocks_list &free_blocks() noexcept { + return m_free_blocks; + } + + inline const free_blocks_list &free_blocks() const noexcept { + return m_free_blocks; + } + + private: + std::size_t m_total_size_byte; + const cache_block_type *m_oldest_active_block{nullptr}; + const cache_block_type *m_newest_active_block{nullptr}; + free_blocks_list m_free_blocks; +}; + +template +struct cache_container { + using cache_heaer_type = cache_header; + using bin_header_type = bin_header; + using cacbe_block_type = cache_block; + + void init() { + new (&header) cache_heaer_type(blocks, num_blocks_per_cache); + // Memo: The in-place an array construction may not be supported by some + // compilers. + for (std::size_t i = 0; i <= max_bin_no; ++i) { + new (&bin_headers[i]) bin_header_type(); + } + } + + void clear_headers() { + header.clear(); + for (std::size_t i = 0; i <= max_bin_no; ++i) { + bin_headers[i].clear(); + } + } + + cache_heaer_type header; + bin_header_type bin_headers[max_bin_no + 1]; + cacbe_block_type blocks[num_blocks_per_cache]; +}; + +// The maximum number of objects in a cache block for the bin number. +template +inline constexpr unsigned int comp_chunk_size( + const typename bin_no_manager::bin_no_type bin_no) noexcept { + const auto object_size = bin_no_manager::to_object_size(bin_no); + // 4096 is meant for page size so that we do not move memory larger than a + // page. + // 8 is meant for the minimum number of objects to cache within a block. + return std::max(std::min((unsigned int)(4096 / object_size), + cache_block::k_capacity), + (unsigned int)(8)); +} + +// Calculate the max bin number can be cached +// considering the internal implementation. +template +inline constexpr typename bin_no_manager::bin_no_type comp_max_bin_no( + const std::size_t max_per_cpu_cache_size, + const std::size_t max_object_size_request) noexcept { + constexpr unsigned int k_num_min_chunks_per_bin = 2; + + typename bin_no_manager::bin_no_type b = 0; + // Support only small bins + for (; b < bin_no_manager::num_small_bins(); ++b) { + const auto min_required_cache_size = + comp_chunk_size(b) * + k_num_min_chunks_per_bin * bin_no_manager::to_object_size(b); + + if (max_per_cpu_cache_size < min_required_cache_size) { + if (b == 0) { + logger::out(logger::level::error, __FILE__, __LINE__, + "The request max per-CPU cache size is too small"); + return 0; + } + --b; + break; + } + } + + return std::min(bin_no_manager::to_bin_no(max_object_size_request), b); +} + +template +inline constexpr std::size_t comp_max_num_objects_per_cache( + const std::size_t max_per_cpu_cache_size) noexcept { + return max_per_cpu_cache_size / bin_no_manager::to_object_size(0); +} + +template +inline constexpr std::size_t comp_num_blocks_per_cache( + const std::size_t max_per_cpu_cache_size) noexcept { + return comp_max_num_objects_per_cache( + max_per_cpu_cache_size) / + cache_block::k_capacity; +} + +} // namespace obcdetail + +/// \brief A cache for small objects. +template class object_cache { public: - // -------------------- // - // Public types and static values - // -------------------- // using size_type = _size_type; using difference_type = _difference_type; using bin_no_manager = _bin_no_manager; @@ -52,55 +364,70 @@ class object_cache { const bin_no_type, const size_type, const difference_type *const); private: - // -------------------- // - // Private types and static values - // -------------------- // - - static constexpr std::size_t k_num_cache_per_cpu = + static constexpr unsigned int k_num_caches_per_cpu = #ifdef METALL_DISABLE_CONCURRENCY 1; #else METALL_NUM_CACHES_PER_CPU; #endif - // The size of each cache bin in bytes. - static constexpr std::size_t k_cache_bin_size = METALL_CACHE_BIN_SIZE; - // Add and remove cache objects with this number of objects at a time. - static constexpr std::size_t k_max_num_objects_in_block = 64; +#ifndef METALL_MAX_PER_CPU_CACHE_SIZE +#error "METALL_MAX_PER_CPU_CACHE_SIZE=byte must be defined" +#endif + // The actual value is determined, considering other restrictions + // such as the max object size to cache. + static constexpr size_type k_max_per_cpu_cache_size = + METALL_MAX_PER_CPU_CACHE_SIZE; + // The maximum object size to cache in byte. - static constexpr std::size_t k_max_cache_object_size = - k_cache_bin_size / k_max_num_objects_in_block / 2; + // The actual max object size is determined, considering other restrictions + // such as the max per-CPU cache size. + static constexpr size_type k_max_object_size = k_max_per_cpu_cache_size / 16; + + // How long the CPU number is cached. + static constexpr unsigned int k_cpu_no_cache_duration = 4; + static constexpr bin_no_type k_max_bin_no = - bin_no_manager::to_bin_no(k_max_cache_object_size); - static constexpr std::size_t k_cpu_no_cache_duration = 4; + obcdetail::comp_max_bin_no( + k_max_per_cpu_cache_size, k_max_object_size); - using single_cache_type = - object_cache_container; - using cache_table_type = std::vector; + static constexpr size_type k_num_blocks_per_cache = + obcdetail::comp_num_blocks_per_cache( + k_max_per_cpu_cache_size); #ifdef METALL_ENABLE_MUTEX_IN_OBJECT_CACHE using mutex_type = mdtl::mutex; using lock_guard_type = mdtl::mutex_lock_guard; #endif + using cache_storage_type = + obcdetail::cache_container; + using cache_block_type = typename cache_storage_type::cacbe_block_type; + public: - // -------------------- // - // Public types and static values - // -------------------- // - using const_bin_iterator = typename single_cache_type::const_iterator; - - // -------------------- // - // Constructor & assign operator - // -------------------- // - object_cache() - : m_cache_table(mdtl::get_num_cpus() * k_num_cache_per_cpu) + class const_bin_iterator; + + inline static constexpr size_type max_per_cpu_cache_size() noexcept { + return k_max_per_cpu_cache_size; + } + + inline static constexpr size_type num_caches_per_cpu() noexcept { + return k_num_caches_per_cpu; + } + + inline static constexpr bin_no_type max_bin_no() noexcept { + return k_max_bin_no; + } + + explicit object_cache() + : m_num_caches(priv_get_num_cpus() * k_num_caches_per_cpu) #ifdef METALL_ENABLE_MUTEX_IN_OBJECT_CACHE , - m_mutex(m_cache_table.size()) + m_mutex(m_num_caches) #endif { - priv_const_helper(); + priv_allocate_cache(); } ~object_cache() noexcept = default; @@ -110,153 +437,212 @@ class object_cache { object_cache &operator=(object_cache &&) noexcept = default; /// \brief Pop an object offset from the cache. + /// If the cache is empty, allocate objects and cache them first. difference_type pop(const bin_no_type bin_no, object_allocator_type *const allocator_instance, - object_allocate_func_type allocator_function) { - if (bin_no > max_bin_no()) return -1; + object_allocate_func_type allocator_function, + object_deallocate_func_type deallocator_function) { + assert(bin_no <= max_bin_no()); - const auto cache_no = priv_comp_cache_no(); + const auto cache_no = priv_cache_no(); #ifdef METALL_ENABLE_MUTEX_IN_OBJECT_CACHE lock_guard_type guard(m_mutex[cache_no]); #endif - // If the cache is empty, allocate objects - if (m_cache_table[cache_no].empty(bin_no)) { - difference_type allocated_offsets[k_max_num_objects_in_block]; - const auto block_size = priv_get_cache_block_size(bin_no); - assert(block_size <= k_max_num_objects_in_block); - (allocator_instance->*allocator_function)(bin_no, block_size, - allocated_offsets); - for (std::size_t i = 0; i < block_size; ++i) { - m_cache_table[cache_no].push(bin_no, allocated_offsets[i]); + + auto &cache = m_cache[cache_no]; + auto &cache_header = cache.header; + auto &bin_header = cache.bin_headers[bin_no]; + const auto object_size = bin_no_manager::to_object_size(bin_no); + + if (bin_header.active_block_size() == 0) { // Active block is empty + + if (bin_header.active_block()) { + // Move to next active block if that is available + auto *const empty_block = bin_header.active_block(); + bin_header.move_to_next_active_block(); + cache_header.unregister(empty_block); + empty_block->disconnect(); + cache_header.free_blocks().push(empty_block); } - } - const auto offset = m_cache_table[cache_no].front(bin_no); - m_cache_table[cache_no].pop(bin_no); - return offset; + if (bin_header.active_block_size() == 0) { + assert(!bin_header.active_block()); + + // There is no cached objects for the bin. + // Allocate some objects and cache them to a free block. + const auto num_new_objects = + obcdetail::comp_chunk_size(bin_no); + const auto new_objects_size = num_new_objects * object_size; + + // Make sure that the cache has enough space to allocate objects. + priv_make_room_for_new_blocks(cache_no, new_objects_size, + allocator_instance, deallocator_function); + assert(!cache_header.free_blocks().empty()); + + // allocate objects to the new block + auto *new_block = cache_header.free_blocks().pop(); + assert(new_block); + new_block->clear(); + new_block->bin_no = bin_no; + (allocator_instance->*allocator_function)(bin_no, num_new_objects, + new_block->cache); + + // Link the new block to the existing blocks + new_block->link_to_older(cache_header.newest_active_block(), + bin_header.active_block()); + + // Update headers + cache_header.register_new_block(new_block); + cache_header.total_size_byte() += new_objects_size; + assert(cache_header.total_size_byte() <= k_max_per_cpu_cache_size); + bin_header.update_active_block(new_block, num_new_objects); + } + } + assert(bin_header.active_block_size() > 0); + + // Pop an object from the active block + --bin_header.active_block_size(); + const auto object_offset = + bin_header.active_block()->cache[bin_header.active_block_size()]; + assert(cache_header.total_size_byte() >= object_size); + cache_header.total_size_byte() -= object_size; + return object_offset; } /// \brief Cache an object. - /// If the cache is full, multiple objects are deallocated at a time. + /// If the cache is full, deallocate some cached objects first. /// Return false if an error occurs. bool push(const bin_no_type bin_no, const difference_type object_offset, object_allocator_type *const allocator_instance, object_deallocate_func_type deallocator_function) { assert(object_offset >= 0); - if (bin_no > max_bin_no()) return false; // Error + assert(bin_no <= max_bin_no()); - const auto cache_no = priv_comp_cache_no(); + const auto cache_no = priv_cache_no(); #ifdef METALL_ENABLE_MUTEX_IN_OBJECT_CACHE lock_guard_type guard(m_mutex[cache_no]); #endif - m_cache_table[cache_no].push(bin_no, object_offset); - - // If the cache is full, deallocate objects - if (m_cache_table[cache_no].full(bin_no)) { - const auto block_size = priv_get_cache_block_size(bin_no); - assert(m_cache_table[cache_no].size(bin_no) >= block_size); - difference_type offsets[k_max_num_objects_in_block]; - assert(block_size <= k_max_num_objects_in_block); - for (std::size_t i = 0; i < block_size; ++i) { - offsets[i] = m_cache_table[cache_no].front(bin_no); - m_cache_table[cache_no].pop(bin_no); - } - (allocator_instance->*deallocator_function)(bin_no, block_size, offsets); + + auto &cache = m_cache[cache_no]; + auto &cache_header = cache.header; + auto &bin_header = cache.bin_headers[bin_no]; + const auto object_size = bin_no_manager::to_object_size(bin_no); + + // Make sure that the cache has enough space to allocate objects. + // TODO: This is not efficient. We do not have to make a free block always. + priv_make_room_for_new_blocks(cache_no, object_size, allocator_instance, + deallocator_function); + + if (!bin_header.active_block() || + bin_header.active_block_size() == cache_block_type::k_capacity) { + // There is no cached objects for the bin or + // the active block is full. + assert(!cache_header.free_blocks().empty()); + + auto *free_block = cache_header.free_blocks().pop(); + assert(free_block); + free_block->clear(); + free_block->bin_no = bin_no; + free_block->link_to_older(cache_header.newest_active_block(), + bin_header.active_block()); + cache_header.register_new_block(free_block); + bin_header.update_active_block(free_block, 0); } + // push an object to the active block + bin_header.active_block()->cache[bin_header.active_block_size()] = + object_offset; + ++bin_header.active_block_size(); + cache_header.total_size_byte() += object_size; + assert(cache_header.total_size_byte() <= k_max_per_cpu_cache_size); + return true; } - void clear() { - for (auto &table : m_cache_table) { - table.clear(); + /// \brief Clear all cached objects. + /// Cached objects are going to be deallocated. + void clear(object_allocator_type *const allocator_instance, + object_deallocate_func_type deallocator_function) { + for (size_type c = 0; c < m_num_caches; ++c) { + auto &cache = m_cache[c]; + for (bin_no_type b = 0; b <= k_max_bin_no; ++b) { + auto &bin_header = cache.bin_headers[b]; + cache_block_type *block = bin_header.active_block(); + size_type num_objects = bin_header.active_block_size(); + while (block) { + if (num_objects > 0) { + (allocator_instance->*deallocator_function)(b, num_objects, + block->cache); + } + block = block->bin_older_block; + num_objects = cache_block_type::k_capacity; + } + } + cache.clear_headers(); } } - std::size_t num_caches() const { return m_cache_table.size(); } - - /// \brief The max bin number this cache manages. - static constexpr bin_no_type max_bin_no() { return k_max_bin_no; } + inline size_type num_caches() const noexcept { return m_num_caches; } - const_bin_iterator begin(const std::size_t cache_no, + const_bin_iterator begin(const size_type cache_no, const bin_no_type bin_no) const { - return m_cache_table[cache_no].begin(bin_no); - } + assert(cache_no < m_num_caches); + assert(bin_no <= k_max_bin_no); + const auto &cache = m_cache[cache_no]; + const auto &bin_header = cache.bin_headers[bin_no]; + + if (bin_header.active_block_size() == 0) { + if (!bin_header.active_block() || + !bin_header.active_block()->bin_older_block) { + // There is no cached objects for the bin. + return const_bin_iterator(); + } + // Start from the older block as the active block is empty + return const_bin_iterator(bin_header.active_block()->bin_older_block, + cache_block_type::k_capacity - 1); + } - const_bin_iterator end(const std::size_t cache_no, - const bin_no_type bin_no) const { - return m_cache_table[cache_no].end(bin_no); + return const_bin_iterator(bin_header.active_block(), + bin_header.active_block_size() - 1); } - private: - // -------------------- // - // Private types and static values - // -------------------- // - - // -------------------- // - // Private methods - // -------------------- // - static constexpr std::size_t priv_get_cache_block_size( - const bin_no_type bin_no) noexcept { - const auto object_size = bin_no_manager::to_object_size(bin_no); - // Returns a value on the interval [8, k_max_num_objects_in_block]. - return std::max(std::min(4096 / object_size, k_max_num_objects_in_block), - static_cast(8)); + const_bin_iterator end([[maybe_unused]] const size_type cache_no, + [[maybe_unused]] const bin_no_type bin_no) const { + assert(cache_no < m_num_caches); + assert(bin_no <= k_max_bin_no); + return const_bin_iterator(); } - void priv_const_helper() { - if (mdtl::get_num_cpus() == 0) { - logger::out(logger::level::critical, __FILE__, __LINE__, - "The achieved number of cpus is zero"); - return; - } - { - std::stringstream ss; - ss << "The number of cpus: " << mdtl::get_num_cpus(); - logger::out(logger::level::info, __FILE__, __LINE__, ss.str().c_str()); - } - { - std::stringstream ss; - ss << "#of caches: " << m_cache_table.size(); - logger::out(logger::level::info, __FILE__, __LINE__, ss.str().c_str()); - } - { - std::stringstream ss; - ss << "Cache capacity per bin: "; - for (std::size_t b = 0; b < single_cache_type::num_bins(); ++b) { - ss << single_cache_type::bin_capacity(b); - if (b < single_cache_type::num_bins() - 1) ss << " "; - } - logger::out(logger::level::info, __FILE__, __LINE__, ss.str().c_str()); - } + private: + inline static unsigned int priv_get_num_cpus() { + return mdtl::get_num_cpus(); } - std::size_t priv_comp_cache_no() const { + inline size_type priv_cache_no() const { #ifdef METALL_DISABLE_CONCURRENCY return 0; #endif #if SUPPORT_GET_CPU_NO thread_local static const auto sub_cache_no = std::hash{}(std::this_thread::get_id()) % - k_num_cache_per_cpu; - const std::size_t cpu_no = priv_get_cpu_no(); - return mdtl::hash<>{}(cpu_no * k_num_cache_per_cpu + sub_cache_no) % - m_cache_table.size(); + k_num_caches_per_cpu; + return (priv_get_cpu_no() * k_num_caches_per_cpu + sub_cache_no) % + m_num_caches; #else thread_local static const auto hashed_thread_id = mdtl::hash<>{}( std::hash{}(std::this_thread::get_id())); - return hashed_thread_id % m_cache_table.size(); + return hashed_thread_id % m_num_caches; #endif } /// \brief Get CPU number. /// This function does not call the system call every time as it is slow. - static std::size_t priv_get_cpu_no() { + inline static size_type priv_get_cpu_no() { #ifdef METALL_DISABLE_CONCURRENCY return 0; #endif - thread_local static int cached_cpu_no = 0; - thread_local static int cached_count = 0; + thread_local static unsigned int cached_cpu_no = 0; + thread_local static unsigned int cached_count = 0; if (cached_cpu_no == 0) { cached_cpu_no = mdtl::get_cpu_no(); } @@ -264,14 +650,129 @@ class object_cache { return cached_cpu_no; } - // -------------------- // - // Private fields - // -------------------- // - cache_table_type m_cache_table; + bool priv_allocate_cache() { + auto *const mem = std::malloc(sizeof(cache_storage_type) * m_num_caches); + m_cache.reset(reinterpret_cast(mem)); + if (!m_cache) { + logger::out(logger::level::error, __FILE__, __LINE__, + "Failed to allocate memory for the cache"); + return false; + } + + for (size_type i = 0; i < m_num_caches; ++i) { + m_cache[i].init(); + } + + return true; + } + + void priv_make_room_for_new_blocks( + const size_type cache_no, const size_type new_objects_size, + object_allocator_type *const allocator_instance, + object_deallocate_func_type deallocator_function) { + auto &cache = m_cache[cache_no]; + auto &cache_header = cache.header; + auto &free_blocks = cache_header.free_blocks(); + auto &bin_headers = cache.bin_headers; + + // Make sure that the cache has enough space to allocate objects. + while (cache_header.total_size_byte() + new_objects_size > + k_max_per_cpu_cache_size || + free_blocks.empty()) { + auto *oldest_block = cache_header.oldest_active_block(); + assert(oldest_block); + + // Deallocate objects from the oldest block + const auto bin_no = oldest_block->bin_no; + assert(bin_no <= k_max_bin_no); + auto &bin_header = bin_headers[bin_no]; + const auto object_size = bin_no_manager::to_object_size(bin_no); + const auto num_objects = (bin_header.active_block() == oldest_block) + ? bin_header.active_block_size() + : cache_block_type::k_capacity; + (allocator_instance->*deallocator_function)(bin_no, num_objects, + oldest_block->cache); + assert(cache_header.total_size_byte() >= num_objects * object_size); + cache_header.total_size_byte() -= num_objects * object_size; + + cache_header.unregister(oldest_block); + if (bin_header.active_block() == oldest_block) { + bin_header.update_active_block(nullptr, 0); + } + oldest_block->disconnect(); + free_blocks.push(oldest_block); + } + } + + const unsigned int m_num_caches; #ifdef METALL_ENABLE_MUTEX_IN_OBJECT_CACHE std::vector m_mutex; #endif + std::unique_ptr m_cache{nullptr}; }; +// const_bin_iterator +template +class object_cache<_size_type, _difference_type, _bin_no_manager, + _object_allocator_type>::const_bin_iterator { + public: + using value_type = difference_type; + using pointer = const value_type *; + using reference = const value_type &; + + const_bin_iterator() noexcept = default; // set to the end + + const_bin_iterator(const cache_block_type *const block, + const unsigned int in_block_pos) noexcept + : m_block(block), m_in_block_pos(in_block_pos) { + assert(m_block || m_in_block_pos == 0); + } + + const_bin_iterator(const const_bin_iterator &) = default; + const_bin_iterator(const_bin_iterator &&) noexcept = default; + const_bin_iterator &operator=(const const_bin_iterator &) = default; + const_bin_iterator &operator=(const_bin_iterator &&) noexcept = default; + + reference operator*() const noexcept { + return m_block->cache[m_in_block_pos]; + } + + pointer operator->() const noexcept { + return &m_block->cache[m_in_block_pos]; + } + + const_bin_iterator &operator++() noexcept { + if (m_in_block_pos == 0) { + m_block = m_block->bin_older_block; + if (!m_block) { + return *this; + } + m_in_block_pos = cache_block_type::k_capacity - 1; + } else { + --m_in_block_pos; + } + + return *this; + } + + const_bin_iterator operator++(int) noexcept { + const auto tmp = *this; + ++(*this); + return tmp; + } + + bool operator==(const const_bin_iterator &other) const noexcept { + return m_block == other.m_block && m_in_block_pos == other.m_in_block_pos; + } + + bool operator!=(const const_bin_iterator &other) const noexcept { + return !(*this == other); + } + + private: + const cache_block_type *m_block{nullptr}; + unsigned int m_in_block_pos{0}; +}; } // namespace metall::kernel #endif // METALL_DETAIL_OBJECT_CACHE_HPP diff --git a/include/metall/kernel/object_cache_container.hpp b/include/metall/kernel/object_cache_container.hpp deleted file mode 100644 index 92c193f2..00000000 --- a/include/metall/kernel/object_cache_container.hpp +++ /dev/null @@ -1,180 +0,0 @@ -// Copyright 2019 Lawrence Livermore National Security, LLC and other Metall -// Project Developers. See the top-level COPYRIGHT file for details. -// -// SPDX-License-Identifier: (Apache-2.0 OR MIT) - -#ifndef METALL_DETAIL_OBJECT_CACHE_CONTAINER_HPP -#define METALL_DETAIL_OBJECT_CACHE_CONTAINER_HPP - -#include -#include -#include - -namespace metall::kernel { - -/// \brief Namespace for object cache details. -namespace objccdetail { - -/// \brief Computes the capacity table at compile time. -template -inline constexpr auto init_capacity_table() noexcept { - // MEMO: {} is needed to prevent the uninitialized error in constexpr - // contexts. This technique is not needed from C++20. - std::array capacity_table{}; - for (std::size_t b = 0; b < capacity_table.size(); ++b) { - capacity_table[b] = k_bin_size / bin_no_manager::to_object_size(b); - } - return capacity_table; -} - -/// \brief Holds the capacity of each bin of an object cache. -template -constexpr std::array capacity_table = - init_capacity_table(); - -/// \brief Computes the offset table at compile time. -template -inline constexpr auto init_offset_table() noexcept { - std::array offset_table{}; - offset_table[0] = 0; - for (std::size_t b = 0; b < offset_table.size() - 1; ++b) { - offset_table[b + 1] = - offset_table[b] + - capacity_table[b]; - } - return offset_table; -} - -/// \brief Holds the offset to the beginning of each bin in the object cache -/// (bins are packed as an 1D array). -template -constexpr std::array offset_table = - init_offset_table(); - -/// \brief Computes the maximum number of items an object cache can holds. -template -inline constexpr auto compute_capacity() noexcept { - std::size_t capacity = 0; - for (const auto n : capacity_table) { - capacity += n; - } - return capacity; -} - -} // namespace objccdetail - -/// \brief Container for object cache. -/// This container is designed to hold the offset addresses of cached objects. -/// \tparam _k_bin_size The total cached object size per bin. -/// \tparam _k_num_bins The number of bins. -/// \tparam _difference_type The address offset type. -/// \tparam _bin_no_manager The bin number manager. -template -class object_cache_container { - public: - static constexpr std::size_t k_bin_size = _k_bin_size; - static constexpr std::size_t k_num_bins = _k_num_bins; - using difference_type = _difference_type; - using bin_no_manager = _bin_no_manager; - using bin_no_type = typename bin_no_manager::bin_no_type; - - private: - // Hold all cached objects' offsets in a 1D array rather than splitting them - // into bins for better locality. - using cache_table_type = std::vector; - - // Cache capacity per bin. - static constexpr auto capacity_table = - objccdetail::capacity_table; - // Offset to the beginning of each bin in the cache table. - static constexpr auto offset_table = - objccdetail::offset_table; - // Maximum number of the cache table can hold. - static constexpr auto k_cache_capacity = - objccdetail::compute_capacity(); - - public: - using const_iterator = typename cache_table_type::const_iterator; - - static constexpr std::size_t bin_size() noexcept { return k_bin_size; } - - static std::size_t num_bins() noexcept { return k_num_bins; } - - static std::size_t bin_capacity(const bin_no_type bin_no) noexcept { - return capacity_table[bin_no]; - } - - object_cache_container() - : m_count_table(k_num_bins, 0), m_cache(k_cache_capacity) {} - - bool push(const bin_no_type bin_no, const difference_type object_offset) { - if (full(bin_no)) { - return false; // Error - } - const std::size_t pos = offset_table[bin_no] + m_count_table[bin_no]; - assert(pos < m_cache.size()); - m_cache[pos] = object_offset; - ++m_count_table[bin_no]; - - return true; - } - - difference_type front(const bin_no_type bin_no) const { - if (empty(bin_no)) return -1; - const std::size_t pos = offset_table[bin_no] + m_count_table[bin_no] - 1; - assert(pos < m_cache.size()); - return m_cache[pos]; - } - - bool pop(const bin_no_type bin_no) { - if (empty(bin_no) || m_count_table[bin_no] == 0) return false; // Error - --m_count_table[bin_no]; - return true; - } - - const_iterator begin(const bin_no_type bin_no) const { - assert(bin_no < m_cache.size()); - const std::size_t pos = offset_table[bin_no]; - return m_cache.begin() + pos; - } - - const_iterator end(const bin_no_type bin_no) const { - assert(bin_no < m_cache.size()); - const std::size_t pos = offset_table[bin_no] + m_count_table[bin_no]; - return m_cache.begin() + pos; - } - - std::size_t size(const bin_no_type bin_no) const { - return m_count_table[bin_no]; - } - - bool empty(const bin_no_type bin_no) const { - return m_count_table[bin_no] == 0; - } - - bool full(const bin_no_type bin_no) const { - return m_count_table[bin_no] == capacity_table[bin_no]; - } - - void clear() { - for (auto& n : m_count_table) { - n = 0; - } - } - - private: - /// \brief Holds the number of cached objects for each bin. - std::vector m_count_table; - /// \brief The container that actually holds the offsets of cached objects. - cache_table_type m_cache; -}; - -} // namespace metall::kernel - -#endif // METALL_DETAIL_OBJECT_CACHE_CONTAINER_HPP diff --git a/include/metall/kernel/segment_allocator.hpp b/include/metall/kernel/segment_allocator.hpp index 04ff3da4..7e601cea 100644 --- a/include/metall/kernel/segment_allocator.hpp +++ b/include/metall/kernel/segment_allocator.hpp @@ -371,9 +371,10 @@ class segment_allocator { // ---------- For allocation ---------- // difference_type priv_allocate_small_object(const bin_no_type bin_no) { #ifndef METALL_DISABLE_OBJECT_CACHE - if (bin_no <= small_object_cache_type::max_bin_no()) { + if (bin_no <= m_object_cache.max_bin_no()) { const auto offset = m_object_cache.pop( - bin_no, this, &myself::priv_allocate_small_objects_from_global); + bin_no, this, &myself::priv_allocate_small_objects_from_global, + &myself::priv_deallocate_small_objects_from_global); assert(offset >= 0 || offset == k_null_offset); return offset; } @@ -523,7 +524,7 @@ class segment_allocator { void priv_deallocate_small_object(const difference_type offset, const bin_no_type bin_no) { #ifndef METALL_DISABLE_OBJECT_CACHE - if (bin_no <= small_object_cache_type::max_bin_no()) { + if (bin_no <= m_object_cache.max_bin_no()) { [[maybe_unused]] const bool ret = m_object_cache.push( bin_no, offset, this, &myself::priv_deallocate_small_objects_from_global); @@ -661,17 +662,8 @@ class segment_allocator { // ---------- For object cache ---------- // #ifndef METALL_DISABLE_OBJECT_CACHE void priv_clear_object_cache() { - for (unsigned int c = 0; c < m_object_cache.num_caches(); ++c) { - for (bin_no_type b = 0; b <= m_object_cache.max_bin_no(); ++b) { - for (auto itr = m_object_cache.begin(c, b), - end = m_object_cache.end(c, b); - itr != end; ++itr) { - const auto offset = *itr; - priv_deallocate_small_objects_from_global(b, 1, &offset); - } - } - } - m_object_cache.clear(); + m_object_cache.clear(this, + &myself::priv_deallocate_small_objects_from_global); } #endif diff --git a/test/kernel/CMakeLists.txt b/test/kernel/CMakeLists.txt index 041110d7..20604e2b 100644 --- a/test/kernel/CMakeLists.txt +++ b/test/kernel/CMakeLists.txt @@ -8,6 +8,8 @@ add_metall_test_executable(multilayer_bitset_test multilayer_bitset_test.cpp) add_metall_test_executable(chunk_directory_test chunk_directory_test.cpp) +add_metall_test_executable(object_cache_test object_cache_test.cpp) + add_metall_test_executable(manager_test manager_test.cpp) add_metall_test_executable(manager_test_single_thread manager_test.cpp) diff --git a/test/kernel/object_cache_test.cpp b/test/kernel/object_cache_test.cpp new file mode 100644 index 00000000..6cd199fe --- /dev/null +++ b/test/kernel/object_cache_test.cpp @@ -0,0 +1,156 @@ +// Copyright 2023 Lawrence Livermore National Security, LLC and other Metall +// Project Developers. See the top-level COPYRIGHT file for details. +// +// SPDX-License-Identifier: (Apache-2.0 OR MIT) + +#include "gtest/gtest.h" + +#include +#include +#include +#include + +#include +#include +#include +#include "../test_utility.hpp" + +using bin_no_manager = + metall::kernel::bin_number_manager<1ULL << 21, 1ULL << 40>; + +// Dummy allocator for testing +// It keeps track of allocated objects +class dummy_allocator { + public: + explicit dummy_allocator(const std::size_t max_bin_no) + : records(max_bin_no + 1), num_allocs(max_bin_no + 1, 0) {} + + void allocate(const bin_no_manager::bin_no_type bin_no, const std::size_t n, + std::ptrdiff_t *const offsets) { + for (std::size_t i = 0; i < n; ++i) { + offsets[i] = (std::ptrdiff_t)(num_allocs[bin_no]++); + records[bin_no].insert(offsets[i]); + } + } + + void deallocate(const bin_no_manager::bin_no_type bin_no, const std::size_t n, + const std::ptrdiff_t *const offsets) { + for (std::size_t i = 0; i < n; ++i) { + ASSERT_EQ(records[bin_no].count(offsets[i]), 1); + records[bin_no].erase(offsets[i]); + } + } + + std::vector> records; + std::vector num_allocs; +}; + +using cache_type = + metall::kernel::object_cache; +namespace { + +TEST(ObjectCacheTest, Construct) { + cache_type cache; + ASSERT_GT(cache.max_per_cpu_cache_size(), 0); + ASSERT_GT(cache.num_caches_per_cpu(), 0); + ASSERT_GT(cache.max_bin_no(), 0); +} + +TEST(ObjectCacheTest, Sequential) { + cache_type cache; + dummy_allocator alloc(cache.max_bin_no()); + + for (int k = 0; k < 2; ++k) { + std::vector> offsets(cache.max_bin_no() + 1); + for (std::size_t b = 0; b <= cache.max_bin_no(); ++b) { + for (std::size_t i = 0; i < 256; ++i) { + const auto off = cache.pop(b, &alloc, &dummy_allocator::allocate, + &dummy_allocator::deallocate); + offsets[b].push_back(off); + } + } + + for (std::size_t b = 0; b <= cache.max_bin_no(); ++b) { + for (std::size_t i = 0; i < 256; ++i) { + cache.push(b, offsets[b][i], &alloc, &dummy_allocator::deallocate); + } + } + } + + // iterate over cache + for (std::size_t c = 0; c < cache.num_caches(); ++c) { + for (std::size_t b = 0; b <= cache.max_bin_no(); ++b) { + for (auto it = cache.begin(c, b); it != cache.end(c, b); ++it) { + const auto off = *it; + ASSERT_EQ(alloc.records[b].count(off), 1); + } + } + } + + // Deallocate all objects in cache + cache.clear(&alloc, &dummy_allocator::deallocate); + + // alloc.records must be empty + for (const auto &per_bin : alloc.records) { + ASSERT_EQ(per_bin.size(), 0); + } +} + +TEST(ObjectCacheTest, SequentialSingleBinManyObjects) { + cache_type cache; + dummy_allocator alloc(cache.max_bin_no()); + + std::vector offsets; + for (std::size_t i = 0; i < 1 << 20; ++i) { + const auto off = cache.pop(0, &alloc, &dummy_allocator::allocate, + &dummy_allocator::deallocate); + offsets.push_back(off); + } + + for (std::size_t i = 0; i < 1 << 20; ++i) { + cache.push(0, offsets[i], &alloc, &dummy_allocator::deallocate); + } + + // Deallocate all objects in cache + cache.clear(&alloc, &dummy_allocator::deallocate); + + // alloc.records must be empty + for (const auto &per_bin : alloc.records) { + ASSERT_EQ(per_bin.size(), 0); + } +} + +TEST(ObjectCacheTest, Random) { + cache_type cache; + dummy_allocator alloc(cache.max_bin_no()); + + std::vector> offsets; + + for (std::size_t i = 0; i < 1 << 15; ++i) { + const auto mode = std::rand() % 5; + if (mode < 3) { + const auto b = std::rand() % (cache.max_bin_no() + 1); + offsets.emplace_back(b, cache.pop(b, &alloc, &dummy_allocator::allocate, + &dummy_allocator::deallocate)); + } else { + if (offsets.empty()) continue; + const auto idx = std::rand() % offsets.size(); + cache.push(offsets[idx].first, offsets[idx].second, &alloc, + &dummy_allocator::deallocate); + offsets.erase(offsets.begin() + idx); + } + } + for (const auto &item : offsets) { + cache.push(item.first, item.second, &alloc, &dummy_allocator::deallocate); + } + + // Deallocate all objects in cache + cache.clear(&alloc, &dummy_allocator::deallocate); + + // alloc.records must be empty + for (std::size_t b = 0; b < alloc.records.size(); ++b) { + ASSERT_EQ(alloc.records[b].size(), 0); + } +} +} // namespace \ No newline at end of file From 8d08f304c878e85c16ba18178d24624a4a9f0234 Mon Sep 17 00:00:00 2001 From: iwabuchi Date: Fri, 27 Oct 2023 16:43:59 -0700 Subject: [PATCH 22/25] Minor bugfix in adj-lsit test --- bench/adjacency_list/test/test_large.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bench/adjacency_list/test/test_large.sh b/bench/adjacency_list/test/test_large.sh index a87083fa..9002c677 100644 --- a/bench/adjacency_list/test/test_large.sh +++ b/bench/adjacency_list/test/test_large.sh @@ -11,7 +11,6 @@ a=0.57 b=0.19 c=0.19 seed=123 -e=$((2**$((${v}+4)))) # The number of edges to generate # The default path to store data. # This value is overwritten if '-d' option is specified @@ -78,6 +77,8 @@ main() { ref_edge_dump_file1="${out_dir_path}/ref_edge_list1" ref_edge_dump_file2="${out_dir_path}/ref_edge_list2" + local e=$((2**$((${v}+4)))) # The number of edges to generate + ./run_adj_list_bench_metall -o ${data_store_path} -d ${adj_list_dump_file} -s ${seed} -v ${v} -e ${e} -a ${a} -b ${b} -c ${c} -r 1 -u 1 -D ${ref_edge_dump_file1} check_program_exit_status echo "" From c263e00b406568b42e0ac4d461b06a5d2de62b51 Mon Sep 17 00:00:00 2001 From: iwabuchi Date: Sun, 29 Oct 2023 13:07:57 -0700 Subject: [PATCH 23/25] Minor code brush up on object cache --- include/metall/kernel/object_cache.hpp | 331 ++++++++++++++----------- 1 file changed, 189 insertions(+), 142 deletions(-) diff --git a/include/metall/kernel/object_cache.hpp b/include/metall/kernel/object_cache.hpp index 3f03e289..23ac3aa8 100644 --- a/include/metall/kernel/object_cache.hpp +++ b/include/metall/kernel/object_cache.hpp @@ -38,14 +38,22 @@ namespace mdtl = metall::mtlldetail; namespace obcdetail { -template +/// A cache block is a unit of memory that contains cached objects +/// (specifically, a cached object is difference_type). +/// Cache blocks are members of two linked-lists. +/// One is a linked-list of all cache blocks in the cache. +/// The other is a linked-list of cache blocks in the same bin. +/// The linked-lists are used to manage the order of cache blocks. +/// The order of cache blocks is used to determine which cache block is +/// evicted when the cache is full. +template struct cache_block { static constexpr unsigned int k_capacity = 64; cache_block() = delete; inline void clear() { - bin_no = std::numeric_limits::max(); + bin_no = std::numeric_limits::max(); older_block = nullptr; newer_block = nullptr; bin_older_block = nullptr; @@ -80,8 +88,7 @@ struct cache_block { } } - // TODO: take bin_no from outside - unsigned int bin_no; + bin_no_type bin_no; cache_block *older_block; cache_block *newer_block; cache_block *bin_older_block; @@ -89,10 +96,14 @@ struct cache_block { difference_type cache[k_capacity]; }; -template +/// A bin header is a unit of memory that contains information about a bin +/// within a cache. Specifically, it contains the active block and the number of +/// objects in the active block. The active block is the block that is currently +/// used to cache objects. Non-active blocks are always full. +template class bin_header { public: - using cache_block_type = cache_block; + using cache_block_type = cache_block; bin_header() { clear(); } @@ -105,8 +116,7 @@ class bin_header { inline void move_to_next_active_block() { if (!m_active_block) return; m_active_block = m_active_block->bin_older_block; - m_active_block_size = - (m_active_block) ? cache_block::k_capacity : 0; + m_active_block_size = (m_active_block) ? cache_block_type::k_capacity : 0; } inline void update_active_block(const cache_block_type *const block, @@ -132,14 +142,23 @@ class bin_header { } private: + // The number of objects in the active block std::size_t m_active_block_size{0}; const cache_block_type *m_active_block{nullptr}; }; -template +/// A free blocks list is a linked-list of free blocks. +/// It is used to manage free blocks. +/// Cache blocks are located in a contiguous memory region. +/// All cache blocks are uninitialized at the beginning --- +/// thus, they do not consume physical memory. +/// This free list is designed such that it does not touch uninitialized blocks +/// until they are used. This design is crucial to reduce Metall manager +/// construction time. +template class free_blocks_list { public: - using cache_block_type = cache_block; + using cache_block_type = cache_block; free_blocks_list(const cache_block_type *uninit_top, std::size_t num_blocks) : m_blocks(nullptr), @@ -185,18 +204,26 @@ class free_blocks_list { } private: + // Blocks that were used and became empty const cache_block_type *m_blocks; - const cache_block_type *m_uninit_top; // uninitialized block top + // The top block of the uninitialized blocks. + // Uninitialized blocks are located in a contiguous memory region. + const cache_block_type *m_uninit_top; const cache_block_type *m_last_block; }; -template +/// A cache header is a unit of memory that contains information about a cache. +/// Specifically, it contains the total size of objects in the cache, +/// the oldest and newest active blocks, and a free blocks list. +template struct cache_header { + private: + using free_blocks_list_type = free_blocks_list; + public: - using cache_block_type = cache_block; + using cache_block_type = cache_block; - cache_header(const cache_block *const blocks, - std::size_t num_blocks) + cache_header(const cache_block_type *const blocks, std::size_t num_blocks) : m_total_size_byte(0), m_free_blocks(blocks, num_blocks) { assert(blocks); assert(num_blocks > 0); @@ -247,11 +274,9 @@ struct cache_header { return m_oldest_active_block; } - inline free_blocks_list &free_blocks() noexcept { - return m_free_blocks; - } + inline free_blocks_list_type &free_blocks() noexcept { return m_free_blocks; } - inline const free_blocks_list &free_blocks() const noexcept { + inline const free_blocks_list_type &free_blocks() const noexcept { return m_free_blocks; } @@ -259,15 +284,17 @@ struct cache_header { std::size_t m_total_size_byte; const cache_block_type *m_oldest_active_block{nullptr}; const cache_block_type *m_newest_active_block{nullptr}; - free_blocks_list m_free_blocks; + free_blocks_list_type m_free_blocks; }; -template +/// A cache container is a unit of memory that contains all data structures that +/// constitute a cache. +template struct cache_container { - using cache_heaer_type = cache_header; - using bin_header_type = bin_header; - using cacbe_block_type = cache_block; + using cache_heaer_type = cache_header; + using bin_header_type = bin_header; + using cacbe_block_type = cache_block; void init() { new (&header) cache_heaer_type(blocks, num_blocks_per_cache); @@ -276,6 +303,7 @@ struct cache_container { for (std::size_t i = 0; i <= max_bin_no; ++i) { new (&bin_headers[i]) bin_header_type(); } + // Do not initialize blocks on purpose to reduce the construction time } void clear_headers() { @@ -290,20 +318,22 @@ struct cache_container { cacbe_block_type blocks[num_blocks_per_cache]; }; -// The maximum number of objects in a cache block for the bin number. +// Allocate new objects by this size template inline constexpr unsigned int comp_chunk_size( const typename bin_no_manager::bin_no_type bin_no) noexcept { + using cache_block_type = + cache_block; const auto object_size = bin_no_manager::to_object_size(bin_no); // 4096 is meant for page size so that we do not move memory larger than a // page. // 8 is meant for the minimum number of objects to cache within a block. return std::max(std::min((unsigned int)(4096 / object_size), - cache_block::k_capacity), + cache_block_type ::k_capacity), (unsigned int)(8)); } -// Calculate the max bin number can be cached +// Calculate the max bin number that can be cached, // considering the internal implementation. template inline constexpr typename bin_no_manager::bin_no_type comp_max_bin_no( @@ -341,9 +371,12 @@ inline constexpr std::size_t comp_max_num_objects_per_cache( template inline constexpr std::size_t comp_num_blocks_per_cache( const std::size_t max_per_cpu_cache_size) noexcept { + using cache_block_type = + cache_block; + return comp_max_num_objects_per_cache( max_per_cpu_cache_size) / - cache_block::k_capacity; + cache_block_type::k_capacity; } } // namespace obcdetail @@ -401,7 +434,7 @@ class object_cache { #endif using cache_storage_type = - obcdetail::cache_container; using cache_block_type = typename cache_storage_type::cacbe_block_type; @@ -442,71 +475,8 @@ class object_cache { object_allocator_type *const allocator_instance, object_allocate_func_type allocator_function, object_deallocate_func_type deallocator_function) { - assert(bin_no <= max_bin_no()); - - const auto cache_no = priv_cache_no(); -#ifdef METALL_ENABLE_MUTEX_IN_OBJECT_CACHE - lock_guard_type guard(m_mutex[cache_no]); -#endif - - auto &cache = m_cache[cache_no]; - auto &cache_header = cache.header; - auto &bin_header = cache.bin_headers[bin_no]; - const auto object_size = bin_no_manager::to_object_size(bin_no); - - if (bin_header.active_block_size() == 0) { // Active block is empty - - if (bin_header.active_block()) { - // Move to next active block if that is available - auto *const empty_block = bin_header.active_block(); - bin_header.move_to_next_active_block(); - cache_header.unregister(empty_block); - empty_block->disconnect(); - cache_header.free_blocks().push(empty_block); - } - - if (bin_header.active_block_size() == 0) { - assert(!bin_header.active_block()); - - // There is no cached objects for the bin. - // Allocate some objects and cache them to a free block. - const auto num_new_objects = - obcdetail::comp_chunk_size(bin_no); - const auto new_objects_size = num_new_objects * object_size; - - // Make sure that the cache has enough space to allocate objects. - priv_make_room_for_new_blocks(cache_no, new_objects_size, - allocator_instance, deallocator_function); - assert(!cache_header.free_blocks().empty()); - - // allocate objects to the new block - auto *new_block = cache_header.free_blocks().pop(); - assert(new_block); - new_block->clear(); - new_block->bin_no = bin_no; - (allocator_instance->*allocator_function)(bin_no, num_new_objects, - new_block->cache); - - // Link the new block to the existing blocks - new_block->link_to_older(cache_header.newest_active_block(), - bin_header.active_block()); - - // Update headers - cache_header.register_new_block(new_block); - cache_header.total_size_byte() += new_objects_size; - assert(cache_header.total_size_byte() <= k_max_per_cpu_cache_size); - bin_header.update_active_block(new_block, num_new_objects); - } - } - assert(bin_header.active_block_size() > 0); - - // Pop an object from the active block - --bin_header.active_block_size(); - const auto object_offset = - bin_header.active_block()->cache[bin_header.active_block_size()]; - assert(cache_header.total_size_byte() >= object_size); - cache_header.total_size_byte() -= object_size; - return object_offset; + return priv_pop(bin_no, allocator_instance, allocator_function, + deallocator_function); } /// \brief Cache an object. @@ -515,48 +485,8 @@ class object_cache { bool push(const bin_no_type bin_no, const difference_type object_offset, object_allocator_type *const allocator_instance, object_deallocate_func_type deallocator_function) { - assert(object_offset >= 0); - assert(bin_no <= max_bin_no()); - - const auto cache_no = priv_cache_no(); -#ifdef METALL_ENABLE_MUTEX_IN_OBJECT_CACHE - lock_guard_type guard(m_mutex[cache_no]); -#endif - - auto &cache = m_cache[cache_no]; - auto &cache_header = cache.header; - auto &bin_header = cache.bin_headers[bin_no]; - const auto object_size = bin_no_manager::to_object_size(bin_no); - - // Make sure that the cache has enough space to allocate objects. - // TODO: This is not efficient. We do not have to make a free block always. - priv_make_room_for_new_blocks(cache_no, object_size, allocator_instance, - deallocator_function); - - if (!bin_header.active_block() || - bin_header.active_block_size() == cache_block_type::k_capacity) { - // There is no cached objects for the bin or - // the active block is full. - assert(!cache_header.free_blocks().empty()); - - auto *free_block = cache_header.free_blocks().pop(); - assert(free_block); - free_block->clear(); - free_block->bin_no = bin_no; - free_block->link_to_older(cache_header.newest_active_block(), - bin_header.active_block()); - cache_header.register_new_block(free_block); - bin_header.update_active_block(free_block, 0); - } - - // push an object to the active block - bin_header.active_block()->cache[bin_header.active_block_size()] = - object_offset; - ++bin_header.active_block_size(); - cache_header.total_size_byte() += object_size; - assert(cache_header.total_size_byte() <= k_max_per_cpu_cache_size); - - return true; + return priv_push(bin_no, object_offset, allocator_instance, + deallocator_function); } /// \brief Clear all cached objects. @@ -666,6 +596,123 @@ class object_cache { return true; } + difference_type priv_pop(const bin_no_type bin_no, + object_allocator_type *const allocator_instance, + object_allocate_func_type allocator_function, + object_deallocate_func_type deallocator_function) { + assert(bin_no <= max_bin_no()); + + const auto cache_no = priv_cache_no(); +#ifdef METALL_ENABLE_MUTEX_IN_OBJECT_CACHE + lock_guard_type guard(m_mutex[cache_no]); +#endif + + auto &cache = m_cache[cache_no]; + auto &cache_header = cache.header; + auto &bin_header = cache.bin_headers[bin_no]; + const auto object_size = bin_no_manager::to_object_size(bin_no); + + if (bin_header.active_block_size() == 0) { // Active block is empty + + if (bin_header.active_block()) { + // Move to next active block if that is available + auto *const empty_block = bin_header.active_block(); + bin_header.move_to_next_active_block(); + cache_header.unregister(empty_block); + empty_block->disconnect(); + cache_header.free_blocks().push(empty_block); + } + + if (bin_header.active_block_size() == 0) { + assert(!bin_header.active_block()); + + // There is no cached objects for the bin. + // Allocate some objects and cache them to a free block. + const auto num_new_objects = + obcdetail::comp_chunk_size(bin_no); + const auto new_objects_size = num_new_objects * object_size; + + // Make sure that the cache has enough space to allocate objects. + priv_make_room_for_new_blocks(cache_no, new_objects_size, + allocator_instance, deallocator_function); + assert(!cache_header.free_blocks().empty()); + + // allocate objects to the new block + auto *new_block = cache_header.free_blocks().pop(); + assert(new_block); + new_block->clear(); + new_block->bin_no = bin_no; + (allocator_instance->*allocator_function)(bin_no, num_new_objects, + new_block->cache); + + // Link the new block to the existing blocks + new_block->link_to_older(cache_header.newest_active_block(), + bin_header.active_block()); + + // Update headers + cache_header.register_new_block(new_block); + cache_header.total_size_byte() += new_objects_size; + assert(cache_header.total_size_byte() <= k_max_per_cpu_cache_size); + bin_header.update_active_block(new_block, num_new_objects); + } + } + assert(bin_header.active_block_size() > 0); + + // Pop an object from the active block + --bin_header.active_block_size(); + const auto object_offset = + bin_header.active_block()->cache[bin_header.active_block_size()]; + assert(cache_header.total_size_byte() >= object_size); + cache_header.total_size_byte() -= object_size; + return object_offset; + } + + bool priv_push(const bin_no_type bin_no, const difference_type object_offset, + object_allocator_type *const allocator_instance, + object_deallocate_func_type deallocator_function) { + assert(object_offset >= 0); + assert(bin_no <= max_bin_no()); + + const auto cache_no = priv_cache_no(); +#ifdef METALL_ENABLE_MUTEX_IN_OBJECT_CACHE + lock_guard_type guard(m_mutex[cache_no]); +#endif + + auto &cache = m_cache[cache_no]; + auto &cache_header = cache.header; + auto &bin_header = cache.bin_headers[bin_no]; + const auto object_size = bin_no_manager::to_object_size(bin_no); + + // Make sure that the cache has enough space to allocate objects. + priv_make_room_for_new_blocks(cache_no, object_size, allocator_instance, + deallocator_function); + + if (!bin_header.active_block() || + bin_header.active_block_size() == cache_block_type::k_capacity) { + // There is no cached objects for the bin or + // the active block is full. + assert(!cache_header.free_blocks().empty()); + + auto *free_block = cache_header.free_blocks().pop(); + assert(free_block); + free_block->clear(); + free_block->bin_no = bin_no; + free_block->link_to_older(cache_header.newest_active_block(), + bin_header.active_block()); + cache_header.register_new_block(free_block); + bin_header.update_active_block(free_block, 0); + } + + // push an object to the active block + bin_header.active_block()->cache[bin_header.active_block_size()] = + object_offset; + ++bin_header.active_block_size(); + cache_header.total_size_byte() += object_size; + assert(cache_header.total_size_byte() <= k_max_per_cpu_cache_size); + + return true; + } + void priv_make_room_for_new_blocks( const size_type cache_no, const size_type new_objects_size, object_allocator_type *const allocator_instance, @@ -674,12 +721,12 @@ class object_cache { auto &cache_header = cache.header; auto &free_blocks = cache_header.free_blocks(); auto &bin_headers = cache.bin_headers; + auto &total_size = cache_header.total_size_byte(); // Make sure that the cache has enough space to allocate objects. - while (cache_header.total_size_byte() + new_objects_size > - k_max_per_cpu_cache_size || + while (total_size + new_objects_size > k_max_per_cpu_cache_size || free_blocks.empty()) { - auto *oldest_block = cache_header.oldest_active_block(); + auto *const oldest_block = cache_header.oldest_active_block(); assert(oldest_block); // Deallocate objects from the oldest block @@ -692,8 +739,8 @@ class object_cache { : cache_block_type::k_capacity; (allocator_instance->*deallocator_function)(bin_no, num_objects, oldest_block->cache); - assert(cache_header.total_size_byte() >= num_objects * object_size); - cache_header.total_size_byte() -= num_objects * object_size; + assert(total_size >= num_objects * object_size); + total_size -= num_objects * object_size; cache_header.unregister(oldest_block); if (bin_header.active_block() == oldest_block) { From e65f6b8edc91588f803f0621402adbab5fa52d4f Mon Sep 17 00:00:00 2001 From: iwabuchi Date: Mon, 30 Oct 2023 12:13:19 -0700 Subject: [PATCH 24/25] Update documents regarding macros --- .../basics/compile_time_options.md | 36 +-------- include/metall/defs.hpp | 76 ++++++++++++++----- 2 files changed, 57 insertions(+), 55 deletions(-) diff --git a/docs/readthedocs/basics/compile_time_options.md b/docs/readthedocs/basics/compile_time_options.md index b26e8d71..6d85ebce 100644 --- a/docs/readthedocs/basics/compile_time_options.md +++ b/docs/readthedocs/basics/compile_time_options.md @@ -1,36 +1,4 @@ # Compile-Time Options -There are some compile-time options (C/C++ macro) as follows to configure the behavior of Metall: - -- METALL_DEFAULT_CAPACITY=*bytes* - - The default capacity of a segment/datastore. - - This value is used when a user does not specify the capacity of a datastore when creating it. - -- METALL_VERBOSE_SYSTEM_SUPPORT_WARNING - - If defined, Metall shows warning messages at compile time if the system does not support important features. - -- METALL_DISABLE_CONCURRENCY - - Disable concurrency support in Metall. This option is useful when Metall is used in a single-threaded application. - - If this macro is defined, applications must not call Metall concurrently from multiple threads. - - Even if this option is enabled, Metall still uses multiple threads for background tasks, such as synchronizing segment files. - -- METALL_USE_SORTED_BIN - - If defined, Metall stores addresses in sorted order in the bin directory. - - This option enables Metall to use memory space more efficiently, but it increases the cost of the bin directory operations. - -- METALL_FREE_SMALL_OBJECT_SIZE_HINT=*bytes* - - If defined, Metall tries to free space when an object equal to or larger than the specified bytes is deallocated. - - Will be rounded up to a multiple of the page size internally. - - -**Macros for the segment storage manager:** - -- METALL_SEGMENT_BLOCK_SIZE=*bytes* - - The segment block size. - - Metall allocates a backing file with this size. - -- METALL_DISABLE_FREE_FILE_SPACE - - If defined, Metall does not free file space. - -- METALL_USE_ANONYMOUS_NEW_MAP - - If defined, Metall uses anonymous memory mapping instead of file mapping when creating a new map region. \ No newline at end of file +There are some compile-time options (C/C++ macro) to configure the behavior of Metall. +Those macros are defined in `metall/include/metall/defs.hpp`. diff --git a/include/metall/defs.hpp b/include/metall/defs.hpp index bd2e51d5..0d5604ed 100644 --- a/include/metall/defs.hpp +++ b/include/metall/defs.hpp @@ -12,14 +12,10 @@ /// \def METALL_MAX_CAPACITY /// The max capacity, i.e., the maximum total memory size a single Metall /// datastore can allocate. This value is a theoretical limit, and the actual -/// limit is smaller than this value. +/// limit is smaller than this value. This value is used to determine the types +/// of some internal variables. #ifndef METALL_MAX_CAPACITY -#define METALL_MAX_CAPACITY (1ULL << 48ULL) -#endif - -#ifdef METALL_MAX_SEGMENT_SIZE -#warning \ - "METALL_MAX_SEGMENT_SIZE is deprecated. Use METALL_MAX_CAPACITY instead." +#define METALL_MAX_CAPACITY (1ULL << 47ULL) #endif /// \def METALL_DEFAULT_CAPACITY @@ -36,20 +32,52 @@ #endif #endif -#ifdef METALL_DEFAULT_VM_RESERVE_SIZE -#warning \ - "METALL_DEFAULT_VM_RESERVE_SIZE is deprecated. Use METALL_DEFAULT_CAPACITY instead." +#ifdef DOXYGEN_SKIP +/// \brief If defined, Metall shows warning messages at compile time if the +/// system does not support important features. +#define METALL_VERBOSE_SYSTEM_SUPPORT_WARNING + +/// \brief If defined, Metall stores addresses in sorted order in the bin +/// directory. This option enables Metall to use memory space more efficiently, +/// but it increases the cost of the bin directory operations. +#define METALL_USE_SORTED_BIN + +/// \brief If defined, Metall tries to free space when an object equal to or +/// larger than the specified bytes is deallocated. Will be rounded up to a +/// multiple of the page size internally. +#define METALL_FREE_SMALL_OBJECT_SIZE_HINT #endif +// -------------------- +// Macros for the default segment storage manager +// -------------------- + /// \def METALL_SEGMENT_BLOCK_SIZE /// The segment block size the default segment storage use. #ifndef METALL_SEGMENT_BLOCK_SIZE #define METALL_SEGMENT_BLOCK_SIZE (1ULL << 28ULL) #endif -#ifdef METALL_INITIAL_SEGMENT_SIZE -#warning \ - "METALL_INITIAL_SEGMENT_SIZE is deprecated. Use METALL_SEGMENT_BLOCK_SIZE instead." +#ifdef DOXYGEN_SKIP +/// \brief If defined, the default segment storage does not free file space even +/// thought the corresponding segment becomes free. +#define METALL_DISABLE_FREE_FILE_SPACE +#endif + +// -------------------- +// Macros for the object cache +// -------------------- + +/// \def METALL_MAX_PER_CPU_CACHE_SIZE +/// The maximum size of the per CPU (logical CPU core) cache in bytes. +#ifndef METALL_MAX_PER_CPU_CACHE_SIZE +#define METALL_MAX_PER_CPU_CACHE_SIZE (1ULL << 20ULL) +#endif + +/// \def METALL_NUM_CACHES_PER_CPU +/// The number of caches per CPU (logical CPU core). +#ifndef METALL_NUM_CACHES_PER_CPU +#define METALL_NUM_CACHES_PER_CPU 2 #endif #ifdef DOXYGEN_SKIP @@ -63,16 +91,22 @@ #define METALL_DISABLE_CONCURRENCY #endif -/// \def METALL_MAX_PER_CPU_CACHE_SIZE -/// The maximum size of the per CPU (logical CPU core) cache in bytes. -#ifndef METALL_MAX_PER_CPU_CACHE_SIZE -#define METALL_MAX_PER_CPU_CACHE_SIZE (1ULL << 20ULL) +// -------------------- +// Deprecated macros + +#ifdef METALL_MAX_SEGMENT_SIZE +#warning \ + "METALL_MAX_SEGMENT_SIZE is deprecated. Use METALL_MAX_CAPACITY instead." #endif -/// \def METALL_NUM_CACHES_PER_CPU -/// The number of caches per CPU (logical CPU core). -#ifndef METALL_NUM_CACHES_PER_CPU -#define METALL_NUM_CACHES_PER_CPU 2 +#ifdef METALL_DEFAULT_VM_RESERVE_SIZE +#warning \ + "METALL_DEFAULT_VM_RESERVE_SIZE is deprecated. Use METALL_DEFAULT_CAPACITY instead." +#endif + +#ifdef METALL_INITIAL_SEGMENT_SIZE +#warning \ + "METALL_INITIAL_SEGMENT_SIZE is deprecated. Use METALL_SEGMENT_BLOCK_SIZE instead." #endif #endif // METALL_DEFS_HPP From 2b4e76666718be3c3f41428f2315f2684e20e42f Mon Sep 17 00:00:00 2001 From: iwabuchi Date: Wed, 22 Nov 2023 17:36:36 -0800 Subject: [PATCH 25/25] Release v0.27 --- CMakeLists.txt | 2 +- docs/Doxyfile.in | 2 +- include/metall/version.hpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cc61d4c6..b818fa77 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,7 +17,7 @@ endif() # Metall general configuration # -------------------------------------------------------------------------------- # project(Metall - VERSION 0.26 + VERSION 0.27 DESCRIPTION "A persistent memory allocator for data-centric analytics" HOMEPAGE_URL "https://github.com/LLNL/metall") diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in index b7ae43f3..fd720cd9 100644 --- a/docs/Doxyfile.in +++ b/docs/Doxyfile.in @@ -38,7 +38,7 @@ PROJECT_NAME = "Metall" # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = v0.26 +PROJECT_NUMBER = v0.27 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/include/metall/version.hpp b/include/metall/version.hpp index 0aac9c1e..2db668ba 100644 --- a/include/metall/version.hpp +++ b/include/metall/version.hpp @@ -14,7 +14,7 @@ /// METALL_VERSION / 100 % 1000 // the minor version. /// METALL_VERSION % 100 // the patch level. /// \endcode -#define METALL_VERSION 2600 +#define METALL_VERSION 2700 namespace metall { /// \brief Variable type to handle a version data.