Skip to content

Commit 7508b1d

Browse files
committed
feat: use new minimal LRU cache implementation
1 parent b919012 commit 7508b1d

File tree

6 files changed

+326
-19
lines changed

6 files changed

+326
-19
lines changed

CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,7 @@ if(WITH_TESTS)
506506
test/global_metadata_test.cpp
507507
test/integral_value_parser_test.cpp
508508
test/lazy_value_test.cpp
509+
test/lru_cache_test.cpp
509510
test/metadata_requirements_test.cpp
510511
test/nilsimsa_test.cpp
511512
test/options_test.cpp
@@ -603,6 +604,10 @@ if(WITH_TESTS)
603604
target_link_libraries(tool_main_test PRIVATE mkdwarfs_main dwarfsck_main dwarfsextract_main PkgConfig::LIBARCHIVE)
604605
endif()
605606

607+
if(TARGET dwarfs_unit_tests)
608+
target_link_libraries(dwarfs_unit_tests PRIVATE phmap)
609+
endif()
610+
606611
if(TARGET manpage_test)
607612
if(WITH_TOOLS)
608613
target_compile_definitions(manpage_test PRIVATE DWARFS_WITH_TOOLS)
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
/* vim:set ts=2 sw=2 sts=2 et: */
2+
/**
3+
* \author Marcus Holland-Moritz ([email protected])
4+
* \copyright Copyright (c) Marcus Holland-Moritz
5+
*
6+
* This file is part of dwarfs.
7+
*
8+
* Permission is hereby granted, free of charge, to any person obtaining a copy
9+
* of this software and associated documentation files (the “Software”), to deal
10+
* in the Software without restriction, including without limitation the rights
11+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12+
* copies of the Software, and to permit persons to whom the Software is
13+
* furnished to do so, subject to the following conditions:
14+
*
15+
* The above copyright notice and this permission notice shall be included in
16+
* all copies or substantial portions of the Software.
17+
*
18+
* THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24+
* SOFTWARE.
25+
*
26+
* SPDX-License-Identifier: MIT
27+
*/
28+
29+
#pragma once
30+
31+
#include <functional>
32+
#include <iterator>
33+
#include <list>
34+
35+
#include <parallel_hashmap/phmap.h>
36+
37+
namespace dwarfs::reader::internal {
38+
39+
template <typename KeyT, typename T>
40+
class lru_cache {
41+
public:
42+
using key_type = KeyT;
43+
using mapped_type = T;
44+
using value_type = std::pair<key_type const, mapped_type>;
45+
46+
using iterator = typename std::list<value_type>::iterator;
47+
using const_iterator = typename std::list<value_type>::const_iterator;
48+
49+
using prune_hook_type = std::function<void(key_type, mapped_type&&)>;
50+
51+
lru_cache() = default;
52+
53+
explicit lru_cache(size_t max_size)
54+
: max_size_{max_size} {
55+
index_.reserve(max_size_);
56+
}
57+
58+
// Set the maximum cache size
59+
void set_max_size(size_t max_size) {
60+
max_size_ = max_size;
61+
while (cache_.size() > max_size_) {
62+
evict_lru();
63+
}
64+
index_.reserve(max_size_);
65+
}
66+
67+
// Set a custom prune hook
68+
void set_prune_hook(prune_hook_type hook) { prune_hook_ = std::move(hook); }
69+
70+
// Insert or update an item in the cache, promoting it
71+
void set(key_type const& key, mapped_type value,
72+
prune_hook_type custom_prune_hook = {}) {
73+
auto it = index_.find(key);
74+
if (it != index_.end()) {
75+
it->second->second = std::move(value);
76+
move_to_front(it->second);
77+
} else {
78+
if (index_.size() >= max_size_) {
79+
evict_lru(std::move(custom_prune_hook));
80+
}
81+
cache_.push_front(value_type(key, std::move(value)));
82+
index_[key] = cache_.begin();
83+
}
84+
}
85+
86+
// Find an item, optionally promoting it
87+
iterator find(key_type const& key, bool promote = true) {
88+
auto it = index_.find(key);
89+
if (it == index_.end()) {
90+
return end();
91+
}
92+
if (promote) {
93+
move_to_front(it->second);
94+
}
95+
return it->second;
96+
}
97+
98+
iterator erase(iterator pos, prune_hook_type custom_prune_hook = {}) {
99+
auto& key = pos->first;
100+
auto& value = pos->second;
101+
if (custom_prune_hook) {
102+
custom_prune_hook(key, std::move(value));
103+
} else if (prune_hook_) {
104+
prune_hook_(key, std::move(value));
105+
}
106+
index_.erase(key);
107+
return cache_.erase(pos);
108+
}
109+
110+
void clear() {
111+
index_.clear();
112+
cache_.clear();
113+
}
114+
115+
bool empty() const { return cache_.empty(); }
116+
117+
size_t size() const { return cache_.size(); }
118+
119+
iterator begin() { return cache_.begin(); }
120+
iterator end() { return cache_.end(); }
121+
122+
const_iterator begin() const { return cache_.begin(); }
123+
const_iterator end() const { return cache_.end(); }
124+
125+
private:
126+
// Move the accessed item to the front of the cache (most recently used)
127+
void move_to_front(iterator it) { cache_.splice(cache_.begin(), cache_, it); }
128+
129+
// Evict the least recently used item
130+
void evict_lru(prune_hook_type custom_prune_hook = {}) {
131+
if (auto it = cache_.end(); it != cache_.begin()) {
132+
erase(--it, std::move(custom_prune_hook));
133+
}
134+
}
135+
136+
size_t max_size_;
137+
phmap::flat_hash_map<key_type, iterator> index_;
138+
std::list<value_type> cache_;
139+
prune_hook_type prune_hook_;
140+
};
141+
142+
} // namespace dwarfs::reader::internal

include/dwarfs/reader/internal/offset_cache.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@
3636
#include <span>
3737
#include <vector>
3838

39-
#include <folly/container/EvictingCacheMap.h>
40-
4139
#include <dwarfs/small_vector.h>
4240

41+
#include <dwarfs/reader/internal/lru_cache.h>
42+
4343
namespace dwarfs::reader::internal {
4444

4545
template <typename InodeT, typename FileOffsetT, typename ChunkIndexT,
@@ -205,7 +205,7 @@ class basic_offset_cache {
205205
}
206206

207207
private:
208-
using cache_type = folly::EvictingCacheMap<inode_type, value_type>;
208+
using cache_type = lru_cache<inode_type, value_type>;
209209

210210
cache_type mutable cache_;
211211
std::mutex mutable mx_;

src/reader/internal/block_cache.cpp

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@
4141

4242
#include <fmt/format.h>
4343

44-
#include <folly/container/EvictingCacheMap.h>
4544
#include <folly/stats/Histogram.h>
4645
#include <folly/system/ThreadName.h>
4746

@@ -60,6 +59,7 @@
6059
#include <dwarfs/reader/internal/block_cache.h>
6160
#include <dwarfs/reader/internal/block_cache_byte_buffer_factory.h>
6261
#include <dwarfs/reader/internal/cached_block.h>
62+
#include <dwarfs/reader/internal/lru_cache.h>
6363
#include <dwarfs/reader/internal/periodic_executor.h>
6464

6565
namespace dwarfs::reader::internal {
@@ -100,7 +100,7 @@ class lru_sequential_access_detector : public sequential_access_detector {
100100
void touch(size_t block_no) override {
101101
std::lock_guard lock(mx_);
102102
lru_.set(
103-
block_no, block_no, true,
103+
block_no, block_no, /* true, */
104104
// NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved)
105105
[this](size_t, size_t&&) { is_sequential_.reset(); });
106106
}
@@ -132,7 +132,7 @@ class lru_sequential_access_detector : public sequential_access_detector {
132132
}
133133

134134
private:
135-
using lru_type = folly::EvictingCacheMap<size_t, size_t>;
135+
using lru_type = lru_cache<size_t, size_t>;
136136

137137
std::mutex mutable mx_;
138138
lru_type lru_;
@@ -251,6 +251,11 @@ class block_cache_ final : public block_cache::impl {
251251
: hardware_concurrency(),
252252
static_cast<size_t>(1)));
253253
}
254+
cache_.set_prune_hook(
255+
[this](size_t block_no, std::shared_ptr<cached_block>&& block) {
256+
on_block_removed("evicted", block_no, std::move(block));
257+
blocks_evicted_.fetch_add(1, std::memory_order_relaxed);
258+
});
254259
}
255260

256261
~block_cache_() noexcept override {
@@ -328,7 +333,6 @@ class block_cache_ final : public block_cache::impl {
328333
}
329334

330335
void set_block_size(size_t size) override {
331-
// XXX: This currently inevitably clears the cache
332336
if (size == 0) {
333337
DWARFS_THROW(runtime_error, "block size is zero");
334338
}
@@ -339,13 +343,7 @@ class block_cache_ final : public block_cache::impl {
339343
}
340344

341345
std::lock_guard lock(mx_);
342-
cache_.~lru_type();
343-
new (&cache_) lru_type(max_blocks);
344-
cache_.setPruneHook(
345-
[this](size_t block_no, std::shared_ptr<cached_block>&& block) {
346-
on_block_removed("evicted", block_no, std::move(block));
347-
blocks_evicted_.fetch_add(1, std::memory_order_relaxed);
348-
});
346+
cache_.set_max_size(max_blocks);
349347
}
350348

351349
void set_num_workers(size_t num) override {
@@ -390,7 +388,7 @@ class block_cache_ final : public block_cache::impl {
390388
if (auto next = seq_access_detector_->prefetch()) {
391389
std::lock_guard lock(mx_);
392390

393-
if (cache_.findWithoutPromotion(*next) == cache_.end() &&
391+
if (cache_.find(*next, false) == cache_.end() &&
394392
active_.find(*next) == active_.end()) {
395393
sequential_prefetches_.fetch_add(1, std::memory_order_relaxed);
396394
LOG_TRACE << "prefetching block " << *next;
@@ -759,8 +757,7 @@ class block_cache_ final : public block_cache::impl {
759757
}
760758
}
761759

762-
using lru_type =
763-
folly::EvictingCacheMap<size_t, std::shared_ptr<cached_block>>;
760+
using lru_type = lru_cache<size_t, std::shared_ptr<cached_block>>;
764761
template <typename Key, typename Value>
765762
using fast_map_type = phmap::flat_hash_map<Key, Value>;
766763

src/reader/internal/inode_reader_v2.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
#include <utility>
3737
#include <vector>
3838

39-
#include <folly/container/EvictingCacheMap.h>
4039
#include <folly/stats/Histogram.h>
4140

4241
#include <range/v3/view/enumerate.hpp>
@@ -50,6 +49,7 @@
5049

5150
#include <dwarfs/reader/internal/block_cache.h>
5251
#include <dwarfs/reader/internal/inode_reader_v2.h>
52+
#include <dwarfs/reader/internal/lru_cache.h>
5353
#include <dwarfs/reader/internal/offset_cache.h>
5454

5555
namespace dwarfs::reader::internal {
@@ -164,7 +164,7 @@ class inode_reader_ final : public inode_reader_v2::impl {
164164
offset_cache_chunk_index_interval,
165165
offset_cache_updater_max_inline_offsets>;
166166

167-
using readahead_cache_type = folly::EvictingCacheMap<uint32_t, file_off_t>;
167+
using readahead_cache_type = lru_cache<uint32_t, file_off_t>;
168168

169169
std::vector<std::future<block_range>>
170170
read_internal(uint32_t inode, size_t size, file_off_t offset, size_t maxiov,

0 commit comments

Comments
 (0)