From 264c66a9bd7d45f4206492ea8aae1fcb9ad5c585 Mon Sep 17 00:00:00 2001 From: arvidn Date: Sun, 17 Jul 2022 18:15:49 -0700 Subject: [PATCH] add new multi-threaded disk I/O subsystem using preadv and pwritev --- CMakeLists.txt | 6 + ChangeLog | 1 + Jamfile | 3 + Makefile | 10 + bindings/python/src/session.cpp | 3 + examples/client_test.cpp | 7 +- include/libtorrent/aux_/debug_disk_thread.hpp | 9 +- include/libtorrent/aux_/disk_buffer_pool.hpp | 9 +- include/libtorrent/aux_/disk_cache.hpp | 395 ++++ .../libtorrent/aux_/disk_completed_queue.hpp | 1 + include/libtorrent/aux_/disk_job.hpp | 8 + include/libtorrent/aux_/disk_job_pool.hpp | 2 + include/libtorrent/aux_/pread_disk_job.hpp | 27 + include/libtorrent/aux_/pread_storage.hpp | 187 ++ include/libtorrent/aux_/store_buffer.hpp | 7 + include/libtorrent/aux_/unique_ptr.hpp | 7 + .../libtorrent/aux_/visit_block_iovecs.hpp | 64 + include/libtorrent/config.hpp | 18 + include/libtorrent/libtorrent.hpp | 1 + include/libtorrent/pread_disk_io.hpp | 28 + src/disk_buffer_pool.cpp | 19 +- src/disk_cache.cpp | 742 ++++++++ src/disk_completed_queue.cpp | 29 +- src/disk_job.cpp | 2 + src/disk_job_pool.cpp | 2 + src/mmap_disk_io.cpp | 1 + src/pread_disk_io.cpp | 1686 +++++++++++++++++ src/pread_storage.cpp | 797 ++++++++ src/session.cpp | 5 +- src/settings_pack.cpp | 2 +- src/torrent.cpp | 27 +- test/Jamfile | 3 + test/test_add_torrent.cpp | 3 +- test/test_copy_file.cpp | 1 + test/test_disk_cache.cpp | 171 ++ test/test_disk_io.cpp | 136 ++ test/test_file.cpp | 2 + test/test_storage.cpp | 80 +- test/test_torrent_info.cpp | 3 +- test/web_seed_suite.cpp | 6 +- tools/disk_io_stress_test.cpp | 5 +- tools/parse_session_stats.py | 3 +- tools/run_benchmark.py | 9 +- 43 files changed, 4492 insertions(+), 35 deletions(-) create mode 100644 include/libtorrent/aux_/disk_cache.hpp create mode 100644 include/libtorrent/aux_/pread_disk_job.hpp create mode 100644 include/libtorrent/aux_/pread_storage.hpp create mode 100644 include/libtorrent/aux_/visit_block_iovecs.hpp create mode 100644 include/libtorrent/pread_disk_io.hpp create mode 100644 src/disk_cache.cpp create mode 100644 src/pread_disk_io.cpp create mode 100644 src/pread_storage.cpp create mode 100644 test/test_disk_cache.cpp create mode 100644 test/test_disk_io.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b06d3c4b725..7a8e26e0c40 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -165,6 +165,8 @@ set(libtorrent_aux_include_files disable_warnings_pop.hpp disable_warnings_push.hpp disk_buffer_pool.hpp + disk_cache.hpp + visit_block_iovecs.hpp disk_completed_queue.hpp mmap_disk_job.hpp disk_job.hpp @@ -225,6 +227,7 @@ set(libtorrent_aux_include_files portmap.hpp posix_part_file.hpp posix_storage.hpp + pread_disk_job.hpp proxy_base.hpp proxy_settings.hpp puff.hpp @@ -326,6 +329,7 @@ set(sources disabled_disk_io.cpp disk_buffer_holder.cpp disk_buffer_pool.cpp + disk_cache.cpp disk_completed_queue.cpp disk_io_thread_pool.cpp disk_job_fence.cpp @@ -383,6 +387,8 @@ set(sources posix_disk_io.cpp posix_part_file.cpp posix_storage.cpp + pread_disk_io.cpp + pread_storage.cpp proxy_base.cpp proxy_settings.cpp puff.cpp diff --git a/ChangeLog b/ChangeLog index 6d7db0652c7..8076ce8708d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,6 @@ 2.1.0 not released + * add a multi-threaded, pread()-based, disk I/O backend (pread_disk_io) * try harder to bind TCP and UDP sockets to the same port * made disk_interface's status_t type a flags type * optimize resume data format to use less space diff --git a/Jamfile b/Jamfile index f2e680bd61b..fb987c5b46e 100644 --- a/Jamfile +++ b/Jamfile @@ -806,6 +806,7 @@ SOURCES = directory disk_buffer_holder disk_buffer_pool + disk_cache disk_completed_queue disk_io_thread_pool disabled_disk_io @@ -910,6 +911,8 @@ SOURCES = mmap mmap_disk_io mmap_storage + pread_disk_io + pread_storage posix_disk_io posix_part_file posix_storage diff --git a/Makefile b/Makefile index dfa6e3cede5..fff4ba87c7a 100644 --- a/Makefile +++ b/Makefile @@ -323,6 +323,7 @@ SOURCES = \ disabled_disk_io.cpp \ disk_buffer_holder.cpp \ disk_buffer_pool.cpp \ + disk_cache.cpp \ disk_completed_queue.cpp \ disk_io_thread_pool.cpp \ disk_job_fence.cpp \ @@ -381,6 +382,8 @@ SOURCES = \ posix_disk_io.cpp \ posix_part_file.cpp \ posix_storage.cpp \ + pread_disk_io.cpp \ + pread_storage.cpp \ proxy_base.cpp \ proxy_settings.cpp \ puff.cpp \ @@ -497,6 +500,7 @@ HEADERS = \ piece_block.hpp \ portmap.hpp \ posix_disk_io.hpp \ + pread_disk_io.hpp \ read_resume_data.hpp \ session.hpp \ session_handle.hpp \ @@ -561,6 +565,8 @@ HEADERS = \ aux_/disable_warnings_pop.hpp \ aux_/disable_warnings_push.hpp \ aux_/disk_buffer_pool.hpp \ + aux_/disk_cache.hpp \ + aux_/visit_block_iovecs.hpp \ aux_/disk_completed_queue.hpp \ aux_/disk_io_thread_pool.hpp \ aux_/disk_job_fence.hpp \ @@ -627,6 +633,8 @@ HEADERS = \ aux_/portmap.hpp \ aux_/posix_part_file.hpp \ aux_/posix_storage.hpp \ + aux_/pread_disk_job.hpp \ + aux_/pread_storage.hpp \ aux_/proxy_base.hpp \ aux_/proxy_settings.hpp \ aux_/puff.hpp \ @@ -892,6 +900,8 @@ TEST_SOURCES = \ test_dht.cpp \ test_dht_storage.cpp \ test_direct_dht.cpp \ + test_disk_cache.cpp \ + test_disk_io.cpp \ test_dos_blocker.cpp \ test_ed25519.cpp \ test_enum_net.cpp \ diff --git a/bindings/python/src/session.cpp b/bindings/python/src/session.cpp index 46048393e2f..e979398a58c 100644 --- a/bindings/python/src/session.cpp +++ b/bindings/python/src/session.cpp @@ -30,6 +30,7 @@ #include #include +#include namespace boost { @@ -882,6 +883,8 @@ namespace #endif if (disk_io == "posix_disk_io_constructor") s.disk_io_constructor = <::posix_disk_io_constructor; + else if (disk_io == "pread_disk_io_constructor") + s.disk_io_constructor = <::pread_disk_io_constructor; else s.disk_io_constructor = <::default_disk_io_constructor; } diff --git a/examples/client_test.cpp b/examples/client_test.cpp index 59469c67b17..f39e6abf0a8 100644 --- a/examples/client_test.cpp +++ b/examples/client_test.cpp @@ -56,6 +56,7 @@ see LICENSE file. #include "libtorrent/mmap_disk_io.hpp" #include "libtorrent/posix_disk_io.hpp" +#include "libtorrent/pread_disk_io.hpp" #include "libtorrent/disabled_disk_io.hpp" #include "torrent_view.hpp" @@ -1347,7 +1348,7 @@ CLIENT OPTIONS -O print session stats counters to the log -1 exit on first torrent completing (useful for benchmarks) -i specify which disk I/O back-end to use. One of: - mmap, posix, disabled + mmap, posix, pread, disabled )" #ifdef TORRENT_UTP_LOG_ENABLE R"( @@ -1561,6 +1562,10 @@ int main(int argc, char* argv[]) #endif if (arg == "posix"_sv) params.disk_io_constructor = lt::posix_disk_io_constructor; +#if TORRENT_HAVE_PREAD || defined TORRENT_WINDOWS + else if (arg == "pread"_sv) + params.disk_io_constructor = lt::pread_disk_io_constructor; +#endif else if (arg == "disabled"_sv) params.disk_io_constructor = lt::disabled_disk_io_constructor; else diff --git a/include/libtorrent/aux_/debug_disk_thread.hpp b/include/libtorrent/aux_/debug_disk_thread.hpp index b2f3ff66ac0..74c9072520f 100644 --- a/include/libtorrent/aux_/debug_disk_thread.hpp +++ b/include/libtorrent/aux_/debug_disk_thread.hpp @@ -24,6 +24,7 @@ see LICENSE file. #include #include #include +#include #include "libtorrent/aux_/disk_job.hpp" #include "libtorrent/disk_interface.hpp" @@ -81,11 +82,11 @@ inline std::string print_job(aux::disk_job const& j) } void operator()(job::file_priority const& j) const { - m_ss << "file-priority( num-files:" << j.prio.size() << " )"; + m_ss << "file-priority( num-files: " << j.prio.size() << " )"; } void operator()(job::clear_piece const& j) const { - m_ss << "clear-piece( piece:" << j.piece << " )"; + m_ss << "clear-piece( piece: " << j.piece << " )"; } void operator()(job::partial_read const& j) const { @@ -93,6 +94,10 @@ inline std::string print_job(aux::disk_job const& j) << " buf-offset: " << j.buffer_offset << " size: " << j.buffer_size << " )"; } + void operator()(job::kick_hasher const& j) const { + m_ss << "kick-hasher( piece: " << j.piece << " )"; + } + private: std::stringstream& m_ss; }; diff --git a/include/libtorrent/aux_/disk_buffer_pool.hpp b/include/libtorrent/aux_/disk_buffer_pool.hpp index 99d39c44506..7b82d2a8d7b 100644 --- a/include/libtorrent/aux_/disk_buffer_pool.hpp +++ b/include/libtorrent/aux_/disk_buffer_pool.hpp @@ -21,6 +21,7 @@ see LICENSE file. #include #include #include +#include #include "libtorrent/io_context.hpp" #include "libtorrent/span.hpp" @@ -54,6 +55,8 @@ namespace aux { return m_in_use; } + std::optional flush_request() const; + void set_settings(settings_interface const& sett); private: @@ -67,10 +70,10 @@ namespace aux { // cache size limit int m_max_use; - // if we have exceeded the limit, we won't start - // allowing allocations again until we drop below - // this low watermark + // if we have exceeded the high watermark we start flushing blocks to + // disk until we're below the low watermark. int m_low_watermark; + int m_high_watermark; // if we exceed the max number of buffers, we start // adding up callbacks to this queue. Once the number diff --git a/include/libtorrent/aux_/disk_cache.hpp b/include/libtorrent/aux_/disk_cache.hpp new file mode 100644 index 00000000000..d51ca52a8c9 --- /dev/null +++ b/include/libtorrent/aux_/disk_cache.hpp @@ -0,0 +1,395 @@ +/* + +Copyright (c) 2023, Arvid Norberg +All rights reserved. + +You may use, distribute and modify this code under the terms of the BSD license, +see LICENSE file. +*/ + +#ifndef TORRENT_DISK_CACHE +#define TORRENT_DISK_CACHE + +#include +#include + +#include "libtorrent/storage_defs.hpp" +#include "libtorrent/aux_/scope_end.hpp" +#include "libtorrent/aux_/alloca.hpp" +#include "libtorrent/aux_/invariant_check.hpp" +#include "libtorrent/aux_/pread_disk_job.hpp" +#include "libtorrent/aux_/pread_storage.hpp" +#include "libtorrent/aux_/disk_io_thread_pool.hpp" // for jobqueue_t +#include "libtorrent/aux_/unique_ptr.hpp" +#include "libtorrent/disk_buffer_holder.hpp" +#include "libtorrent/hasher.hpp" + +#include "libtorrent/aux_/disable_warnings_push.hpp" +#include + +#define BOOST_BIND_NO_PLACEHOLDERS + +#include +#include +#include +#include +#include +#include + +#include "libtorrent/aux_/disable_warnings_pop.hpp" + + +namespace libtorrent::aux { + +namespace mi = boost::multi_index; + +// uniquely identifies a torrent and piece +struct piece_location +{ + piece_location(storage_index_t const t, piece_index_t const p) + : torrent(t), piece(p) {} + storage_index_t torrent; + piece_index_t piece; + bool operator==(piece_location const& rhs) const + { + return std::tie(torrent, piece) + == std::tie(rhs.torrent, rhs.piece); + } + + bool operator<(piece_location const& rhs) const + { + return std::tie(torrent, piece) + < std::tie(rhs.torrent, rhs.piece); + } +}; + +inline size_t hash_value(piece_location const& l) +{ + std::size_t ret = 0; + boost::hash_combine(ret, std::hash{}(l.torrent)); + boost::hash_combine(ret, std::hash{}(l.piece)); + return ret; +} + +struct piece_hasher +{ + piece_hasher() : ph(hasher{}) {} + + sha1_hash final_hash(); + void update(span const buf); + lt::hasher& ctx(); + +private: + std::variant ph; +}; + +struct cached_block_entry +{ + // returns the buffer associated with this block. It either picks it from + // the write job that's hung on this block, or from the buffer in the block + // object, if it has been flushed to disk already. + // If there is no buffer, it returns an empty span. + span buf() const; + + // returns the buffer associated with the write job hanging on this block. + // If there is no write job, it returns an empty span. + span write_buf() const; + + // once the write job has been executed, and we've flushed the buffer, we + // move it into buf_holder, to keep the buffer alive until any hash job has + // completed as well. The underlying data can be accessed through buf, but + // the owner moves from the pread_disk_job object to this buf_holder. + // TODO: save space by just storing the buffer pointer here. The + // cached_piece_entry could hold the pointer to the buffer pool to be able + // to free these on destruction + // we would still need to save the *size* of the block, to support the + // shorter last block of a torrent + disk_buffer_holder buf_holder; + pread_disk_job* write_job = nullptr; + + bool flushed_to_disk = false; + + // TODO: only allocate this field for v2 torrents + sha256_hash block_hash; +}; + +struct cached_piece_entry +{ + cached_piece_entry(piece_location const& loc + , int const num_blocks + , int const piece_size_v2 + , bool v1 + , bool v2); + + span get_blocks() const; + + piece_location piece; + + // this is set to true when the piece has been populated with all blocks + // it will make it prioritized for flushing to disk + // it will be cleared once all blocks have been flushed + bool ready_to_flush = false; + + // when this is true, there is a thread currently hashing blocks and + // updating the hash context in "ph". Other threads may not touch "ph", + // "hasing_cursor", and may only read "hasing". + bool hashing = false; + + // when a thread is writing this piece to disk, this is true. Only one + // thread at a time should be flushing a piece to disk. + bool flushing = false; + + // this is set to true if the piece hash has been computed and returned + // to the bittorrent engine. + bool piece_hash_returned = false; + + // this indicates that this piece belongs to a v2 torrent, and it has the + // block_hash member of cached_block_entry and we need to compute the block + // hashes as well + bool v1_hashes = false; + bool v2_hashes = false; + + // if this is a v2 torrent, this is the exact size of this piece. The + // end-piece of each file may be truncated for v2 torrents + int piece_size2; + + int blocks_in_piece = 0; + + // the number of blocks that have been hashed so far. Specifically for the + // v1 SHA1 hash of the piece, so all blocks are contiguous starting at block + // 0. + int hasher_cursor = 0; + + // the number of contiguous blocks, starting at 0, that have been flushed to + // disk so far. This is used to determine how many blocks are left to flush + // from this piece without requiring read-back to hash them, by substracting + // flushed_cursor from hasher_cursor. + int flushed_cursor = 0; + + // the number of blocks that have a write job associated with them + int num_jobs = 0; + + // returns the number of blocks in this piece that have been hashed and + // ready to be flushed without requiring reading them back in the future. + int cheap_to_flush() const + { + return int(hasher_cursor) - int(flushed_cursor); + } + + unique_ptr blocks; + + piece_hasher ph; + + // if there is a hash_job set on this piece, whenever we complete hashing + // the last block, we should post this + pread_disk_job* hash_job = nullptr; + + // if the piece has been requested to be cleared, but it was locked + // (flushing) at the time. We hang this job here to complete it once the + // thread currently flushing is done with it + pread_disk_job* clear_piece = nullptr; +}; + +struct disk_cache +{ + using piece_container = mi::multi_index_container< + cached_piece_entry, + mi::indexed_by< + // look up ranges of pieces by (torrent, piece-index) + mi::ordered_unique>, + // ordered by the number of contiguous blocks we can flush without + // read-back. large numbers are ordered first + mi::ordered_non_unique, std::greater>, + // ordered by whether the piece is ready to be flushed or not + // true is ordered before false + mi::ordered_non_unique, std::greater>, + // hash-table lookup of individual pieces. faster than index 0 + mi::hashed_unique> + > + >; + + template + bool get(piece_location const loc, int const block_idx, Fun f) const + { + std::unique_lock l(m_mutex); + + INVARIANT_CHECK; + + auto& view = m_pieces.template get<0>(); + auto i = view.find(loc); + if (i == view.end()) return false; + + if (i->blocks[block_idx].buf().data()) + { + // TODO: it would be nice if this could be called without holding + // the mutex. It would require being able to lock the piece + f(i->blocks[block_idx].buf()); + return true; + } + return false; + } + + template + sha256_hash hash2(piece_location const loc, int const block_idx, Fun f) const + { + std::unique_lock l(m_mutex); + + INVARIANT_CHECK; + + auto& view = m_pieces.template get<0>(); + auto i = view.find(loc); + if (i != view.end()) + { + if (i->hashing) + { + // TODO: it would probably be more efficient to wait here. + // #error we should hang the hash job onto the piece. If there is a + // job already, form a queue + l.unlock(); + return f(); + } + auto const& cbe = i->blocks[block_idx]; + // There's nothing stopping the hash threads from hashing the blocks in + // parallel. This should not depend on the hasher_cursor. That's a v1 + // concept + if (i->hasher_cursor > block_idx) + return cbe.block_hash; + if (cbe.buf().data()) + { + hasher256 h; + h.update(cbe.buf()); + return h.final(); + } + } + l.unlock(); + return f(); + } + + // returns false if the piece is not in the cache + template + bool hash_piece(piece_location const loc, Fun f) + { + std::unique_lock l(m_mutex); + + INVARIANT_CHECK; + + auto& view = m_pieces.template get<0>(); + auto piece_iter = view.find(loc); + if (piece_iter == view.end()) return false; + + TORRENT_ALLOCA(blocks, char const*, piece_iter->blocks_in_piece); + TORRENT_ALLOCA(v2_hashes, sha256_hash, piece_iter->blocks_in_piece); + + for (int i = 0; i < piece_iter->blocks_in_piece; ++i) + { + blocks[i] = piece_iter->blocks[i].buf().data(); + v2_hashes[i] = piece_iter->blocks[i].block_hash; + } + + view.modify(piece_iter, [](cached_piece_entry& e) { e.hashing = true; }); + int const hasher_cursor = piece_iter->hasher_cursor; + l.unlock(); + + auto se = scope_end([&] { + l.lock(); + view.modify(piece_iter, [&](cached_piece_entry& e) { + e.hashing = false; + }); + }); + f(const_cast(piece_iter->ph), hasher_cursor, blocks, v2_hashes); + return true; + } + + // If the specified piece exists in the cache, and it's unlocked, clear all + // write jobs (return them in "aborted"). Returns true if the clear_piece + // job should be posted as complete. Returns false if the piece is locked by + // another thread, and the clear_piece job has been queued to be issued once + // the piece is unlocked. + bool try_clear_piece(piece_location const loc, pread_disk_job* j, jobqueue_t& aborted); + + template + int get2(piece_location const loc, int const block_idx, Fun f) const + { + std::unique_lock l(m_mutex); + + INVARIANT_CHECK; + + auto& view = m_pieces.template get<0>(); + auto i = view.find(loc); + if (i == view.end()) return 0; + + char const* buf1 = i->blocks[block_idx].buf().data(); + char const* buf2 = i->blocks[block_idx + 1].buf().data(); + + if (buf1 == nullptr && buf2 == nullptr) + return 0; + + return f(buf1, buf2); + } + + // returns true if this piece needs to have its hasher kicked + bool insert(piece_location const loc + , int const block_idx + , pread_disk_job* write_job); + + enum hash_result: std::uint8_t + { + job_completed, + job_queued, + post_job, + }; + + hash_result try_hash_piece(piece_location const loc, pread_disk_job* hash_job); + + // this should be called from a hasher thread + void kick_hasher(piece_location const& loc, jobqueue_t& completed_jobs); + + // this should be called by a disk thread + // the callback should return the number of blocks it successfully flushed + // to disk + void flush_to_disk(std::function, int)> f + , int const target_blocks + , std::function clear_piece_fun); + + void flush_storage(std::function, int)> f + , storage_index_t const storage + , std::function clear_piece_fun); + + std::size_t size() const; + std::size_t num_flushing() const; + +#if TORRENT_USE_INVARIANT_CHECKS + void check_invariant() const; +#endif + +private: + + // this requires the mutex to be locked + void clear_piece_impl(cached_piece_entry& cpe, jobqueue_t& aborted); + + template + Iter flush_piece_impl(View& view + , Iter piece_iter + , std::function, int)> const& f + , std::unique_lock& l + , int const num_blocks + , span const blocks + , std::function clear_piece_fun); + + mutable std::mutex m_mutex; + piece_container m_pieces; + + // the number of *dirty* blocks in the cache. i.e. blocks that need to be + // flushed to disk. The cache may (briefly) hold more buffers than this + // while finishing hashing blocks. + int m_blocks = 0; + + // the number of blocks currently being flushed by a disk thread + // we use this to avoid over-shooting flushing blocks + int m_flushing_blocks = 0; +}; + +} + +#endif + diff --git a/include/libtorrent/aux_/disk_completed_queue.hpp b/include/libtorrent/aux_/disk_completed_queue.hpp index 2a307fa6014..cf13c2138f6 100644 --- a/include/libtorrent/aux_/disk_completed_queue.hpp +++ b/include/libtorrent/aux_/disk_completed_queue.hpp @@ -26,6 +26,7 @@ struct disk_completed_queue {} void abort_job(io_context& ioc, aux::disk_job* j); + void abort_jobs(io_context& ioc, jobqueue_t jobs); void append(io_context& ioc, jobqueue_t jobs); private: diff --git a/include/libtorrent/aux_/disk_job.hpp b/include/libtorrent/aux_/disk_job.hpp index 78197185556..dc8d793dd33 100644 --- a/include/libtorrent/aux_/disk_job.hpp +++ b/include/libtorrent/aux_/disk_job.hpp @@ -44,6 +44,7 @@ namespace libtorrent::aux { , file_priority , clear_piece , partial_read + , kick_hasher , num_job_ids }; @@ -234,6 +235,12 @@ namespace job { // the piece to clear piece_index_t piece; }; + + struct kick_hasher + { + // the piece whose hasher to kick + piece_index_t piece; + }; } // disk_job is a generic base class to disk io subsystem-specifit jobs (e.g. @@ -285,6 +292,7 @@ namespace job { , job::file_priority , job::clear_piece , job::partial_read + , job::kick_hasher > action; // the type of job this is diff --git a/include/libtorrent/aux_/disk_job_pool.hpp b/include/libtorrent/aux_/disk_job_pool.hpp index a9d108a60ef..b515b7cadd8 100644 --- a/include/libtorrent/aux_/disk_job_pool.hpp +++ b/include/libtorrent/aux_/disk_job_pool.hpp @@ -79,7 +79,9 @@ namespace aux { }; struct mmap_disk_job; + struct pread_disk_job; extern template struct disk_job_pool; + extern template struct disk_job_pool; } } diff --git a/include/libtorrent/aux_/pread_disk_job.hpp b/include/libtorrent/aux_/pread_disk_job.hpp new file mode 100644 index 00000000000..fe9896b730b --- /dev/null +++ b/include/libtorrent/aux_/pread_disk_job.hpp @@ -0,0 +1,27 @@ +/* + +Copyright (c) 2022, Arvid Norberg +All rights reserved. + +You may use, distribute and modify this code under the terms of the BSD license, +see LICENSE file. +*/ + +#ifndef TORRENT_PREAD_DISK_JOB_HPP +#define TORRENT_PREAD_DISK_JOB_HPP + +#include "libtorrent/aux_/disk_job.hpp" + +namespace libtorrent::aux { + + struct pread_storage; + + struct TORRENT_EXTRA_EXPORT pread_disk_job : disk_job + { + // the disk storage this job applies to (if applicable) + std::shared_ptr storage; + }; + +} + +#endif // TORRENT_PREAD_DISK_JOB_HPP diff --git a/include/libtorrent/aux_/pread_storage.hpp b/include/libtorrent/aux_/pread_storage.hpp new file mode 100644 index 00000000000..18c187d5cd2 --- /dev/null +++ b/include/libtorrent/aux_/pread_storage.hpp @@ -0,0 +1,187 @@ +/* + +Copyright (c) 2022, Arvid Norberg +All rights reserved. + +You may use, distribute and modify this code under the terms of the BSD license, +see LICENSE file. +*/ + +#ifndef TORRENT_PREAD_STORAGE_HPP +#define TORRENT_PREAD_STORAGE_HPP + +#include "libtorrent/config.hpp" + +#include +#include + +#include "libtorrent/fwd.hpp" +#include "libtorrent/aux_/disk_job_fence.hpp" +#include "libtorrent/storage_defs.hpp" +#include "libtorrent/aux_/part_file.hpp" +#include "libtorrent/aux_/stat_cache.hpp" +#include "libtorrent/aux_/file_pool.hpp" +#include "libtorrent/bitfield.hpp" +#include "libtorrent/span.hpp" +#include "libtorrent/aux_/vector.hpp" +#include "libtorrent/aux_/open_mode.hpp" // for aux::open_mode_t +#include "libtorrent/disk_interface.hpp" // for disk_job_flags_t + +namespace libtorrent::aux { + + struct session_settings; + struct file_view; + + struct TORRENT_EXTRA_EXPORT pread_storage + : std::enable_shared_from_this + , aux::disk_job_fence + { + // constructs the pread_storage based on the given storage_params. + // ``file_pool`` is the cache of file handles that the storage will use. + // All files it opens will ask the file_pool to open them. + pread_storage(storage_params const& params, aux::file_pool&); + + // hidden + ~pread_storage(); + pread_storage(pread_storage const&) = delete; + pread_storage& operator=(pread_storage const&) = delete; + + void abort_jobs(); + + bool has_any_file(storage_error&); + void set_file_priority(settings_interface const& + , aux::vector& prio + , storage_error&); + void rename_file(file_index_t index, std::string const& new_filename + , storage_error&); + void release_files(storage_error&); + void delete_files(remove_flags_t options, storage_error&); + status_t initialize(settings_interface const&, storage_error&); + std::pair move_storage(std::string save_path + , move_flags_t, storage_error&); + bool verify_resume_data(add_torrent_params const& rd + , aux::vector const& links + , storage_error&); + bool tick(); + + int read(settings_interface const&, span buffer + , piece_index_t piece, int offset, aux::open_mode_t mode + , disk_job_flags_t flags + , storage_error&); + int write(settings_interface const&, span buffer + , piece_index_t piece, int offset, aux::open_mode_t mode + , disk_job_flags_t flags + , storage_error&); + int write(settings_interface const& sett + , span const> buffers + , piece_index_t const piece, int offset + , open_mode_t const mode + , disk_job_flags_t const flags + , storage_error& error); + int hash(settings_interface const&, hasher& ph, std::ptrdiff_t len + , piece_index_t piece, int offset, aux::open_mode_t mode + , disk_job_flags_t flags, storage_error&); + int hash2(settings_interface const&, hasher256& ph, std::ptrdiff_t len + , piece_index_t piece, int offset, aux::open_mode_t mode + , disk_job_flags_t flags, storage_error&); + + // if the files in this storage are mapped, returns the mapped + // file_storage, otherwise returns the original file_storage object. + file_storage const& files() const { return m_mapped_files ? *m_mapped_files : m_files; } + + bool set_need_tick() + { + bool const prev = m_need_tick; + m_need_tick = true; + return prev; + } + + void do_tick() + { + m_need_tick = false; + tick(); + } + + void set_owner(std::shared_ptr const& tor) { m_torrent = tor; } + + storage_index_t storage_index() const { return m_storage_index; } + void set_storage_index(storage_index_t st) { m_storage_index = st; } + + bool v1() const { return m_v1; } + bool v2() const { return m_v2; } + + private: + + bool m_need_tick = false; + file_storage const& m_files; + + // the reason for this to be a void pointer + // is to avoid creating a dependency on the + // torrent. This shared_ptr is here only + // to keep the torrent object alive until + // the storage destructs. This is because + // the file_storage object is owned by the torrent. + std::shared_ptr m_torrent; + + storage_index_t m_storage_index{0}; + + void need_partfile(); + + std::unique_ptr m_mapped_files; + + // in order to avoid calling stat() on each file multiple times + // during startup, cache the results in here, and clear it all + // out once the torrent starts (to avoid getting stale results) + // each entry represents the size and timestamp of the file + mutable aux::stat_cache m_stat_cache; + + // helper function to open a file in the file pool with the right mode + std::shared_ptr open_file(settings_interface const&, file_index_t + , aux::open_mode_t, storage_error&) const; + std::shared_ptr open_file_impl(settings_interface const& + , file_index_t, aux::open_mode_t, storage_error&) const; + + bool use_partfile(file_index_t index) const; + void use_partfile(file_index_t index, bool b); + + aux::vector m_file_priority; + std::string m_save_path; + std::string m_part_file_name; + + // this this is an array indexed by file-index. Each slot represents + // whether this file has the part-file enabled for it. This is used for + // backwards compatibility with pre-partfile versions of libtorrent. If + // this vector is empty, the default is that files *do* use the partfile. + // on startup, any 0-priority file that's found in it's original location + // is expected to be an old-style (pre-partfile) torrent storage, and + // those files have their slot set to false in this vector. + // note that the vector is *sparse*, it's only allocated if a file has its + // entry set to false, and only indices up to that entry. + aux::vector m_use_partfile; + + // the file pool is a member of the disk_io_thread + // to make all storage instances share the pool + aux::file_pool& m_pool; + + // used for skipped files + std::unique_ptr m_part_file; + + // this is a bitfield with one bit per file. A bit being set means + // we've written to that file previously. If we do write to a file + // whose bit is 0, we set the file size, to make the file allocated + // on disk (in full allocation mode) and just sparsely allocated in + // case of sparse allocation mode + mutable std::mutex m_file_created_mutex; + mutable typed_bitfield m_file_created; + + bool m_allocate_files; + // this is a v1 torrent + bool m_v1; + // this is a v2 torrent. If both v1 and v2 are set, it's a hybrid + // torrent + bool m_v2; + }; + +} + +#endif // TORRENT_PREAD_STORAGE_HPP diff --git a/include/libtorrent/aux_/store_buffer.hpp b/include/libtorrent/aux_/store_buffer.hpp index 48a51448186..3f1cf0aa9dc 100644 --- a/include/libtorrent/aux_/store_buffer.hpp +++ b/include/libtorrent/aux_/store_buffer.hpp @@ -74,6 +74,13 @@ struct store_buffer auto const it = m_store_buffer.find(loc); if (it != m_store_buffer.end()) { + // TODO: it would be nice if this could be called without holding + // the mutex. It would require a reference counter on the store + // buffer entries and that we potentially erases it after this call. + // it would also require the store buffer being able to take over + // ownership of the buffer when the owner erases it. Perhase erase() + // could be made to take a buffer_holder, which is held onto if the + // refcount > 0 f(it->second); return true; } diff --git a/include/libtorrent/aux_/unique_ptr.hpp b/include/libtorrent/aux_/unique_ptr.hpp index cd490554e39..f892043bcba 100644 --- a/include/libtorrent/aux_/unique_ptr.hpp +++ b/include/libtorrent/aux_/unique_ptr.hpp @@ -31,6 +31,8 @@ namespace libtorrent { namespace aux { unique_ptr() = default; explicit unique_ptr(T* arr) : base(arr) {} + unique_ptr(base b): base(std::move(b)) {} + decltype(auto) operator[](IndexType idx) const { TORRENT_ASSERT(idx >= IndexType(0)); @@ -38,6 +40,11 @@ namespace libtorrent { namespace aux { } }; + template + unique_ptr make_unique(IndexType const num) { + static_assert(std::is_array_v); + return unique_ptr(new std::remove_extent_t[std::size_t(num)]); + } }} #endif diff --git a/include/libtorrent/aux_/visit_block_iovecs.hpp b/include/libtorrent/aux_/visit_block_iovecs.hpp new file mode 100644 index 00000000000..fa6da043ead --- /dev/null +++ b/include/libtorrent/aux_/visit_block_iovecs.hpp @@ -0,0 +1,64 @@ +/* + +Copyright (c) 2023, Arvid Norberg +All rights reserved. + +You may use, distribute and modify this code under the terms of the BSD license, +see LICENSE file. +*/ + +#ifndef TORRENT_VISIT_BLOCK_IOVECS +#define TORRENT_VISIT_BLOCK_IOVECS + +#include "libtorrent/span.hpp" +#include "libtorrent/aux_/alloca.hpp" + +namespace libtorrent::aux { + +// Fun is a function object that's called with f(span>, int) +// and is expected to return a bool. true=interrupt, false=continue +template +void visit_block_iovecs(span blocks + , Fun const& f) +{ + TORRENT_ASSERT(blocks.size() > 0); + TORRENT_ALLOCA(iovec, span, blocks.size()); + + int count = 0; + + int start_idx = 0; + int idx = 0; + + for (auto& be : blocks) + { + auto const buf = be.write_buf(); + if (count > 0 && buf.empty()) + { + bool const interrupt = f(iovec.first(count), start_idx); + if (interrupt) return; + + start_idx = idx; + count = 0; + } + + if (buf.empty()) + { + ++idx; + start_idx = idx; + continue; + } + + iovec[count] = buf; + ++count; + ++idx; + } + + if (count > 0) + { + f(iovec.first(count), start_idx); + } +} + +} + +#endif diff --git a/include/libtorrent/config.hpp b/include/libtorrent/config.hpp index f9f28de506a..dfdd7ab0acc 100644 --- a/include/libtorrent/config.hpp +++ b/include/libtorrent/config.hpp @@ -91,6 +91,10 @@ see LICENSE file. || defined __FreeBSD_kernel__ #define TORRENT_BSD +#ifdef __NetBSD__ +#define TORRENT_HAS_FSYNC_RANGE 1 +#endif + #if defined __APPLE__ #include @@ -356,6 +360,11 @@ see LICENSE file. #define TORRENT_USE_IFCONF 1 #define TORRENT_USE_GRTTABLE 1 +#ifndef TORRENT_HAVE_PREAD +#define TORRENT_HAVE_PREAD 0 +#endif + + // ==== GNU/Hurd === #elif defined __GNU__ #define TORRENT_HURD @@ -470,6 +479,11 @@ see LICENSE file. #define TORRENT_HAVE_MMAP 0 #endif +#ifndef TORRENT_HAVE_PREAD +#define TORRENT_HAVE_PREAD 1 +#endif + + #ifndef TORRENT_HAVE_MAP_VIEW_OF_FILE #define TORRENT_HAVE_MAP_VIEW_OF_FILE 0 #endif @@ -550,6 +564,10 @@ see LICENSE file. #define TORRENT_HAS_COPYFILE 0 #endif +#ifndef TORRENT_HAS_FSYNC_RANGE +#define TORRENT_HAS_FSYNC_RANGE 0 +#endif + // debug builds have asserts enabled by default, release // builds have asserts if they are explicitly enabled by // the release_asserts macro. diff --git a/include/libtorrent/libtorrent.hpp b/include/libtorrent/libtorrent.hpp index 1d6a027b67a..f1b4f66dfa3 100644 --- a/include/libtorrent/libtorrent.hpp +++ b/include/libtorrent/libtorrent.hpp @@ -83,6 +83,7 @@ #include "libtorrent/piece_block.hpp" #include "libtorrent/portmap.hpp" #include "libtorrent/posix_disk_io.hpp" +#include "libtorrent/pread_disk_io.hpp" #include "libtorrent/random.hpp" #include "libtorrent/read_resume_data.hpp" #include "libtorrent/session.hpp" diff --git a/include/libtorrent/pread_disk_io.hpp b/include/libtorrent/pread_disk_io.hpp new file mode 100644 index 00000000000..b6ef36772c5 --- /dev/null +++ b/include/libtorrent/pread_disk_io.hpp @@ -0,0 +1,28 @@ +/* + +Copyright (c) 2022, Arvid Norberg +All rights reserved. + +You may use, distribute and modify this code under the terms of the BSD license, +see LICENSE file. +*/ + +#ifndef TORRENT_PREAD_DISK_IO_HPP +#define TORRENT_PREAD_DISK_IO_HPP + +#include "libtorrent/config.hpp" +#include "libtorrent/disk_interface.hpp" +#include "libtorrent/io_context.hpp" + +namespace libtorrent { + + struct counters; + struct settings_interface; + + // constructs a multi-threaded file disk I/O using pread()/pwrite() + TORRENT_EXPORT std::unique_ptr pread_disk_io_constructor( + io_context& ios, settings_interface const&, counters& cnt); + +} + +#endif // TORRENT_PREAD_DISK_IO_HPP diff --git a/src/disk_buffer_pool.cpp b/src/disk_buffer_pool.cpp index ea03a576626..9168b272a1a 100644 --- a/src/disk_buffer_pool.cpp +++ b/src/disk_buffer_pool.cpp @@ -16,6 +16,7 @@ see LICENSE file. #include "libtorrent/io_context.hpp" #include "libtorrent/disk_observer.hpp" #include "libtorrent/disk_interface.hpp" // for default_block_size +#include "libtorrent/aux_/debug_disk_thread.hpp" #include "libtorrent/aux_/disable_warnings_push.hpp" @@ -52,8 +53,9 @@ namespace { disk_buffer_pool::disk_buffer_pool(io_context& ios) : m_in_use(0) - , m_max_use(64) - , m_low_watermark(std::max(m_max_use - 32, 0)) + , m_max_use(256) + , m_low_watermark(m_max_use / 2) + , m_high_watermark(m_max_use * 3 / 4) , m_exceeded_max_size(false) , m_ios(ios) {} @@ -140,8 +142,8 @@ namespace { } #endif - if (m_in_use >= m_low_watermark + (m_max_use - m_low_watermark) - / 2 && !m_exceeded_max_size) + if (m_in_use >= std::max(m_high_watermark, m_max_use - 32) + && !m_exceeded_max_size) { m_exceeded_max_size = true; } @@ -179,6 +181,7 @@ namespace { int const pool_size = std::max(1, sett.get_int(settings_pack::max_queued_disk_bytes) / default_block_size); m_max_use = pool_size; m_low_watermark = m_max_use / 2; + m_high_watermark = m_max_use * 3 / 4; if (m_in_use >= m_max_use && !m_exceeded_max_size) { m_exceeded_max_size = true; @@ -189,6 +192,14 @@ namespace { #endif } + std::optional disk_buffer_pool::flush_request() const + { + std::unique_lock l(m_pool_mutex); + if (m_in_use >= m_high_watermark) + return m_in_use - m_low_watermark; + return std::nullopt; + } + void disk_buffer_pool::remove_buffer_in_use(char* buf) { TORRENT_UNUSED(buf); diff --git a/src/disk_cache.cpp b/src/disk_cache.cpp new file mode 100644 index 00000000000..7200e7e8dd4 --- /dev/null +++ b/src/disk_cache.cpp @@ -0,0 +1,742 @@ +/* + +Copyright (c) 2023, Arvid Norberg +All rights reserved. + +You may use, distribute and modify this code under the terms of the BSD license, +see LICENSE file. +*/ + +#include "libtorrent/aux_/disk_cache.hpp" +#include "libtorrent/aux_/debug_disk_thread.hpp" + +namespace libtorrent::aux { + +namespace mi = boost::multi_index; + +namespace { +struct compare_storage +{ + bool operator()(piece_location const& lhs, storage_index_t const rhs) const + { + return lhs.torrent < rhs; + } + + bool operator()(storage_index_t const lhs, piece_location const& rhs) const + { + return lhs < rhs.torrent; + } +}; + +bool have_buffers(span blocks) +{ + for (auto const& b : blocks) + if (b.buf().data() == nullptr) return false; + return true; +} + +bool compute_ready_to_flush(span blocks) +{ + bool has_job = false; + for (auto const& b : blocks) + { + has_job |= bool(b.write_job); + if (!b.write_job && !b.flushed_to_disk) return false; + } + return has_job; +} + +int compute_flushed_cursor(span blocks) +{ + int ret = 0; + for (auto const& b : blocks) + { + if (!b.flushed_to_disk) return ret; + ++ret; + } + return ret; +} + +#if TORRENT_USE_ASSERTS +int count_jobs(span blocks) +{ + return static_cast(std::count_if(blocks.begin(), blocks.end() + , [](cached_block_entry const& b) { return b.write_job; })); +} +#endif + +} + +span cached_block_entry::buf() const +{ + if (buf_holder) + return {buf_holder.data(), buf_holder.size()}; + + if (write_job != nullptr) + { + TORRENT_ASSERT(write_job->get_type() == aux::job_action_t::write); + auto const& job = std::get(write_job->action); + return {job.buf.data(), job.buffer_size}; + } + return {nullptr, 0}; +} + +span cached_block_entry::write_buf() const +{ + if (write_job != nullptr) + { + TORRENT_ASSERT(write_job->get_type() == aux::job_action_t::write); + auto const& job = std::get(write_job->action); + return {job.buf.data(), job.buffer_size}; + } + return {nullptr, 0}; +} + +template +struct overload : Type... { + using Type::operator()...; +}; +template overload(Type...) -> overload; + +sha1_hash piece_hasher::final_hash() +{ + sha1_hash ret; + std::visit(overload{ + [&] (hasher& h) { ret = h.final(); ph = ret; }, + [&] (sha1_hash const& h) { ret = h; }, + }, ph); + TORRENT_ASSERT(!ret.is_all_zeros()); + return ret; +} + +void piece_hasher::update(span const buf) +{ + hasher* ctx = std::get_if(&ph); + TORRENT_ASSERT(ctx != nullptr); + ctx->update(buf); +} + +lt::hasher& piece_hasher::ctx() +{ + hasher* ctx = std::get_if(&ph); + TORRENT_ASSERT(ctx != nullptr); + return *ctx; +} + +cached_piece_entry::cached_piece_entry(piece_location const& loc, int const num_blocks, int const piece_size_v2, bool const v1, bool const v2) + : piece(loc) + , v1_hashes(v1) + , v2_hashes(v2) + , piece_size2(piece_size_v2) + , blocks_in_piece(num_blocks) + , blocks(aux::make_unique(num_blocks)) +{} + +span cached_piece_entry::get_blocks() const +{ + return {blocks.get(), blocks_in_piece}; +} + +// If the specified piece exists in the cache, and it's unlocked, clear all +// write jobs (return them in "aborted"). Returns true if the clear_piece +// job should be posted as complete. Returns false if the piece is locked by +// another thread, and the clear_piece job has been queued to be issued once +// the piece is unlocked. +bool disk_cache::try_clear_piece(piece_location const loc, pread_disk_job* j, jobqueue_t& aborted) +{ + std::unique_lock l(m_mutex); + + INVARIANT_CHECK; + + auto& view = m_pieces.template get<0>(); + auto i = view.find(loc); + if (i == view.end()) return true; + if (i->flushing) + { + // postpone the clearing until we're done flushing + view.modify(i, [&](cached_piece_entry& e) { e.clear_piece = j; }); + return false; + } + + // we clear a piece after it fails the hash check. It doesn't make sense + // to be hashing still + TORRENT_ASSERT(!i->hashing); + if (i->hashing) + { + // postpone the clearing until we're done flushing + view.modify(i, [&](cached_piece_entry& e) { e.clear_piece = j; }); + return false; + } + + view.modify(i, [&](cached_piece_entry& e) { + clear_piece_impl(e, aborted); + }); + return true; +} + +// returns true if this piece needs to have its hasher kicked +bool disk_cache::insert(piece_location const loc + , int const block_idx + , pread_disk_job* write_job) +{ + TORRENT_ASSERT(write_job != nullptr); + std::unique_lock l(m_mutex); + + INVARIANT_CHECK; + + auto& view = m_pieces.template get<0>(); + auto i = view.find(loc); + if (i == view.end()) + { + pread_storage* storage = write_job->storage.get(); + file_storage const& fs = storage->files(); + int const blocks_in_piece = (storage->files().piece_size(loc.piece) + default_block_size - 1) / default_block_size; + int const piece_size2 = fs.piece_size2(loc.piece); + i = m_pieces.emplace(loc, blocks_in_piece, piece_size2, storage->v1(), storage->v2()).first; + } + + cached_block_entry& blk = i->blocks[block_idx]; + DLOG("disk_cache.insert: piece: %d blk: %d flushed: %d write_job: %p flushed_cursor: %d hashed_cursor: %d\n" + , static_cast(i->piece.piece) + , block_idx + , blk.flushed_to_disk + , blk.write_job + , i->flushed_cursor + , i->hasher_cursor); + TORRENT_ASSERT(!blk.buf_holder); + TORRENT_ASSERT(blk.write_job == nullptr); + TORRENT_ASSERT(blk.flushed_to_disk == false); + TORRENT_ASSERT(block_idx >= i->flushed_cursor); + TORRENT_ASSERT(block_idx >= i->hasher_cursor); + + TORRENT_ASSERT(write_job->get_type() == aux::job_action_t::write); + blk.write_job = write_job; + ++m_blocks; + + bool const ready_to_flush = compute_ready_to_flush(i->get_blocks()); + view.modify(i, [&](cached_piece_entry& e) { + e.ready_to_flush = ready_to_flush; + ++e.num_jobs; + }); + + return block_idx == 0 || ready_to_flush; +} + +// this call can have 3 outcomes: +// 1. the job is immediately satisfied and should be posted to the +// completion queue +// 2. The piece is in the cache and currently hashing, but it's not done +// yet. We hang the hash job on the piece itself so the hashing thread +// can complete it when hashing finishes +// 3. The piece is not in the cache and should be posted to the disk thread +// to read back the bytes. +disk_cache::hash_result disk_cache::try_hash_piece(piece_location const loc, pread_disk_job* hash_job) +{ + std::unique_lock l(m_mutex); + + INVARIANT_CHECK; + + auto& view = m_pieces.template get<0>(); + auto i = view.find(loc); + if (i == view.end()) return hash_result::post_job; + + // we should only ask for the hash once + TORRENT_ASSERT(!i->piece_hash_returned); + + if (!i->hashing && i->hasher_cursor == i->blocks_in_piece) + { + view.modify(i, [&](cached_piece_entry& e) { + e.piece_hash_returned = true; + + auto& job = std::get(hash_job->action); + job.piece_hash = e.ph.final_hash(); + if (!job.block_hashes.empty()) + { + TORRENT_ASSERT(i->v2_hashes); + for (int idx = 0; idx < e.blocks_in_piece; ++idx) + job.block_hashes[idx] = e.blocks[idx].block_hash; + } + }); + return hash_result::job_completed; + } + + if (i->hashing + && i->hasher_cursor < i->blocks_in_piece + && have_buffers(i->get_blocks().subspan(i->hasher_cursor)) + ) + { + // We're not done hashing yet, let the hashing thread post the + // completion once it's done + + // We don't expect to ever have simultaneous async_hash() requests + // for the same piece + TORRENT_ASSERT(i->hash_job == nullptr); + view.modify(i, [&](cached_piece_entry& e) { e.hash_job = hash_job; }); + return hash_result::job_queued; + } + + return hash_result::post_job; +} + +// this should be called from a hasher thread +void disk_cache::kick_hasher(piece_location const& loc, jobqueue_t& completed_jobs) +{ + std::unique_lock l(m_mutex); + + INVARIANT_CHECK; + + auto& view = m_pieces.template get<0>(); + auto piece_iter = view.find(loc); + if (piece_iter == view.end()) + return; + + // some other thread beat us to it + if (piece_iter->hashing) + return; + + // this piece is done hasing + if (piece_iter->piece_hash_returned) + return; + + TORRENT_ALLOCA(blocks_storage, span, piece_iter->blocks_in_piece); + int cursor = piece_iter->hasher_cursor; +keep_going: + int block_idx = 0; + int end = cursor; + while (end < piece_iter->blocks_in_piece && piece_iter->blocks[end].buf().data()) + { + blocks_storage[block_idx] = piece_iter->blocks[end].buf(); + ++block_idx; + ++end; + } + auto const blocks = blocks_storage.first(block_idx); + + view.modify(piece_iter, [](cached_piece_entry& e) { e.hashing = true; }); + + bool const need_v1 = piece_iter->v1_hashes; + bool const need_v2 = piece_iter->v2_hashes; + + DLOG("kick_hasher: piece: %d hashed_cursor: [%d, %d] v1: %d v2: %d ctx: %p\n" + , static_cast(piece_iter->piece.piece) + , cursor, end + , need_v1, need_v2 + , &piece_iter->ph); + l.unlock(); + + int bytes_left = piece_iter->piece_size2 - (cursor * default_block_size); + for (auto& buf: blocks) + { + cached_block_entry& cbe = piece_iter->blocks[cursor]; + + if (need_v1) + { + auto& ctx = const_cast(piece_iter->ph); + ctx.update(buf); + } + + if (need_v2 && bytes_left > 0) + { + int const this_block_size = std::min(bytes_left, default_block_size); + cbe.block_hash = hasher256(buf.first(this_block_size)).final(); + bytes_left -= default_block_size; + } + + ++cursor; + } + + l.lock(); + + for (auto& cbe : piece_iter->get_blocks().subspan(piece_iter->hasher_cursor, block_idx)) + { + // TODO: free these in bulk, acquiring the mutex just once + // free them after releasing the mutex, l + if (cbe.buf_holder) + cbe.buf_holder.reset(); + } + + view.modify(piece_iter, [&](cached_piece_entry& e) { + e.hasher_cursor = cursor; + e.hashing = false; + }); + + if (cursor != piece_iter->blocks_in_piece) + { + // if some other thread added the next block, keep going + if (piece_iter->blocks[cursor].buf().data()) + goto keep_going; + DLOG("kick_hasher: no attached hash job\n"); + return; + } + + if (!piece_iter->hash_job) return; + + // there's a hash job hung on this piece, post it now + pread_disk_job* j = nullptr; + span const cached_blocks = piece_iter->get_blocks(); + + sha1_hash piece_hash; + TORRENT_ASSERT(!piece_iter->piece_hash_returned); + view.modify(piece_iter, [&cached_blocks, &j, &piece_hash](cached_piece_entry& e) { + j = std::exchange(e.hash_job, nullptr); + e.ready_to_flush = compute_ready_to_flush(cached_blocks); + e.piece_hash_returned = true; + // we've hashed all blocks, and there's a hash job associated with + // this piece, post it. + piece_hash = e.ph.final_hash(); + }); + + auto& job = std::get(j->action); + job.piece_hash = piece_hash; + if (!job.block_hashes.empty()) + { + TORRENT_ASSERT(need_v2); + int const to_copy = std::min( + piece_iter->blocks_in_piece, + int(job.block_hashes.size())); + for (int i = 0; i < to_copy; ++i) + job.block_hashes[i] = piece_iter->blocks[i].block_hash; + } + DLOG("kick_hasher: posting attached job piece: %d\n" + , static_cast(piece_iter->piece.piece)); + completed_jobs.push_back(j); +} + +template +Iter disk_cache::flush_piece_impl(View& view + , Iter piece_iter + , std::function, int)> const& f + , std::unique_lock& l + , int const num_blocks + , span const blocks + , std::function clear_piece_fun) +{ + view.modify(piece_iter, [](cached_piece_entry& e) { TORRENT_ASSERT(!e.flushing); e.flushing = true; }); + m_flushing_blocks += num_blocks; + TORRENT_ASSERT(num_blocks > 0); + + int const hash_cursor = piece_iter->hasher_cursor; + + // we have to release the lock while flushing, but since we set the + // "flushing" member to true, this piece is pinned to the cache + l.unlock(); + + int count = 0; + bitfield flushed_blocks; + { + auto se = scope_end([&] { + l.lock(); + view.modify(piece_iter, [](cached_piece_entry& e) { + TORRENT_ASSERT(e.flushing); + e.flushing = false; + }); + TORRENT_ASSERT(m_flushing_blocks >= num_blocks); + m_flushing_blocks -= num_blocks; + }); + flushed_blocks.resize(int(blocks.size())); + flushed_blocks.clear_all(); + count = f(flushed_blocks, blocks, hash_cursor); + } + TORRENT_ASSERT(l.owns_lock()); + + // now that we hold the mutex again, we can update the entries for + // all the blocks that were flushed + int jobs = 0; + for (int i = 0; i < blocks.size(); ++i) + { + if (!flushed_blocks.get_bit(i)) continue; + cached_block_entry& blk = blocks[i]; + + auto* j = blk.write_job; + TORRENT_ASSERT(j); + TORRENT_ASSERT(j->get_type() == aux::job_action_t::write); + blk.buf_holder = std::move(std::get(j->action).buf); + blk.flushed_to_disk = true; + TORRENT_ASSERT(blk.buf_holder); + // TODO: free these in bulk at the end, or something + if (i < hash_cursor) + blk.buf_holder.reset(); + + blk.write_job = nullptr; + ++jobs; + } + auto next_iter = std::next(piece_iter); + view.modify(piece_iter, [&blocks, jobs](cached_piece_entry& e) { + span const all_blocks = e.get_blocks(); + e.flushed_cursor = compute_flushed_cursor(all_blocks); + e.ready_to_flush = compute_ready_to_flush(all_blocks); + TORRENT_ASSERT(e.num_jobs >= jobs); + e.num_jobs -= jobs; + }); + DLOG("flush_piece_impl: piece: %d flushed_cursor: %d ready_to_flush: %d\n" + , static_cast(piece_iter->piece.piece), piece_iter->flushed_cursor, piece_iter->ready_to_flush); + TORRENT_ASSERT(count <= blocks.size()); + TORRENT_ASSERT(m_blocks >= count); + m_blocks -= count; + if (piece_iter->clear_piece) + { + jobqueue_t aborted; + pread_disk_job* clear_piece = nullptr; + view.modify(piece_iter, [&](cached_piece_entry& e) { + clear_piece_impl(e, aborted); + clear_piece = std::exchange(e.clear_piece, nullptr); + }); + clear_piece_fun(std::move(aborted), clear_piece); + } + + return next_iter; +} + +// this should be called by a disk thread +// the callback should return the number of blocks it successfully flushed +// to disk +void disk_cache::flush_to_disk( + std::function, int)> f + , int const target_blocks + , std::function clear_piece_fun) +{ + std::unique_lock l(m_mutex); + + INVARIANT_CHECK; + + // first we look for pieces that are ready to be flushed and should be + // updating + auto& view = m_pieces.template get<2>(); + for (auto piece_iter = view.begin(); piece_iter != view.end();) + { + // We want to flush all pieces that are ready to flush regardless of + // the flush target. There's not much value in keeping them in RAM + // when we've completely downloaded the piece and hashed it + // so, we don't check flush target in this loop + + if (piece_iter->flushing) + { + ++piece_iter; + continue; + } + + if (!piece_iter->ready_to_flush) + break; + + int const num_blocks = piece_iter->blocks_in_piece; + TORRENT_ASSERT(num_blocks >= 0); + if (num_blocks == 0) + { + ++piece_iter; + continue; + } + span const blocks = piece_iter->get_blocks(); + + auto const next_iter = flush_piece_impl(view, piece_iter, f, l + , num_blocks, blocks, clear_piece_fun); + + if (piece_iter->piece_hash_returned) + { + TORRENT_ASSERT(!piece_iter->flushing); + TORRENT_ASSERT(!piece_iter->hashing); + view.erase(piece_iter); + } + piece_iter = next_iter; + } + + // if we get here, we have to "force flush" some blocks even though we + // don't have all the blocks yet. Start by flushing pieces that have the + // most contiguous blocks to flush: + auto& view2 = m_pieces.template get<1>(); + for (auto piece_iter = view2.begin(); piece_iter != view2.end();) + { + // We avoid flushing if other threads have already initiated sufficient + // amount of flushing + if (m_blocks - m_flushing_blocks <= target_blocks) + return; + + if (piece_iter->flushing) + { + ++piece_iter; + continue; + } + + int const num_blocks = piece_iter->hasher_cursor - piece_iter->flushed_cursor; + TORRENT_ASSERT(num_blocks >= 0); + + // the pieces are ordered by the number of blocks that are cheap to + // flush (i.e. won't require read-back later) + // if we encounter a 0, all the remaining ones will also be zero + if (num_blocks <= 0) break; + span const blocks = piece_iter->get_blocks().subspan(piece_iter->flushed_cursor); + + piece_iter = flush_piece_impl(view2, piece_iter, f, l + , num_blocks, blocks, clear_piece_fun); + } + + // we may still need to flush blocks at this point, even though we + // would require read-back later to compute the piece hash + auto& view3 = m_pieces.template get<0>(); + for (auto piece_iter = view3.begin(); piece_iter != view3.end();) + { + // We avoid flushing if other threads have already initiated sufficient + // amount of flushing + if (m_blocks - m_flushing_blocks <= target_blocks) + return; + + if (piece_iter->flushing) + { + ++piece_iter; + continue; + } + + int const num_blocks = piece_iter->num_jobs; + TORRENT_ASSERT(count_jobs(piece_iter->get_blocks()) == num_blocks); + if (num_blocks == 0) + { + ++piece_iter; + continue; + } + + span const blocks = piece_iter->get_blocks(); + + piece_iter = flush_piece_impl(view3, piece_iter, f, l + , num_blocks, blocks, clear_piece_fun); + } +} + +void disk_cache::flush_storage(std::function, int)> f + , storage_index_t const storage + , std::function clear_piece_fun) +{ + std::unique_lock l(m_mutex); + + INVARIANT_CHECK; + + auto& range_view = m_pieces.template get<0>(); + auto& view = m_pieces.template get<3>(); + auto const [begin, end] = range_view.equal_range(storage, compare_storage()); + + std::vector pieces; + for (auto i = begin; i != end; ++i) + pieces.push_back(i->piece.piece); + + bitfield flushed_blocks; + + for (auto piece : pieces) + { + auto piece_iter = view.find(piece_location{storage, piece}); + if (piece_iter == view.end()) + continue; + + // There's a risk that some other thread is flushing this piece, but + // won't force-flush it completely. In that case parts of the piece + // may not be flushed + // TODO: maybe we should track these pieces and synchronize with + // them later. maybe wait for them to be flushed or hang our job on + // them, but that would really only work if there's only one piece + // left + if (piece_iter->flushing) + continue; + + int const num_blocks = piece_iter->num_jobs; + TORRENT_ASSERT(count_jobs(piece_iter->get_blocks()) == num_blocks); + if (num_blocks == 0) continue; + span const blocks = piece_iter->get_blocks(); + + flush_piece_impl(view, piece_iter, f, l + , num_blocks, blocks, clear_piece_fun); + + TORRENT_ASSERT(!piece_iter->flushing); + TORRENT_ASSERT(!piece_iter->hashing); + piece_iter = view.erase(piece_iter); + } +} + +std::size_t disk_cache::size() const +{ + std::unique_lock l(m_mutex); + INVARIANT_CHECK; + return static_cast(m_blocks); +} + +std::size_t disk_cache::num_flushing() const +{ + std::unique_lock l(m_mutex); + INVARIANT_CHECK; + return static_cast(m_flushing_blocks); +} + +#if TORRENT_USE_INVARIANT_CHECKS +void disk_cache::check_invariant() const +{ + // mutex must be held by caller + int dirty_blocks = 0; + int flushing_blocks = 0; + + auto& view = m_pieces.template get<2>(); + for (auto const& piece_entry : view) + { + int const num_blocks = piece_entry.blocks_in_piece; + + if (piece_entry.flushing) + flushing_blocks += num_blocks; + + span const blocks = piece_entry.get_blocks(); + + TORRENT_ASSERT(piece_entry.flushed_cursor <= num_blocks); + TORRENT_ASSERT(piece_entry.hasher_cursor <= num_blocks); + + int idx = 0; + for (auto& be : blocks) + { + if (be.write_job) ++dirty_blocks; + // a block holds either a write job or buffer, never both + TORRENT_ASSERT(!(bool(be.write_job) && bool(be.buf_holder))); + if (be.write_job) + TORRENT_ASSERT(be.write_job->get_type() == aux::job_action_t::write); + + if (idx < piece_entry.flushed_cursor) + TORRENT_ASSERT(be.write_job == nullptr); + else if (idx == piece_entry.flushed_cursor) + TORRENT_ASSERT(!be.buf_holder); + +// if (idx < piece_entry.hasher_cursor) +// TORRENT_ASSERT(!be.buf_holder); + + if (piece_entry.ready_to_flush) + TORRENT_ASSERT(be.write_job != nullptr || be.flushed_to_disk); + ++idx; + } + } + // if one or more blocks are being flushed, we cannot know how many blocks + // are in flight. We just know the limit + TORRENT_ASSERT(dirty_blocks == m_blocks); + TORRENT_ASSERT(m_flushing_blocks <= flushing_blocks); +} +#endif + +// this requires the mutex to be locked +void disk_cache::clear_piece_impl(cached_piece_entry& cpe, jobqueue_t& aborted) +{ + TORRENT_ASSERT(!cpe.flushing); + TORRENT_ASSERT(!cpe.hashing); + int jobs = 0; + for (int idx = 0; idx < cpe.blocks_in_piece; ++idx) + { + auto& cbe = cpe.blocks[idx]; + if (cbe.write_job) + { + aborted.push_back(cbe.write_job); + cbe.write_job = nullptr; + ++jobs; + --m_blocks; + } + cbe.flushed_to_disk = false; + cbe.buf_holder.reset(); + } + cpe.ready_to_flush = false; + cpe.piece_hash_returned = false; + cpe.hasher_cursor = 0; + cpe.flushed_cursor = 0; + TORRENT_ASSERT(cpe.num_jobs >= jobs); + cpe.num_jobs -= jobs; + cpe.ph = piece_hasher{}; + DLOG("clear_piece: piece: %d\n", static_cast(cpe.piece.piece)); +} + +} diff --git a/src/disk_completed_queue.cpp b/src/disk_completed_queue.cpp index 18429b1cb16..74c8d53e56a 100644 --- a/src/disk_completed_queue.cpp +++ b/src/disk_completed_queue.cpp @@ -42,12 +42,39 @@ void disk_completed_queue::abort_job(io_context& ioc, aux::disk_job* j) } } +void disk_completed_queue::abort_jobs(io_context& ioc, jobqueue_t jobs) +{ + if (jobs.empty()) return; + + for (auto i = jobs.iterate(); i.get(); i.next()) + { + auto* j = i.get(); + j->ret = disk_status::fatal_disk_error; + j->error = storage_error(boost::asio::error::operation_aborted); + j->flags |= aux::disk_job::aborted; +#if TORRENT_USE_ASSERTS + TORRENT_ASSERT(j->job_posted == false); + j->job_posted = true; +#endif + } + std::lock_guard l(m_completed_jobs_mutex); + m_completed_jobs.append(std::move(jobs)); + + if (!m_job_completions_in_flight && !m_completed_jobs.empty()) + { + DLOG("posting job handlers (%d)\n", m_completed_jobs.size()); + + post(ioc, [this] { this->call_job_handlers(); }); + m_job_completions_in_flight = true; + } +} + void disk_completed_queue::append(io_context& ioc, jobqueue_t jobs) { std::lock_guard l(m_completed_jobs_mutex); m_completed_jobs.append(std::move(jobs)); - if (!m_job_completions_in_flight) + if (!m_job_completions_in_flight && !m_completed_jobs.empty()) { DLOG("posting job handlers (%d)\n", m_completed_jobs.size()); diff --git a/src/disk_job.cpp b/src/disk_job.cpp index 6d2a01d25ac..7cfedf1596f 100644 --- a/src/disk_job.cpp +++ b/src/disk_job.cpp @@ -97,6 +97,8 @@ namespace { j.handler(std::move(j.buf), m_job.error); } + void operator()(job::kick_hasher&) const {} + private: disk_job& m_job; }; diff --git a/src/disk_job_pool.cpp b/src/disk_job_pool.cpp index 11f0571dbae..e08abb0cb73 100644 --- a/src/disk_job_pool.cpp +++ b/src/disk_job_pool.cpp @@ -10,6 +10,7 @@ see LICENSE file. #include "libtorrent/aux_/disk_job_pool.hpp" #include "libtorrent/aux_/mmap_disk_job.hpp" +#include "libtorrent/aux_/pread_disk_job.hpp" namespace libtorrent { namespace aux { @@ -69,5 +70,6 @@ namespace aux { } template struct disk_job_pool; + template struct disk_job_pool; } } diff --git a/src/mmap_disk_io.cpp b/src/mmap_disk_io.cpp index 3fa5b0245f1..235d13a35c0 100644 --- a/src/mmap_disk_io.cpp +++ b/src/mmap_disk_io.cpp @@ -136,6 +136,7 @@ struct TORRENT_EXTRA_EXPORT mmap_disk_io final // this submits all queued up jobs to the thread void submit_jobs() override; + status_t do_job(aux::job::kick_hasher&, aux::mmap_disk_job*) { return status_t{}; } status_t do_job(aux::job::partial_read& a, aux::mmap_disk_job* j); status_t do_job(aux::job::read& a, aux::mmap_disk_job* j); status_t do_job(aux::job::write& a, aux::mmap_disk_job* j); diff --git a/src/pread_disk_io.cpp b/src/pread_disk_io.cpp new file mode 100644 index 00000000000..8fb880cc4e2 --- /dev/null +++ b/src/pread_disk_io.cpp @@ -0,0 +1,1686 @@ +/* + +Copyright (c) 2022, Arvid Norberg +All rights reserved. + +You may use, distribute and modify this code under the terms of the BSD license, +see LICENSE file. +*/ + +#include "libtorrent/config.hpp" + +#include "libtorrent/aux_/pread_storage.hpp" +#include "libtorrent/pread_disk_io.hpp" +#include "libtorrent/disk_buffer_holder.hpp" +#include "libtorrent/aux_/throw.hpp" +#include "libtorrent/error_code.hpp" +#include "libtorrent/error.hpp" +#include "libtorrent/aux_/disk_buffer_pool.hpp" +#include "libtorrent/aux_/pread_disk_job.hpp" +#include "libtorrent/performance_counters.hpp" +#include "libtorrent/aux_/debug.hpp" +#include "libtorrent/units.hpp" +#include "libtorrent/hasher.hpp" +#include "libtorrent/aux_/platform_util.hpp" // for set_thread_name +#include "libtorrent/aux_/disk_job_pool.hpp" +#include "libtorrent/aux_/disk_io_thread_pool.hpp" +#include "libtorrent/aux_/disk_cache.hpp" +#include "libtorrent/aux_/visit_block_iovecs.hpp" +#include "libtorrent/aux_/time.hpp" +#include "libtorrent/add_torrent_params.hpp" +#include "libtorrent/aux_/numeric_cast.hpp" +#include "libtorrent/settings_pack.hpp" +#include "libtorrent/aux_/storage_array.hpp" +#include "libtorrent/aux_/disk_completed_queue.hpp" +#include "libtorrent/aux_/debug_disk_thread.hpp" + +#include + +namespace libtorrent { +namespace { + +aux::open_mode_t file_mode_for_job(aux::pread_disk_job* j) +{ + aux::open_mode_t ret = aux::open_mode::read_only; + if (j->flags & disk_interface::sequential_access) ret |= aux::open_mode::sequential_access; + return ret; +} + +#if TORRENT_USE_ASSERTS +bool valid_flags(disk_job_flags_t const flags) +{ + return (flags & ~(disk_interface::force_copy + | disk_interface::sequential_access + | disk_interface::volatile_read + | disk_interface::v1_hash + | disk_interface::flush_piece)) + == disk_job_flags_t{}; +} +#endif + +template +void translate_error(aux::disk_job* j, Fun f) +{ + try + { + j->ret = f(); + } + catch (boost::system::system_error const& err) + { + j->ret = disk_status::fatal_disk_error; + j->error.ec = err.code(); + j->error.operation = operation_t::exception; + } + catch (std::bad_alloc const&) + { + j->ret = disk_status::fatal_disk_error; + j->error.ec = errors::no_memory; + j->error.operation = operation_t::exception; + } + catch (std::exception const&) + { + j->ret = disk_status::fatal_disk_error; + j->error.ec = boost::asio::error::fault; + j->error.operation = operation_t::exception; + } +} + +} // anonymous namespace + +// this is a singleton consisting of the thread and a queue +// of disk io jobs +struct TORRENT_EXTRA_EXPORT pread_disk_io final + : disk_interface +{ + pread_disk_io(io_context& ios, settings_interface const&, counters& cnt); +#if TORRENT_USE_ASSERTS + ~pread_disk_io() override; +#endif + + void settings_updated() override; + storage_holder new_torrent(storage_params const& params + , std::shared_ptr const& owner) override; + void remove_torrent(storage_index_t) override; + + void abort(bool wait) override; + + void async_read(storage_index_t storage, peer_request const& r + , std::function handler + , disk_job_flags_t flags = {}) override; + bool async_write(storage_index_t storage, peer_request const& r + , char const* buf, std::shared_ptr o + , std::function handler + , disk_job_flags_t flags = {}) override; + void async_hash(storage_index_t storage, piece_index_t piece, span v2 + , disk_job_flags_t flags + , std::function handler) override; + void async_hash2(storage_index_t storage, piece_index_t piece, int offset, disk_job_flags_t flags + , std::function handler) override; + void async_move_storage(storage_index_t storage, std::string p, move_flags_t flags + , std::function handler) override; + void async_release_files(storage_index_t storage + , std::function handler = std::function()) override; + void async_delete_files(storage_index_t storage, remove_flags_t options + , std::function handler) override; + void async_check_files(storage_index_t storage + , add_torrent_params const* resume_data + , aux::vector links + , std::function handler) override; + void async_rename_file(storage_index_t storage, file_index_t index, std::string name + , std::function handler) override; + void async_stop_torrent(storage_index_t storage + , std::function handler) override; + void async_set_file_priority(storage_index_t storage + , aux::vector prio + , std::function)> handler) override; + + void async_clear_piece(storage_index_t storage, piece_index_t index + , std::function handler) override; + + void update_stats_counters(counters& c) const override; + + std::vector get_status(storage_index_t) const override; + + // this submits all queued up jobs to the thread + void submit_jobs() override; + + status_t do_job(aux::job::partial_read& a, aux::pread_disk_job* j); + status_t do_job(aux::job::read& a, aux::pread_disk_job* j); + status_t do_job(aux::job::write& a, aux::pread_disk_job* j); + status_t do_job(aux::job::hash& a, aux::pread_disk_job* j); + status_t do_job(aux::job::hash2& a, aux::pread_disk_job* j); + + status_t do_job(aux::job::move_storage& a, aux::pread_disk_job* j); + status_t do_job(aux::job::release_files& a, aux::pread_disk_job* j); + status_t do_job(aux::job::delete_files& a, aux::pread_disk_job* j); + status_t do_job(aux::job::check_fastresume& a, aux::pread_disk_job* j); + status_t do_job(aux::job::rename_file& a, aux::pread_disk_job* j); + status_t do_job(aux::job::stop_torrent& a, aux::pread_disk_job* j); + status_t do_job(aux::job::file_priority& a, aux::pread_disk_job* j); + status_t do_job(aux::job::clear_piece& a, aux::pread_disk_job* j); + status_t do_job(aux::job::kick_hasher& a, aux::pread_disk_job* j); + +private: + + void thread_fun(aux::disk_io_thread_pool& pool + , executor_work_guard work); + + void add_completed_jobs(jobqueue_t jobs); + void add_completed_jobs_impl(jobqueue_t jobs, jobqueue_t& completed); + + void perform_job(aux::pread_disk_job* j, jobqueue_t& completed_jobs); + + // this queues up another job to be submitted + void add_job(aux::pread_disk_job* j, bool user_add = true); + void add_fence_job(aux::pread_disk_job* j, bool user_add = true); + + void execute_job(aux::pread_disk_job* j); + void immediate_execute(); + void abort_jobs(); + void abort_hash_jobs(storage_index_t storage); + + void try_flush_cache(int target_cache_size + , std::unique_lock& l); + void flush_storage(std::shared_ptr const& storage); + + int flush_cache_blocks(bitfield& flushed, span blocks + , int hash_cursor + , jobqueue_t& completed_jobs); + void clear_piece_jobs(jobqueue_t aborted, aux::pread_disk_job* clear); + + // returns the maximum number of threads + // the actual number of threads may be less + int num_threads() const; + aux::disk_io_thread_pool& pool_for_job(aux::pread_disk_job* j); + + // set to true once we start shutting down + std::atomic m_abort{false}; + + // this is a counter of how many threads are currently running. + // it's used to identify the last thread still running while + // shutting down. This last thread is responsible for cleanup + // must hold the job mutex to access + int m_num_running_threads = 0; + + aux::disk_job_pool m_job_pool; + + // std::mutex to protect the m_generic_threads and m_hash_threads lists + mutable std::mutex m_job_mutex; + + // when set, it means we're trying to flush the disk cache down to this size + // it's a signal to generic disk threads to start flushing. Once flushing + // starts, m_flush_target is cleared. + std::optional m_flush_target = std::nullopt; + + settings_interface const& m_settings; + + // LRU cache of open files + aux::file_pool m_file_pool; + + // disk cache + aux::disk_buffer_pool m_buffer_pool; + + // total number of blocks in use by both the read + // and the write cache. This is not supposed to + // exceed m_cache_size + + counters& m_stats_counters; + + // this is the main thread io_context. Callbacks are + // posted on this in order to have them execute in + // the main thread. + io_context& m_ios; + + aux::disk_completed_queue m_completed_jobs; + + // storages that have had write activity recently and will get ticked + // soon, for deferred actions (say, flushing partfile metadata) + std::vector>> m_need_tick; + std::mutex m_need_tick_mutex; + + aux::storage_array m_torrents; + + std::atomic_flag m_jobs_aborted = ATOMIC_FLAG_INIT; + + // every write job is inserted into this map while it is in the job queue. + // It is removed after the write completes. This will let subsequent reads + // pull the buffers straight out of the queue instead of having to + // synchronize with the writing thread(s) + aux::disk_cache m_cache; + + // most jobs are posted to m_generic_io_jobs + // but hash jobs are posted to m_hash_io_jobs if m_hash_threads + // has a non-zero maximum thread count + aux::disk_io_thread_pool m_generic_threads; + aux::disk_io_thread_pool m_hash_threads; +}; + +TORRENT_EXPORT std::unique_ptr pread_disk_io_constructor( + io_context& ios, settings_interface const& sett, counters& cnt) +{ + return std::make_unique(ios, sett, cnt); +} + +// ------- pread_disk_io ------ + +// for _1 and _2 +using namespace std::placeholders; + +pread_disk_io::pread_disk_io(io_context& ios, settings_interface const& sett, counters& cnt) + : m_settings(sett) + , m_file_pool(sett.get_int(settings_pack::file_pool_size)) + , m_buffer_pool(ios) + , m_stats_counters(cnt) + , m_ios(ios) + , m_completed_jobs([&](aux::disk_job** j, int const n) { + m_job_pool.free_jobs(reinterpret_cast(j), n); + }, cnt) + , m_generic_threads(std::bind(&pread_disk_io::thread_fun, this, _1, _2), ios) + , m_hash_threads(std::bind(&pread_disk_io::thread_fun, this, _1, _2), ios) +{ + settings_updated(); +} + +std::vector pread_disk_io::get_status(storage_index_t const st) const +{ + return m_file_pool.get_status(st); +} + +storage_holder pread_disk_io::new_torrent(storage_params const& params + , std::shared_ptr const& owner) +{ + TORRENT_ASSERT(params.files.is_valid()); + + auto storage = std::make_shared(params, m_file_pool); + storage->set_owner(owner); + storage_index_t const idx = m_torrents.add(std::move(storage)); + return storage_holder(idx, *this); +} + +void pread_disk_io::remove_torrent(storage_index_t const idx) +{ + m_torrents.remove(idx); +} + +#if TORRENT_USE_ASSERTS +pread_disk_io::~pread_disk_io() +{ + DLOG("destructing pread_disk_io\n"); + + // abort should have been triggered + TORRENT_ASSERT(m_abort); + + // there are not supposed to be any writes in-flight by now + TORRENT_ASSERT(m_cache.size() == 0); + + // all torrents are supposed to have been removed by now + TORRENT_ASSERT(m_torrents.empty()); +} +#endif + +void pread_disk_io::abort(bool const wait) +{ + DLOG("pread_disk_io::abort: (wait: %d)\n", int(wait)); + + // first make sure queued jobs have been submitted + // otherwise the queue may not get processed + submit_jobs(); + + // abuse the job mutex to make setting m_abort and checking the thread count atomic + // see also the comment in thread_fun + std::unique_lock l(m_job_mutex); + if (m_abort.exchange(true)) return; + bool const no_threads = m_generic_threads.num_threads() == 0 + && m_hash_threads.num_threads() == 0; + // abort outstanding jobs belonging to this torrent + + DLOG("aborting hash jobs\n"); + m_hash_threads.visit_jobs([](aux::disk_job* j) + { + j->flags |= aux::disk_job::aborted; + }); + l.unlock(); + + // if there are no disk threads, we can't wait for the jobs here, because + // we'd stall indefinitely + if (no_threads) + { + abort_jobs(); + } + + DLOG("aborting thread pools\n"); + // even if there are no threads it doesn't hurt to abort the pools + // it prevents threads from being started after an abort which is a good + // defensive programming measure + m_generic_threads.abort(wait); + m_hash_threads.abort(wait); +} + +void pread_disk_io::settings_updated() +{ + m_buffer_pool.set_settings(m_settings); + m_file_pool.resize(m_settings.get_int(settings_pack::file_pool_size)); + + int const num_threads = m_settings.get_int(settings_pack::aio_threads); + int const num_hash_threads = m_settings.get_int(settings_pack::hashing_threads); + DLOG("set max threads(%d, %d)\n", num_threads, num_hash_threads); + + m_generic_threads.set_max_threads(num_threads); + m_hash_threads.set_max_threads(num_hash_threads); +} + +void pread_disk_io::perform_job(aux::pread_disk_job* j, jobqueue_t& completed_jobs) +{ + TORRENT_ASSERT(j->next == nullptr); + +#if DEBUG_DISK_THREAD + { + std::unique_lock l(m_job_mutex); + + DLOG("perform_job job: %s outstanding: %d\n" + , print_job(*j).c_str() + , j->storage ? j->storage->num_outstanding_jobs() : -1); + } +#endif + + std::shared_ptr storage = j->storage; + + m_stats_counters.inc_stats_counter(counters::num_running_disk_jobs, 1); + + // call disk function + // TODO: in the future, propagate exceptions back to the handlers + translate_error(j, [&] { + return std::visit([this, j](auto& a) { return this->do_job(a, j); }, j->action); + }); + + // note that -2 errors are OK + TORRENT_ASSERT(j->ret != disk_status::fatal_disk_error + || (j->error.ec && j->error.operation != operation_t::unknown)); + + m_stats_counters.inc_stats_counter(counters::num_running_disk_jobs, -1); + + completed_jobs.push_back(j); +} + +status_t pread_disk_io::do_job(aux::job::partial_read& a, aux::pread_disk_job* j) +{ + TORRENT_ASSERT(a.buf); + time_point const start_time = clock_type::now(); + + span const b = {a.buf.data() + a.buffer_offset, a.buffer_size}; + + int const ret = j->storage->read(m_settings, b + , a.piece, a.offset, file_mode_for_job(j), j->flags, j->error); + + TORRENT_ASSERT(ret >= 0 || j->error.ec); + TORRENT_UNUSED(ret); + + if (!j->error.ec) + { + std::int64_t const read_time = total_microseconds(clock_type::now() - start_time); + + m_stats_counters.inc_stats_counter(counters::num_read_back); + m_stats_counters.inc_stats_counter(counters::num_blocks_read); + m_stats_counters.inc_stats_counter(counters::num_read_ops); + m_stats_counters.inc_stats_counter(counters::disk_read_time, read_time); + m_stats_counters.inc_stats_counter(counters::disk_job_time, read_time); + } + + TORRENT_ASSERT((j->flags & aux::disk_job::in_progress) || !j->storage); + return status_t{}; +} + +status_t pread_disk_io::do_job(aux::job::read& a, aux::pread_disk_job* j) +{ + a.buf = disk_buffer_holder(m_buffer_pool, m_buffer_pool.allocate_buffer("send buffer"), default_block_size); + if (!a.buf) + { + j->error.ec = error::no_memory; + j->error.operation = operation_t::alloc_cache_piece; + return disk_status::fatal_disk_error; + } + + time_point const start_time = clock_type::now(); + + aux::open_mode_t const file_mode = file_mode_for_job(j); + span const b = {a.buf.data(), a.buffer_size}; + + int const ret = j->storage->read(m_settings, b + , a.piece, a.offset, file_mode, j->flags, j->error); + + TORRENT_ASSERT(ret >= 0 || j->error.ec); + TORRENT_UNUSED(ret); + + if (!j->error.ec) + { + std::int64_t const read_time = total_microseconds(clock_type::now() - start_time); + + m_stats_counters.inc_stats_counter(counters::num_read_back); + m_stats_counters.inc_stats_counter(counters::num_blocks_read); + m_stats_counters.inc_stats_counter(counters::num_read_ops); + m_stats_counters.inc_stats_counter(counters::disk_read_time, read_time); + m_stats_counters.inc_stats_counter(counters::disk_job_time, read_time); + } + TORRENT_ASSERT((j->flags & aux::disk_job::in_progress) || !j->storage); + return status_t{}; +} + +status_t pread_disk_io::do_job(aux::job::write&, aux::pread_disk_job*) +{ + TORRENT_ASSERT_FAIL(); + return status_t{}; +} + +void pread_disk_io::async_read(storage_index_t storage, peer_request const& r + , std::function handler + , disk_job_flags_t const flags) +{ + TORRENT_ASSERT(valid_flags(flags)); + TORRENT_ASSERT(r.length <= default_block_size); + TORRENT_ASSERT(r.length > 0); + TORRENT_ASSERT(r.start >= 0); + + storage_error ec; + if (r.length <= 0 || r.start < 0) + { + // this is an invalid read request. + ec.ec = errors::invalid_request; + ec.operation = operation_t::file_read; + handler(disk_buffer_holder{}, ec); + return; + } + + // in case r.start is not aligned to a block, calculate that offset, + // since that's how the disk_cache is indexed. block_offset is the + // aligned offset to the first block this read touches. In the case the + // request is aligned, it's the same as r.start + int const block_offset = r.start - (r.start % default_block_size); + int const block_idx = r.start / default_block_size; + // this is the offset into the block that we're reading from + int const read_offset = r.start - block_offset; + + DLOG("async_read piece: %d block: %d (read-offset: %d)\n", static_cast(r.piece) + , block_offset / default_block_size, read_offset); + + disk_buffer_holder buffer; + + if (read_offset + r.length > default_block_size) + { + // This is an unaligned request spanning two blocks. One of the two + // blocks may be in the cache, or neither. + // If neither is in the cache, we can just issue a normal + // read job for the unaligned request. + + aux::piece_location const loc{storage, r.piece}; + std::ptrdiff_t const len1 = default_block_size - read_offset; + + TORRENT_ASSERT(r.length > len1); + + int const ret = m_cache.get2(loc, block_idx, [&](char const* buf1, char const* buf2) + { + buffer = disk_buffer_holder(m_buffer_pool + , m_buffer_pool.allocate_buffer("send buffer") + , r.length); + if (!buffer) + { + ec.ec = error::no_memory; + ec.operation = operation_t::alloc_cache_piece; + return 3; + } + + if (buf1) + std::memcpy(buffer.data(), buf1 + read_offset, std::size_t(len1)); + if (buf2) + std::memcpy(buffer.data() + len1, buf2, std::size_t(r.length - len1)); + return (buf1 ? 2 : 0) | (buf2 ? 1 : 0); + }); + + if (ret == 3) + { + // both sides were found in the store buffer and the read request + // was satisfied immediately + handler(std::move(buffer), ec); + return; + } + + if (ret != 0) + { + TORRENT_ASSERT(ret == 1 || ret == 2); + // only one side of the read request was found in the store + // buffer, and we need to issue a partial read for the remaining + // bytes + aux::pread_disk_job* j = m_job_pool.allocate_job( + flags, + m_torrents[storage]->shared_from_this(), + std::move(handler), + std::move(buffer), + std::uint16_t((ret == 1) ? 0 : len1), // buffer_offset + std::uint16_t((ret == 1) ? len1 : r.length - len1), // buffer_size + r.piece, + (ret == 1) ? r.start : block_offset + default_block_size // offset + ); + + add_job(j); + return; + } + + // if we couldn't find any block in the cache, fall through and post it + // as a normal read job + } + else + { + // this is an aligned read request for one block + if (m_cache.get({ storage, r.piece }, block_idx, [&](span buf) + { + TORRENT_ASSERT_VAL(read_offset <= buf.size(), read_offset); + TORRENT_ASSERT_VAL(read_offset + r.length <= buf.size(), r.length); + buffer = disk_buffer_holder(m_buffer_pool, m_buffer_pool.allocate_buffer("send buffer"), r.length); + if (!buffer) + { + ec.ec = error::no_memory; + ec.operation = operation_t::alloc_cache_piece; + return; + } + + std::memcpy(buffer.data(), buf.data() + read_offset, std::size_t(r.length)); + })) + { + handler(std::move(buffer), ec); + return; + } + } + + aux::pread_disk_job* j = m_job_pool.allocate_job( + flags, + m_torrents[storage]->shared_from_this(), + std::move(handler), + disk_buffer_holder{}, + std::uint16_t(r.length), // buffer_size + r.piece, + r.start // offset + ); + + add_job(j); +} + +bool pread_disk_io::async_write(storage_index_t const storage, peer_request const& r + , char const* buf, std::shared_ptr o + , std::function handler + , disk_job_flags_t const flags) +{ + TORRENT_ASSERT(valid_flags(flags)); + bool exceeded = false; + disk_buffer_holder buffer(m_buffer_pool, m_buffer_pool.allocate_buffer( + exceeded, o, "receive buffer"), r.length); + if (!buffer) aux::throw_ex(); + std::memcpy(buffer.data(), buf, aux::numeric_cast(r.length)); + + TORRENT_ASSERT(r.start % default_block_size == 0); + TORRENT_ASSERT(r.length <= default_block_size); + + aux::pread_disk_job* j = m_job_pool.allocate_job( + flags, + m_torrents[storage]->shared_from_this(), + std::move(handler), + std::move(buffer), + r.piece, + r.start, + std::uint16_t(r.length) + ); + + DLOG("async_write: piece: %d offset: %d\n", int(r.piece), int(r.start)); + bool const need_kick = m_cache.insert({j->storage->storage_index(), r.piece}, r.start / default_block_size, j); + + if (need_kick) + { + // TODO: if the most recently added job to the hash thread pool is a + // kick-hasher job for the same piece, skip this + aux::pread_disk_job* khj = m_job_pool.allocate_job( + flags, + m_torrents[storage]->shared_from_this(), + r.piece + ); + add_job(khj); + } + + std::unique_lock l(m_job_mutex); + if (!m_flush_target) + { + // if the disk buffer wants to free up blocks, notify the thread + // pool that we may need to flush blocks + auto req = m_buffer_pool.flush_request(); + if (req) + { + m_flush_target = std::max(0, int(m_cache.size()) - *req); + DLOG("async_write: set flush_target: %d\n", *m_flush_target); + // wake up a thread + m_generic_threads.interrupt(); + } + } + + return exceeded; +} + +void pread_disk_io::async_hash(storage_index_t const storage + , piece_index_t const piece, span const v2, disk_job_flags_t const flags + , std::function handler) +{ + TORRENT_ASSERT(valid_flags(flags)); + aux::pread_disk_job* j = m_job_pool.allocate_job( + flags, + m_torrents[storage]->shared_from_this(), + std::move(handler), + piece, + v2, + sha1_hash{} + ); + + aux::disk_cache::hash_result const ret = m_cache.try_hash_piece({j->storage->storage_index(), piece}, j); + + // if we have already computed the piece hash, just post the completion + // immediately + if (ret == aux::disk_cache::job_completed) + { + jobqueue_t jobs; + jobs.push_back(j); + add_completed_jobs(std::move(jobs)); + return; + } + + // In this case the job has been queued on the piece, and will be posted + // once the hashing completes + if (ret == aux::disk_cache::job_queued) + return; + + add_job(j); +} + +void pread_disk_io::async_hash2(storage_index_t const storage + , piece_index_t const piece, int const offset, disk_job_flags_t const flags + , std::function handler) +{ + TORRENT_ASSERT(valid_flags(flags)); + aux::pread_disk_job* j = m_job_pool.allocate_job( + flags, + m_torrents[storage]->shared_from_this(), + std::move(handler), + piece, + offset, + sha256_hash{} + ); + + // In theory, we could check the cache for this block hash, but we + // only retain cached_piece_entries until the main piece hash has been + // returned, asking for individual blocks may not be available + add_job(j); +} + +void pread_disk_io::async_move_storage(storage_index_t const storage + , std::string p, move_flags_t const flags + , std::function handler) +{ + aux::pread_disk_job* j = m_job_pool.allocate_job( + {}, + m_torrents[storage]->shared_from_this(), + std::move(handler), + std::move(p), // path + flags + ); + + add_fence_job(j); +} + +void pread_disk_io::async_release_files(storage_index_t const storage + , std::function handler) +{ + aux::pread_disk_job* j = m_job_pool.allocate_job( + {}, + m_torrents[storage]->shared_from_this(), + std::move(handler) + ); + + add_fence_job(j); +} + +void pread_disk_io::abort_hash_jobs(storage_index_t const storage) +{ + // abort outstanding hash jobs belonging to this torrent + std::unique_lock l(m_job_mutex); + + auto st = m_torrents[storage]->shared_from_this(); + // hash jobs + m_hash_threads.visit_jobs([&](aux::disk_job* gj) + { + auto* j = static_cast(gj); + if (j->storage != st) return; + // only cancel volatile-read jobs. This means only full checking + // jobs. These jobs are likely to have a pretty deep queue and + // really gain from being cancelled. They can also be restarted + // easily. + if (j->flags & disk_interface::volatile_read) + j->flags |= aux::disk_job::aborted; + }); +} + +void pread_disk_io::async_delete_files(storage_index_t const storage + , remove_flags_t const options + , std::function handler) +{ + abort_hash_jobs(storage); + aux::pread_disk_job* j = m_job_pool.allocate_job( + {}, + m_torrents[storage]->shared_from_this(), + std::move(handler), + options + ); + add_fence_job(j); +} + +void pread_disk_io::async_check_files(storage_index_t const storage + , add_torrent_params const* resume_data + , aux::vector links + , std::function handler) +{ + aux::vector* links_vector = nullptr; + if (!links.empty()) links_vector = new aux::vector(std::move(links)); + + aux::pread_disk_job* j = m_job_pool.allocate_job( + {}, + m_torrents[storage]->shared_from_this(), + std::move(handler), + links_vector, + resume_data + ); + + add_fence_job(j); +} + +void pread_disk_io::async_rename_file(storage_index_t const storage + , file_index_t const index, std::string name + , std::function handler) +{ + aux::pread_disk_job* j = m_job_pool.allocate_job( + {}, + m_torrents[storage]->shared_from_this(), + std::move(handler), + index, + std::move(name) + ); + add_fence_job(j); +} + +void pread_disk_io::async_stop_torrent(storage_index_t const storage + , std::function handler) +{ + auto st = m_torrents[storage]->shared_from_this(); + abort_hash_jobs(storage); + + aux::pread_disk_job* j = m_job_pool.allocate_job( + {}, + m_torrents[storage]->shared_from_this(), + std::move(handler) + ); + add_fence_job(j); +} + +void pread_disk_io::async_set_file_priority(storage_index_t const storage + , aux::vector prios + , std::function)> handler) +{ + aux::pread_disk_job* j = m_job_pool.allocate_job( + {}, + m_torrents[storage]->shared_from_this(), + std::move(handler), + std::move(prios) + ); + + add_fence_job(j); +} + +void pread_disk_io::async_clear_piece(storage_index_t const storage + , piece_index_t const index, std::function handler) +{ + aux::pread_disk_job* j = m_job_pool.allocate_job( + {}, + m_torrents[storage]->shared_from_this(), + std::move(handler), + index + ); + + DLOG("async_clear_piece: piece: %d\n", int(index)); + // regular jobs are not executed in-order. + // clear piece must wait for all write jobs issued to the piece finish + // before it completes. + jobqueue_t aborted_jobs; + bool const immediate_completion = m_cache.try_clear_piece( + {j->storage->storage_index(), index}, j, aborted_jobs); + + m_completed_jobs.abort_jobs(m_ios, std::move(aborted_jobs)); + if (immediate_completion) + { + DLOG("immediate clear\n"); + jobqueue_t jobs; + jobs.push_back(j); + add_completed_jobs(std::move(jobs)); + } + else + { + DLOG("deferred clear\n"); + } +} + +status_t pread_disk_io::do_job(aux::job::hash& a, aux::pread_disk_job* j) +{ + // we're not using a cache. This is the simple path + // just read straight from the file + bool const v1 = bool(j->flags & disk_interface::v1_hash); + bool const v2 = !a.block_hashes.empty(); + + int const piece_size = v1 ? j->storage->files().piece_size(a.piece) : 0; + int const piece_size2 = v2 ? j->storage->files().piece_size2(a.piece) : 0; + int const blocks_in_piece = v1 ? (piece_size + default_block_size - 1) / default_block_size : 0; + int const blocks_in_piece2 = v2 ? j->storage->files().blocks_in_piece2(a.piece) : 0; + aux::open_mode_t const file_mode = file_mode_for_job(j); + + TORRENT_ASSERT(!v2 || int(a.block_hashes.size()) >= blocks_in_piece2); + TORRENT_ASSERT(v1 || v2); + + int const blocks_to_read = std::max(blocks_in_piece, blocks_in_piece2); + + // this creates a function object, ready to be passed to + // m_cache.hash_piece() + auto hash_partial_piece = [&] (lt::aux::piece_hasher& ph + , int const hasher_cursor + , span const blocks + , span const v2_hashes) + { + time_point const start_time = clock_type::now(); + + if (v2 && hasher_cursor > 0) + { + for (int i = 0; i < hasher_cursor; ++i) + { + TORRENT_ASSERT(!v2_hashes[i].is_all_zeros()); + a.block_hashes[i] = v2_hashes[i]; + } + } + + int offset = hasher_cursor * default_block_size; + int blocks_read_from_disk = 0; + for (int i = hasher_cursor; i < blocks_to_read; ++i) + { + bool const v2_block = i < blocks_in_piece2; + + std::ptrdiff_t const len = v1 ? std::min(default_block_size, piece_size - offset) : 0; + std::ptrdiff_t const len2 = v2_block ? std::min(default_block_size, piece_size2 - offset) : 0; + + hasher256 ph2; + char const* buf = blocks[i]; + if (buf == nullptr) + { + DLOG("do_hash: reading (piece: %d block: %d)\n", int(a.piece), i); + + j->error.ec.clear(); + + if (v1) + { + auto const flags = v2_block + ? (j->flags & ~disk_interface::flush_piece) + : j->flags; + + j->storage->hash(m_settings, ph.ctx(), len, a.piece + , offset, file_mode, flags, j->error); + } + if (v2_block) + { + j->storage->hash2(m_settings, ph2, len2, a.piece, offset + , file_mode, j->flags, j->error); + } + if (j->error) break; + ++blocks_read_from_disk; + } + else + { + if (v1) + ph.update({ buf, len }); + if (v2_block) + ph2.update({buf, len2}); + } + offset += default_block_size; + + if (v2_block) + a.block_hashes[i] = ph2.final(); + } + + if (v1) + a.piece_hash = ph.final_hash(); + + if (!j->error.ec) + { + std::int64_t const read_time = total_microseconds(clock_type::now() - start_time); + + m_stats_counters.inc_stats_counter(counters::num_blocks_read, blocks_read_from_disk); + m_stats_counters.inc_stats_counter(counters::num_read_ops, blocks_read_from_disk); + m_stats_counters.inc_stats_counter(counters::disk_hash_time, read_time); + m_stats_counters.inc_stats_counter(counters::disk_job_time, read_time); + } + }; + + if (!m_cache.hash_piece({ j->storage->storage_index(), a.piece} + , hash_partial_piece)) + { + // fall back to reading everything from disk + + TORRENT_ALLOCA(blocks, char const*, blocks_to_read); + TORRENT_ALLOCA(v2_hashes, sha256_hash, blocks_in_piece2); + for (char const*& b : blocks) b = nullptr; + lt::aux::piece_hasher ph; + hash_partial_piece(ph, 0, blocks, v2_hashes); + } + return j->error ? disk_status::fatal_disk_error : status_t{}; +} + +status_t pread_disk_io::do_job(aux::job::hash2& a, aux::pread_disk_job* j) +{ + int const piece_size = j->storage->files().piece_size2(a.piece); + aux::open_mode_t const file_mode = file_mode_for_job(j); + + DLOG("do_hash2: reading (piece: %d offset: %d)\n", int(a.piece), int(a.offset)); + + time_point const start_time = clock_type::now(); + + TORRENT_ASSERT(piece_size > a.offset); + std::ptrdiff_t const len = std::min(default_block_size, piece_size - a.offset); + + int ret = 0; + a.piece_hash2 = m_cache.hash2({ j->storage->storage_index(), a.piece } + , a.offset / default_block_size + , [&] { + hasher256 h; + ret = j->storage->hash2(m_settings, h, len, a.piece, a.offset + , file_mode, j->flags, j->error); + return h.final(); + }); + + if (!j->error.ec) + { + std::int64_t const read_time = total_microseconds(clock_type::now() - start_time); + + m_stats_counters.inc_stats_counter(counters::num_blocks_read); + m_stats_counters.inc_stats_counter(counters::num_read_ops); + m_stats_counters.inc_stats_counter(counters::disk_hash_time, read_time); + m_stats_counters.inc_stats_counter(counters::disk_job_time, read_time); + } + + return ret >= 0 ? status_t{} : disk_status::fatal_disk_error; +} + +status_t pread_disk_io::do_job(aux::job::move_storage& a, aux::pread_disk_job* j) +{ + // if this assert fails, something's wrong with the fence logic + TORRENT_ASSERT(j->storage->num_outstanding_jobs() == 1); + flush_storage(j->storage); + + // if files have to be closed, that's the storage's responsibility + auto const [ret, p] = j->storage->move_storage(std::move(a.path), a.move_flags, j->error); + + a.path = std::move(p); + return ret; +} + +status_t pread_disk_io::do_job(aux::job::release_files&, aux::pread_disk_job* j) +{ + // if this assert fails, something's wrong with the fence logic + TORRENT_ASSERT(j->storage->num_outstanding_jobs() == 1); + flush_storage(j->storage); + j->storage->release_files(j->error); + return j->error ? disk_status::fatal_disk_error : status_t{}; +} + +status_t pread_disk_io::do_job(aux::job::delete_files& a, aux::pread_disk_job* j) +{ + TORRENT_ASSERT(a.flags); + + // if this assert fails, something's wrong with the fence logic + TORRENT_ASSERT(j->storage->num_outstanding_jobs() == 1); + + // TODO: maybe we don't need to write to files we're about to delete + flush_storage(j->storage); + + j->storage->delete_files(a.flags, j->error); + return j->error ? disk_status::fatal_disk_error : status_t{}; +} + +status_t pread_disk_io::do_job(aux::job::check_fastresume& a, aux::pread_disk_job* j) +{ + // if this assert fails, something's wrong with the fence logic + TORRENT_ASSERT(j->storage->num_outstanding_jobs() == 1); + flush_storage(j->storage); + add_torrent_params const* rd = a.resume_data; + add_torrent_params tmp; + if (rd == nullptr) rd = &tmp; + + std::unique_ptr> links(a.links); + // check if the fastresume data is up to date + // if it is, use it and return true. If it + // isn't return false and the full check + // will be run. If the links pointer is non-empty, it has the same number + // of elements as there are files. Each element is either empty or contains + // the absolute path to a file identical to the corresponding file in this + // torrent. The storage must create hard links (or copy) those files. If + // any file does not exist or is inaccessible, the disk job must fail. + + TORRENT_ASSERT(j->storage->files().piece_length() > 0); + + // always initialize the storage + auto const ret_flag = j->storage->initialize(m_settings, j->error); + if (j->error) return disk_status::fatal_disk_error | ret_flag; + + // we must call verify_resume() unconditionally of the setting below, in + // order to set up the links (if present) + bool const verify_success = j->storage->verify_resume_data(*rd + , links ? *links : aux::vector(), j->error); + + // j->error may have been set at this point, by verify_resume_data() + // it's important to not have it cleared out subsequent calls, as long + // as they succeed. + + if (m_settings.get_bool(settings_pack::no_recheck_incomplete_resume)) + return status_t{} | ret_flag; + + if (!aux::contains_resume_data(*rd)) + { + // if we don't have any resume data, we still may need to trigger a + // full re-check, if there are *any* files. + storage_error ignore; + return ((j->storage->has_any_file(ignore)) + ? disk_status::need_full_check + : status_t{}) + | ret_flag; + } + + return (verify_success + ? status_t{} + : disk_status::need_full_check) + | ret_flag; +} + +status_t pread_disk_io::do_job(aux::job::rename_file& a, aux::pread_disk_job* j) +{ + // if this assert fails, something's wrong with the fence logic + TORRENT_ASSERT(j->storage->num_outstanding_jobs() == 1); + + // if files need to be closed, that's the storage's responsibility + j->storage->rename_file(a.file_index, a.name, j->error); + return j->error ? disk_status::fatal_disk_error : status_t{}; +} + +status_t pread_disk_io::do_job(aux::job::stop_torrent&, aux::pread_disk_job* j) +{ + // if this assert fails, something's wrong with the fence logic + TORRENT_ASSERT(j->storage->num_outstanding_jobs() == 1); + flush_storage(j->storage); + j->storage->release_files(j->error); + return j->error ? disk_status::fatal_disk_error : status_t{}; +} + +void pread_disk_io::update_stats_counters(counters& c) const +{ + // These are atomic_counts, so it's safe to access them from + // a different thread + std::unique_lock jl(m_job_mutex); + + c.set_value(counters::num_read_jobs, m_job_pool.read_jobs_in_use()); + c.set_value(counters::num_write_jobs, m_job_pool.write_jobs_in_use()); + c.set_value(counters::num_jobs, m_job_pool.jobs_in_use()); + c.set_value(counters::queued_disk_jobs, m_generic_threads.queue_size() + + m_hash_threads.queue_size()); + + jl.unlock(); + + // gauges + c.set_value(counters::disk_blocks_in_use, m_buffer_pool.in_use()); +} + +status_t pread_disk_io::do_job(aux::job::file_priority& a, aux::pread_disk_job* j) +{ + j->storage->set_file_priority(m_settings + , a.prio + , j->error); + return status_t{}; +} + +status_t pread_disk_io::do_job(aux::job::clear_piece&, aux::pread_disk_job*) +{ + TORRENT_ASSERT_FAIL(); + return {}; +} + +status_t pread_disk_io::do_job(aux::job::kick_hasher& a, aux::pread_disk_job* j) +{ + jobqueue_t jobs; + m_cache.kick_hasher({j->storage->storage_index(), a.piece}, jobs); + add_completed_jobs(std::move(jobs)); + return {}; +} + +void pread_disk_io::add_fence_job(aux::pread_disk_job* j, bool const user_add) +{ + // if this happens, it means we started to shut down + // the disk threads too early. We have to post all jobs + // before the disk threads are shut down + if (m_abort) + { + m_completed_jobs.abort_job(m_ios, j); + return; + } + + DLOG("add_fence:job: %s (outstanding: %d)\n" + , print_job(*j).c_str() + , j->storage->num_outstanding_jobs()); + + TORRENT_ASSERT(j->storage); + m_stats_counters.inc_stats_counter(counters::num_fenced_read + static_cast(j->get_type())); + + int const ret = j->storage->raise_fence(j, m_stats_counters); + if (ret == aux::disk_job_fence::fence_post_fence) + { + std::unique_lock l(m_job_mutex); + TORRENT_ASSERT((j->flags & aux::disk_job::in_progress) || !j->storage); + m_generic_threads.push_back(j); + l.unlock(); + } + + if (num_threads() == 0 && user_add) + immediate_execute(); +} + +void pread_disk_io::add_job(aux::pread_disk_job* j, bool const user_add) +{ + TORRENT_ASSERT(!j->storage || j->storage->files().is_valid()); + TORRENT_ASSERT(j->next == nullptr); + // if this happens, it means we started to shut down + // the disk threads too early. We have to post all jobs + // before the disk threads are shut down + if (m_abort) + { + m_completed_jobs.abort_job(m_ios, j); + return; + } + + TORRENT_ASSERT(!(j->flags & aux::disk_job::in_progress)); + + DLOG("add_job: %s (outstanding: %d)\n" + , print_job(*j).c_str() + , j->storage ? j->storage->num_outstanding_jobs() : 0); + + // is the fence up for this storage? + // jobs that are instantaneous are not affected by the fence, is_blocked() + // will take ownership of the job and queue it up, in case the fence is up + // if the fence flag is set, this job just raised the fence on the storage + // and should be scheduled + if (j->storage && j->storage->is_blocked(j)) + { + m_stats_counters.inc_stats_counter(counters::blocked_disk_jobs); + DLOG("blocked job: %s (torrent: %d total: %d)\n" + , print_job(*j).c_str(), j->storage ? j->storage->num_blocked() : 0 + , int(m_stats_counters[counters::blocked_disk_jobs])); + return; + } + + std::unique_lock l(m_job_mutex); + + TORRENT_ASSERT((j->flags & aux::disk_job::in_progress) || !j->storage); + + auto& q = pool_for_job(j); + q.push_back(j); + l.unlock(); + // if we literally have 0 disk threads, we have to execute the jobs + // immediately. If add job is called internally by the pread_disk_io, + // we need to defer executing it. We only want the top level to loop + // over the job queue (as is done below) + if (pool_for_job(j).max_threads() == 0 && user_add) + immediate_execute(); +} + +void pread_disk_io::immediate_execute() +{ + while (!m_generic_threads.empty()) + { + auto* j = static_cast(m_generic_threads.pop_front()); + execute_job(j); + } +} + +void pread_disk_io::submit_jobs() +{ + std::unique_lock l(m_job_mutex); + m_generic_threads.submit_jobs(); + m_hash_threads.submit_jobs(); +} + +void pread_disk_io::execute_job(aux::pread_disk_job* j) +{ + jobqueue_t completed_jobs; + if (j->flags & aux::disk_job::aborted) + { + j->ret = disk_status::fatal_disk_error; + j->error = storage_error(boost::asio::error::operation_aborted); + completed_jobs.push_back(j); + add_completed_jobs(std::move(completed_jobs)); + return; + } + + perform_job(j, completed_jobs); + if (!completed_jobs.empty()) + add_completed_jobs(std::move(completed_jobs)); +} + +int pread_disk_io::flush_cache_blocks(bitfield& flushed + , span blocks + , int const hash_cursor, jobqueue_t& completed_jobs) +{ + if (blocks.empty()) return 0; + +#if DEBUG_DISK_THREAD + { + auto piece = piece_index_t(-1); + std::string blocks_str; + blocks_str.reserve(blocks.size()); + for (auto const& blk : blocks) + { + blocks_str += blk.write_job ? '*' : ' '; + if (blk.write_job) + piece = std::get(blk.write_job->action).piece; + } + // If this assert fires, it means we were asked to flush a piece + // that doesn't have any jobs to flush + TORRENT_ASSERT(piece != piece_index_t(-1)); + DLOG("flush_cache_blocks: piece: %d hash_cursor: %d blocks: [%s]\n", int(piece), hash_cursor, blocks_str.c_str()); + } +#else + TORRENT_UNUSED(hash_cursor); +#endif + + // blocks may be sparse. We need to skip any block entry where write_job is null + m_stats_counters.inc_stats_counter(counters::num_running_disk_jobs, 1); + m_stats_counters.inc_stats_counter(counters::num_writing_threads, 1); + time_point const start_time = clock_type::now(); + + bool failed = false; + + // the total number of blocks we ended up flushing to disk + int ret = 0; + + visit_block_iovecs(blocks, [&] (span> iovec, int const start_idx) { + auto* j = blocks[start_idx].write_job; + TORRENT_ASSERT(j->get_type() == aux::job_action_t::write); + auto& a = std::get(j->action); + aux::open_mode_t const file_mode = file_mode_for_job(j); + aux::pread_storage* storage = j->storage.get(); + + TORRENT_ASSERT(a.piece != piece_index_t(-1)); + int const count = static_cast(iovec.size()); + DLOG("write: blocks: %d (piece: %d)\n", count, int(a.piece)); + + storage_error error; + storage->write(m_settings, iovec + , a.piece, a.offset, file_mode, j->flags, error); + + int i = start_idx; + for (aux::cached_block_entry const& blk : blocks.subspan(start_idx, count)) + { + auto* j2 = blk.write_job; + TORRENT_ASSERT(j2); + TORRENT_ASSERT(j2->get_type() == aux::job_action_t::write); + j2->error = error; + flushed.set_bit(i); + completed_jobs.push_back(j2); + ++i; + } + + ret += count; + + if (error) { + // if there was a failure, fail the remaining jobs as well + for (aux::cached_block_entry const& blk : blocks.subspan(start_idx + count)) + { + auto* j2 = blk.write_job; + if (j2 == nullptr) continue; + j2->error = error; + // TODO: should we free the job's buffer here? + completed_jobs.push_back(j2); + } + failed = true; + } + return failed; + }); + + if (!failed) + { + std::int64_t const write_time = total_microseconds(clock_type::now() - start_time); + + m_stats_counters.inc_stats_counter(counters::num_blocks_written, blocks.size()); + m_stats_counters.inc_stats_counter(counters::num_write_ops); + m_stats_counters.inc_stats_counter(counters::disk_write_time, write_time); + m_stats_counters.inc_stats_counter(counters::disk_job_time, write_time); + } + + // TODO: put this in an RAII object + m_stats_counters.inc_stats_counter(counters::num_writing_threads, -1); + m_stats_counters.inc_stats_counter(counters::num_running_disk_jobs, -1); + + return ret; +} + +void pread_disk_io::clear_piece_jobs(jobqueue_t aborted, aux::pread_disk_job* clear) +{ + m_completed_jobs.abort_jobs(m_ios, std::move(aborted)); + jobqueue_t jobs; + jobs.push_back(clear); + add_completed_jobs(std::move(jobs)); +} + +void pread_disk_io::try_flush_cache(int const target_cache_size + , std::unique_lock& l) +{ + DLOG("flushing, cache target: %d (current size: %d currently flushing: %d)\n" + , target_cache_size, m_cache.size(), m_cache.num_flushing()); + l.unlock(); + jobqueue_t completed_jobs; + m_cache.flush_to_disk( + [&](bitfield& flushed, span blocks, int const hash_cursor) { + return flush_cache_blocks(flushed, blocks, hash_cursor, completed_jobs); + } + , target_cache_size + , [&](jobqueue_t aborted, aux::pread_disk_job* clear) { + clear_piece_jobs(std::move(aborted), clear); + }); + l.lock(); + DLOG("flushed blocks (%d blocks left), return to disk loop\n", m_cache.size()); + if (!completed_jobs.empty()) + add_completed_jobs(std::move(completed_jobs)); +} + +void pread_disk_io::flush_storage(std::shared_ptr const& storage) +{ + storage_index_t const torrent = storage->storage_index(); + DLOG("flush_storage (%d)\n", torrent); + jobqueue_t completed_jobs; + m_cache.flush_storage( + [&](bitfield& flushed, span blocks, int const hash_cursor) { + return flush_cache_blocks(flushed, blocks, hash_cursor, completed_jobs); + } + , torrent + , [&](jobqueue_t aborted, aux::pread_disk_job* clear) { + clear_piece_jobs(std::move(aborted), clear); + }); + DLOG("flush_storage - done (%d left)\n", m_cache.size()); + if (!completed_jobs.empty()) + add_completed_jobs(std::move(completed_jobs)); +} + +void pread_disk_io::thread_fun(aux::disk_io_thread_pool& pool + , executor_work_guard work) +{ + // work is used to keep the io_context alive + TORRENT_UNUSED(work); + + ADD_OUTSTANDING_ASYNC("pread_disk_io::work"); + std::thread::id const thread_id = std::this_thread::get_id(); + + aux::set_thread_name("libtorrent-disk-thread"); + + DLOG("started disk thread\n"); + + std::unique_lock l(m_job_mutex); + + ++m_num_running_threads; + m_stats_counters.inc_stats_counter(counters::num_running_threads, 1); + + // we call close_oldest_file on the file_pool regularly. This is the next + // time we should call it + time_point next_close_oldest_file = min_time(); + + for (;;) + { + auto const res = pool.wait_for_job(l); + + // if we need to flush the cache, let one of the generic threads do + // that + if (m_flush_target/* && &pool == &m_generic_threads*/) + { + int const target_cache_size = *std::exchange(m_flush_target, std::nullopt); + DLOG("try_flush_cache(%d)\n", target_cache_size); + try_flush_cache(target_cache_size, l); + continue; + } + + if (res == aux::wait_result::exit_thread) + { + DLOG("exit disk loop\n"); + break; + } + + if (res != aux::wait_result::new_job) + { + DLOG("continue disk loop\n"); + continue; + } + + auto* j = static_cast(pool.pop_front()); + + if (&pool == &m_generic_threads || (j->flags & disk_interface::flush_piece)) + { + DLOG("optimistic flush\n"); + // This will attempt to flush any pieces that have been completely + // downloaded + try_flush_cache(int(m_cache.size()), l); + } + + l.unlock(); + + TORRENT_ASSERT((j->flags & aux::disk_job::in_progress) || !j->storage); + + if (&pool == &m_generic_threads && thread_id == pool.first_thread_id()) + { + time_point const now = aux::time_now(); + { + std::unique_lock l2(m_need_tick_mutex); + while (!m_need_tick.empty() && m_need_tick.front().first < now) + { + std::shared_ptr st = m_need_tick.front().second.lock(); + m_need_tick.erase(m_need_tick.begin()); + if (st) + { + l2.unlock(); + st->tick(); + l2.lock(); + } + } + } + + if (now > next_close_oldest_file) + { + seconds const interval(m_settings.get_int(settings_pack::close_file_interval)); + if (interval <= seconds(0)) + { + // check again in one minute, in case the setting changed + next_close_oldest_file = now + minutes(1); + } + else + { + next_close_oldest_file = now + interval; + m_file_pool.close_oldest(); + } + } + } + + execute_job(j); + + l.lock(); + } + + // do cleanup in the last running thread + // if we're not aborting, that means we just configured the thread pool to + // not have any threads (i.e. perform all disk operations in the network + // thread). In this case, the cleanup will happen in abort(). + + int const threads_left = --m_num_running_threads; + if (threads_left > 0 || !m_abort) + { + DLOG("exiting disk thread. num_threads: %d aborting: %d\n" + , threads_left, int(m_abort)); + m_stats_counters.inc_stats_counter(counters::num_running_threads, -1); + COMPLETE_ASYNC("pread_disk_io::work"); + return; + } + + DLOG("last thread alive. (left: %d) cleaning up. (generic-jobs: %d hash-jobs: %d)\n" + , threads_left + , m_generic_threads.queue_size() + , m_hash_threads.queue_size()); + + // flush everything before exiting this thread + try_flush_cache(0, l); + + // it is important to hold the job mutex while calling try_thread_exit() + // and continue to hold it until checking m_abort above so that abort() + // doesn't inadvertently trigger the code below when it thinks there are no + // more disk I/O threads running + l.unlock(); + + // at this point, there are no queued jobs left. However, main + // thread is still running and may still have peer_connections + // that haven't fully destructed yet, reclaiming their references + // to read blocks in the disk cache. We need to wait until all + // references are removed from other threads before we can go + // ahead with the cleanup. + // This is not supposed to happen because the disk thread is now scheduled + // for shut down after all peers have shut down (see + // session_impl::abort_stage2()). + + DLOG("the last disk thread alive. cleaning up\n"); + + abort_jobs(); + + m_stats_counters.inc_stats_counter(counters::num_running_threads, -1); + COMPLETE_ASYNC("pread_disk_io::work"); +} + +void pread_disk_io::abort_jobs() +{ + DLOG("pread_disk_io::abort_jobs\n"); + + if (m_jobs_aborted.test_and_set()) return; + + // close all files. This may take a long + // time on certain OSes (i.e. Mac OS) + // that's why it's important to do this in + // the disk thread in parallel with stopping + // trackers. + m_file_pool.release(); +} + +int pread_disk_io::num_threads() const +{ + return m_generic_threads.max_threads() + m_hash_threads.max_threads(); +} + +aux::disk_io_thread_pool& pread_disk_io::pool_for_job(aux::pread_disk_job* j) +{ + if (m_hash_threads.max_threads() > 0 + && (j->get_type() == aux::job_action_t::hash + || j->get_type() == aux::job_action_t::hash2 + || j->get_type() == aux::job_action_t::kick_hasher)) + return m_hash_threads; + else + return m_generic_threads; +} + +void pread_disk_io::add_completed_jobs(jobqueue_t jobs) +{ + jobqueue_t completed = std::move(jobs); + do + { + // when a job completes, it's possible for it to cause + // a fence to be lowered, issuing the jobs queued up + // behind the fence + jobqueue_t new_jobs; + add_completed_jobs_impl(std::move(completed), new_jobs); + TORRENT_ASSERT(completed.empty()); + completed = std::move(new_jobs); + } while (!completed.empty()); +} + +void pread_disk_io::add_completed_jobs_impl(jobqueue_t jobs, jobqueue_t& completed) +{ + jobqueue_t new_jobs; + int ret = 0; + for (auto i = jobs.iterate(); i.get(); i.next()) + { + auto* j = static_cast(i.get()); + + if (j->flags & aux::disk_job::fence) + { + m_stats_counters.inc_stats_counter( + counters::num_fenced_read + static_cast(j->get_type()), -1); + } + + if (j->flags & aux::disk_job::in_progress) + { + TORRENT_ASSERT(j->storage); + if (j->storage) + ret += j->storage->job_complete(j, new_jobs); + } + + TORRENT_ASSERT(ret == new_jobs.size()); + TORRENT_ASSERT(!(j->flags & aux::disk_job::in_progress)); +#if TORRENT_USE_ASSERTS + TORRENT_ASSERT(j->job_posted == false); + j->job_posted = true; +#endif + } + + if (ret) + { + DLOG("unblocked %d jobs (%d left)\n", ret + , int(m_stats_counters[counters::blocked_disk_jobs]) - ret); + } + + m_stats_counters.inc_stats_counter(counters::blocked_disk_jobs, -ret); + TORRENT_ASSERT(int(m_stats_counters[counters::blocked_disk_jobs]) >= 0); + + if (m_abort.load()) + { + while (!new_jobs.empty()) + { + auto* j = static_cast(new_jobs.pop_front()); + TORRENT_ASSERT((j->flags & aux::disk_job::in_progress) || !j->storage); + j->ret = disk_status::fatal_disk_error; + j->error = storage_error(boost::asio::error::operation_aborted); + completed.push_back(j); + } + } + else + { + if (!new_jobs.empty()) + { + { + std::lock_guard l(m_job_mutex); + m_generic_threads.append(std::move(new_jobs)); + } + + { + std::lock_guard l(m_job_mutex); + m_generic_threads.submit_jobs(); + } + } + } + + m_completed_jobs.append(m_ios, std::move(jobs)); +} + +} diff --git a/src/pread_storage.cpp b/src/pread_storage.cpp new file mode 100644 index 00000000000..a0d92735abd --- /dev/null +++ b/src/pread_storage.cpp @@ -0,0 +1,797 @@ +/* + +Copyright (c) 2022, Arvid Norberg +All rights reserved. + +You may use, distribute and modify this code under the terms of the BSD license, +see LICENSE file. +*/ + +#include "libtorrent/config.hpp" +#include "libtorrent/error_code.hpp" +#include "libtorrent/aux_/storage_utils.hpp" +#include "libtorrent/hasher.hpp" + +#include +#include +#include +#include +#include +#include + +#include "libtorrent/aux_/pread_storage.hpp" +#include "libtorrent/aux_/torrent.hpp" +#include "libtorrent/aux_/path.hpp" +#include "libtorrent/aux_/invariant_check.hpp" +#include "libtorrent/aux_/session_impl.hpp" +#include "libtorrent/aux_/file_pool.hpp" +#include "libtorrent/aux_/file.hpp" // for file_handle, pread_all, pwrite_all +#include "libtorrent/disk_buffer_holder.hpp" +#include "libtorrent/aux_/stat_cache.hpp" +#include "libtorrent/aux_/readwrite.hpp" +#include "libtorrent/hex.hpp" // to_hex + +#include + +#if (TORRENT_HAS_FADVISE && defined POSIX_FADV_DONTNEED) +#include +#endif + +#if defined TORRENT_LINUX && defined SYNC_FILE_RANGE_WRITE +#include // for sync_file_range +#elif defined TORRENT_WINDOWS +#include "libtorrent/aux_/windows.hpp" // for FlushFileBuffers +#elif TORRENT_HAS_FSYNC_RANGE +#include // for fsync_range +#else +#include // for fsync +#endif + +namespace libtorrent::aux { + +namespace { + + // TODO: move this o aux_/file.hpp + void advise_dont_need(handle_type handle, std::int64_t offset, std::int64_t len) + { +#if (TORRENT_HAS_FADVISE && defined POSIX_FADV_DONTNEED) + ::posix_fadvise(handle, offset, len, POSIX_FADV_DONTNEED); +#else + TORRENT_UNUSED(handle); + TORRENT_UNUSED(offset); + TORRENT_UNUSED(len); +#endif + } + + // TODO: move this o aux_/file.hpp + void sync_file(handle_type handle, std::int64_t offset, std::int64_t len) + { +#if defined TORRENT_LINUX && defined SYNC_FILE_RANGE_WRITE + ::sync_file_range(handle, offset, len, SYNC_FILE_RANGE_WRITE); +#elif defined TORRENT_WINDOWS + ::FlushFileBuffers(handle); + TORRENT_UNUSED(offset); + TORRENT_UNUSED(len); +#elif TORRENT_HAS_FSYNC_RANGE + ::fsync_range(handle, FDATASYNC, offset, len); +#else + ::fsync(handle); + TORRENT_UNUSED(offset); + TORRENT_UNUSED(len); +#endif + } +} + + pread_storage::pread_storage(storage_params const& params + , file_pool& pool) + : m_files(params.files) + , m_file_priority(params.priorities) + , m_save_path(complete(params.path)) + , m_part_file_name("." + to_hex(params.info_hash) + ".parts") + , m_pool(pool) + , m_allocate_files(params.mode == storage_mode_allocate) + , m_v1(params.v1) + , m_v2(params.v2) + { + // a torrent must be either v1 or v2 (or both) + TORRENT_ASSERT(m_v1 || m_v2); + if (params.mapped_files) m_mapped_files = std::make_unique(*params.mapped_files); + + TORRENT_ASSERT(files().num_files() > 0); + } + + pread_storage::~pread_storage() + { + error_code ec; + if (m_part_file) m_part_file->flush_metadata(ec); + + // this may be called from a different + // thread than the disk thread + m_pool.release(storage_index()); + } + + void pread_storage::need_partfile() + { + if (m_part_file) return; + + m_part_file = std::make_unique( + m_save_path, m_part_file_name + , files().num_pieces(), files().piece_length()); + } + + void pread_storage::set_file_priority(settings_interface const& sett + , vector& prio + , storage_error& ec) + { + // extend our file priorities in case it's truncated + // the default assumed priority is 4 (the default) + if (prio.size() > m_file_priority.size()) + m_file_priority.resize(prio.size(), default_priority); + + file_storage const& fs = files(); + for (file_index_t i(0); i < prio.end_index(); ++i) + { + // pad files always have priority 0. + if (fs.pad_file_at(i)) continue; + + download_priority_t const old_prio = m_file_priority[i]; + download_priority_t new_prio = prio[i]; + if (old_prio == dont_download && new_prio != dont_download) + { + // move stuff out of the part file + auto f = open_file(sett, i, open_mode::write, ec); + if (ec) + { + prio = m_file_priority; + return; + } + TORRENT_ASSERT(f); + + if (m_part_file && use_partfile(i)) + { + try + { + m_part_file->export_file([&f](std::int64_t file_offset, span buf) + { + do { + error_code err; + int const r = pwrite_all(f->fd(), buf, file_offset, err); + if (err) + throw_ex(err); + buf = buf.subspan(r); + file_offset += r; + } while (buf.size() > 0); + }, fs.file_offset(i), fs.file_size(i), ec.ec); + if (ec) + { + ec.file(i); + ec.operation = operation_t::partfile_write; + prio = m_file_priority; + return; + } + } + catch (lt::system_error const& err) + { + ec.file(i); + ec.operation = operation_t::partfile_write; + ec.ec = err.code(); + return; + } + } + } + else if (old_prio != dont_download && new_prio == dont_download) + { + // move stuff into the part file + // this is not implemented yet. + // so we just don't use a partfile for this file + + std::string const fp = fs.file_path(i, m_save_path); + bool const file_exists = exists(fp, ec.ec); + if (ec.ec) + { + ec.file(i); + ec.operation = operation_t::file_stat; + prio = m_file_priority; + return; + } + use_partfile(i, !file_exists); + } + ec.ec.clear(); + m_file_priority[i] = new_prio; + + if (m_file_priority[i] == dont_download && use_partfile(i)) + { + need_partfile(); + } + } + if (m_part_file) m_part_file->flush_metadata(ec.ec); + if (ec) + { + ec.file(torrent_status::error_file_partfile); + ec.operation = operation_t::partfile_write; + } + } + + bool pread_storage::use_partfile(file_index_t const index) const + { + TORRENT_ASSERT_VAL(index >= file_index_t{}, index); + if (index >= m_use_partfile.end_index()) return true; + return m_use_partfile[index]; + } + + void pread_storage::use_partfile(file_index_t const index, bool const b) + { + if (index >= m_use_partfile.end_index()) + { + // no need to extend this array if we're just setting it to "true", + // that's default already + if (b) return; + m_use_partfile.resize(static_cast(index) + 1, true); + } + m_use_partfile[index] = b; + } + + status_t pread_storage::initialize(settings_interface const& sett, storage_error& ec) + { + m_stat_cache.reserve(files().num_files()); + +#ifdef TORRENT_WINDOWS + // don't do full file allocations on network drives + auto const file_name = convert_to_native_path_string(m_save_path); + int const drive_type = GetDriveTypeW(file_name.c_str()); + + if (drive_type == DRIVE_REMOTE) + m_allocate_files = false; +#endif + { + std::unique_lock l(m_file_created_mutex); + m_file_created.resize(files().num_files(), false); + } + + file_storage const& fs = files(); + status_t ret{}; + // if some files have priority 0, we need to check if they exist on the + // filesystem, in which case we won't use a partfile for them. + // this is to be backwards compatible with previous versions of + // libtorrent, when part files were not supported. + for (file_index_t i(0); i < m_file_priority.end_index(); ++i) + { + if (m_file_priority[i] != dont_download || fs.pad_file_at(i)) + continue; + + error_code err; + auto const size = m_stat_cache.get_filesize(i, fs, m_save_path, err); + if (!err && size > 0) + { + use_partfile(i, false); + if (size > fs.file_size(i)) + ret = ret | disk_status::oversized_file; + } + else + { + // we may have earlier determined we *can't* use a partfile for + // this file, we need to be able to change our mind in case the + // file disappeared + use_partfile(i, true); + need_partfile(); + } + } + + initialize_storage(fs, m_save_path, m_stat_cache, m_file_priority + , [&sett, this](file_index_t const file_index, storage_error& e) + { open_file(sett, file_index, open_mode::write, e); } + , create_symlink + , [&ret](file_index_t, std::int64_t) { ret = ret | disk_status::oversized_file; } + , ec); + + // close files that were opened in write mode + m_pool.release(storage_index()); + return ret; + } + + bool pread_storage::has_any_file(storage_error& ec) + { + m_stat_cache.reserve(files().num_files()); + + if (aux::has_any_file(files(), m_save_path, m_stat_cache, ec)) + return true; + + if (ec) return false; + + file_status s; + stat_file(combine_path(m_save_path, m_part_file_name), &s, ec.ec); + if (!ec) return true; + + // the part file not existing is expected + if (ec.ec == boost::system::errc::no_such_file_or_directory) + ec.ec.clear(); + + if (ec) + { + ec.file(torrent_status::error_file_partfile); + ec.operation = operation_t::file_stat; + } + return false; + } + + void pread_storage::rename_file(file_index_t const index, std::string const& new_filename + , storage_error& ec) + { + if (index < file_index_t(0) || index >= files().end_file()) return; + std::string const old_name = files().file_path(index, m_save_path); + m_pool.release(storage_index(), index); + + // if the old file doesn't exist, just succeed and change the filename + // that will be created. This shortcut is important because the + // destination directory may not exist yet, which would cause a failure + // even though we're not moving a file (yet). It's better for it to + // fail later when we try to write to the file the first time, because + // the user then will have had a chance to make the destination directory + // valid. + if (exists(old_name, ec.ec)) + { + std::string new_path; + if (is_complete(new_filename)) new_path = new_filename; + else new_path = combine_path(m_save_path, new_filename); + std::string new_dir = parent_path(new_path); + + // create any missing directories that the new filename + // lands in + create_directories(new_dir, ec.ec); + if (ec.ec) + { + ec.file(index); + ec.operation = operation_t::file_rename; + return; + } + + rename(old_name, new_path, ec.ec); + + // if old_name doesn't exist, that's not an error + // here. Once we start writing to the file, it will + // be written to the new filename + if (ec.ec == boost::system::errc::no_such_file_or_directory) + ec.ec.clear(); + + if (ec) + { + ec.ec.clear(); + copy_file(old_name, new_path, ec); + + if (ec) + { + ec.file(index); + return; + } + + error_code ignore; + remove(old_name, ignore); + } + } + else if (ec.ec) + { + // if exists fails, report that error + ec.file(index); + ec.operation = operation_t::file_rename; + return; + } + + // if old path doesn't exist, just rename the file + // in our file_storage, so that when it is created + // it will get the new name + if (!m_mapped_files) + { m_mapped_files = std::make_unique(files()); } + m_mapped_files->rename_file(index, new_filename); + } + + void pread_storage::release_files(storage_error&) + { + if (m_part_file) + { + error_code ignore; + m_part_file->flush_metadata(ignore); + } + + // make sure we don't have the files open + m_pool.release(storage_index()); + + // make sure we can pick up new files added to the download directory when + // we start the torrent again + m_stat_cache.clear(); + } + + void pread_storage::delete_files(remove_flags_t const options, storage_error& ec) + { + // make sure we don't have the files open + m_pool.release(storage_index()); + + // if there's a part file open, make sure to destruct it to have it + // release the underlying part file. Otherwise we may not be able to + // delete it + if (m_part_file) m_part_file.reset(); + + aux::delete_files(files(), m_save_path, m_part_file_name, options, ec); + } + + bool pread_storage::verify_resume_data(add_torrent_params const& rd + , aux::vector const& links + , storage_error& ec) + { + return aux::verify_resume_data(rd, links, files() + , m_file_priority, m_stat_cache, m_save_path, ec); + } + + std::pair pread_storage::move_storage(std::string save_path + , move_flags_t const flags, storage_error& ec) + { + m_pool.release(storage_index()); + + status_t ret; + auto move_partfile = [&](std::string const& new_save_path, error_code& e) + { + if (!m_part_file) return; + m_part_file->move_partfile(new_save_path, e); + }; + std::tie(ret, m_save_path) = aux::move_storage(files(), m_save_path, std::move(save_path) + , std::move(move_partfile), flags, ec); + + // clear the stat cache in case the new location has new files + m_stat_cache.clear(); + + return { ret, m_save_path }; + } + + int pread_storage::read(settings_interface const& sett + , span buffer + , piece_index_t const piece, int const offset + , open_mode_t const mode + , disk_job_flags_t const flags + , storage_error& error) + { +#ifdef TORRENT_SIMULATE_SLOW_READ + std::this_thread::sleep_for(seconds(1)); +#endif + return readwrite(files(), buffer, piece, offset, error + , [this, mode, flags, &sett](file_index_t const file_index + , std::int64_t const file_offset + , span buf, storage_error& ec) + { + // reading from a pad file yields zeroes + if (files().pad_file_at(file_index)) return read_zeroes(buf); + + if (file_index < m_file_priority.end_index() + && m_file_priority[file_index] == dont_download + && use_partfile(file_index)) + { + TORRENT_ASSERT(m_part_file); + + error_code e; + peer_request map = files().map_file(file_index, file_offset, 0); + int const ret = m_part_file->read(buf, map.piece, map.start, e); + + if (e) + { + ec.ec = e; + ec.file(file_index); + ec.operation = operation_t::partfile_read; + return -1; + } + return ret; + } + + auto handle = open_file(sett, file_index, mode, ec); + if (ec) return -1; + + // set this unconditionally in case the upper layer would like to treat + // short reads as errors + ec.operation = operation_t::file_read; + + int const ret = pread_all(handle->fd(), buf, file_offset, ec.ec); + if (ec.ec) { + ec.file(file_index); + return ret; + } + if (flags & disk_interface::volatile_read) + advise_dont_need(handle->fd(), file_offset, buf.size()); + + return ret; + }); + } + + int pread_storage::write(settings_interface const& sett + , span const> buffers + , piece_index_t const piece, int offset + , open_mode_t const mode + , disk_job_flags_t const flags + , storage_error& error) + { + for (auto const& buf : buffers) + { + write(sett, buf, piece, offset, mode, flags, error); + offset += int(buf.size()); + if (error) return offset; + } + return offset; + } + + int pread_storage::write(settings_interface const& sett + , span buffer + , piece_index_t const piece, int const offset + , open_mode_t const mode + , disk_job_flags_t + , storage_error& error) + { + auto const write_mode = sett.get_int(settings_pack::disk_io_write_mode); + return readwrite(files(), buffer, piece, offset, error + , [this, mode, &sett, write_mode](file_index_t const file_index + , std::int64_t const file_offset + , span buf, storage_error& ec) + { + // writing to a pad-file is a no-op + if (files().pad_file_at(file_index)) + return int(buf.size()); + + if (file_index < m_file_priority.end_index() + && m_file_priority[file_index] == dont_download + && use_partfile(file_index)) + { + TORRENT_ASSERT(m_part_file); + + error_code e; + peer_request map = files().map_file(file_index + , file_offset, 0); + int const ret = m_part_file->write(buf, map.piece, map.start, e); + + if (e) + { + ec.ec = e; + ec.file(file_index); + ec.operation = operation_t::partfile_write; + return -1; + } + return ret; + } + + // invalidate our stat cache for this file, since + // we're writing to it + m_stat_cache.set_dirty(file_index); + + auto handle = open_file(sett, file_index, open_mode::write | mode, ec); + if (ec) return -1; + TORRENT_ASSERT(handle); + + // set this unconditionally in case the upper layer would like to treat + // short reads as errors + ec.operation = operation_t::file_write; + + int const ret = pwrite_all(handle->fd(), buf, file_offset, ec.ec); + if (ec.ec) + { + ec.file(file_index); + ec.operation = operation_t::file_write; + return ret; + } + if (write_mode == settings_pack::write_through) + sync_file(handle->fd(), file_offset, buf.size()); + return ret; + }); + } + + int pread_storage::hash(settings_interface const& sett + , hasher& ph, std::ptrdiff_t const len + , piece_index_t const piece, int const offset + , open_mode_t const mode + , disk_job_flags_t const flags + , storage_error& error) + { +#ifdef TORRENT_SIMULATE_SLOW_READ + std::this_thread::sleep_for(seconds(1)); +#endif + char dummy = 0; + + std::vector scratch_buffer; + + return readwrite(files(), span{&dummy, len}, piece, offset, error + , [this, mode, flags, &ph, &sett, &scratch_buffer]( + file_index_t const file_index + , std::int64_t const file_offset + , span buf, storage_error& ec) + { + if (files().pad_file_at(file_index)) + return hash_zeroes(ph, buf.size()); + + if (file_index < m_file_priority.end_index() + && m_file_priority[file_index] == dont_download + && use_partfile(file_index)) + { + error_code e; + peer_request map = files().map_file(file_index, file_offset, 0); + int const ret = m_part_file->hash(ph, buf.size() + , map.piece, map.start, e); + + if (e) + { + ec.ec = e; + ec.file(file_index); + ec.operation = operation_t::partfile_read; + } + return ret; + } + + auto handle = open_file(sett, file_index, mode, ec); + if (ec) return -1; + + scratch_buffer.resize(std::size_t(buf.size())); + int ret = pread_all(handle->fd(), scratch_buffer, file_offset, ec.ec); + if (ec.ec) + { + ec.file(file_index); + ec.operation = operation_t::file_read; + return ret; + } + if (ret >= 0) + { + ph.update(scratch_buffer); + if (flags & disk_interface::volatile_read) + advise_dont_need(handle->fd(), file_offset, buf.size()); + } + + return ret; + }); + } + + int pread_storage::hash2(settings_interface const& sett + , hasher256& ph, std::ptrdiff_t const len + , piece_index_t const piece, int const offset + , open_mode_t const mode + , disk_job_flags_t const flags + , storage_error& error) + { + std::int64_t const start_offset = static_cast(piece) * std::int64_t(files().piece_length()) + offset; + file_index_t const file_index = files().file_index_at_offset(start_offset); + std::int64_t const file_offset = start_offset - files().file_offset(file_index); + TORRENT_ASSERT(file_offset >= 0); + TORRENT_ASSERT(!files().pad_file_at(file_index)); + + if (file_index < m_file_priority.end_index() + && m_file_priority[file_index] == dont_download + && use_partfile(file_index)) + { + error_code e; + peer_request map = files().map_file(file_index, file_offset, 0); + int const ret = m_part_file->hash2(ph, len, map.piece, map.start, e); + + if (e) + { + error.ec = e; + error.file(file_index); + error.operation = operation_t::partfile_read; + return -1; + } + return ret; + } + + auto handle = open_file(sett, file_index, mode, error); + if (error) return -1; + + std::unique_ptr scratch_buffer(new char[std::size_t(len)]); + span b = {scratch_buffer.get(), len}; + int const ret = pread_all(handle->fd(), b, file_offset, error.ec); + if (error.ec) + { + error.operation = operation_t::file_read; + error.file(file_index); + return ret; + } + ph.update(b); + if (flags & disk_interface::volatile_read) + advise_dont_need(handle->fd(), file_offset, len); + + return static_cast(len); + } + + // a wrapper around open_file_impl that, if it fails, makes sure the + // directories have been created and retries + std::shared_ptr pread_storage::open_file(settings_interface const& sett + , file_index_t const file + , open_mode_t mode, storage_error& ec) const + { + if (mode & open_mode::write + && !(mode & open_mode::truncate)) + { + std::unique_lock l(m_file_created_mutex); + if (m_file_created.size() != files().num_files()) + m_file_created.resize(files().num_files(), false); + + // if we haven't created this file already, make sure to truncate it to + // its final size + mode |= (m_file_created[file] == false) ? open_mode::truncate : open_mode::read_only; + } + + if (files().file_flags(file) & file_storage::flag_executable) + mode |= open_mode::executable; + + if (files().file_flags(file) & file_storage::flag_hidden) + mode |= open_mode::hidden; + +#ifdef _WIN32 + if (sett.get_bool(settings_pack::enable_set_file_valid_data)) + { + mode |= open_mode::allow_set_file_valid_data; + } +#endif + + auto h = open_file_impl(sett, file, mode, ec); + if (ec.ec) + { + ec.file(file); + return {}; + } + TORRENT_ASSERT(h); + + if (mode & open_mode::truncate) + { + // remember that we've truncated this file, so we don't have to do it + // again + std::unique_lock l(m_file_created_mutex); + m_file_created.set_bit(file); + } + + // the optional should be set here + TORRENT_ASSERT(static_cast(h)); + return h; + } + + std::shared_ptr pread_storage::open_file_impl(settings_interface const& sett + , file_index_t file + , open_mode_t mode + , storage_error& ec) const + { + TORRENT_ASSERT(!files().pad_file_at(file)); + if (!m_allocate_files) mode |= open_mode::sparse; + + // files with priority 0 should always be sparse + if (m_file_priority.end_index() > file && m_file_priority[file] == dont_download) + mode |= open_mode::sparse; + + if (sett.get_bool(settings_pack::no_atime_storage)) + { + mode |= open_mode::no_atime; + } + + // if we have a cache already, don't store the data twice by leaving it in the OS cache as well + auto const write_mode = sett.get_int(settings_pack::disk_io_write_mode); + if (write_mode == settings_pack::disable_os_cache + || write_mode == settings_pack::write_through) + { + mode |= open_mode::no_cache; + } + + try { +#if TORRENT_HAVE_MAP_VIEW_OF_FILE + int dummy = 0; +#endif + return m_pool.open_file(storage_index(), m_save_path, file + , files(), mode +#if TORRENT_HAVE_MAP_VIEW_OF_FILE + , &dummy +#endif + ); + } + catch (storage_error const& se) + { + ec = se; + ec.file(file); + TORRENT_ASSERT(ec); + return {}; + } + } + + bool pread_storage::tick() + { + error_code ec; + if (m_part_file) m_part_file->flush_metadata(ec); + + return false; + } +} // namespace libtorrent::aux diff --git a/src/session.cpp b/src/session.cpp index 9714d039ea6..ed44ba97b2c 100644 --- a/src/session.cpp +++ b/src/session.cpp @@ -19,6 +19,7 @@ see LICENSE file. #include "libtorrent/disk_interface.hpp" #include "libtorrent/mmap_disk_io.hpp" #include "libtorrent/posix_disk_io.hpp" +#include "libtorrent/pread_disk_io.hpp" #include "libtorrent/aux_/platform_util.hpp" namespace libtorrent { @@ -491,7 +492,9 @@ namespace { TORRENT_EXPORT std::unique_ptr default_disk_io_constructor( io_context& ios, settings_interface const& sett, counters& cnt) { -#if TORRENT_HAVE_MMAP || TORRENT_HAVE_MAP_VIEW_OF_FILE +#if TORRENT_HAVE_PREAD || defined TORRENT_WINDOWS + return pread_disk_io_constructor(ios, sett, cnt); +#elif TORRENT_HAVE_MMAP || TORRENT_HAVE_MAP_VIEW_OF_FILE // TODO: In C++17. use if constexpr instead #include "libtorrent/aux_/disable_deprecation_warnings_push.hpp" if (sizeof(void*) == 8) diff --git a/src/settings_pack.cpp b/src/settings_pack.cpp index dc622e5a5ca..459d8b4c843 100644 --- a/src/settings_pack.cpp +++ b/src/settings_pack.cpp @@ -244,7 +244,7 @@ namespace { SET(initial_picker_threshold, 4, nullptr), SET(allowed_fast_set_size, 5, nullptr), SET(suggest_mode, settings_pack::no_piece_suggestions, nullptr), - SET(max_queued_disk_bytes, 1024 * 1024, nullptr), + SET(max_queued_disk_bytes, 50 * 1024 * 1024, nullptr), SET(handshake_timeout, 10, nullptr), SET(send_buffer_low_watermark, 10 * 1024, nullptr), SET(send_buffer_watermark, 500 * 1024, nullptr), diff --git a/src/torrent.cpp b/src/torrent.cpp index 612f1aa0572..a4d1d67cbc6 100644 --- a/src/torrent.cpp +++ b/src/torrent.cpp @@ -1398,6 +1398,10 @@ bool is_downloading_state(int const st) if (write_mode == settings_pack::disable_os_cache) dflags |= disk_interface::flush_piece | disk_interface::volatile_read; +#ifndef TORRENT_DISABLE_LOGGING + debug_log("*** add_piece [ piece: %d | block: %d ]" + , static_cast(piece), i); +#endif m_ses.disk_thread().async_write(m_storage, p, data + p.start, nullptr , [self, p](storage_error const& error) { self->on_disk_write_complete(error, p); } , dflags); @@ -2381,6 +2385,9 @@ bool is_downloading_state(int const st) handle_disk_error("force_recheck", error); return; } +#ifndef TORRENT_DISABLE_LOGGING + debug_log("on_force_recheck: status: %x", static_cast(status)); +#endif bool const has_error_status = (status & disk_status::fatal_disk_error) || (status & disk_status::need_full_check) @@ -2396,6 +2403,9 @@ bool is_downloading_state(int const st) m_checking_piece = piece_index_t(0); m_num_checked_pieces = piece_index_t(0); +#ifndef TORRENT_DISABLE_LOGGING + debug_log("on_force_recheck: starting from 0"); +#endif set_state(torrent_status::checking_files); if (m_auto_managed) pause(torrent_handle::graceful_pause); if (should_check_files()) start_checking(); @@ -2550,15 +2560,19 @@ bool is_downloading_state(int const st) // if the v1 hash failed the check, don't add the v2 hashes to the // merkle tree. They are most likely invalid. if (torrent_file().info_hashes().has_v2() && !bool(hash_passed[0] == false)) - { - hash_passed[1] = on_blocks_hashed(piece, block_hashes); - } + { hash_passed[1] = on_blocks_hashed(piece, block_hashes); } } else { hash_passed[0] = hash_passed[1] = true; } +#ifndef TORRENT_DISABLE_LOGGING + std::stringstream hash; + hash << piece_hash; + debug_log("on_piece_hashed, piece: %d piece_hash: %s" + , static_cast(piece), hash.str().c_str()); +#endif if ((hash_passed[0] && !hash_passed[1]) || (!hash_passed[0] && hash_passed[1])) { handle_inconsistent_hashes(piece); @@ -4131,6 +4145,9 @@ namespace { if (m_abort) return; if (m_deleted) return; + TORRENT_ASSERT(m_picker); + if (!m_picker) return; + m_picker->completed_hash_job(piece); boost::tribool passed = boost::indeterminate; @@ -6954,7 +6971,9 @@ namespace { return result.valid; } - if (m_picker && m_picker->is_downloading(p) && m_picker->is_piece_finished(p) + if (m_picker + && m_picker->is_downloading(p) + && !m_picker->has_piece_passed(p) && !m_picker->is_hashing(p)) { piece_passed(p); diff --git a/test/Jamfile b/test/Jamfile index 4f3b892dd76..3ae28b64aed 100644 --- a/test/Jamfile +++ b/test/Jamfile @@ -200,6 +200,7 @@ run test_remap_files.cpp ; run test_similar_torrent.cpp ; run test_truncate.cpp ; run test_copy_file.cpp ; +run test_disk_cache.cpp ; # turn these tests into simulations run test_resume.cpp ; @@ -223,6 +224,7 @@ run test_web_seed_http_pw.cpp ; run test_web_seed_chunked.cpp ; run test_web_seed_ban.cpp ; run test_pe_crypto.cpp ; +run test_disk_io.cpp ; run test_rtc.cpp ; run test_utp.cpp ; @@ -322,4 +324,5 @@ alias deterministic-tests : test_similar_torrent test_truncate test_vector_utils + test_disk_io ; diff --git a/test/test_add_torrent.cpp b/test/test_add_torrent.cpp index cdf64bd5c41..5b19994ae0a 100644 --- a/test/test_add_torrent.cpp +++ b/test/test_add_torrent.cpp @@ -9,6 +9,7 @@ see LICENSE file. #include "test.hpp" #include "setup_transfer.hpp" // for load_file +#include "settings.hpp" // for settings() #include "libtorrent/flags.hpp" #include "libtorrent/alert_types.hpp" @@ -77,7 +78,7 @@ lt::error_code test_add_torrent(std::string file, add_torrent_test_flag_t const atp.ti.reset(); } - lt::session_params p; + lt::session_params p = settings(); p.settings.set_int(lt::settings_pack::alert_mask, lt::alert_category::error | lt::alert_category::status); p.settings.set_str(lt::settings_pack::listen_interfaces, "127.0.0.1:6881"); lt::session ses(p); diff --git a/test/test_copy_file.cpp b/test/test_copy_file.cpp index 668352ab3bc..33977c07199 100644 --- a/test/test_copy_file.cpp +++ b/test/test_copy_file.cpp @@ -11,6 +11,7 @@ see LICENSE file. #include "libtorrent/error_code.hpp" #include "libtorrent/aux_/mmap.hpp" #include "libtorrent/aux_/open_mode.hpp" +#include "libtorrent/aux_/storage_utils.hpp" #include "test.hpp" #include "test_utils.hpp" diff --git a/test/test_disk_cache.cpp b/test/test_disk_cache.cpp new file mode 100644 index 00000000000..3a1d22c52e8 --- /dev/null +++ b/test/test_disk_cache.cpp @@ -0,0 +1,171 @@ +/* + +Copyright (c) 2024, Arvid Norberg +All rights reserved. + +You may use, distribute and modify this code under the terms of the BSD license, +see LICENSE file. +*/ + +#include "libtorrent/aux_/visit_block_iovecs.hpp" +#include +#include "test.hpp" + +using lt::span; + +namespace { + +struct tbe +{ + span write_buf() const + { + return _buf; + } + span _buf; +}; + +template +tbe b(char const (&literal)[N]) +{ + auto buf = span{&literal[0], N - 1}; + return tbe{buf}; +} + +std::string join(span> iovec) +{ + std::string ret; + for (span const& b : iovec) + { + ret.append(b.begin(), b.end()); + } + return ret; +} + +} + +TORRENT_TEST(visit_block_iovecs_full) +{ + std::array const blocks{b("a"), b("b"), b("c"), b("d"), b("e")}; + + int cnt = 0; + lt::aux::visit_block_iovecs(span(blocks) + , [&cnt] (span> iovec, int start_idx) { + TEST_EQUAL(cnt, 0); + TEST_EQUAL(start_idx, 0); + TEST_EQUAL(iovec.size(), 5); + TEST_EQUAL(join(iovec), "abcde"); + ++cnt; + return false; + }); +} + +TORRENT_TEST(visit_block_iovecs_one_hole) +{ + std::array const blocks{b("a"), b("b"), b(""), b("d"), b("e")}; + + int cnt = 0; + lt::aux::visit_block_iovecs(span(blocks) + , [&cnt] (span> iovec, int start_idx) { + switch (cnt) { + case 0: + TEST_EQUAL(start_idx, 0); + TEST_EQUAL(iovec.size(), 2); + TEST_EQUAL(join(iovec), "ab"); + break; + case 1: + TEST_EQUAL(start_idx, 3); + TEST_EQUAL(iovec.size(), 2); + TEST_EQUAL(join(iovec), "de"); + break; + default: + TORRENT_ASSERT_FAIL(); + } + ++cnt; + return false; + }); +} + +TORRENT_TEST(visit_block_iovecs_two_holes) +{ + std::array const blocks{b("a"), b(""), b("c"), b(""), b("e")}; + + int cnt = 0; + lt::aux::visit_block_iovecs(span(blocks) + , [&cnt] (span> iovec, int start_idx) { + switch (cnt) { + case 0: + TEST_EQUAL(start_idx, 0); + TEST_EQUAL(iovec.size(), 1); + TEST_EQUAL(join(iovec), "a"); + break; + case 1: + TEST_EQUAL(start_idx, 2); + TEST_EQUAL(iovec.size(), 1); + TEST_EQUAL(join(iovec), "c"); + break; + case 2: + TEST_EQUAL(start_idx, 4); + TEST_EQUAL(iovec.size(), 1); + TEST_EQUAL(join(iovec), "e"); + break; + default: + TORRENT_ASSERT_FAIL(); + } + ++cnt; + return false; + }); +} + + +TORRENT_TEST(visit_block_iovecs_interrupt) +{ + std::array const blocks{b("a"), b(""), b("c")}; + + int cnt = 0; + lt::aux::visit_block_iovecs(span(blocks) + , [&cnt] (span> iovec, int start_idx) { + switch (cnt) { + case 0: + TEST_EQUAL(start_idx, 0); + TEST_EQUAL(iovec.size(), 1); + TEST_EQUAL(join(iovec), "a"); + break; + default: + TORRENT_ASSERT_FAIL(); + } + ++cnt; + return true; + }); +} + +TORRENT_TEST(visit_block_iovecs_leading_hole) +{ + std::array const blocks{b(""), b("a"), b("b"), b("c"), b("d")}; + + int cnt = 0; + lt::aux::visit_block_iovecs(span(blocks) + , [&cnt] (span> iovec, int start_idx) { + TEST_EQUAL(cnt, 0); + TEST_EQUAL(start_idx, 1); + TEST_EQUAL(iovec.size(), 4); + TEST_EQUAL(join(iovec), "abcd"); + ++cnt; + return false; + }); +} + +TORRENT_TEST(visit_block_iovecs_trailing_hole) +{ + std::array const blocks{b("a"), b("b"), b("c"), b("d"), b("")}; + + int cnt = 0; + lt::aux::visit_block_iovecs(span(blocks) + , [&cnt] (span> iovec, int start_idx) { + TEST_EQUAL(cnt, 0); + TEST_EQUAL(start_idx, 0); + TEST_EQUAL(iovec.size(), 4); + TEST_EQUAL(join(iovec), "abcd"); + ++cnt; + return false; + }); +} diff --git a/test/test_disk_io.cpp b/test/test_disk_io.cpp new file mode 100644 index 00000000000..189c389f126 --- /dev/null +++ b/test/test_disk_io.cpp @@ -0,0 +1,136 @@ +/* + +Copyright (c) 2024, Arvid Norberg +All rights reserved. + +You may use, distribute and modify this code under the terms of the BSD license, +see LICENSE file. +*/ + +#include +#include "test.hpp" +#include "setup_transfer.hpp" +#include "libtorrent/disk_interface.hpp" +#include "libtorrent/mmap_disk_io.hpp" +#include "libtorrent/posix_disk_io.hpp" +#include "libtorrent/pread_disk_io.hpp" +#include "libtorrent/session_params.hpp" // for disk_io_constructor_type +#include "libtorrent/settings_pack.hpp" // for default_settings +#include "libtorrent/flags.hpp" +#include "libtorrent/storage_defs.hpp" +#include "libtorrent/io_context.hpp" +#include "libtorrent/performance_counters.hpp" +#include "libtorrent/file_storage.hpp" +#include "libtorrent/aux_/vector.hpp" +#include "libtorrent/sha1_hash.hpp" + +using disk_test_mode_t = lt::flags::bitfield_flag; + +namespace test_mode { + using lt::operator ""_bit; +constexpr disk_test_mode_t v1 = 0_bit; +constexpr disk_test_mode_t v2 = 1_bit; +} + +namespace { +void disk_io_test_suite(lt::disk_io_constructor_type disk_io + , disk_test_mode_t const flags + , int const piece_size + , int const num_files) +{ + lt::io_context ios; + lt::counters cnt; + lt::settings_pack sett = lt::default_settings(); + std::unique_ptr disk_thread = disk_io(ios, sett, cnt); + + lt::file_storage fs; + fs.set_piece_length(piece_size); + std::int64_t total_size = 0; + for (int i = 0; i < num_files; ++i) + { + int const file_size = piece_size * 2 + i * 11; + total_size += file_size; + fs.add_file("test-torrent/file-" + std::to_string(i), file_size, {}); + } + fs.set_num_pieces(int((total_size + piece_size - 1) / piece_size)); + + lt::aux::vector priorities; + std::string const name = "test_torrent_store"; + lt::storage_params params{ + fs, + nullptr, + name, + lt::storage_mode_t::storage_mode_sparse, + priorities, + lt::sha1_hash{}, + bool(flags & test_mode::v1), + bool(flags & test_mode::v2), + }; + + lt::storage_holder storage = disk_thread->new_torrent(params + , std::shared_ptr()); + + int blocks_written = 0; + int expect_written = 0; + int const block_size = std::min(lt::default_block_size, piece_size); + for (lt::piece_index_t p : fs.piece_range()) + { + int const len = fs.piece_size(p); + std::vector const buffer = generate_piece(p, len); + for (int block = 0; block < len; block += block_size) + { + int const write_size = std::min(block_size, len - block); + lt::disk_job_flags_t const disk_flags = (block + block_size >= len) + ? lt::disk_interface::flush_piece + : lt::disk_job_flags_t{}; + std::cout << "flags: " << disk_flags << std::endl; + disk_thread->async_write(storage + , lt::peer_request{p, block, write_size} + , buffer.data() + block + , std::shared_ptr() + , [&](lt::storage_error const& e) { + TORRENT_ASSERT(!e.ec); + ++blocks_written; + } + , disk_flags); + ++expect_written; + disk_thread->submit_jobs(); + } + } + + std::cout << "blocks_written: " << blocks_written << std::endl; + while (blocks_written < expect_written) + { + ios.run_for(std::chrono::milliseconds(500)); + std::cout << "blocks_written: " << blocks_written << std::endl; + } + + TEST_EQUAL(blocks_written, expect_written); + + disk_thread->abort(true); +} + +} + +#if TORRENT_HAVE_MMAP || TORRENT_HAVE_MAP_VIEW_OF_FILE +TORRENT_TEST(test_mmap_disk_io_small_pieces) +{ + disk_io_test_suite(<::mmap_disk_io_constructor, test_mode::v1 | test_mode::v2, 300, 3); +} + +TORRENT_TEST(test_mmap_disk_io) +{ + disk_io_test_suite(<::mmap_disk_io_constructor, test_mode::v1 | test_mode::v2, 0x8000, 3); +} +#endif + +TORRENT_TEST(test_posix_disk_io) +{ + disk_io_test_suite(<::posix_disk_io_constructor, test_mode::v1 | test_mode::v2, 0x8000, 3); +} + +TORRENT_TEST(test_pread_disk_io) +{ + disk_io_test_suite(<::pread_disk_io_constructor, test_mode::v1 | test_mode::v2, 0x8000, 3); +} + diff --git a/test/test_file.cpp b/test/test_file.cpp index ce6c32b451a..bd06b52b011 100644 --- a/test/test_file.cpp +++ b/test/test_file.cpp @@ -18,6 +18,8 @@ see LICENSE file. #include "libtorrent/string_view.hpp" #include "libtorrent/aux_/file_view_pool.hpp" #include "libtorrent/aux_/numeric_cast.hpp" +#include "libtorrent/aux_/storage_utils.hpp" +#include "libtorrent/aux_/file_pool_impl.hpp" #include "test.hpp" #include "test_utils.hpp" #include diff --git a/test/test_storage.cpp b/test/test_storage.cpp index 54693d7932d..c8ecd6f6b74 100644 --- a/test/test_storage.cpp +++ b/test/test_storage.cpp @@ -19,7 +19,9 @@ see LICENSE file. #include "libtorrent/aux_/mmap_storage.hpp" #include "libtorrent/aux_/posix_storage.hpp" +#include "libtorrent/aux_/pread_storage.hpp" #include "libtorrent/aux_/file_view_pool.hpp" +#include "libtorrent/aux_/file_pool.hpp" #include "libtorrent/hasher.hpp" #include "libtorrent/session.hpp" #include "libtorrent/session_params.hpp" @@ -45,20 +47,13 @@ see LICENSE file. using namespace std::placeholders; using namespace lt; -#if ! TORRENT_HAVE_MMAP && ! TORRENT_HAVE_MAP_VIEW_OF_FILE -namespace libtorrent { -namespace aux { - struct file_view_pool {}; -} -} -#endif - namespace { #if TORRENT_HAVE_MMAP || TORRENT_HAVE_MAP_VIEW_OF_FILE using lt::aux::mmap_storage; #endif using lt::aux::posix_storage; +using lt::aux::pread_storage; constexpr int piece_size = 16 * 1024 * 16; constexpr int half = piece_size / 2; @@ -186,6 +181,12 @@ struct file_pool_type using type = int; }; +template <> +struct file_pool_type +{ + using type = aux::file_pool; +}; + template std::shared_ptr make_storage(storage_params const& p , typename file_pool_type::type& fp); @@ -206,6 +207,13 @@ std::shared_ptr make_storage(storage_params const& p return std::make_shared(p); } +template <> +std::shared_ptr make_storage(storage_params const& p + , aux::file_pool& fp) +{ + return std::make_shared(p, fp); +} + template std::pair, std::shared_ptr> setup_torrent( @@ -295,6 +303,33 @@ int read(std::shared_ptr s void release_files(std::shared_ptr, storage_error&) {} +int write(std::shared_ptr s + , aux::session_settings const& sett + , span buf + , piece_index_t const piece + , int const offset + , aux::open_mode_t mode + , storage_error& error) +{ + return s->write(sett, buf, piece, offset, mode, disk_job_flags_t{}, error); +} + +int read(std::shared_ptr s + , aux::session_settings const& sett + , span buf + , piece_index_t piece + , int offset + , aux::open_mode_t mode + , storage_error& ec) +{ + return s->read(sett, buf, piece, offset, mode, disk_job_flags_t{}, ec); +} + +void release_files(std::shared_ptr s, storage_error& ec) +{ + s->release_files(ec); +} + std::vector new_piece(std::size_t const size) { std::vector ret(size); @@ -875,6 +910,17 @@ TORRENT_TEST(remove_posix_disk_io) test_remove(current_working_directory()); } +TORRENT_TEST(rename_pread_disk_io) +{ + test_rename(current_working_directory()); +} + +TORRENT_TEST(remove_pread_disk_io) +{ + test_remove(current_working_directory()); +} + + void test_fastresume(bool const test_deprecated) { std::string test_path = current_working_directory(); @@ -912,6 +958,7 @@ void test_fastresume(bool const test_deprecated) { print_alerts(ses, "ses"); s = h.status(); + std::cout << "progress: " << s.progress << std::endl; if (s.progress == 1.0f) { std::cout << "progress: 1.0f" << std::endl; @@ -1222,6 +1269,7 @@ bool check_pattern(std::vector const& buf, int counter) TORRENT_TEST(mmap_disk_io) { run_test(); } #endif TORRENT_TEST(posix_disk_io) { run_test(); } +TORRENT_TEST(pread_disk_io) { run_test(); } namespace { @@ -1576,6 +1624,22 @@ TORRENT_TEST(move_posix_storage_reset) test_move_storage_reset(move_flags_t::reset_save_path_unchecked); } +TORRENT_TEST(move_pread_storage_to_self) +{ + test_move_storage_to_self(); +} + +TORRENT_TEST(move_pread_storage_into_self) +{ + test_move_storage_into_self(); +} + +TORRENT_TEST(move_pread_storage_reset) +{ + test_move_storage_reset(move_flags_t::reset_save_path); + test_move_storage_reset(move_flags_t::reset_save_path_unchecked); +} + TORRENT_TEST(storage_paths_string_pooling) { file_storage file_storage; diff --git a/test/test_torrent_info.cpp b/test/test_torrent_info.cpp index 94496bcc36c..3ec0545a66d 100644 --- a/test/test_torrent_info.cpp +++ b/test/test_torrent_info.cpp @@ -14,7 +14,8 @@ see LICENSE file. #include "test.hpp" #include "setup_transfer.hpp" // for load_file #include "test_utils.hpp" -#include "settings.hpp" +#include "settings.hpp" // for settings() + #include "libtorrent/file_storage.hpp" #include "libtorrent/load_torrent.hpp" #include "libtorrent/aux_/path.hpp" diff --git a/test/web_seed_suite.cpp b/test/web_seed_suite.cpp index de5ac01c9dc..e7a28325534 100644 --- a/test/web_seed_suite.cpp +++ b/test/web_seed_suite.cpp @@ -220,7 +220,10 @@ void test_transfer(lt::session& ses, lt::add_torrent_params p { bool const expect = !fs.pad_file_at(i); std::string file_path = combine_path(save_path, fs.file_path(i)); - std::printf("checking file: %s\n", file_path.c_str()); + std::printf("checking file: %s (pad-file: %d size: %" PRId64 ")\n" + , file_path.c_str() + , !expect + , fs.file_size(i)); TEST_EQUAL(exists(file_path), expect); } } @@ -338,7 +341,6 @@ int EXPORT run_http_suite(int proxy, char const* protocol { settings_pack pack = settings(); - pack.set_int(settings_pack::max_queued_disk_bytes, 256 * 1024); pack.set_str(settings_pack::listen_interfaces, test_listen_interface()); pack.set_int(settings_pack::max_retry_port_bind, 1000); pack.set_bool(settings_pack::enable_lsd, false); diff --git a/tools/disk_io_stress_test.cpp b/tools/disk_io_stress_test.cpp index 3a2ba2dae79..2b8027f5d5e 100644 --- a/tools/disk_io_stress_test.cpp +++ b/tools/disk_io_stress_test.cpp @@ -10,6 +10,7 @@ see LICENSE file. #include "libtorrent/session.hpp" // for default_disk_io_constructor #include "libtorrent/disabled_disk_io.hpp" #include "libtorrent/mmap_disk_io.hpp" +#include "libtorrent/pread_disk_io.hpp" #include "libtorrent/posix_disk_io.hpp" #include "libtorrent/disk_interface.hpp" @@ -167,6 +168,8 @@ int run_test(test_case const& t) { if (t.disk_backend == "posix"_sv) disk_io = lt::posix_disk_io_constructor(ioc, pack, cnt); + else if (t.disk_backend == "pread"_sv) + disk_io = lt::pread_disk_io_constructor(ioc, pack, cnt); else if (t.disk_backend == "disabled"_sv) disk_io = lt::disabled_disk_io_constructor(ioc, pack, cnt); else @@ -262,7 +265,7 @@ int run_test(test_case const& t) { if ((job_counter & 0x1fff) == 0) { - printf("o: %d w: %d r: %d\r" + printf("o: %d w: %d r: %d \r" , outstanding , int(blocks_to_write.size()) , int(blocks_to_read.size())); diff --git a/tools/parse_session_stats.py b/tools/parse_session_stats.py index 4ef185348f2..bf27d4133c3 100755 --- a/tools/parse_session_stats.py +++ b/tools/parse_session_stats.py @@ -130,7 +130,8 @@ def process_color(c, op): def plot_fun(script): try: - ret = os.system('gnuplot "%s" 2>/dev/null' % script) + print('gnuplot "%s"' % script) + ret = os.system('gnuplot "%s"' % script) except Exception as e: print('please install gnuplot: sudo apt install gnuplot') raise e diff --git a/tools/run_benchmark.py b/tools/run_benchmark.py index 0e2dcb42048..a05e50dfc1b 100755 --- a/tools/run_benchmark.py +++ b/tools/run_benchmark.py @@ -49,10 +49,11 @@ def main(): rm_file_or_dir('t') - run_test('download-write-through', 'upload', ['-1', '--disk_io_write_mode=write_through', '-s', args.save_path], args.download_peers) - reset_download(args.save_path) - run_test('download-full-cache', 'upload', ['-1', '--disk_io_write_mode=enable_os_cache', '-s', args.save_path], args.download_peers) - run_test('upload', 'download', ['-G', '-e', '240', '-s', args.save_path], args.upload_peers) + for io_backend in ["mmap", "pread", "posix"]: + run_test(f'download-write-through-{io_backend}', 'upload', ['-i', io_backend, '-1', '--disk_io_write_mode=write_through', '-s', args.save_path], args.download_peers) + reset_download(args.save_path) + run_test(f'download-full-cache-{io_backend}', 'upload', ['-i', io_backend, '-1', '--disk_io_write_mode=enable_os_cache', '-s', args.save_path], args.download_peers) + run_test(f'upload-{io_backend}', 'download', ['-i', io_backend, '-G', '-e', '240', '-s', args.save_path], args.upload_peers) def run_test(name, test_cmd, client_arg, num_peers):