From a2f4dd8a8fcc6e67331cb70259ce9394a101cd92 Mon Sep 17 00:00:00 2001 From: Patrick Stotko Date: Tue, 19 Nov 2024 14:45:59 +0100 Subject: [PATCH] deque: Extend support for custom execution policies --- src/stdgpu/deque.cuh | 37 +++++++++++- src/stdgpu/impl/deque_detail.cuh | 96 +++++++++++++++++++++++++------- tests/stdgpu/deque.inc | 8 +-- 3 files changed, 116 insertions(+), 25 deletions(-) diff --git a/src/stdgpu/deque.cuh b/src/stdgpu/deque.cuh index fd1cf2266..137f13806 100644 --- a/src/stdgpu/deque.cuh +++ b/src/stdgpu/deque.cuh @@ -245,6 +245,17 @@ public: [[nodiscard]] STDGPU_HOST_DEVICE bool empty() const; + /** + * \brief Checks if the object is empty + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return True if the object is empty, false otherwise + */ + template >)> + [[nodiscard]] bool + empty(ExecutionPolicy&& policy) const; + /** * \brief Checks if the object is full * \return True if the object is full, false otherwise @@ -252,6 +263,17 @@ public: STDGPU_HOST_DEVICE bool full() const; + /** + * \brief Checks if the object is full + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return True if the object is full, false otherwise + */ + template >)> + bool + full(ExecutionPolicy&& policy) const; + /** * \brief Returns the current size * \return The size @@ -259,6 +281,17 @@ public: STDGPU_HOST_DEVICE index_t size() const; + /** + * \brief Returns the current size + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return The size + */ + template >)> + index_t + size(ExecutionPolicy&& policy) const; + /** * \brief Returns the maximal size * \return The maximal size @@ -373,8 +406,10 @@ private: bool occupied_count_valid(ExecutionPolicy&& policy) const; + template >)> bool - size_valid() const; + size_valid(ExecutionPolicy&& policy) const; using mutex_array_allocator_type = typename stdgpu::allocator_traits::template rebind_alloc; diff --git a/src/stdgpu/impl/deque_detail.cuh b/src/stdgpu/impl/deque_detail.cuh index da31fd7dc..04f365b34 100644 --- a/src/stdgpu/impl/deque_detail.cuh +++ b/src/stdgpu/impl/deque_detail.cuh @@ -417,6 +417,15 @@ deque::empty() const return (size() == 0); } +template +template >)> +inline bool +deque::empty(ExecutionPolicy&& policy) const +{ + return (size(std::forward(policy)) == 0); +} + template inline STDGPU_HOST_DEVICE bool deque::full() const @@ -424,6 +433,15 @@ deque::full() const return (size() == max_size()); } +template +template >)> +inline bool +deque::full(ExecutionPolicy&& policy) const +{ + return (size(std::forward(policy)) == max_size()); +} + template inline STDGPU_HOST_DEVICE index_t deque::size() const @@ -453,6 +471,37 @@ deque::size() const return current_size; } +template +template >)> +inline index_t +deque::size(ExecutionPolicy&& policy) const +{ + index_t current_size = _size.load(std::forward(policy)); + + // Check boundary cases where the push/pop caused the pointers to be overful/underful + if (current_size < 0) + { + printf("stdgpu::deque::size : Size out of bounds: %" STDGPU_PRIINDEX " not in [0, %" STDGPU_PRIINDEX + "]. Clamping to 0\n", + current_size, + capacity()); + return 0; + } + if (current_size > capacity()) + { + printf("stdgpu::deque::size : Size out of bounds: %" STDGPU_PRIINDEX " not in [0, %" STDGPU_PRIINDEX + "]. Clamping to %" STDGPU_PRIINDEX "\n", + current_size, + capacity(), + capacity()); + return capacity(); + } + + STDGPU_ENSURES(current_size <= capacity()); + return current_size; +} + template inline STDGPU_HOST_DEVICE index_t deque::max_size() const noexcept @@ -501,18 +550,18 @@ template ::clear(ExecutionPolicy&& policy) { - if (empty()) + if (empty(std::forward(policy))) { return; } if (!detail::is_destroy_optimizable()) { - const index_t begin = static_cast(_begin.load()); - const index_t end = static_cast(_end.load()); + const index_t begin = static_cast(_begin.load(std::forward(policy))); + const index_t end = static_cast(_end.load(std::forward(policy))); // Full, i.e. one large block and begin == end - if (full()) + if (full(std::forward(policy))) { detail::unoptimized_destroy(std::forward(policy), device_begin(_data), device_end(_data)); } @@ -537,12 +586,12 @@ deque::clear(ExecutionPolicy&& policy) _occupied.reset(std::forward(policy)); - _size.store(0); + _size.store(std::forward(policy), 0); - _begin.store(0); - _end.store(0); + _begin.store(std::forward(policy), 0); + _end.store(std::forward(policy), 0); - STDGPU_ENSURES(empty()); + STDGPU_ENSURES(empty(std::forward(policy))); STDGPU_ENSURES(valid(std::forward(policy))); } @@ -565,7 +614,8 @@ deque::valid(ExecutionPolicy&& policy) const return true; } - return (size_valid() && occupied_count_valid(std::forward(policy)) && + return (size_valid(std::forward(policy)) && + occupied_count_valid(std::forward(policy)) && _locks.valid(std::forward(policy))); } @@ -582,11 +632,11 @@ template deque::device_range(ExecutionPolicy&& policy) { - const index_t begin = static_cast(_begin.load()); - const index_t end = static_cast(_end.load()); + const index_t begin = static_cast(_begin.load(std::forward(policy))); + const index_t end = static_cast(_end.load(std::forward(policy))); // Full, i.e. one large block and begin == end - if (full()) + if (full(std::forward(policy))) { iota(std::forward(policy), device_begin(_range_indices), device_end(_range_indices), 0); } @@ -611,7 +661,9 @@ deque::device_range(ExecutionPolicy&& policy) begin); } - return device_indexed_range(stdgpu::device_range(_range_indices, size()), data()); + return device_indexed_range( + stdgpu::device_range(_range_indices, size(std::forward(policy))), + data()); } template @@ -627,11 +679,11 @@ template deque::device_range(ExecutionPolicy&& policy) const { - const index_t begin = static_cast(_begin.load()); - const index_t end = static_cast(_end.load()); + const index_t begin = static_cast(_begin.load(std::forward(policy))); + const index_t end = static_cast(_end.load(std::forward(policy))); // Full, i.e. one large block and begin == end - if (full()) + if (full(std::forward(policy))) { iota(std::forward(policy), device_begin(_range_indices), device_end(_range_indices), 0); } @@ -656,7 +708,9 @@ deque::device_range(ExecutionPolicy&& policy) const begin); } - return device_indexed_range(stdgpu::device_range(_range_indices, size()), data()); + return device_indexed_range( + stdgpu::device_range(_range_indices, size(std::forward(policy))), + data()); } template @@ -672,17 +726,19 @@ template ::occupied_count_valid(ExecutionPolicy&& policy) const { - index_t size_count = size(); + index_t size_count = size(std::forward(policy)); index_t size_sum = _occupied.count(std::forward(policy)); return (size_count == size_sum); } template +template >)> bool -deque::size_valid() const +deque::size_valid(ExecutionPolicy&& policy) const { - int current_size = _size.load(); + int current_size = _size.load(std::forward(policy)); return (0 <= current_size && current_size <= static_cast(capacity())); } diff --git a/tests/stdgpu/deque.inc b/tests/stdgpu/deque.inc index e265f67f1..13018e697 100644 --- a/tests/stdgpu/deque.inc +++ b/tests/stdgpu/deque.inc @@ -1365,10 +1365,10 @@ TEST_F(stdgpu_deque, clear_custom_execution_policy) pool.clear(policy); - ASSERT_EQ(pool.size(), 0); - ASSERT_TRUE(pool.empty()); - ASSERT_FALSE(pool.full()); - ASSERT_TRUE(pool.valid()); + ASSERT_EQ(pool.size(policy), 0); + ASSERT_TRUE(pool.empty(policy)); + ASSERT_FALSE(pool.full(policy)); + ASSERT_TRUE(pool.valid(policy)); stdgpu::deque::destroyDeviceObject(pool); }