From 99fe4a0af145bf545845b1674a9a7da64d5efb0e Mon Sep 17 00:00:00 2001 From: Patrick Stotko Date: Tue, 19 Nov 2024 15:17:27 +0100 Subject: [PATCH] vector: Extend support for custom execution policies --- src/stdgpu/impl/vector_detail.cuh | 148 +++++++++++++++++++++++++++--- src/stdgpu/vector.cuh | 125 ++++++++++++++++++++++++- tests/stdgpu/vector.inc | 32 +++---- 3 files changed, 276 insertions(+), 29 deletions(-) diff --git a/src/stdgpu/impl/vector_detail.cuh b/src/stdgpu/impl/vector_detail.cuh index d89a0da94..e20c30914 100644 --- a/src/stdgpu/impl/vector_detail.cuh +++ b/src/stdgpu/impl/vector_detail.cuh @@ -436,6 +436,15 @@ vector::empty() const return (size() == 0); } +template +template >)> +inline bool +vector::empty(ExecutionPolicy&& policy) const +{ + return (size(std::forward(policy)) == 0); +} + template inline STDGPU_HOST_DEVICE bool vector::full() const @@ -443,6 +452,15 @@ vector::full() const return (size() == max_size()); } +template +template >)> +inline bool +vector::full(ExecutionPolicy&& policy) const +{ + return (size(std::forward(policy)) == max_size()); +} + template inline STDGPU_HOST_DEVICE index_t vector::size() const @@ -472,6 +490,37 @@ vector::size() const return current_size; } +template +template >)> +inline index_t +vector::size(ExecutionPolicy&& policy) const +{ + index_t current_size = static_cast(_size.load(std::forward(policy))); + + // Check boundary cases where the push/pop caused the pointers to be overful/underful + if (current_size < 0) + { + printf("stdgpu::vector::size : Size out of bounds: %" STDGPU_PRIINDEX " not in [0, %" STDGPU_PRIINDEX + "]. Clamping to 0\n", + current_size, + capacity()); + return 0; + } + if (current_size > capacity()) + { + printf("stdgpu::vector::size : Size out of bounds: %" STDGPU_PRIINDEX " not in [0, %" STDGPU_PRIINDEX + "]. Clamping to %" STDGPU_PRIINDEX "\n", + current_size, + capacity(), + capacity()); + return capacity(); + } + + STDGPU_ENSURES(current_size <= capacity()); + return current_size; +} + template inline STDGPU_HOST_DEVICE index_t vector::max_size() const noexcept @@ -520,14 +569,14 @@ template ::clear(ExecutionPolicy&& policy) { - if (empty()) + if (empty(std::forward(policy))) { return; } if (!detail::is_destroy_optimizable()) { - const index_t current_size = size(); + const index_t current_size = size(std::forward(policy)); detail::unoptimized_destroy(std::forward(policy), stdgpu::device_begin(_data), @@ -536,9 +585,9 @@ vector::clear(ExecutionPolicy&& policy) _occupied.reset(std::forward(policy)); - _size.store(static_cast(0)); + _size.store(std::forward(policy), static_cast(0)); - STDGPU_ENSURES(empty()); + STDGPU_ENSURES(empty(std::forward(policy))); STDGPU_ENSURES(valid(std::forward(policy))); } @@ -561,13 +610,23 @@ vector::valid(ExecutionPolicy&& policy) const return true; } - return (size_valid() && occupied_count_valid(std::forward(policy)) && + return (size_valid(std::forward(policy)) && + occupied_count_valid(std::forward(policy)) && _locks.valid(std::forward(policy))); } template device_ptr vector::device_begin() +{ + return device_begin(execution::device); +} + +template +template >)> +device_ptr +vector::device_begin([[maybe_unused]] ExecutionPolicy&& policy) { return stdgpu::device_begin(_data); } @@ -576,12 +635,30 @@ template device_ptr vector::device_end() { - return device_begin() + size(); + return device_end(execution::device); +} + +template +template >)> +device_ptr +vector::device_end(ExecutionPolicy&& policy) +{ + return stdgpu::device_begin(_data) + size(std::forward(policy)); } template device_ptr vector::device_begin() const +{ + return device_begin(execution::device); +} + +template +template >)> +device_ptr +vector::device_begin([[maybe_unused]] ExecutionPolicy&& policy) const { return stdgpu::device_begin(_data); } @@ -590,12 +667,30 @@ template device_ptr vector::device_end() const { - return device_begin() + size(); + return device_end(execution::device); +} + +template +template >)> +device_ptr +vector::device_end(ExecutionPolicy&& policy) const +{ + return stdgpu::device_begin(_data) + size(std::forward(policy)); } template device_ptr vector::device_cbegin() const +{ + return device_cbegin(execution::device); +} + +template +template >)> +device_ptr +vector::device_cbegin([[maybe_unused]] ExecutionPolicy&& policy) const { return stdgpu::device_cbegin(_data); } @@ -604,21 +699,48 @@ template device_ptr vector::device_cend() const { - return device_cbegin() + size(); + return device_cend(execution::device); +} + +template +template >)> +device_ptr +vector::device_cend(ExecutionPolicy&& policy) const +{ + return stdgpu::device_cbegin(_data) + size(std::forward(policy)); } template stdgpu::device_range vector::device_range() { - return stdgpu::device_range(_data, size()); + return device_range(execution::device); +} + +template +template >)> +stdgpu::device_range +vector::device_range(ExecutionPolicy&& policy) +{ + return stdgpu::device_range(_data, size(std::forward(policy))); } template stdgpu::device_range vector::device_range() const { - return stdgpu::device_range(_data, size()); + return device_range(execution::device); +} + +template +template >)> +stdgpu::device_range +vector::device_range(ExecutionPolicy&& policy) const +{ + return stdgpu::device_range(_data, size(std::forward(policy))); } template @@ -643,10 +765,12 @@ vector::occupied_count_valid(ExecutionPolicy&& policy) const } template +template >)> bool -vector::size_valid() const +vector::size_valid(ExecutionPolicy&& policy) const { - index_t current_size = static_cast(_size.load()); + index_t current_size = static_cast(_size.load(std::forward(policy))); return (0 <= current_size && current_size <= capacity()); } diff --git a/src/stdgpu/vector.cuh b/src/stdgpu/vector.cuh index 71a74c8c8..21262c396 100644 --- a/src/stdgpu/vector.cuh +++ b/src/stdgpu/vector.cuh @@ -290,6 +290,17 @@ public: [[nodiscard]] STDGPU_HOST_DEVICE bool empty() const; + /** + * \brief Checks if the object is empty + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return True if the object is empty, false otherwise + */ + template >)> + [[nodiscard]] bool + empty(ExecutionPolicy&& policy) const; + /** * \brief Checks if the object is full * \return True if the object is full, false otherwise @@ -297,6 +308,17 @@ public: STDGPU_HOST_DEVICE bool full() const; + /** + * \brief Checks if the object is full + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return True if the object is full, false otherwise + */ + template >)> + bool + full(ExecutionPolicy&& policy) const; + /** * \brief Returns the current size * \return The size @@ -304,6 +326,17 @@ public: STDGPU_HOST_DEVICE index_t size() const; + /** + * \brief Returns the current size + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return The size + */ + template >)> + index_t + size(ExecutionPolicy&& policy) const; + /** * \brief Returns the maximal size * \return The maximal size @@ -380,6 +413,17 @@ public: device_ptr device_begin(); + /** + * \brief Creates a pointer to the begin of the device container + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return A pointer to the begin of the object + */ + template >)> + device_ptr + device_begin(ExecutionPolicy&& policy); + /** * \brief Creates a pointer to the end of the device container * \return A pointer to the end of the object @@ -387,6 +431,17 @@ public: device_ptr device_end(); + /** + * \brief Creates a pointer to the end of the device container + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return A pointer to the end of the object + */ + template >)> + device_ptr + device_end(ExecutionPolicy&& policy); + /** * \brief Creates a pointer to the begin of the device container * \return A const pointer to the begin of the object @@ -394,6 +449,17 @@ public: device_ptr device_begin() const; + /** + * \brief Creates a pointer to the begin of the device container + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return A const pointer to the begin of the object + */ + template >)> + device_ptr + device_begin(ExecutionPolicy&& policy) const; + /** * \brief Creates a pointer to the end of the device container * \return A const pointer to the end of the object @@ -401,6 +467,17 @@ public: device_ptr device_end() const; + /** + * \brief Creates a pointer to the end of the device container + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return A const pointer to the end of the object + */ + template >)> + device_ptr + device_end(ExecutionPolicy&& policy) const; + /** * \brief Creates a pointer to the begin of the device container * \return A const pointer to the begin of the object @@ -408,6 +485,17 @@ public: device_ptr device_cbegin() const; + /** + * \brief Creates a pointer to the begin of the device container + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return A const pointer to the begin of the object + */ + template >)> + device_ptr + device_cbegin(ExecutionPolicy&& policy) const; + /** * \brief Creates a pointer to the end of the device container * \return A const pointer to the end of the object @@ -415,6 +503,17 @@ public: device_ptr device_cend() const; + /** + * \brief Creates a pointer to the end of the device container + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return A const pointer to the end of the object + */ + template >)> + device_ptr + device_cend(ExecutionPolicy&& policy) const; + /** * \brief Creates a range of the device container * \return A range of the object @@ -422,6 +521,17 @@ public: stdgpu::device_range device_range(); + /** + * \brief Creates a range of the device container + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return A range of the object + */ + template >)> + stdgpu::device_range + device_range(ExecutionPolicy&& policy); + /** * \brief Creates a range of the device container * \return A const range of the object @@ -429,6 +539,17 @@ public: stdgpu::device_range device_range() const; + /** + * \brief Creates a range of the device container + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return A const range of the object + */ + template >)> + stdgpu::device_range + device_range(ExecutionPolicy&& policy) const; + private: template friend class detail::vector_insert; @@ -447,8 +568,10 @@ private: bool occupied_count_valid(ExecutionPolicy&& policy) const; + template >)> bool - size_valid() const; + size_valid(ExecutionPolicy&& policy) const; using mutex_array_allocator_type = typename stdgpu::allocator_traits::template rebind_alloc; diff --git a/tests/stdgpu/vector.inc b/tests/stdgpu/vector.inc index ac5f81856..df4b9c3db 100644 --- a/tests/stdgpu/vector.inc +++ b/tests/stdgpu/vector.inc @@ -754,14 +754,14 @@ TEST_F(stdgpu_vector, insert_custom_execution_policy) int* values = createDeviceArray(N_insert); stdgpu::iota(policy, stdgpu::device_begin(values), stdgpu::device_end(values), static_cast(N_init) + 1); - pool.insert(policy, pool.device_end(), stdgpu::device_begin(values), stdgpu::device_end(values)); + pool.insert(policy, pool.device_end(policy), stdgpu::device_begin(values), stdgpu::device_end(values)); - ASSERT_EQ(pool.size(), N_init + N_insert); - ASSERT_FALSE(pool.empty()); - ASSERT_FALSE(pool.full()); - ASSERT_TRUE(pool.valid()); + ASSERT_EQ(pool.size(policy), N_init + N_insert); + ASSERT_FALSE(pool.empty(policy)); + ASSERT_FALSE(pool.full(policy)); + ASSERT_TRUE(pool.valid(policy)); - int* host_numbers = copyCreateDevice2HostArray(pool.data(), pool.size()); + int* host_numbers = copyCreateDevice2HostArray(pool.data(), pool.size(policy)); for (stdgpu::index_t i = 0; i < pool.size(); ++i) { EXPECT_EQ(host_numbers[i], i + 1); @@ -865,14 +865,14 @@ TEST_F(stdgpu_vector, erase_custom_execution_policy) fill_vector(pool, N_init); - pool.erase(policy, pool.device_end() - N_erase, pool.device_end()); + pool.erase(policy, pool.device_end(policy) - N_erase, pool.device_end(policy)); - ASSERT_EQ(pool.size(), N_init - N_erase); - ASSERT_FALSE(pool.empty()); - ASSERT_FALSE(pool.full()); - ASSERT_TRUE(pool.valid()); + ASSERT_EQ(pool.size(policy), N_init - N_erase); + ASSERT_FALSE(pool.empty(policy)); + ASSERT_FALSE(pool.full(policy)); + ASSERT_TRUE(pool.valid(policy)); - int* host_numbers = copyCreateDevice2HostArray(pool.data(), pool.size()); + int* host_numbers = copyCreateDevice2HostArray(pool.data(), pool.size(policy)); for (stdgpu::index_t i = 0; i < pool.size(); ++i) { EXPECT_EQ(host_numbers[i], i + 1); @@ -952,10 +952,10 @@ TEST_F(stdgpu_vector, clear_custom_execution_policy) pool.clear(policy); - ASSERT_EQ(pool.size(), 0); - ASSERT_TRUE(pool.empty()); - ASSERT_FALSE(pool.full()); - ASSERT_TRUE(pool.valid()); + ASSERT_EQ(pool.size(policy), 0); + ASSERT_TRUE(pool.empty(policy)); + ASSERT_FALSE(pool.full(policy)); + ASSERT_TRUE(pool.valid(policy)); stdgpu::vector::destroyDeviceObject(pool); }