From ae8acbb68647cdc00fe58049a824c3dbb339d582 Mon Sep 17 00:00:00 2001 From: Patrick Stotko Date: Tue, 19 Nov 2024 13:17:16 +0100 Subject: [PATCH] atomic: Extend support for custom execution policies --- benchmarks/stdgpu/main.cpp | 1 + src/stdgpu/atomic.cuh | 48 ++++++++++++++++ src/stdgpu/cuda/CMakeLists.txt | 3 +- src/stdgpu/cuda/impl/memory_detail.h | 79 +++++++++++++++++++++++++ src/stdgpu/cuda/memory.h | 52 +++++++++++++++++ src/stdgpu/hip/CMakeLists.txt | 3 +- src/stdgpu/hip/impl/memory_detail.h | 80 ++++++++++++++++++++++++++ src/stdgpu/hip/memory.h | 52 +++++++++++++++++ src/stdgpu/impl/atomic_detail.cuh | 67 ++++++++++++++++++++- src/stdgpu/impl/memory_detail.h | 46 +++++++++++++++ src/stdgpu/openmp/CMakeLists.txt | 3 +- src/stdgpu/openmp/impl/memory_detail.h | 61 ++++++++++++++++++++ src/stdgpu/openmp/memory.h | 52 +++++++++++++++++ tests/stdgpu/atomic.inc | 26 +++++++++ tests/stdgpu/main.cpp | 1 + 15 files changed, 569 insertions(+), 5 deletions(-) create mode 100644 src/stdgpu/cuda/impl/memory_detail.h create mode 100644 src/stdgpu/hip/impl/memory_detail.h create mode 100644 src/stdgpu/openmp/impl/memory_detail.h diff --git a/benchmarks/stdgpu/main.cpp b/benchmarks/stdgpu/main.cpp index 7d913072f..9d81e50a2 100644 --- a/benchmarks/stdgpu/main.cpp +++ b/benchmarks/stdgpu/main.cpp @@ -65,6 +65,7 @@ main(int argc, char* argv[]) stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::host), stdgpu::get_allocation_count(stdgpu::dynamic_memory_type::host) - stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::host)); + printf("+---------------------------------------------------------+\n"); return EXIT_SUCCESS; } diff --git a/src/stdgpu/atomic.cuh b/src/stdgpu/atomic.cuh index 36c5f4131..ed594dd50 100644 --- a/src/stdgpu/atomic.cuh +++ b/src/stdgpu/atomic.cuh @@ -210,6 +210,18 @@ public: STDGPU_HOST_DEVICE T load(const memory_order order = memory_order_seq_cst) const; + /** + * \brief Atomically loads and returns the current value of the atomic object + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy + * \param[in] order The memory order + * \return The current value of this object + */ + template >)> + T + load(ExecutionPolicy&& policy, const memory_order order = memory_order_seq_cst) const; + /** * \brief Atomically loads and returns the current value of the atomic object * \return The current value of this object @@ -225,6 +237,18 @@ public: STDGPU_HOST_DEVICE void store(const T desired, const memory_order order = memory_order_seq_cst); + /** + * \brief Atomically replaces the current value with desired one + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy + * \param[in] desired The value to store to the atomic object + * \param[in] order The memory order + */ + template >)> + void + store(ExecutionPolicy&& policy, const T desired, const memory_order order = memory_order_seq_cst); + /** * \brief Atomically replaces the current value with desired one * \param[in] desired The value to store to the atomic object @@ -496,6 +520,18 @@ public: STDGPU_HOST_DEVICE T load(const memory_order order = memory_order_seq_cst) const; + /** + * \brief Atomically loads and returns the current value of the atomic object + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy + * \param[in] order The memory order + * \return The current value of this object + */ + template >)> + T + load(ExecutionPolicy&& policy, const memory_order order = memory_order_seq_cst) const; + /** * \brief Loads and returns the current value of the atomic object * \return The current value of this object @@ -512,6 +548,18 @@ public: STDGPU_HOST_DEVICE void store(const T desired, const memory_order order = memory_order_seq_cst); + /** + * \brief Atomically replaces the current value with desired one + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy + * \param[in] desired The value to store to the atomic object + * \param[in] order The memory order + */ + template >)> + void + store(ExecutionPolicy&& policy, const T desired, const memory_order order = memory_order_seq_cst); + /** * \brief Replaces the current value with desired * \param[in] desired The value to store to the atomic object diff --git a/src/stdgpu/cuda/CMakeLists.txt b/src/stdgpu/cuda/CMakeLists.txt index 781d420b0..27889cf58 100644 --- a/src/stdgpu/cuda/CMakeLists.txt +++ b/src/stdgpu/cuda/CMakeLists.txt @@ -22,7 +22,8 @@ if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.23) TYPE HEADERS BASE_DIRS ${STDGPU_INCLUDE_LOCAL_DIR} FILES impl/atomic_detail.cuh - impl/error.h) + impl/error.h + impl/memory_detail.h) endif() target_compile_features(stdgpu PUBLIC cuda_std_17) diff --git a/src/stdgpu/cuda/impl/memory_detail.h b/src/stdgpu/cuda/impl/memory_detail.h new file mode 100644 index 000000000..8b9b6035f --- /dev/null +++ b/src/stdgpu/cuda/impl/memory_detail.h @@ -0,0 +1,79 @@ +/* + * Copyright 2024 Patrick Stotko + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef STDGPU_CUDA_MEMORY_DETAIL_H +#define STDGPU_CUDA_MEMORY_DETAIL_H + +#include +#include + +#include + +namespace stdgpu::cuda +{ + +template >)> +void +memcpy_impl(ExecutionPolicy&& policy, + void* destination, + const void* source, + index64_t bytes, + cudaMemcpyKind kind, + bool needs_sychronization) +{ + cudaStream_t stream = thrust::cuda_cub::stream(thrust::detail::derived_cast(thrust::detail::strip_const(policy))); + + STDGPU_CUDA_SAFE_CALL(cudaMemcpyAsync(destination, source, static_cast(bytes), kind, stream)); + if (needs_sychronization) + { + STDGPU_CUDA_SAFE_CALL(cudaStreamSynchronize(stream)); + } +} + +template >)> +void +memcpy_device_to_device(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes) +{ + memcpy_impl(std::forward(policy), destination, source, bytes, cudaMemcpyDeviceToDevice, false); +} + +template >)> +void +memcpy_device_to_host(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes) +{ + memcpy_impl(std::forward(policy), destination, source, bytes, cudaMemcpyDeviceToHost, true); +} + +template >)> +void +memcpy_host_to_device(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes) +{ + memcpy_impl(std::forward(policy), destination, source, bytes, cudaMemcpyHostToDevice, false); +} + +template >)> +void +memcpy_host_to_host(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes) +{ + memcpy_impl(std::forward(policy), destination, source, bytes, cudaMemcpyHostToHost, true); +} + +} // namespace stdgpu::cuda + +#endif // STDGPU_CUDA_MEMORY_DETAIL_H diff --git a/src/stdgpu/cuda/memory.h b/src/stdgpu/cuda/memory.h index 7c6af7a17..71805cccb 100644 --- a/src/stdgpu/cuda/memory.h +++ b/src/stdgpu/cuda/memory.h @@ -17,6 +17,8 @@ #define STDGPU_CUDA_MEMORY_H #include +#include +#include namespace stdgpu::cuda { @@ -90,6 +92,56 @@ memcpy_host_to_device(void* destination, const void* source, index64_t bytes); void memcpy_host_to_host(void* destination, const void* source, index64_t bytes); +/** + * \brief Performs platform-specific memory copy from device to device + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy + * \param[in] destination The destination array + * \param[in] source The source array + * \param[in] bytes The size of the allocated array + */ +template >)> +void +memcpy_device_to_device(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes); + +/** + * \brief Performs platform-specific memory copy from device to host + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy + * \param[in] destination The destination array + * \param[in] source The source array + * \param[in] bytes The size of the allocated array + */ +template >)> +void +memcpy_device_to_host(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes); + +/** + * \brief Performs platform-specific memory copy from host to device + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy + * \param[in] destination The destination array + * \param[in] source The source array + * \param[in] bytes The size of the allocated array + */ +template >)> +void +memcpy_host_to_device(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes); + +/** + * \brief Performs platform-specific memory copy from host to host + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy + * \param[in] destination The destination array + * \param[in] source The source array + * \param[in] bytes The size of the allocated array + */ +template >)> +void +memcpy_host_to_host(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes); + } // namespace stdgpu::cuda +#include + #endif // STDGPU_CUDA_MEMORY_H diff --git a/src/stdgpu/hip/CMakeLists.txt b/src/stdgpu/hip/CMakeLists.txt index 4d8f9c76c..c84bc883c 100644 --- a/src/stdgpu/hip/CMakeLists.txt +++ b/src/stdgpu/hip/CMakeLists.txt @@ -21,7 +21,8 @@ if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.23) TYPE HEADERS BASE_DIRS ${STDGPU_INCLUDE_LOCAL_DIR} FILES impl/atomic_detail.h - impl/error.h) + impl/error.h + impl/memory_detail.h) endif() target_compile_features(stdgpu PUBLIC hip_std_17) diff --git a/src/stdgpu/hip/impl/memory_detail.h b/src/stdgpu/hip/impl/memory_detail.h new file mode 100644 index 000000000..fb945bc6d --- /dev/null +++ b/src/stdgpu/hip/impl/memory_detail.h @@ -0,0 +1,80 @@ +/* + * Copyright 2024 Patrick Stotko + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef STDGPU_HIP_MEMORY_DETAIL_H +#define STDGPU_HIP_MEMORY_DETAIL_H + +#include +#include + +#include + +namespace stdgpu::hip +{ + +template >)> +void +memcpy_impl(ExecutionPolicy&& policy, + void* destination, + const void* source, + index64_t bytes, + hipMemcpyKind kind, + bool needs_sychronization) +{ + cudaStream_t stream = + thrust::hip_rocprim::stream(thrust::detail::derived_cast(thrust::detail::strip_const(policy))); + + STDGPU_HIP_SAFE_CALL(hipMemcpyAsync(destination, source, static_cast(bytes), kind, stream)); + if (needs_sychronization) + { + STDGPU_HIP_SAFE_CALL(hipStreamSynchronize(stream)); + } +} + +template >)> +void +memcpy_device_to_device(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes) +{ + memcpy_impl(std::forward(policy), destination, source, bytes, hipMemcpyDeviceToDevice, false); +} + +template >)> +void +memcpy_device_to_host(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes) +{ + memcpy_impl(std::forward(policy), destination, source, bytes, hipMemcpyDeviceToHost, true); +} + +template >)> +void +memcpy_host_to_device(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes) +{ + memcpy_impl(std::forward(policy), destination, source, bytes, hipMemcpyHostToDevice, false); +} + +template >)> +void +memcpy_host_to_host(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes) +{ + memcpy_impl(std::forward(policy), destination, source, bytes, hipMemcpyHostToHost, true); +} + +} // namespace stdgpu::hip + +#endif // STDGPU_HIP_MEMORY_DETAIL_H diff --git a/src/stdgpu/hip/memory.h b/src/stdgpu/hip/memory.h index 9c215c49e..840a5c5a9 100644 --- a/src/stdgpu/hip/memory.h +++ b/src/stdgpu/hip/memory.h @@ -17,6 +17,8 @@ #define STDGPU_HIP_MEMORY_H #include +#include +#include namespace stdgpu::hip { @@ -90,6 +92,56 @@ memcpy_host_to_device(void* destination, const void* source, index64_t bytes); void memcpy_host_to_host(void* destination, const void* source, index64_t bytes); +/** + * \brief Performs platform-specific memory copy from device to device + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy + * \param[in] destination The destination array + * \param[in] source The source array + * \param[in] bytes The size of the allocated array + */ +template >)> +void +memcpy_device_to_device(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes); + +/** + * \brief Performs platform-specific memory copy from device to host + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy + * \param[in] destination The destination array + * \param[in] source The source array + * \param[in] bytes The size of the allocated array + */ +template >)> +void +memcpy_device_to_host(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes); + +/** + * \brief Performs platform-specific memory copy from host to device + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy + * \param[in] destination The destination array + * \param[in] source The source array + * \param[in] bytes The size of the allocated array + */ +template >)> +void +memcpy_host_to_device(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes); + +/** + * \brief Performs platform-specific memory copy from host to host + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy + * \param[in] destination The destination array + * \param[in] source The source array + * \param[in] bytes The size of the allocated array + */ +template >)> +void +memcpy_host_to_host(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes); + } // namespace stdgpu::hip +#include + #endif // STDGPU_HIP_MEMORY_H diff --git a/src/stdgpu/impl/atomic_detail.cuh b/src/stdgpu/impl/atomic_detail.cuh index 09f7f8d8c..d64a8cbb6 100644 --- a/src/stdgpu/impl/atomic_detail.cuh +++ b/src/stdgpu/impl/atomic_detail.cuh @@ -205,12 +205,30 @@ atomic::load(const memory_order order) const return _value_ref.load(order); } +template +template >)> +inline T +atomic::load(ExecutionPolicy&& policy, const memory_order order) const +{ + return _value_ref.load(std::forward(policy), order); +} + template inline STDGPU_HOST_DEVICE atomic::operator T() const { return _value_ref.operator T(); } +template +template >)> +inline void +atomic::store(ExecutionPolicy&& policy, const T desired, const memory_order order) +{ + _value_ref.store(std::forward(policy), desired, order); +} + template inline STDGPU_HOST_DEVICE void atomic::store(const T desired, const memory_order order) @@ -430,12 +448,36 @@ atomic_ref::load([[maybe_unused]] const memory_order order) const detail::atomic_consistency_thread_fence(order); #else - copyDevice2HostArray(_value, 1, &local_value, MemoryCopy::NO_CHECK); + local_value = load(execution::device, order); #endif return local_value; } +template +template >)> +inline T +atomic_ref::load(ExecutionPolicy&& policy, [[maybe_unused]] const memory_order order) const +{ + if (_value == nullptr) + { + return 0; + } + + T local_value; + stdgpu::detail::memcpy(std::forward(policy), + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + static_cast(const_cast*>(&local_value)), + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + static_cast(const_cast*>(_value)), + 1 * static_cast(sizeof(T)), // NOLINT(bugprone-sizeof-expression) + stdgpu::dynamic_memory_type::host, + stdgpu::dynamic_memory_type::device); + + return local_value; +} + template inline STDGPU_HOST_DEVICE atomic_ref::operator T() const { @@ -458,10 +500,31 @@ atomic_ref::store(const T desired, [[maybe_unused]] const memory_order order) detail::atomic_store_thread_fence(order); #else - copyHost2DeviceArray(&desired, 1, _value, MemoryCopy::NO_CHECK); + store(execution::device, desired, order); #endif } +template +template >)> +inline void +atomic_ref::store(ExecutionPolicy&& policy, const T desired, [[maybe_unused]] const memory_order order) +{ + if (_value == nullptr) + { + return; + } + + stdgpu::detail::memcpy(std::forward(policy), + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + static_cast(const_cast*>(_value)), + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + static_cast(const_cast*>(&desired)), + 1 * static_cast(sizeof(T)), // NOLINT(bugprone-sizeof-expression) + stdgpu::dynamic_memory_type::device, + stdgpu::dynamic_memory_type::host); +} + // NOLINTNEXTLINE(misc-unconventional-assign-operator,cppcoreguidelines-c-copy-assignment-signature) template // NOLINTNEXTLINE(misc-unconventional-assign-operator,cppcoreguidelines-c-copy-assignment-signature) diff --git a/src/stdgpu/impl/memory_detail.h b/src/stdgpu/impl/memory_detail.h index f98f4b7db..91fc1e47b 100644 --- a/src/stdgpu/impl/memory_detail.h +++ b/src/stdgpu/impl/memory_detail.h @@ -28,6 +28,8 @@ #include #include +#include STDGPU_DETAIL_BACKEND_HEADER(memory.h) + namespace stdgpu::detail { @@ -52,6 +54,50 @@ memcpy(void* destination, dynamic_memory_type source_type, const bool external_memory); +template >)> +void +memcpy(ExecutionPolicy&& policy, + void* destination, + const void* source, + index64_t bytes, + dynamic_memory_type destination_type, + dynamic_memory_type source_type) +{ + if (source_type == dynamic_memory_type::device && destination_type == dynamic_memory_type::device) + { + stdgpu::STDGPU_BACKEND_NAMESPACE::memcpy_device_to_device(std::forward(policy), + destination, + source, + bytes); + } + else if (source_type == dynamic_memory_type::device && destination_type == dynamic_memory_type::host) + { + stdgpu::STDGPU_BACKEND_NAMESPACE::memcpy_device_to_host(std::forward(policy), + destination, + source, + bytes); + } + else if (source_type == dynamic_memory_type::host && destination_type == dynamic_memory_type::device) + { + stdgpu::STDGPU_BACKEND_NAMESPACE::memcpy_host_to_device(std::forward(policy), + destination, + source, + bytes); + } + else if (source_type == dynamic_memory_type::host && destination_type == dynamic_memory_type::host) + { + stdgpu::STDGPU_BACKEND_NAMESPACE::memcpy_host_to_host(std::forward(policy), + destination, + source, + bytes); + } + else + { + printf("stdgpu::detail::memcpy : Unsupported dynamic source or destination memory type\n"); + return; + } +} + template class uninitialized_fill_functor { diff --git a/src/stdgpu/openmp/CMakeLists.txt b/src/stdgpu/openmp/CMakeLists.txt index 8d18d1a4e..7002eede7 100644 --- a/src/stdgpu/openmp/CMakeLists.txt +++ b/src/stdgpu/openmp/CMakeLists.txt @@ -21,7 +21,8 @@ if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.23) target_sources(stdgpu PUBLIC FILE_SET stdgpu_backend_header_implementations TYPE HEADERS BASE_DIRS ${STDGPU_INCLUDE_LOCAL_DIR} - FILES impl/atomic_detail.h) + FILES impl/atomic_detail.h + impl/memory_detail.h) endif() target_compile_definitions(stdgpu PUBLIC THRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_OMP) diff --git a/src/stdgpu/openmp/impl/memory_detail.h b/src/stdgpu/openmp/impl/memory_detail.h new file mode 100644 index 000000000..bceb1b735 --- /dev/null +++ b/src/stdgpu/openmp/impl/memory_detail.h @@ -0,0 +1,61 @@ +/* + * Copyright 2024 Patrick Stotko + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef STDGPU_OPENMP_MEMORY_DETAIL_H +#define STDGPU_OPENMP_MEMORY_DETAIL_H + +#include + +namespace stdgpu::openmp +{ + +template >)> +void +memcpy_device_to_device([[maybe_unused]] ExecutionPolicy&& policy, + void* destination, + const void* source, + index64_t bytes) +{ + std::memcpy(destination, source, static_cast(bytes)); +} + +template >)> +void +memcpy_device_to_host([[maybe_unused]] ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes) +{ + std::memcpy(destination, source, static_cast(bytes)); +} + +template >)> +void +memcpy_host_to_device([[maybe_unused]] ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes) +{ + std::memcpy(destination, source, static_cast(bytes)); +} + +template >)> +void +memcpy_host_to_host([[maybe_unused]] ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes) +{ + std::memcpy(destination, source, static_cast(bytes)); +} + +} // namespace stdgpu::openmp + +#endif // STDGPU_OPENMP_MEMORY_DETAIL_H diff --git a/src/stdgpu/openmp/memory.h b/src/stdgpu/openmp/memory.h index 130ec9d7b..11a2149fd 100644 --- a/src/stdgpu/openmp/memory.h +++ b/src/stdgpu/openmp/memory.h @@ -17,6 +17,8 @@ #define STDGPU_OPENMP_MEMORY_H #include +#include +#include namespace stdgpu::openmp { @@ -90,6 +92,56 @@ memcpy_host_to_device(void* destination, const void* source, index64_t bytes); void memcpy_host_to_host(void* destination, const void* source, index64_t bytes); +/** + * \brief Performs platform-specific memory copy from device to device + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy + * \param[in] destination The destination array + * \param[in] source The source array + * \param[in] bytes The size of the allocated array + */ +template >)> +void +memcpy_device_to_device(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes); + +/** + * \brief Performs platform-specific memory copy from device to host + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy + * \param[in] destination The destination array + * \param[in] source The source array + * \param[in] bytes The size of the allocated array + */ +template >)> +void +memcpy_device_to_host(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes); + +/** + * \brief Performs platform-specific memory copy from host to device + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy + * \param[in] destination The destination array + * \param[in] source The source array + * \param[in] bytes The size of the allocated array + */ +template >)> +void +memcpy_host_to_device(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes); + +/** + * \brief Performs platform-specific memory copy from host to host + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy + * \param[in] destination The destination array + * \param[in] source The source array + * \param[in] bytes The size of the allocated array + */ +template >)> +void +memcpy_host_to_host(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes); + } // namespace stdgpu::openmp +#include + #endif // STDGPU_OPENMP_MEMORY_H diff --git a/tests/stdgpu/atomic.inc b/tests/stdgpu/atomic.inc index 7faddf724..9eccc6676 100644 --- a/tests/stdgpu/atomic.inc +++ b/tests/stdgpu/atomic.inc @@ -174,7 +174,12 @@ empty_container() const T new_value = static_cast(42); empty_container.store(new_value); + + EXPECT_EQ(empty_container.load(), T()); + empty_container = new_value; + + EXPECT_EQ(empty_container.load(), T()); } TEST_F(stdgpu_atomic, empty_container_int) @@ -2449,5 +2454,26 @@ TEST_F(stdgpu_atomic, custom_execution_policy) stdgpu::atomic value = stdgpu::atomic::createDeviceObject(policy); + EXPECT_EQ(value.load(policy), int()); + + const int new_value = 42; + value.store(policy, new_value); + + EXPECT_EQ(value.load(policy), new_value); + stdgpu::atomic::destroyDeviceObject(policy, value); } + +TEST_F(stdgpu_atomic, custom_execution_policy_empty_container) +{ + test_utils::custom_device_policy policy; + + stdgpu::atomic empty_container; + + EXPECT_EQ(empty_container.load(policy), int()); + + const int new_value = 42; + empty_container.store(policy, new_value); + + EXPECT_EQ(empty_container.load(policy), int()); +} diff --git a/tests/stdgpu/main.cpp b/tests/stdgpu/main.cpp index 04e363ec0..b5fa0c1c1 100644 --- a/tests/stdgpu/main.cpp +++ b/tests/stdgpu/main.cpp @@ -66,6 +66,7 @@ main(int argc, char* argv[]) stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::host), stdgpu::get_allocation_count(stdgpu::dynamic_memory_type::host) - stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::host)); + printf("+---------------------------------------------------------+\n"); return result; }