Skip to content

Commit 5601dfe

Browse files
committed
atomic: Extend support for custom execution policies
1 parent f51a985 commit 5601dfe

File tree

12 files changed

+551
-2
lines changed

12 files changed

+551
-2
lines changed

benchmarks/stdgpu/main.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ main(int argc, char* argv[])
6565
stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::host),
6666
stdgpu::get_allocation_count(stdgpu::dynamic_memory_type::host) -
6767
stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::host));
68+
printf("+---------------------------------------------------------+\n");
6869

6970
return EXIT_SUCCESS;
7071
}

src/stdgpu/atomic.cuh

+48
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,18 @@ public:
210210
STDGPU_HOST_DEVICE T
211211
load(const memory_order order = memory_order_seq_cst) const;
212212

213+
/**
214+
* \brief Atomically loads and returns the current value of the atomic object
215+
* \tparam ExecutionPolicy The type of the execution policy
216+
* \param[in] policy The execution policy
217+
* \param[in] order The memory order
218+
* \return The current value of this object
219+
*/
220+
template <typename ExecutionPolicy,
221+
STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
222+
T
223+
load(ExecutionPolicy&& policy, const memory_order order = memory_order_seq_cst) const;
224+
213225
/**
214226
* \brief Atomically loads and returns the current value of the atomic object
215227
* \return The current value of this object
@@ -225,6 +237,18 @@ public:
225237
STDGPU_HOST_DEVICE void
226238
store(const T desired, const memory_order order = memory_order_seq_cst);
227239

240+
/**
241+
* \brief Atomically replaces the current value with desired one
242+
* \tparam ExecutionPolicy The type of the execution policy
243+
* \param[in] policy The execution policy
244+
* \param[in] desired The value to store to the atomic object
245+
* \param[in] order The memory order
246+
*/
247+
template <typename ExecutionPolicy,
248+
STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
249+
void
250+
store(ExecutionPolicy&& policy, const T desired, const memory_order order = memory_order_seq_cst);
251+
228252
/**
229253
* \brief Atomically replaces the current value with desired one
230254
* \param[in] desired The value to store to the atomic object
@@ -496,6 +520,18 @@ public:
496520
STDGPU_HOST_DEVICE T
497521
load(const memory_order order = memory_order_seq_cst) const;
498522

523+
/**
524+
* \brief Atomically loads and returns the current value of the atomic object
525+
* \tparam ExecutionPolicy The type of the execution policy
526+
* \param[in] policy The execution policy
527+
* \param[in] order The memory order
528+
* \return The current value of this object
529+
*/
530+
template <typename ExecutionPolicy,
531+
STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
532+
T
533+
load(ExecutionPolicy&& policy, const memory_order order = memory_order_seq_cst) const;
534+
499535
/**
500536
* \brief Loads and returns the current value of the atomic object
501537
* \return The current value of this object
@@ -512,6 +548,18 @@ public:
512548
STDGPU_HOST_DEVICE void
513549
store(const T desired, const memory_order order = memory_order_seq_cst);
514550

551+
/**
552+
* \brief Atomically replaces the current value with desired one
553+
* \tparam ExecutionPolicy The type of the execution policy
554+
* \param[in] policy The execution policy
555+
* \param[in] desired The value to store to the atomic object
556+
* \param[in] order The memory order
557+
*/
558+
template <typename ExecutionPolicy,
559+
STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
560+
void
561+
store(ExecutionPolicy&& policy, const T desired, const memory_order order = memory_order_seq_cst);
562+
515563
/**
516564
* \brief Replaces the current value with desired
517565
* \param[in] desired The value to store to the atomic object

src/stdgpu/cuda/impl/memory_detail.h

+81
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/*
2+
* Copyright 2024 Patrick Stotko
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
#ifndef STDGPU_CUDA_MEMORY_DETAIL_H
17+
#define STDGPU_CUDA_MEMORY_DETAIL_H
18+
19+
#include <stdgpu/cuda/memory.h>
20+
21+
#include <thrust/detail/execution_policy.h>
22+
#include <thrust/system/cuda/detail/util.h>
23+
24+
#include <stdgpu/cuda/impl/error.h>
25+
26+
namespace stdgpu::cuda
27+
{
28+
29+
template <typename ExecutionPolicy, STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
30+
void
31+
memcpy_impl(ExecutionPolicy&& policy,
32+
void* destination,
33+
const void* source,
34+
index64_t bytes,
35+
cudaMemcpyKind kind,
36+
bool needs_sychronization)
37+
{
38+
cudaStream_t stream = thrust::cuda_cub::stream(thrust::detail::derived_cast(thrust::detail::strip_const(policy)));
39+
40+
STDGPU_CUDA_SAFE_CALL(cudaMemcpyAsync(destination, source, static_cast<std::size_t>(bytes), kind, stream));
41+
if (needs_sychronization)
42+
{
43+
STDGPU_CUDA_SAFE_CALL(cudaStreamSynchronize(stream));
44+
}
45+
}
46+
47+
template <typename ExecutionPolicy,
48+
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
49+
void
50+
memcpy_device_to_device(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes)
51+
{
52+
memcpy_impl(std::forward<ExecutionPolicy>(policy), destination, source, bytes, cudaMemcpyDeviceToDevice, false);
53+
}
54+
55+
template <typename ExecutionPolicy,
56+
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
57+
void
58+
memcpy_device_to_host(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes)
59+
{
60+
memcpy_impl(std::forward<ExecutionPolicy>(policy), destination, source, bytes, cudaMemcpyDeviceToHost, true);
61+
}
62+
63+
template <typename ExecutionPolicy,
64+
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
65+
void
66+
memcpy_host_to_device(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes)
67+
{
68+
memcpy_impl(std::forward<ExecutionPolicy>(policy), destination, source, bytes, cudaMemcpyHostToDevice, false);
69+
}
70+
71+
template <typename ExecutionPolicy,
72+
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
73+
void
74+
memcpy_host_to_host(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes)
75+
{
76+
memcpy_impl(std::forward<ExecutionPolicy>(policy), destination, source, bytes, cudaMemcpyHostToHost, true);
77+
}
78+
79+
} // namespace stdgpu::cuda
80+
81+
#endif // STDGPU_CUDA_MEMORY_DETAIL_H

src/stdgpu/cuda/memory.h

+52
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#define STDGPU_CUDA_MEMORY_H
1818

1919
#include <stdgpu/cstddef.h>
20+
#include <stdgpu/execution.h>
21+
#include <stdgpu/type_traits.h>
2022

2123
namespace stdgpu::cuda
2224
{
@@ -90,6 +92,56 @@ memcpy_host_to_device(void* destination, const void* source, index64_t bytes);
9092
void
9193
memcpy_host_to_host(void* destination, const void* source, index64_t bytes);
9294

95+
/**
96+
* \brief Performs platform-specific memory copy from device to device
97+
* \tparam ExecutionPolicy The type of the execution policy
98+
* \param[in] policy The execution policy
99+
* \param[in] destination The destination array
100+
* \param[in] source The source array
101+
* \param[in] bytes The size of the allocated array
102+
*/
103+
template <typename ExecutionPolicy, STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
104+
void
105+
memcpy_device_to_device(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes);
106+
107+
/**
108+
* \brief Performs platform-specific memory copy from device to host
109+
* \tparam ExecutionPolicy The type of the execution policy
110+
* \param[in] policy The execution policy
111+
* \param[in] destination The destination array
112+
* \param[in] source The source array
113+
* \param[in] bytes The size of the allocated array
114+
*/
115+
template <typename ExecutionPolicy, STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
116+
void
117+
memcpy_device_to_host(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes);
118+
119+
/**
120+
* \brief Performs platform-specific memory copy from host to device
121+
* \tparam ExecutionPolicy The type of the execution policy
122+
* \param[in] policy The execution policy
123+
* \param[in] destination The destination array
124+
* \param[in] source The source array
125+
* \param[in] bytes The size of the allocated array
126+
*/
127+
template <typename ExecutionPolicy, STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
128+
void
129+
memcpy_host_to_device(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes);
130+
131+
/**
132+
* \brief Performs platform-specific memory copy from host to host
133+
* \tparam ExecutionPolicy The type of the execution policy
134+
* \param[in] policy The execution policy
135+
* \param[in] destination The destination array
136+
* \param[in] source The source array
137+
* \param[in] bytes The size of the allocated array
138+
*/
139+
template <typename ExecutionPolicy, STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
140+
void
141+
memcpy_host_to_host(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes);
142+
93143
} // namespace stdgpu::cuda
94144

145+
#include <stdgpu/cuda/impl/memory_detail.h>
146+
95147
#endif // STDGPU_CUDA_MEMORY_H

src/stdgpu/hip/impl/memory_detail.h

+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/*
2+
* Copyright 2024 Patrick Stotko
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
#ifndef STDGPU_HIP_MEMORY_DETAIL_H
17+
#define STDGPU_HIP_MEMORY_DETAIL_H
18+
19+
#include <stdgpu/hip/memory.h>
20+
21+
#include <thrust/detail/execution_policy.h>
22+
#include <thrust/system/hip/detail/util.h>
23+
24+
#include <stdgpu/hip/impl/error.h>
25+
26+
namespace stdgpu::hip
27+
{
28+
29+
template <typename ExecutionPolicy, STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
30+
void
31+
memcpy_impl(ExecutionPolicy&& policy,
32+
void* destination,
33+
const void* source,
34+
index64_t bytes,
35+
hipMemcpyKind kind,
36+
bool needs_sychronization)
37+
{
38+
cudaStream_t stream =
39+
thrust::hip_rocprim::stream(thrust::detail::derived_cast(thrust::detail::strip_const(policy)));
40+
41+
STDGPU_HIP_SAFE_CALL(hipMemcpyAsync(destination, source, static_cast<std::size_t>(bytes), kind, stream));
42+
if (needs_sychronization)
43+
{
44+
STDGPU_HIP_SAFE_CALL(hipStreamSynchronize(stream));
45+
}
46+
}
47+
48+
template <typename ExecutionPolicy,
49+
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
50+
void
51+
memcpy_device_to_device(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes)
52+
{
53+
memcpy_impl(std::forward<ExecutionPolicy>(policy), destination, source, bytes, hipMemcpyDeviceToDevice, false);
54+
}
55+
56+
template <typename ExecutionPolicy,
57+
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
58+
void
59+
memcpy_device_to_host(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes)
60+
{
61+
memcpy_impl(std::forward<ExecutionPolicy>(policy), destination, source, bytes, hipMemcpyDeviceToHost, true);
62+
}
63+
64+
template <typename ExecutionPolicy,
65+
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
66+
void
67+
memcpy_host_to_device(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes)
68+
{
69+
memcpy_impl(std::forward<ExecutionPolicy>(policy), destination, source, bytes, hipMemcpyHostToDevice, false);
70+
}
71+
72+
template <typename ExecutionPolicy,
73+
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
74+
void
75+
memcpy_host_to_host(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes)
76+
{
77+
memcpy_impl(std::forward<ExecutionPolicy>(policy), destination, source, bytes, hipMemcpyHostToHost, true);
78+
}
79+
80+
} // namespace stdgpu::hip
81+
82+
#endif // STDGPU_HIP_MEMORY_DETAIL_H

src/stdgpu/hip/memory.h

+52
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#define STDGPU_HIP_MEMORY_H
1818

1919
#include <stdgpu/cstddef.h>
20+
#include <stdgpu/execution.h>
21+
#include <stdgpu/type_traits.h>
2022

2123
namespace stdgpu::hip
2224
{
@@ -90,6 +92,56 @@ memcpy_host_to_device(void* destination, const void* source, index64_t bytes);
9092
void
9193
memcpy_host_to_host(void* destination, const void* source, index64_t bytes);
9294

95+
/**
96+
* \brief Performs platform-specific memory copy from device to device
97+
* \tparam ExecutionPolicy The type of the execution policy
98+
* \param[in] policy The execution policy
99+
* \param[in] destination The destination array
100+
* \param[in] source The source array
101+
* \param[in] bytes The size of the allocated array
102+
*/
103+
template <typename ExecutionPolicy, STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
104+
void
105+
memcpy_device_to_device(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes);
106+
107+
/**
108+
* \brief Performs platform-specific memory copy from device to host
109+
* \tparam ExecutionPolicy The type of the execution policy
110+
* \param[in] policy The execution policy
111+
* \param[in] destination The destination array
112+
* \param[in] source The source array
113+
* \param[in] bytes The size of the allocated array
114+
*/
115+
template <typename ExecutionPolicy, STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
116+
void
117+
memcpy_device_to_host(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes);
118+
119+
/**
120+
* \brief Performs platform-specific memory copy from host to device
121+
* \tparam ExecutionPolicy The type of the execution policy
122+
* \param[in] policy The execution policy
123+
* \param[in] destination The destination array
124+
* \param[in] source The source array
125+
* \param[in] bytes The size of the allocated array
126+
*/
127+
template <typename ExecutionPolicy, STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
128+
void
129+
memcpy_host_to_device(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes);
130+
131+
/**
132+
* \brief Performs platform-specific memory copy from host to host
133+
* \tparam ExecutionPolicy The type of the execution policy
134+
* \param[in] policy The execution policy
135+
* \param[in] destination The destination array
136+
* \param[in] source The source array
137+
* \param[in] bytes The size of the allocated array
138+
*/
139+
template <typename ExecutionPolicy, STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
140+
void
141+
memcpy_host_to_host(ExecutionPolicy&& policy, void* destination, const void* source, index64_t bytes);
142+
93143
} // namespace stdgpu::hip
94144

145+
#include <stdgpu/hip/impl/memory_detail.h>
146+
95147
#endif // STDGPU_HIP_MEMORY_H

0 commit comments

Comments
 (0)