-
Notifications
You must be signed in to change notification settings - Fork 4
/
p3a_allocator.hpp
158 lines (141 loc) · 3.99 KB
/
p3a_allocator.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#pragma once
#include <cstdint> //int64_t
#include <cstdlib> //malloc
#include <Kokkos_Macros.hpp>
#include "p3a_macros.hpp"
#include "p3a_execution.hpp"
namespace p3a {
class allocation_failure : public std::bad_alloc {
char message[100];
public:
allocation_failure(char const* memory_space_arg, std::int64_t n_arg)
{
using long_long = long long;
auto const long_long_n = long_long(n_arg);
std::snprintf(message, sizeof(message), "failed to allocate %lld bytes in %s memory",
long_long_n, memory_space_arg);
}
virtual const char* what() const noexcept override
{
return message;
}
};
template <class T>
class host_allocator {
public:
using size_type = std::int64_t;
template <class U> struct rebind { using other = p3a::host_allocator<U>; };
static T* allocate(size_type n)
{
auto const result = std::malloc(std::size_t(n) * sizeof(T));
if ((result == nullptr) && (n != 0)) {
throw allocation_failure("CPU", n);
}
return static_cast<T*>(result);
}
static void deallocate(T* p, size_type)
{
std::free(p);
}
};
#ifdef KOKKOS_ENABLE_CUDA
template <class T>
class cuda_host_pinned_allocator {
public:
using size_type = std::int64_t;
template <class U> struct rebind { using other = p3a::cuda_host_pinned_allocator<U>; };
P3A_NEVER_INLINE static T* allocate(size_type n)
{
void* ptr = nullptr;
auto const result = cudaMallocHost(&ptr, std::size_t(n) * sizeof(T));
if (result != cudaSuccess) {
throw allocation_failure("CUDA host pinned", n);
}
return static_cast<T*>(ptr);
}
P3A_NEVER_INLINE static void deallocate(T* p, size_type)
{
details::handle_cuda_error(cudaFreeHost(p));
}
};
template <class T>
class cuda_device_allocator {
public:
using size_type = std::int64_t;
template <class U> struct rebind { using other = p3a::cuda_device_allocator<U>; };
P3A_NEVER_INLINE static T* allocate(size_type n)
{
void* ptr = nullptr;
auto const result = cudaMalloc(&ptr, std::size_t(n) * sizeof(T));
if (result != cudaSuccess) {
throw allocation_failure("CUDA device", n);
}
return static_cast<T*>(ptr);
}
P3A_NEVER_INLINE static void deallocate(T* p, size_type)
{
details::handle_cuda_error(cudaFree(p));
}
};
#endif
#ifdef KOKKOS_ENABLE_HIP
template <class T>
class hip_host_pinned_allocator {
public:
using size_type = std::int64_t;
template <class U> struct rebind { using other = p3a::hip_host_pinned_allocator<U>; };
P3A_NEVER_INLINE static T* allocate(size_type n)
{
void* ptr = nullptr;
auto const result = hipHostMalloc(&ptr, std::size_t(n) * sizeof(T), hipHostMallocDefault);
if (result != hipSuccess) {
throw allocation_failure("HIP host pinned", n);
}
return static_cast<T*>(ptr);
}
P3A_NEVER_INLINE static void deallocate(T* p, size_type)
{
details::handle_hip_error(hipHostFree(p));
}
};
template <class T>
class hip_device_allocator {
public:
using size_type = std::int64_t;
template <class U> struct rebind { using other = p3a::hip_device_allocator<U>; };
P3A_NEVER_INLINE static T* allocate(size_type n)
{
void* ptr = nullptr;
auto const result = hipMalloc(&ptr, std::size_t(n) * sizeof(T));
if (result != hipSuccess) {
throw allocation_failure("HIP device", n);
}
return static_cast<T*>(ptr);
}
P3A_NEVER_INLINE static void deallocate(T* p, size_type)
{
details::handle_hip_error(hipFree(p));
}
};
#endif
#if defined(KOKKOS_ENABLE_CUDA)
template <class T>
using device_allocator = cuda_device_allocator<T>;
#elif defined(KOKKOS_ENABLE_HIP)
template <class T>
using device_allocator = hip_device_allocator<T>;
#else
template <class T>
using device_allocator = host_allocator<T>;
#endif
#if defined(KOKKOS_ENABLE_CUDA)
template <class T>
using host_pinned_allocator = cuda_host_pinned_allocator<T>;
#elif defined(KOKKOS_ENABLE_HIP)
template <class T>
using host_pinned_allocator = hip_host_pinned_allocator<T>;
#else
template <class T>
using host_pinned_allocator = host_allocator<T>;
#endif
}