forked from SvenDeSmet/ESTL
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathuOpenCL.h
350 lines (305 loc) · 13.8 KB
/
uOpenCL.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
/* The information in this file is
* Copyright (C) 2011, Sven De Smet <[email protected]>
* and is subject to the terms and conditions of the
* GNU Lesser General Public License Version 2.1
* The license text is available from
* http://www.gnu.org/licenses/lgpl.html
*/
#ifndef UOPENCL_H
#define UOPENCL_H
#include <string>
#define __CL_ENABLE_EXCEPTIONS
#define CL_LOG_ERRORS stdout
#include "cl.hpp"
#include <exception>
#include <string>
#include "Complex.h"
#include "tests/Timer.h"
#define xCLErr(result) { if (result != CL_SUCCESS) { printf("Exception"); fflush(stdout); throw CLException(result); } }
class CLException : public std::exception {
private:
cl::Error e;
public:
CLException(cl::Error iE) : e(iE) { }
virtual const char* what() const throw() { return handle().c_str(); }
std::string handle() const {
std::string msg = "[";
switch (e.err()) {
case CL_INVALID_COMMAND_QUEUE: msg += "CL_INVALID_COMMAND_QUEUE"; break;
case CL_INVALID_CONTEXT: msg += "CL_INVALID_CONTEXT"; break;
case CL_INVALID_MEM_OBJECT: msg += "CL_INVALID_MEM_OBJECT"; break;
case CL_INVALID_VALUE: msg += "CL_INVALID_VALUE"; break;
case CL_INVALID_PROGRAM_EXECUTABLE: msg += "CL_INVALID_PROGRAM_EXECUTABLE"; break;
case CL_INVALID_KERNEL: msg += "CL_INVALID_KERNEL"; break;
case CL_INVALID_KERNEL_ARGS: msg += "CL_INVALID_KERNEL_ARGS"; break;
case CL_INVALID_WORK_DIMENSION: msg += "CL_INVALID_WORK_DIMENSION"; break;
case CL_INVALID_WORK_GROUP_SIZE: msg += "CL_INVALID_WORK_GROUP_SIZE"; break;
case CL_INVALID_WORK_ITEM_SIZE: msg += "CL_INVALID_WORK_ITEM_SIZE"; break;
case CL_INVALID_GLOBAL_OFFSET: msg += "CL_INVALID_GLOBAL_OFFSET"; break;
case CL_OUT_OF_RESOURCES: msg += "CL_OUT_OF_RESOURCES"; break;
case CL_MEM_OBJECT_ALLOCATION_FAILURE: msg += "CL_MEM_OBJECT_ALLOCATION_FAILURE"; break;
case CL_INVALID_EVENT_WAIT_LIST: msg += "CL_INVALID_EVENT_WAIT_LIST"; break;
case CL_OUT_OF_HOST_MEMORY: msg += "CL_OUT_OF_HOST_MEMORY"; break;
}
msg += " (CL Exception)]";
printf("%s", msg.c_str());
fflush(stdout);
return msg;
}
};
class CLProgram {
private:
cl::Program* program;
public:
CLProgram(cl::Context& context, std::vector<std::string>& src, std::vector<cl::Device>& devicesToUse) {
cl::Program::Sources sources;
for (int s = 0; s < (int) src.size(); ++s) sources.push_back(std::make_pair(src[s].c_str(), src[s].length()));
program = new cl::Program(context, sources);
try { program->build(devicesToUse, "-cl-mad-enable"); }
catch (cl::Error cle) { printf("Error: %s", cle.what());
if (cle.err() == CL_BUILD_PROGRAM_FAILURE) {
cl_build_status status;
program->getBuildInfo<cl_build_status>(devicesToUse[0], CL_PROGRAM_BUILD_STATUS, &status);
if (status != CL_SUCCESS) { try {
size_t ret_val_size;
clGetProgramBuildInfo((*program)(), devicesToUse[0](), CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
printf("size: %i", (int) ret_val_size);
char* build_log = new char[ret_val_size + 1];
clGetProgramBuildInfo((*program)(), devicesToUse[0](), CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
build_log[ret_val_size] = '\0';
for (int s = 0; s < (int) src.size(); ++s) printf("%s", src[s].c_str());
printf("Kernel build error:\n%s", build_log);
delete [] build_log;
} catch (cl::Error err) { printf("Kernel build error: unkown (Failed to retrieve build log)."); throw err; }
} else printf("Kernel build successful.");
}
} catch (std::exception e) { printf("%s", e.what()); }
}
// cl::Program& getProgram() { return *program; }
cl::Kernel* getKernel(std::string kernelName) {
cl_int err;
cl::Kernel* result = new cl::Kernel(*program, kernelName.c_str(), &err); xCLErr(err);
return result;
}
~CLProgram() { delete program; }
};
class CLPlatform {
private:
cl::Platform platform;
public:
CLPlatform(cl::Platform iPlatform) : platform(iPlatform) { }
std::string name() {
std::string result;
xCLErr(platform.getInfo(CL_PLATFORM_NAME, &result));
return result;
}
std::string vendor() {
std::string result;
xCLErr(platform.getInfo(CL_PLATFORM_VENDOR, &result));
return result;
}
std::string profile() {
std::string result;
xCLErr(platform.getInfo(CL_PLATFORM_PROFILE, &result));
return result;
}
std::string version() {
std::string result;
xCLErr(platform.getInfo(CL_PLATFORM_VERSION, &result));
return result;
}
std::string extensions() {
std::string result;
xCLErr(platform.getInfo(CL_PLATFORM_EXTENSIONS, &result));
return result;
}
};
class CLDevice {
private:
cl::Device device;
public:
CLDevice(cl::Device iDevice) : device(iDevice) { }
std::string name() {
std::string result;
xCLErr(device.getInfo<std::string>(CL_DEVICE_NAME, &result));
return result;
}
std::string vendor() {
std::string result;
xCLErr(device.getInfo<std::string>(CL_DEVICE_VENDOR, &result));
return result;
}
bool available() {
cl_bool result;
xCLErr(device.getInfo<cl_bool>(CL_DEVICE_AVAILABLE, &result));
return result == CL_TRUE;
}
cl_ulong globalMemorySize() {
cl_ulong result;
xCLErr(device.getInfo<cl_ulong>(CL_DEVICE_GLOBAL_MEM_SIZE, &result));
return result;
}
cl_ulong localMemorySize() {
cl_ulong result;
xCLErr(device.getInfo<cl_ulong>(CL_DEVICE_LOCAL_MEM_SIZE, &result));
return result;
}
cl_uint maxComputeUnits() {
cl_uint result;
xCLErr(device.getInfo<cl_uint>(CL_DEVICE_MAX_COMPUTE_UNITS, &result));
return result;
}
size_t maxWorkGroupSize(int dim) {
size_t result[3];
xCLErr(device.getInfo<size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE, &result[0]));
return result[dim];
}
cl_uint maxClockFrequency() {
cl_uint result;
xCLErr(device.getInfo<cl_uint>(CL_DEVICE_MAX_CLOCK_FREQUENCY, &result));
return result;
}
cl_uint preferredVectorWidthFloat() {
cl_uint result;
xCLErr(device.getInfo<cl_uint>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, &result));
return result;
}
};
template <class D> class ComplexArrayCL {
private:
cl_mem data, reals, imaginaries;
int size;
bool planar;
public:
ComplexArrayCL(cl::Context& context, ComplexArray<D>* array) : size(array->getSize()), planar(array->getPlanar()) {
cl_int err;
if (planar) {
reals = clCreateBuffer(context(), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size*sizeof(D), array->getReals(), &err);
xCLErr(err);
imaginaries = clCreateBuffer(context(), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size*sizeof(D), array->getImaginaries(), &err);
xCLErr(err);
} else {
data = clCreateBuffer(context(), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 2*size*sizeof(D), array->getData(), &err); xCLErr(err);
}
}
ComplexArrayCL(cl::Context& context, int iSize, bool iPlanar) : size(iSize), planar(iPlanar) {
cl_int err;
if (planar) {
reals = clCreateBuffer(context(), CL_MEM_READ_WRITE, size*sizeof(D), NULL, &err); xCLErr(err);
imaginaries = clCreateBuffer(context(), CL_MEM_READ_WRITE, size*sizeof(D), NULL, &err); xCLErr(err);
} else {
data = clCreateBuffer(context(), CL_MEM_READ_WRITE, 2*size*sizeof(D), NULL, &err); xCLErr(err);
}
}
ComplexArrayCL(cl::Context& context, PlannarizedComplexArray<GlobalPlannarLevel, D>* array) : size(array->getElements()/2), planar(false) {
cl_int err;
data = clCreateBuffer(context(), CL_MEM_READ_WRITE, array->getElements()*sizeof(D), NULL, &err);
xCLErr(err);
}
cl_mem getReals() { return reals; }
cl_mem getImaginaries() { return imaginaries; }
cl_mem getData() { return data; }
void enqueueReadArray(cl::CommandQueue& queue, ComplexArray<D>& a, bool blocking = true) {
if (planar) {
xCLErr(clEnqueueReadBuffer(queue(), reals, blocking ? CL_TRUE : CL_FALSE, 0, size*sizeof(D), a.getReals(), 0, NULL, NULL));
xCLErr(clEnqueueReadBuffer(queue(), imaginaries, blocking ? CL_TRUE : CL_FALSE, 0, size*sizeof(D), a.getImaginaries(), 0, NULL, NULL));
} else {
xCLErr(clEnqueueReadBuffer(queue(), data, blocking ? CL_TRUE : CL_FALSE, 0, 2*size*sizeof(D), a.getData(), 0, NULL, NULL));
}
}
void enqueueWriteArray(cl::CommandQueue& queue, ComplexArray<D>& a, bool blocking = false) {
if (planar) {
xCLErr(clEnqueueWriteBuffer(queue(), reals, blocking ? CL_TRUE : CL_FALSE, 0, size*sizeof(D), a.getReals(), 0, NULL, NULL));
xCLErr(clEnqueueWriteBuffer(queue(), imaginaries, blocking ? CL_TRUE : CL_FALSE, 0, size*sizeof(D), a.getImaginaries(), 0, NULL, NULL));
} else {
xCLErr(clEnqueueWriteBuffer(queue(), data, blocking ? CL_TRUE : CL_FALSE, 0, 2*size*sizeof(D), a.getData(), 0, NULL, NULL));
}
}
void enqueueReadArray(cl::CommandQueue& queue, PlannarizedComplexArray<GlobalPlannarLevel, D>& a, bool blocking = true) {
xCLErr(clEnqueueReadBuffer(queue(), data, blocking ? CL_TRUE : CL_FALSE, 0, a.getElements()*sizeof(D), a.getData(), 0, NULL, NULL));
}
void enqueueWriteArray(cl::CommandQueue& queue, PlannarizedComplexArray<GlobalPlannarLevel, D>& a, bool blocking = false) {
xCLErr(clEnqueueWriteBuffer(queue(), data, blocking ? CL_TRUE : CL_FALSE, 0, a.getElements()*sizeof(D), a.getData(), 0, NULL, NULL));
}
~ComplexArrayCL() {
if (planar) {
clReleaseMemObject(reals);
clReleaseMemObject(imaginaries);
} else clReleaseMemObject(data);
}
};
/*class Access {
private:
streng arrayName, indexExpression;
public:
Access(streng iArrayName, streng iIndexExpression) : arrayName(iArrayName), indexExpression(iIndexExpression) { }
operator streng () { return arrayName + streng("[") + indexExpression + streng("]"); }
};*/
class OpenCLAlgorithm {
protected:
std::vector<cl::Device> devicesToUse;
cl::Context* context;
cl::CommandQueue* commandQueue;
public:
Timer *timerComputation, *timerTotal;
std::vector<Timer> kernelTimers;
OpenCLAlgorithm() : context(NULL), commandQueue(NULL), timerComputation(NULL), timerTotal(NULL) {
std::vector<cl::Platform> platforms;
xCLErr(cl::Platform::get(&platforms));
// printf("Platforms: %i", (int) platforms.size()); fflush(stdout);
// devicesToUse = new std::vector<cl::Device>();
for (int p = 0; p < (int) platforms.size(); ++p) { CLPlatform platform = CLPlatform(platforms[p]);
// printf("== Platform %i: %s ==", p, platform.name().c_str());
printf("[%s %s", platform.vendor().c_str(), platform.version().c_str());
/* printf("Profile: %s", platform.profile().c_str());
printf("Version: %s", platform.version().c_str());
printf("Extensions: %s", platform.extensions().c_str());
*/
std::vector<cl::Device> devices;
// xCLErr(platforms[p].getDevices(CL_DEVICE_TYPE_CPU, &devices));
xCLErr(platforms[p].getDevices(CL_DEVICE_TYPE_GPU, &devices));
// qDe bug("%i devices", (int) devices.size());
for (int d = 0; d < (int) devices.size(); ++d) { CLDevice device = CLDevice(devices[d]);
printf(": %s %s]", device.vendor().c_str(), device.name().c_str());
/* printf("Global Memory Size: %ld", device.globalMemorySize());
printf("Local Memory Size: %ld", device.localMemorySize());
printf("Max Compute Units: %i", device.maxComputeUnits());
printf("Max Clock Frequency: %i", device.maxClockFrequency());
printf("Max Work Group Size: %i", device.maxWorkGroupSize(0));*/
/* printf("Preferred Vector Width Float: %i", device.preferredVectorWidthFloat());
*/ if (device.available()) {
// printf("Available");
devicesToUse.push_back(devices[d]);
} //else { printf("Not available"); }
}
}
cl_int err;
// printf("Creating context..."); fflush(stdout);
context = new cl::Context(devicesToUse, NULL, NULL, NULL, &err); xCLErr(err);
//printf("Creating command queue..."); fflush(stdout);
commandQueue = new cl::CommandQueue(*context, devicesToUse[0], CL_QUEUE_PROFILING_ENABLE, &err); xCLErr(err);
}
static int getGlobalMemory() {
std::vector<cl::Platform> platforms;
xCLErr(cl::Platform::get(&platforms));
for (int p = 0; p < (int) platforms.size(); ++p) { CLPlatform platform = CLPlatform(platforms[p]);
std::vector<cl::Device> devices;
xCLErr(platforms[p].getDevices(CL_DEVICE_TYPE_GPU, &devices));
for (int d = 0; d < (int) devices.size(); ++d) { CLDevice device = CLDevice(devices[d]);
if (device.available()) {
return device.globalMemorySize(); // printf("Available");
} //else { printf("Not available"); }
}
}
return 0;
}
virtual double getTotalComputationFlops(int kernel) { return 0; }
virtual std::string getKernelInfo(int kernel) { return ""; }
~OpenCLAlgorithm() {
devicesToUse.clear();
kernelTimers.clear();
delete context;
delete commandQueue;
}
};
#endif // UOPENCL_H