Skip to content

Commit c5406ef

Browse files
committed
Fixed several issues with macOS
1 parent 0f7e621 commit c5406ef

File tree

2 files changed

+3
-14
lines changed

2 files changed

+3
-14
lines changed

make.sh

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
mkdir -p bin # create directory for executable
44
rm -f ./bin/OpenCL-Benchmark # prevent execution of old version if compiling fails
55

6-
g++ ./src/*.cpp -o ./bin/OpenCL-Benchmark -pthread -I./src/OpenCL/include -L./src/OpenCL/lib -lOpenCL # compile on Linux
7-
#g++ ./src/*.cpp -o ./bin/OpenCL-Benchmark -pthread -I./src/OpenCL/include -framework OpenCL # compile on macOS
8-
#g++ ./src/*.cpp -o ./bin/OpenCL-Benchmark -pthread -I./src/OpenCL/include -L/system/vendor/lib64 -lOpenCL # compile on Android
6+
g++ ./src/*.cpp -o ./bin/OpenCL-Benchmark -std=c++17 -pthread -I./src/OpenCL/include -L./src/OpenCL/lib -lOpenCL # compile on Linux
7+
#g++ ./src/*.cpp -o ./bin/OpenCL-Benchmark -std=c++17 -pthread -I./src/OpenCL/include -framework OpenCL # compile on macOS
8+
#g++ ./src/*.cpp -o ./bin/OpenCL-Benchmark -std=c++17 -pthread -I./src/OpenCL/include -L/system/vendor/lib64 -lOpenCL # compile on Android
99

1010
./bin/OpenCL-Benchmark "$@" # run OpenCL-Benchmark

src/kernel.cpp

-11
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ string opencl_c_container() { return R( // ########################## begin of O
77
kernel void kernel_double(global float* data) {
88
double x = (double)get_global_id(0);
99
double y = (double)get_local_id(0);
10-
#pragma unroll
1110
for(uint i=0u; i<128u; i++) {
1211
x = fma(y, x, y);
1312
y = fma(x, y, x);
@@ -19,7 +18,6 @@ kernel void kernel_double(global float* data) {
1918
kernel void kernel_float(global float* data) {
2019
float x = (float)get_global_id(0);
2120
float y = (float)get_local_id(0);
22-
#pragma unroll
2321
for(uint i=0u; i<512u; i++) {
2422
x = fma(y, x, y);
2523
y = fma(x, y, x);
@@ -31,7 +29,6 @@ kernel void kernel_float(global float* data) {
3129
kernel void kernel_half(global float* data) {
3230
half2 x = (half2)((float)get_global_id(0), (float)get_local_id(0));
3331
half2 y = (half2)((float)get_local_id(0), (float)get_global_id(0));
34-
#pragma unroll
3532
for(uint i=0u; i<512u; i++) {
3633
x = fma(y, x, y);
3734
y = fma(x, y, x);
@@ -43,7 +40,6 @@ kernel void kernel_half(global float* data) {
4340
kernel void kernel_long(global float* data) {
4441
long x = (long)get_global_id(0);
4542
long y = (long)get_local_id(0);
46-
#pragma unroll
4743
for(uint i=0u; i<8u; i++) {
4844
x = (y*x)+y;
4945
y = (x*y)+x;
@@ -54,7 +50,6 @@ kernel void kernel_long(global float* data) {
5450
kernel void kernel_int(global float* data) {
5551
int x = get_global_id(0);
5652
int y = get_local_id(0);
57-
#pragma unroll
5853
for(uint i=0u; i<512u; i++) {
5954
x = (y*x)+y;
6055
y = (x*y)+x;
@@ -65,7 +60,6 @@ kernel void kernel_int(global float* data) {
6560
kernel void kernel_short(global float* data) {
6661
short2 x = as_short2((int)get_global_id(0));
6762
short2 y = as_short2((int)get_local_id(0));
68-
#pragma unroll
6963
for(uint i=0u; i<128u; i++) {
7064
x = (y*x)+y;
7165
y = (x*y)+x;
@@ -76,7 +70,6 @@ kernel void kernel_short(global float* data) {
7670
kernel void kernel_char(global float* data) {
7771
char4 x = as_char4((int)get_global_id(0));
7872
char4 y = as_char4((int)get_local_id(0));
79-
#pragma unroll
8073
for(uint i=0u; i<64u; i++) {
8174
x = (y*x)+y;
8275
y = (x*y)+x;
@@ -88,25 +81,21 @@ kernel void kernel_char(global float* data) {
8881

8982
kernel void kernel_coalesced_write(global float* data) {
9083
const uint n = get_global_id(0);
91-
#pragma unroll
9284
for(uint i=0u; i<def_M; i++) data[i*def_N+n] = 0.0f; // coalesced write
9385
}
9486
kernel void kernel_coalesced_read(global float* data) {
9587
const uint n = get_global_id(0);
9688
float x = 0.0f;
97-
#pragma unroll
9889
for(uint i=0u; i<def_M; i++) x += data[i*def_N+n]; // coalesced read
9990
data[n] = x;
10091
}
10192
kernel void kernel_misaligned_write(global float* data) {
10293
const uint n = get_global_id(0);
103-
#pragma unroll
10494
for(uint i=0u; i<def_M; i++) data[n*def_M+i] = 0.0f; // misaligned write
10595
}
10696
kernel void kernel_misaligned_read(global float* data) {
10797
const uint n = get_global_id(0);
10898
float x = 0.0f;
109-
#pragma unroll
11099
for(uint i=0u; i<def_M; i++) x += data[n*def_M+i]; // misaligned read
111100
data[n] = x;
112101
}

0 commit comments

Comments
 (0)