@@ -7,7 +7,6 @@ string opencl_c_container() { return R( // ########################## begin of O
7
7
kernel void kernel_double (global float * data) {
8
8
double x = (double )get_global_id (0 );
9
9
double y = (double )get_local_id (0 );
10
- #pragma unroll
11
10
for (uint i=0u ; i<128u ; i++) {
12
11
x = fma (y, x, y);
13
12
y = fma (x, y, x);
@@ -19,7 +18,6 @@ kernel void kernel_double(global float* data) {
19
18
kernel void kernel_float (global float * data) {
20
19
float x = (float )get_global_id (0 );
21
20
float y = (float )get_local_id (0 );
22
- #pragma unroll
23
21
for (uint i=0u ; i<512u ; i++) {
24
22
x = fma (y, x, y);
25
23
y = fma (x, y, x);
@@ -31,7 +29,6 @@ kernel void kernel_float(global float* data) {
31
29
kernel void kernel_half (global float * data) {
32
30
half2 x = (half2)((float )get_global_id (0 ), (float )get_local_id (0 ));
33
31
half2 y = (half2)((float )get_local_id (0 ), (float )get_global_id (0 ));
34
- #pragma unroll
35
32
for (uint i=0u ; i<512u ; i++) {
36
33
x = fma (y, x, y);
37
34
y = fma (x, y, x);
@@ -43,7 +40,6 @@ kernel void kernel_half(global float* data) {
43
40
kernel void kernel_long (global float * data) {
44
41
long x = (long )get_global_id (0 );
45
42
long y = (long )get_local_id (0 );
46
- #pragma unroll
47
43
for (uint i=0u ; i<8u ; i++) {
48
44
x = (y*x)+y;
49
45
y = (x*y)+x;
@@ -54,7 +50,6 @@ kernel void kernel_long(global float* data) {
54
50
kernel void kernel_int (global float * data) {
55
51
int x = get_global_id (0 );
56
52
int y = get_local_id (0 );
57
- #pragma unroll
58
53
for (uint i=0u ; i<512u ; i++) {
59
54
x = (y*x)+y;
60
55
y = (x*y)+x;
@@ -65,7 +60,6 @@ kernel void kernel_int(global float* data) {
65
60
kernel void kernel_short (global float * data) {
66
61
short2 x = as_short2 ((int )get_global_id (0 ));
67
62
short2 y = as_short2 ((int )get_local_id (0 ));
68
- #pragma unroll
69
63
for (uint i=0u ; i<128u ; i++) {
70
64
x = (y*x)+y;
71
65
y = (x*y)+x;
@@ -76,7 +70,6 @@ kernel void kernel_short(global float* data) {
76
70
kernel void kernel_char (global float * data) {
77
71
char4 x = as_char4 ((int )get_global_id (0 ));
78
72
char4 y = as_char4 ((int )get_local_id (0 ));
79
- #pragma unroll
80
73
for (uint i=0u ; i<64u ; i++) {
81
74
x = (y*x)+y;
82
75
y = (x*y)+x;
@@ -88,25 +81,21 @@ kernel void kernel_char(global float* data) {
88
81
89
82
kernel void kernel_coalesced_write (global float * data) {
90
83
const uint n = get_global_id (0 );
91
- #pragma unroll
92
84
for (uint i=0u ; i<def_M; i++) data[i*def_N+n] = 0 .0f ; // coalesced write
93
85
}
94
86
kernel void kernel_coalesced_read (global float * data) {
95
87
const uint n = get_global_id (0 );
96
88
float x = 0 .0f ;
97
- #pragma unroll
98
89
for (uint i=0u ; i<def_M; i++) x += data[i*def_N+n]; // coalesced read
99
90
data[n] = x;
100
91
}
101
92
kernel void kernel_misaligned_write (global float * data) {
102
93
const uint n = get_global_id (0 );
103
- #pragma unroll
104
94
for (uint i=0u ; i<def_M; i++) data[n*def_M+i] = 0 .0f ; // misaligned write
105
95
}
106
96
kernel void kernel_misaligned_read (global float * data) {
107
97
const uint n = get_global_id (0 );
108
98
float x = 0 .0f ;
109
- #pragma unroll
110
99
for (uint i=0u ; i<def_M; i++) x += data[n*def_M+i]; // misaligned read
111
100
data[n] = x;
112
101
}
0 commit comments