Skip to content

Commit

Permalink
[SYCL][libclc] Add generic addrspace overloads of vload/vstore builti…
Browse files Browse the repository at this point in the history
…ns (intel#13092)

These definitions and declarations were missing, which would be
problematic for a target which supports the generic address space where
that address space is uniquely mangled. As with the maths builtins, the
NVIDIA target wouldn't see this problem as the 'generic' address space
is mangled identically to the 'private' one, so we were serendipitously
providing the builtins through these.

I couldn't seem to make SYCL actually generate these builtins, but there
is a path from SPIR-V to them (through the OpenCL SPIR-V environment),
so providing them is prudent irregardless. In the absence of tests I
manually verified that there are now as many 'AS0' overloads of these
builtins as there 'AS5'/'AS1'/etc overloads.
  • Loading branch information
frasercrmck authored Mar 27, 2024
1 parent 93a1abb commit 51ffc04
Show file tree
Hide file tree
Showing 9 changed files with 198 additions and 51 deletions.
47 changes: 33 additions & 14 deletions libclc/generic/include/clc/shared/vload.h
Original file line number Diff line number Diff line change
@@ -1,20 +1,31 @@
#define _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \
_CLC_OVERLOAD _CLC_DECL VEC_TYPE vload##SUFFIX##WIDTH(size_t offset, const ADDR_SPACE MEM_TYPE *x);

#define _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, ADDR_SPACE) \
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \
#define _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \
_CLC_OVERLOAD _CLC_DECL VEC_TYPE vload##SUFFIX##WIDTH( \
size_t offset, const ADDR_SPACE MEM_TYPE *x);

#define _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, ADDR_SPACE) \
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE)

#define _CLC_VECTOR_VLOAD_PRIM3(SUFFIX, MEM_TYPE, PRIM_TYPE) \
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private) \
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local) \
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __constant) \
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global)
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
#define _CLC_VECTOR_VLOAD_GENERIC_DECL _CLC_VECTOR_VLOAD_DECL
#else
// The generic address space isn't available, so make the macro do nothing
#define _CLC_VECTOR_VLOAD_GENERIC_DECL(X, Y, Z, W)
#endif

#define _CLC_VECTOR_VLOAD_PRIM3(SUFFIX, MEM_TYPE, PRIM_TYPE) \
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private) \
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local) \
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __constant) \
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global) \
_CLC_VECTOR_VLOAD_GENERIC_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __generic)

#define _CLC_VECTOR_VLOAD_PRIM1(PRIM_TYPE) \
#define _CLC_VECTOR_VLOAD_PRIM1(PRIM_TYPE) \
_CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE)

// Declare vector load prototypes
Expand Down Expand Up @@ -52,7 +63,15 @@ _CLC_VLOAD_DECL(a_half, half, float, , __global)
_CLC_VLOAD_DECL(a_half, half, float, , __local)
_CLC_VLOAD_DECL(a_half, half, float, , __private)

#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
_CLC_VLOAD_DECL(_half, half, float, , __generic)
_CLC_VLOAD_DECL(a_half, half, float, , __generic)
#endif

#undef _CLC_VLOAD_DECL
#undef _CLC_VECTOR_VLOAD_DECL
#undef _CLC_VECTOR_VLOAD_PRIM3
#undef _CLC_VECTOR_VLOAD_PRIM1
#undef _CLC_VECTOR_VLOAD_GENERIC_DECL
63 changes: 40 additions & 23 deletions libclc/generic/include/clc/shared/vstore.h
Original file line number Diff line number Diff line change
@@ -1,30 +1,45 @@
#define _CLC_VSTORE_DECL(SUFFIX, PRIM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE, RND) \
_CLC_OVERLOAD _CLC_DECL void vstore##SUFFIX##WIDTH##RND(VEC_TYPE vec, size_t offset, ADDR_SPACE PRIM_TYPE *out);

#define _CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, ADDR_SPACE, RND) \
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE, RND) \
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE, RND) \
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE, RND) \
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE, RND) \
#define _CLC_VSTORE_DECL(SUFFIX, PRIM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE, RND) \
_CLC_OVERLOAD _CLC_DECL void vstore##SUFFIX##WIDTH##RND( \
VEC_TYPE vec, size_t offset, ADDR_SPACE PRIM_TYPE *out);

#define _CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, ADDR_SPACE, RND) \
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE, RND) \
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE, RND) \
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE, RND) \
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE, RND) \
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE, RND)

#define _CLC_VECTOR_VSTORE_PRIM3(SUFFIX, MEM_TYPE, PRIM_TYPE, RND) \
_CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private, RND) \
_CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local, RND) \
_CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global, RND)
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
#define _CLC_VSTORE_GENERIC_DECL _CLC_VSTORE_DECL
#define _CLC_VECTOR_VSTORE_GENERIC_DECL _CLC_VECTOR_VSTORE_DECL
#else
// The generic address space isn't available, so make the macros do nothing
#define _CLC_VSTORE_GENERIC_DECL(X, Y, Z, W, V, U)
#define _CLC_VECTOR_VSTORE_GENERIC_DECL(X, Y, Z, W, V)
#endif

#define _CLC_VECTOR_VSTORE_PRIM3(SUFFIX, MEM_TYPE, PRIM_TYPE, RND) \
_CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private, RND) \
_CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local, RND) \
_CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global, RND) \
_CLC_VECTOR_VSTORE_GENERIC_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __generic, RND)

#define _CLC_VECTOR_VSTORE_PRIM1(PRIM_TYPE) \
_CLC_VECTOR_VSTORE_PRIM3(,PRIM_TYPE, PRIM_TYPE, )
#define _CLC_VECTOR_VSTORE_PRIM1(PRIM_TYPE) \
_CLC_VECTOR_VSTORE_PRIM3(, PRIM_TYPE, PRIM_TYPE, )

#define _CLC_VECTOR_VSTORE_HALF_PRIM1(PRIM_TYPE, RND) \
_CLC_VSTORE_DECL(_half, half, PRIM_TYPE, , __private, RND) \
_CLC_VSTORE_DECL(_half, half, PRIM_TYPE, , __local, RND) \
_CLC_VSTORE_DECL(_half, half, PRIM_TYPE, , __global, RND) \
_CLC_VECTOR_VSTORE_PRIM3(_half, half, PRIM_TYPE, RND) \
_CLC_VSTORE_DECL(a_half, half, PRIM_TYPE, , __private, RND) \
_CLC_VSTORE_DECL(a_half, half, PRIM_TYPE, , __local, RND) \
_CLC_VSTORE_DECL(a_half, half, PRIM_TYPE, , __global, RND) \
_CLC_VECTOR_VSTORE_PRIM3(a_half, half, PRIM_TYPE, RND)
#define _CLC_VECTOR_VSTORE_HALF_PRIM1(PRIM_TYPE, RND) \
_CLC_VSTORE_DECL(_half, half, PRIM_TYPE, , __private, RND) \
_CLC_VSTORE_DECL(_half, half, PRIM_TYPE, , __local, RND) \
_CLC_VSTORE_DECL(_half, half, PRIM_TYPE, , __global, RND) \
_CLC_VSTORE_GENERIC_DECL(_half, half, PRIM_TYPE, , __generic, RND) \
_CLC_VECTOR_VSTORE_PRIM3(_half, half, PRIM_TYPE, RND) \
_CLC_VSTORE_DECL(a_half, half, PRIM_TYPE, , __private, RND) \
_CLC_VSTORE_DECL(a_half, half, PRIM_TYPE, , __local, RND) \
_CLC_VSTORE_DECL(a_half, half, PRIM_TYPE, , __global, RND) \
_CLC_VSTORE_GENERIC_DECL(a_half, half, PRIM_TYPE, , __generic, RND) \
_CLC_VECTOR_VSTORE_PRIM3(a_half, half, PRIM_TYPE, RND)

_CLC_VECTOR_VSTORE_PRIM1(char)
_CLC_VECTOR_VSTORE_PRIM1(uchar)
Expand Down Expand Up @@ -57,6 +72,8 @@ _CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rte)


#undef _CLC_VSTORE_DECL
#undef _CLC_VSTORE_GENERIC_DECL
#undef _CLC_VECTOR_VSTORE_DECL
#undef _CLC_VECTOR_VSTORE_PRIM3
#undef _CLC_VECTOR_VSTORE_PRIM1
#undef _CLC_VECTOR_VSTORE_GENERIC_DECL
28 changes: 22 additions & 6 deletions libclc/generic/lib/shared/vload.cl
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,23 @@
typedef PRIM_TYPE##16 less_aligned_##ADDR_SPACE##PRIM_TYPE##16 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##16 vload16(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##16*) (&x[16*offset])); \
} \
}

#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
#define VLOAD_VECTORIZE_GENERIC VLOAD_VECTORIZE
#else
// The generic address space isn't available, so make the macro do nothing
#define VLOAD_VECTORIZE_GENERIC(X,Y)
#endif

#define VLOAD_ADDR_SPACES(__CLC_SCALAR_GENTYPE) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __private) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __local) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __constant) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __global) \
#define VLOAD_ADDR_SPACES(__CLC_SCALAR_GENTYPE) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __private) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __local) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __constant) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __global) \
VLOAD_VECTORIZE_GENERIC(__CLC_SCALAR_GENTYPE, __generic)

#define VLOAD_TYPES() \
VLOAD_ADDR_SPACES(char) \
Expand Down Expand Up @@ -62,6 +72,11 @@ float __clc_vload_half_float_helper__constant(const __constant half *);
float __clc_vload_half_float_helper__global(const __global half *);
float __clc_vload_half_float_helper__local(const __local half *);
float __clc_vload_half_float_helper__private(const __private half *);
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
float __clc_vload_half_float_helper__generic(const __generic half *);
#endif

#define VEC_LOAD1(val, AS) val = __clc_vload_half_float_helper##AS (&mem[offset++]);
#else
Expand Down Expand Up @@ -115,3 +130,4 @@ float __clc_vload_half_float_helper__private(const __private half *);
#undef VLOAD_TYPES
#undef VLOAD_ADDR_SPACES
#undef VLOAD_VECTORIZE
#undef VLOAD_VECTORIZE_GENERIC
10 changes: 10 additions & 0 deletions libclc/generic/lib/shared/vload_half.inc
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,22 @@
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __local);
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __global);
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __constant);
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __generic);
#endif

#undef __CLC_OFFSET
#else
FUNC(, 1, 1, __CLC_GENTYPE, __private);
FUNC(, 1, 1, __CLC_GENTYPE, __local);
FUNC(, 1, 1, __CLC_GENTYPE, __global);
FUNC(, 1, 1, __CLC_GENTYPE, __constant);
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
FUNC(, 1, 1, __CLC_GENTYPE, __generic);
#endif
#endif
#endif
32 changes: 26 additions & 6 deletions libclc/generic/lib/shared/vstore.cl
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,22 @@
typedef PRIM_TYPE##16 less_aligned_##ADDR_SPACE##PRIM_TYPE##16 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\
_CLC_OVERLOAD _CLC_DEF void vstore16(PRIM_TYPE##16 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
*((ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##16*) (&mem[16*offset])) = vec; \
} \
}

#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
#define VSTORE_VECTORIZE_GENERIC VSTORE_VECTORIZE
#else
// The generic address space isn't available, so make the macro do nothing
#define VSTORE_VECTORIZE_GENERIC(X,Y)
#endif

#define VSTORE_ADDR_SPACES(__CLC_SCALAR___CLC_GENTYPE) \
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __private) \
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __local) \
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __global) \
#define VSTORE_ADDR_SPACES(__CLC_SCALAR___CLC_GENTYPE) \
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __private) \
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __local) \
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __global) \
VSTORE_VECTORIZE_GENERIC(__CLC_SCALAR___CLC_GENTYPE, __generic)

VSTORE_ADDR_SPACES(char)
VSTORE_ADDR_SPACES(uchar)
Expand All @@ -43,7 +53,6 @@ VSTORE_ADDR_SPACES(long)
VSTORE_ADDR_SPACES(ulong)
VSTORE_ADDR_SPACES(float)


#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
VSTORE_ADDR_SPACES(double)
Expand All @@ -68,11 +77,21 @@ _CLC_DEF void __clc_vstore_half_##STYPE##_helper##AS(STYPE s, AS half *d) \
DECLARE_HELPER(float, __private, __builtin_store_halff);
DECLARE_HELPER(float, __global, __builtin_store_halff);
DECLARE_HELPER(float, __local, __builtin_store_halff);
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
DECLARE_HELPER(float, __generic, __builtin_store_halff);
#endif

#ifdef cl_khr_fp64
DECLARE_HELPER(double, __private, __builtin_store_half);
DECLARE_HELPER(double, __global, __builtin_store_half);
DECLARE_HELPER(double, __local, __builtin_store_half);
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
DECLARE_HELPER(double, __generic, __builtin_store_half);
#endif
#endif

#define VEC_STORE1(STYPE, AS, val, ROUNDF) __clc_vstore_half_##STYPE##_helper##AS (ROUNDF(val), &mem[offset++]);
Expand Down Expand Up @@ -261,3 +280,4 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rte(double x)
#undef DECLARE_HELPER
#undef VSTORE_ADDR_SPACES
#undef VSTORE_VECTORIZE
#undef VSTORE_VECTORIZE_GENERIC
10 changes: 10 additions & 0 deletions libclc/generic/lib/shared/vstore_half.inc
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,21 @@
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __generic);
#endif

#undef __CLC_OFFSET
#else
FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __generic);
#endif
#endif
#endif
26 changes: 25 additions & 1 deletion libclc/generic/libspirv/shared/vload.cl
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,21 @@
*)(&x[16 * offset])); \
}

#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
#define VLOAD_VECTORIZE_GENERIC VLOAD_VECTORIZE
#else
// The generic address space isn't available, so make the macro do nothing
#define VLOAD_VECTORIZE_GENERIC(X,Y,Z)
#endif

#define VLOAD_ADDR_SPACES_IMPL(__CLC_RET_GENTYPE, __CLC_SCALAR_GENTYPE) \
VLOAD_VECTORIZE(__CLC_RET_GENTYPE, __CLC_SCALAR_GENTYPE, __private) \
VLOAD_VECTORIZE(__CLC_RET_GENTYPE, __CLC_SCALAR_GENTYPE, __local) \
VLOAD_VECTORIZE(__CLC_RET_GENTYPE, __CLC_SCALAR_GENTYPE, __constant) \
VLOAD_VECTORIZE(__CLC_RET_GENTYPE, __CLC_SCALAR_GENTYPE, __global)
VLOAD_VECTORIZE(__CLC_RET_GENTYPE, __CLC_SCALAR_GENTYPE, __global) \
VLOAD_VECTORIZE_GENERIC(__CLC_RET_GENTYPE, __CLC_SCALAR_GENTYPE, __generic)

#define VLOAD_ADDR_SPACES(__CLC_SCALAR_GENTYPE) \
VLOAD_ADDR_SPACES_IMPL(__CLC_SCALAR_GENTYPE, __CLC_SCALAR_GENTYPE)
Expand Down Expand Up @@ -99,6 +109,12 @@ float __clc_vload_half_float_helper__global(const __global half *);
float __clc_vload_half_float_helper__local(const __local half *);
float __clc_vload_half_float_helper__private(const __private half *);

#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
float __clc_vload_half_float_helper__generic(const __generic half *);
#endif

#define VEC_LOAD1(val, AS) \
val = __clc_vload_half_float_helper##AS(&mem[offset++]);
#else
Expand Down Expand Up @@ -158,6 +174,12 @@ GEN_VLOAD_HALF(__global)
GEN_VLOAD_HALF(__local)
GEN_VLOAD_HALF(__constant)

#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
GEN_VLOAD_HALF(__generic)
#endif

#undef VLOAD_HALF_IMPL
#undef VLOAD_HALF_VEC_IMPL
#undef GEN_VLOAD_HALF
Expand All @@ -170,3 +192,5 @@ GEN_VLOAD_HALF(__constant)
#undef VLOAD_TYPES
#undef VLOAD_ADDR_SPACES
#undef VLOAD_VECTORIZE
#undef VLOAD_VECTORIZE_GENERIC
#undef VLOAD_VECTORIZE
22 changes: 21 additions & 1 deletion libclc/generic/libspirv/shared/vstore.cl
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,20 @@
*)(&mem[16 * offset])) = vec; \
}

#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
#define VSTORE_VECTORIZE_GENERIC VSTORE_VECTORIZE
#else
// The generic address space isn't available, so make the macro do nothing
#define VSTORE_VECTORIZE_GENERIC(X,Y)
#endif

#define VSTORE_ADDR_SPACES(__CLC_SCALAR___CLC_GENTYPE) \
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __private) \
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __local) \
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __global)
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __global) \
VSTORE_VECTORIZE_GENERIC(__CLC_SCALAR___CLC_GENTYPE, __generic)

VSTORE_ADDR_SPACES(schar)
VSTORE_ADDR_SPACES(uchar)
Expand Down Expand Up @@ -97,11 +107,21 @@ VSTORE_ADDR_SPACES(half)
DECLARE_HELPER(float, __private, __builtin_store_halff);
DECLARE_HELPER(float, __global, __builtin_store_halff);
DECLARE_HELPER(float, __local, __builtin_store_halff);
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
DECLARE_HELPER(float, __generic, __builtin_store_halff);
#endif

#ifdef cl_khr_fp64
DECLARE_HELPER(double, __private, __builtin_store_half);
DECLARE_HELPER(double, __global, __builtin_store_half);
DECLARE_HELPER(double, __local, __builtin_store_half);
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
DECLARE_HELPER(double, __generic, __builtin_store_half);
#endif
#endif

#define VEC_STORE1(STYPE, AS, val, ROUNDF) \
Expand Down
Loading

0 comments on commit 51ffc04

Please sign in to comment.