Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL][E2E] Rewrite Tests Containing Deprecated Overloads #3 #16775

Merged
merged 2 commits into from
Feb 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions sycl/test-e2e/Basic/kernel_max_wg_size.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,16 @@ __attribute__((noinline)) void f(int *result, nd_item<1> &index) {
result[index.get_global_id()] = index.get_global_id();
}

struct KernelFunctor {
int *mResult;
KernelFunctor(int *result) : mResult(result) {}

void operator()(nd_item<1> index) const { f(mResult, index); }
auto get(syclex::properties_tag) const {
return syclex::properties{intelex::grf_size<256>};
}
};
HPS-1 marked this conversation as resolved.
Show resolved Hide resolved

int main() {
queue myQueue;
auto myContext = myQueue.get_context();
Expand All @@ -46,11 +56,9 @@ int main() {
nd_range myRange{range{maxWgSize}, range{maxWgSize}};

int *result = sycl::malloc_shared<int>(maxWgSize, myQueue);
syclex::properties kernelProperties{intelex::grf_size<256>};
myQueue.submit([&](handler &cgh) {
cgh.use_kernel_bundle(myBundle);
cgh.parallel_for<MyKernel>(myRange, kernelProperties,
([=](nd_item<1> index) { f(result, index); }));
cgh.parallel_for<MyKernel>(myRange, KernelFunctor(result));
});

myQueue.wait();
Expand Down
37 changes: 0 additions & 37 deletions sycl/test-e2e/Basic/sub_group_size_prop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,33 +44,12 @@ void test(queue &Queue, const std::vector<size_t> SupportedSGSizes) {
return;
}

auto Props = ext::oneapi::experimental::properties{
ext::oneapi::experimental::sub_group_size<SGSize>};

nd_range<1> NdRange(SGSize * 4, SGSize * 2);

size_t ReadSubGroupSize = 0;
{
buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1));

Queue.submit([&](handler &CGH) {
accessor ReadSubGroupSizeBufAcc{ReadSubGroupSizeBuf, CGH,
sycl::write_only, sycl::no_init};

CGH.parallel_for<SubGroupKernel<Variant::Function, SGSize>>(
NdRange, Props, [=](nd_item<1> NdItem) {
auto SG = NdItem.get_sub_group();
if (NdItem.get_global_linear_id() == 0)
ReadSubGroupSizeBufAcc[0] = SG.get_local_linear_range();
});
});
}
assert(ReadSubGroupSize == SGSize && "Failed check for function.");

ReadSubGroupSize = 0;
{
buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1));

Queue.submit([&](handler &CGH) {
accessor ReadSubGroupSizeBufAcc{ReadSubGroupSizeBuf, CGH,
sycl::write_only, sycl::no_init};
Expand All @@ -81,22 +60,6 @@ void test(queue &Queue, const std::vector<size_t> SupportedSGSizes) {
});
}
assert(ReadSubGroupSize == SGSize && "Failed check for functor.");

ReadSubGroupSize = 0;
{
buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1));

Queue.submit([&](handler &CGH) {
accessor ReadSubGroupSizeBufAcc{ReadSubGroupSizeBuf, CGH,
sycl::write_only, sycl::no_init};
KernelFunctorWithSGSizeProp<SGSize> KernelFunctor{ReadSubGroupSizeBufAcc};

CGH.parallel_for<SubGroupKernel<Variant::Functor, SGSize>>(NdRange, Props,
KernelFunctor);
});
}
assert(ReadSubGroupSize == SGSize &&
"Failed check for functor and properties.");
}

int main() {
Expand Down
79 changes: 47 additions & 32 deletions sycl/test-e2e/ClusterLaunch/cluster_launch_parallel_for.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,49 @@

#include <string>

template <int Dim, typename T> struct KernelFunctor {
int *mCorrectResultFlag;
T mClusterLaunchProperty;
sycl::range<Dim> mClusterRange;
KernelFunctor(int *CorrectResultFlag, T ClusterLaunchProperty,
sycl::range<Dim> ClusterRange)
: mCorrectResultFlag(CorrectResultFlag),
mClusterLaunchProperty(ClusterLaunchProperty),
mClusterRange(ClusterRange) {}

void operator()(sycl::nd_item<Dim> It) const {
uint32_t ClusterDimX, ClusterDimY, ClusterDimZ;
// Temporary solution till cluster group class is implemented
#if defined(__SYCL_DEVICE_ONLY__) && defined(__SYCL_CUDA_ARCH__) && \
(__SYCL_CUDA_ARCH__ >= 900)
asm volatile("\n\t"
"mov.u32 %0, %%cluster_nctaid.x; \n\t"
"mov.u32 %1, %%cluster_nctaid.y; \n\t"
"mov.u32 %2, %%cluster_nctaid.z; \n\t"
: "=r"(ClusterDimZ), "=r"(ClusterDimY), "=r"(ClusterDimX));
#endif
if constexpr (Dim == 1) {
if (ClusterDimZ == mClusterRange[0] && ClusterDimY == 1 &&
ClusterDimX == 1) {
*mCorrectResultFlag = 1;
}
} else if constexpr (Dim == 2) {
if (ClusterDimZ == mClusterRange[1] && ClusterDimY == mClusterRange[0] &&
ClusterDimX == 1) {
*mCorrectResultFlag = 1;
}
} else {
if (ClusterDimZ == mClusterRange[2] && ClusterDimY == mClusterRange[1] &&
ClusterDimX == mClusterRange[0]) {
*mCorrectResultFlag = 1;
}
}
}
auto get(sycl::ext::oneapi::experimental::properties_tag) const {
return mClusterLaunchProperty;
}
};

template <int Dim>
int test_cluster_launch_parallel_for(sycl::queue &Queue,
sycl::range<Dim> GlobalRange,
Expand All @@ -25,38 +68,10 @@ int test_cluster_launch_parallel_for(sycl::queue &Queue,

Queue
.submit([&](sycl::handler &CGH) {
CGH.parallel_for(sycl::nd_range<Dim>(GlobalRange, LocalRange),
ClusterLaunchProperty, [=](sycl::nd_item<Dim> It) {
uint32_t ClusterDimX, ClusterDimY, ClusterDimZ;
// Temporary solution till cluster group class is implemented
#if defined(__SYCL_DEVICE_ONLY__) && defined(__SYCL_CUDA_ARCH__) && \
(__SYCL_CUDA_ARCH__ >= 900)
asm volatile("\n\t"
"mov.u32 %0, %%cluster_nctaid.x; \n\t"
"mov.u32 %1, %%cluster_nctaid.y; \n\t"
"mov.u32 %2, %%cluster_nctaid.z; \n\t"
: "=r"(ClusterDimZ), "=r"(ClusterDimY),
"=r"(ClusterDimX));
#endif
if constexpr (Dim == 1) {
if (ClusterDimZ == ClusterRange[0] &&
ClusterDimY == 1 && ClusterDimX == 1) {
*CorrectResultFlag = 1;
}
} else if constexpr (Dim == 2) {
if (ClusterDimZ == ClusterRange[1] &&
ClusterDimY == ClusterRange[0] &&
ClusterDimX == 1) {
*CorrectResultFlag = 1;
}
} else {
if (ClusterDimZ == ClusterRange[2] &&
ClusterDimY == ClusterRange[1] &&
ClusterDimX == ClusterRange[0]) {
*CorrectResultFlag = 1;
}
}
});
CGH.parallel_for(
sycl::nd_range<Dim>(GlobalRange, LocalRange),
KernelFunctor<Dim, decltype(ClusterLaunchProperty)>(
CorrectResultFlag, ClusterLaunchProperty, ClusterRange));
})
.wait_and_throw();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,22 @@ template <typename T> void dummy_kernel(T *Input, int N, sycl::nd_item<1> It) {
#endif
}

template <typename T1, typename T2> struct KernelFunctor {
T1 mAcc;
T2 mClusterLaunchProperty;
KernelFunctor(T2 ClusterLaunchProperty, T1 Acc)
: mClusterLaunchProperty(ClusterLaunchProperty), mAcc(Acc) {}

void operator()(sycl::nd_item<1> It) const {
dummy_kernel(
mAcc.template get_multi_ptr<sycl::access::decorated::yes>().get(), 4096,
It);
}
auto get(sycl::ext::oneapi::experimental::properties_tag) const {
return mClusterLaunchProperty;
}
};

int main() {

std::vector<int> HostArray(4096, -20);
Expand All @@ -46,13 +62,8 @@ int main() {
cuda::cluster_size ClusterDims(sycl::range{2});
properties ClusterLaunchProperty{ClusterDims};
auto Acc = Buff.template get_access<sycl::access::mode::read_write>(CGH);
CGH.parallel_for(
sycl::nd_range({4096}, {32}), ClusterLaunchProperty,
[=](sycl::nd_item<1> It) {
dummy_kernel(
Acc.get_multi_ptr<sycl::access::decorated::yes>().get(), 4096,
It);
});
CGH.parallel_for(sycl::nd_range({4096}, {32}),
KernelFunctor(ClusterLaunchProperty, Acc));
});
Queue.submit([&](sycl::handler &CGH) {
auto Acc = Buff.template get_access<sycl::access::mode::read_write>(CGH);
Expand Down
13 changes: 11 additions & 2 deletions sycl/test-e2e/DeviceCodeSplit/grf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,15 @@ bool checkResult(const std::vector<float> &A, int Inc) {
return true;
}

template <typename T1, typename T2> struct KernelFunctor {
T1 mPA;
T2 mProp;
KernelFunctor(T1 PA, T2 Prop) : mPA(PA), mProp(Prop) {}

void operator()(id<1> i) const { mPA[i] += 2; }
auto get(properties_tag) const { return mProp; }
};

int main(void) {
constexpr unsigned Size = 32;
constexpr unsigned VL = 16;
Expand Down Expand Up @@ -122,8 +131,8 @@ int main(void) {

auto e = q.submit([&](handler &cgh) {
auto PA = bufa.get_access<access::mode::read_write>(cgh);
cgh.parallel_for<class SYCLKernelSpecifiedGRF>(
Size, prop, [=](id<1> i) { PA[i] += 2; });
cgh.parallel_for<class SYCLKernelSpecifiedGRF>(Size,
KernelFunctor(PA, prop));
});
e.wait();
} catch (sycl::exception const &e) {
Expand Down
68 changes: 0 additions & 68 deletions sycl/test-e2e/Graph/Inputs/sub_group_prop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,49 +39,13 @@ void test(queue &Queue, const std::vector<size_t> SupportedSGSizes) {
return;
}

auto Props = ext::oneapi::experimental::properties{
ext::oneapi::experimental::sub_group_size<SGSize>};

nd_range<1> NdRange(SGSize * 4, SGSize * 2);

size_t ReadSubGroupSize = 0;
{
buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1));
ReadSubGroupSizeBuf.set_write_back(false);

{
exp_ext::command_graph Graph{
Queue.get_context(),
Queue.get_device(),
{exp_ext::property::graph::assume_buffer_outlives_graph{}}};

add_node(Graph, Queue, [&](handler &CGH) {
accessor ReadSubGroupSizeBufAcc{ReadSubGroupSizeBuf, CGH,
sycl::write_only, sycl::no_init};

CGH.parallel_for<SubGroupKernel<Variant::Function, SGSize>>(
NdRange, Props, [=](nd_item<1> NdItem) {
auto SG = NdItem.get_sub_group();
if (NdItem.get_global_linear_id() == 0)
ReadSubGroupSizeBufAcc[0] = SG.get_local_linear_range();
});
});

auto ExecGraph = Graph.finalize();
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); });
Queue.wait_and_throw();
}

host_accessor HostAcc(ReadSubGroupSizeBuf);
ReadSubGroupSize = HostAcc[0];
}
assert(ReadSubGroupSize == SGSize && "Failed check for function.");

ReadSubGroupSize = 0;
{
buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1));
ReadSubGroupSizeBuf.set_write_back(false);

{
exp_ext::command_graph Graph{
Queue.get_context(),
Expand All @@ -107,38 +71,6 @@ void test(queue &Queue, const std::vector<size_t> SupportedSGSizes) {
ReadSubGroupSize = HostAcc[0];
}
assert(ReadSubGroupSize == SGSize && "Failed check for functor.");

ReadSubGroupSize = 0;
{
buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1));
ReadSubGroupSizeBuf.set_write_back(false);

{
exp_ext::command_graph Graph{
Queue.get_context(),
Queue.get_device(),
{exp_ext::property::graph::assume_buffer_outlives_graph{}}};

add_node(Graph, Queue, [&](handler &CGH) {
accessor ReadSubGroupSizeBufAcc{ReadSubGroupSizeBuf, CGH,
sycl::write_only, sycl::no_init};
KernelFunctorWithSGSizeProp<SGSize> KernelFunctor{
ReadSubGroupSizeBufAcc};

CGH.parallel_for<SubGroupKernel<Variant::Functor, SGSize>>(
NdRange, Props, KernelFunctor);
});

auto ExecGraph = Graph.finalize();
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); });
Queue.wait_and_throw();
}

host_accessor HostAcc(ReadSubGroupSizeBuf);
ReadSubGroupSize = HostAcc[0];
}
assert(ReadSubGroupSize == SGSize &&
"Failed check for functor and properties.");
}

int main() {
Expand Down
27 changes: 20 additions & 7 deletions sycl/test-e2e/VirtualFunctions/misc/group-barrier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,24 @@ class MultiplyOp : public BaseOp {
}
};

template <typename T1, typename T2, typename T3> struct KernelFunctor {
T1 mDeviceStorage;
T2 mDataAcc;
T3 mLocalAcc;
KernelFunctor(T1 DeviceStorage, T2 DataAcc, T3 LocalAcc)
: mDeviceStorage(DeviceStorage), mDataAcc(DataAcc), mLocalAcc(LocalAcc) {}

void operator()(sycl::nd_item<1> It) const {
auto *Ptr = mDeviceStorage->template getAs<BaseOp>();
mDataAcc[It.get_global_id()] = Ptr->apply(
mLocalAcc.template get_multi_ptr<sycl::access::decorated::no>().get(),
It.get_group());
}
auto get(oneapi::properties_tag) const {
return oneapi::properties{oneapi::assume_indirect_calls};
}
};

int main() try {
using storage_t = obj_storage_t<SumOp, MultiplyOp>;

Expand All @@ -113,7 +131,6 @@ int main() try {
sycl::range G{16};
sycl::range L{4};

constexpr oneapi::properties props{oneapi::assume_indirect_calls};
for (unsigned TestCase = 0; TestCase < 2; ++TestCase) {
sycl::buffer<int> DataStorage(G);

Expand All @@ -126,12 +143,8 @@ int main() try {
q.submit([&](sycl::handler &CGH) {
sycl::accessor DataAcc(DataStorage, CGH, sycl::read_write);
sycl::local_accessor<int> LocalAcc(L, CGH);
CGH.parallel_for(sycl::nd_range{G, L}, props, [=](auto It) {
auto *Ptr = DeviceStorage->getAs<BaseOp>();
DataAcc[It.get_global_id()] = Ptr->apply(
LocalAcc.get_multi_ptr<sycl::access::decorated::no>().get(),
It.get_group());
});
CGH.parallel_for(sycl::nd_range{G, L},
KernelFunctor(DeviceStorage, DataAcc, LocalAcc));
}).wait_and_throw();

auto *Ptr = HostStorage.construct</* ret type = */ BaseOp>(TestCase);
Expand Down
Loading