Skip to content

Commit 3d8edb2

Browse files
committed
[SYCL][Graph] 3D kernel update regression test
Add an E2E regression test for updating kernel nodes with 3 dimensions. Test contains a graph with two nodes, the first node with an NDRange containing a user specified local size, and the second node containing a Range with implementation determined local size.
1 parent 3672997 commit 3d8edb2

File tree

1 file changed

+83
-0
lines changed

1 file changed

+83
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
//
8+
9+
// Tests updating a 3D ND-Range graph kernel node using index-based explicit
10+
// update
11+
12+
#include "../graph_common.hpp"
13+
14+
int main() {
15+
queue Queue{};
16+
17+
const range<3> GlobalWorkSize(1, 2, 2);
18+
const range<3> LocalWorkSize(1, 2, 2);
19+
const size_t N = GlobalWorkSize[0] * GlobalWorkSize[1] * GlobalWorkSize[2];
20+
21+
exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};
22+
23+
int *PtrA = malloc_device<int>(N, Queue);
24+
int *PtrB = malloc_device<int>(N, Queue);
25+
26+
std::vector<int> HostDataA(N);
27+
std::vector<int> HostDataB(N);
28+
29+
Queue.memset(PtrA, 0, N * sizeof(int)).wait();
30+
Queue.memset(PtrB, 0, N * sizeof(int)).wait();
31+
32+
exp_ext::dynamic_parameter DynParam(Graph, PtrA);
33+
34+
nd_range<3> NDRange{GlobalWorkSize, LocalWorkSize};
35+
auto NodeA = Graph.add([&](handler &cgh) {
36+
cgh.set_arg(0, DynParam);
37+
// TODO: Use the free function kernel extension instead of regular kernels
38+
// when available.
39+
cgh.parallel_for(NDRange, [=](nd_item<3> Item) {
40+
size_t GlobalID = Item.get_global_linear_id();
41+
PtrA[GlobalID] = GlobalID;
42+
});
43+
});
44+
45+
range<3> Range{GlobalWorkSize};
46+
auto NodeB = Graph.add(
47+
[&](handler &cgh) {
48+
cgh.set_arg(0, DynParam);
49+
// TODO: Use the free function kernel extension instead of regular
50+
// kernels when available.
51+
cgh.parallel_for(Range, [=](item<3> Item) {
52+
size_t GlobalID = Item.get_linear_id();
53+
PtrA[GlobalID] *= 2;
54+
});
55+
},
56+
exp_ext::property::node::depends_on{NodeA});
57+
58+
auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{});
59+
60+
// PtrA should be filled with values
61+
Queue.ext_oneapi_graph(ExecGraph).wait();
62+
63+
Queue.copy(PtrA, HostDataA.data(), N).wait();
64+
Queue.copy(PtrB, HostDataB.data(), N).wait();
65+
for (size_t i = 0; i < N; i++) {
66+
assert(HostDataA[i] == (i * 2));
67+
assert(HostDataB[i] == 0);
68+
}
69+
70+
// Swap PtrB to be the input/output
71+
DynParam.update(PtrB);
72+
ExecGraph.update({NodeA, NodeB});
73+
Queue.ext_oneapi_graph(ExecGraph).wait();
74+
75+
Queue.copy(PtrA, HostDataA.data(), N).wait();
76+
Queue.copy(PtrB, HostDataB.data(), N).wait();
77+
for (size_t i = 0; i < N; i++) {
78+
const size_t Ref = i * 2;
79+
assert(HostDataA[i] == Ref);
80+
assert(HostDataB[i] == Ref);
81+
}
82+
return 0;
83+
}

0 commit comments

Comments
 (0)