Skip to content

Commit

Permalink
[SYCL][Graph] Fix queue recording barrier to different graphs (intel#…
Browse files Browse the repository at this point in the history
…14212)

Recording barrier submissions to from the same queue to a different
graph current produces the following error with added regression test:

```
Terminate called after throwing an instance of 'sycl::_V1::exception'
  what():  Graph nodes cannot depend on events from another graph.
```

This is because the queue implementation doesn't clear all the state
around what the last queue submission was between graph recordings.

Fixed by clearing all members of the barrier book keeping struct in the
queue.
  • Loading branch information
EwanC authored Jun 20, 2024
1 parent 8e3b8ce commit 82f77d1
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 1 deletion.
8 changes: 7 additions & 1 deletion sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -732,7 +732,7 @@ class queue_impl {
std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> Graph) {
std::lock_guard<std::mutex> Lock(MMutex);
MGraph = Graph;
MExtGraphDeps.LastEventPtr = nullptr;
MExtGraphDeps.reset();
}

std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
Expand Down Expand Up @@ -938,6 +938,12 @@ class queue_impl {
// ordering
std::vector<EventImplPtr> UnenqueuedCmdEvents;
EventImplPtr LastBarrier;

void reset() {
LastEventPtr = nullptr;
UnenqueuedCmdEvents.clear();
LastBarrier = nullptr;
}
} MDefaultGraphDeps, MExtGraphDeps;

const bool MIsInorder;
Expand Down
58 changes: 58 additions & 0 deletions sycl/test-e2e/Graph/RecordReplay/barrier_multi_graph.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
// Extra run to check for immediate-command-list in Level Zero
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
//

#include "../graph_common.hpp"

int main() {
queue Queue{};

int *PtrA = malloc_device<int>(Size, Queue);
int *PtrB = malloc_device<int>(Size, Queue);

exp_ext::command_graph GraphA{Queue};
exp_ext::command_graph GraphB{Queue};

GraphA.begin_recording(Queue);
auto EventA = Queue.submit([&](handler &CGH) {
CGH.parallel_for(range<1>{Size}, [=](id<1> it) { PtrA[it] = it; });
});
Queue.ext_oneapi_submit_barrier({EventA});
Queue.copy(PtrA, PtrB, Size);
GraphA.end_recording();

GraphB.begin_recording(Queue);
auto EventB = Queue.submit([&](handler &CGH) {
CGH.parallel_for(range<1>{Size}, [=](id<1> it) { PtrA[it] = it * 2; });
});
Queue.ext_oneapi_submit_barrier();
Queue.copy(PtrA, PtrB, Size);
GraphB.end_recording();

auto ExecGraphA = GraphA.finalize();
auto ExecGraphB = GraphB.finalize();

Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraphA); }).wait();

std::array<int, Size> Output;
Queue.memcpy(Output.data(), PtrB, sizeof(int) * Size).wait();

for (int i = 0; i < Size; i++) {
assert(Output[i] == i);
}

Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraphB); }).wait();
Queue.memcpy(Output.data(), PtrB, sizeof(int) * Size).wait();

for (int i = 0; i < Size; i++) {
assert(Output[i] == 2 * i);
}

free(PtrA, Queue);
free(PtrB, Queue);
return 0;
}
27 changes: 27 additions & 0 deletions sycl/unittests/Extensions/CommandGraph/Regressions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,30 @@ TEST_F(CommandGraphTest, AccessorModeRegression) {
EXPECT_EQ(NodeC.get_predecessors().size(), 0ul);
EXPECT_EQ(NodeC.get_successors().size(), 0ul);
}

TEST_F(CommandGraphTest, QueueRecordBarrierMultipleGraph) {
// Test that using barriers recorded from the same queue to
// different graphs.

Graph.begin_recording(Queue);
auto NodeKernel = Queue.submit(
[&](sycl::handler &cgh) { cgh.single_task<TestKernel<>>([]() {}); });
Queue.ext_oneapi_submit_barrier({NodeKernel});
Graph.end_recording(Queue);

experimental::command_graph<experimental::graph_state::modifiable> GraphB{
Queue};
GraphB.begin_recording(Queue);
auto NodeKernelB = Queue.submit(
[&](sycl::handler &cgh) { cgh.single_task<TestKernel<>>([]() {}); });
Queue.ext_oneapi_submit_barrier({NodeKernelB});
GraphB.end_recording(Queue);

experimental::command_graph<experimental::graph_state::modifiable> GraphC{
Queue};
GraphC.begin_recording(Queue);
auto NodeKernelC = Queue.submit(
[&](sycl::handler &cgh) { cgh.single_task<TestKernel<>>([]() {}); });
Queue.ext_oneapi_submit_barrier();
GraphC.end_recording(Queue);
}

0 comments on commit 82f77d1

Please sign in to comment.