Skip to content

Commit

Permalink
[SYCL][Graph] Fix queue recording barrier to different graphs
Browse files Browse the repository at this point in the history
Recording barrier submissions to from the same queue to a different
graph current produces the following error with added regression tests:

```
Terminate called after throwing an instance of 'sycl::_V1::exception'
  what():  Graph nodes cannot depend on events from another graph.
```

This is because the queue implementation doesn't clear all the
state around what the last queue submission was between graph
recordings.

Fixed by clearing all members of the barrier book keeping struct
in the queue.
  • Loading branch information
EwanC committed Jun 19, 2024
1 parent 57b8401 commit 9c8002d
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 1 deletion.
8 changes: 7 additions & 1 deletion sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -732,7 +732,7 @@ class queue_impl {
std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> Graph) {
std::lock_guard<std::mutex> Lock(MMutex);
MGraph = Graph;
MExtGraphDeps.LastEventPtr = nullptr;
MExtGraphDeps.reset();
}

std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
Expand Down Expand Up @@ -938,6 +938,12 @@ class queue_impl {
// ordering
std::vector<EventImplPtr> UnenqueuedCmdEvents;
EventImplPtr LastBarrier;

void reset() {
LastEventPtr = nullptr;
UnenqueuedCmdEvents.clear();
LastBarrier = nullptr;
}
} MDefaultGraphDeps, MExtGraphDeps;

const bool MIsInorder;
Expand Down
58 changes: 58 additions & 0 deletions sycl/test-e2e/Graph/RecordReplay/barrier_multi_graph.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
// Extra run to check for immediate-command-list in Level Zero
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
//

#include "../graph_common.hpp"

int main() {
queue Queue{};

int *PtrA = malloc_device<int>(Size, Queue);
int *PtrB = malloc_device<int>(Size, Queue);

exp_ext::command_graph GraphA{Queue};
exp_ext::command_graph GraphB{Queue};

GraphA.begin_recording(Queue);
auto EventA = Queue.submit([&](handler &CGH) {
CGH.parallel_for(range<1>{Size}, [=](id<1> it) { PtrA[it] = it; });
});
Queue.ext_oneapi_submit_barrier({EventA});
Queue.copy(PtrA, PtrB, Size);
GraphA.end_recording();

GraphB.begin_recording(Queue);
auto EventB = Queue.submit([&](handler &CGH) {
CGH.parallel_for(range<1>{Size}, [=](id<1> it) { PtrA[it] = it * 2; });
});
Queue.ext_oneapi_submit_barrier();
Queue.copy(PtrA, PtrB, Size);
GraphB.end_recording();

auto ExecGraphA = GraphA.finalize();
auto ExecGraphB = GraphB.finalize();

Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraphA); }).wait();

std::array<int, Size> Output;
Queue.memcpy(Output.data(), PtrB, sizeof(int) * Size).wait();

for (int i = 0; i < Size; i++) {
assert(Output[i] == i);
}

Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraphB); }).wait();
Queue.memcpy(Output.data(), PtrB, sizeof(int) * Size).wait();

for (int i = 0; i < Size; i++) {
assert(Output[i] == 2 * i);
}

free(PtrA, Queue);
free(PtrB, Queue);
return 0;
}
27 changes: 27 additions & 0 deletions sycl/unittests/Extensions/CommandGraph/Regressions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,30 @@ TEST_F(CommandGraphTest, AccessorModeRegression) {
EXPECT_EQ(NodeC.get_predecessors().size(), 0ul);
EXPECT_EQ(NodeC.get_successors().size(), 0ul);
}

TEST_F(CommandGraphTest, QueueRecordBarrierMultipleGraph) {
// Test that using barriers recorded from the same queue to
// different graphs.

Graph.begin_recording(Queue);
auto NodeKernel = Queue.submit(
[&](sycl::handler &cgh) { cgh.single_task<TestKernel<>>([]() {}); });
Queue.ext_oneapi_submit_barrier({NodeKernel});
Graph.end_recording(Queue);

experimental::command_graph<experimental::graph_state::modifiable> GraphB{
Queue};
GraphB.begin_recording(Queue);
auto NodeKernelB = Queue.submit(
[&](sycl::handler &cgh) { cgh.single_task<TestKernel<>>([]() {}); });
Queue.ext_oneapi_submit_barrier({NodeKernelB});
GraphB.end_recording(Queue);

experimental::command_graph<experimental::graph_state::modifiable> GraphC{
Queue};
GraphC.begin_recording(Queue);
auto NodeKernelC = Queue.submit(
[&](sycl::handler &cgh) { cgh.single_task<TestKernel<>>([]() {}); });
Queue.ext_oneapi_submit_barrier();
GraphC.end_recording(Queue);
}

0 comments on commit 9c8002d

Please sign in to comment.