diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index 9a7a1e309eb1a..1c8242f0b2514 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -1703,11 +1703,15 @@ passed an invalid event. The new handler methods, and queue shortcuts, defined by link:../supported/sycl_ext_oneapi_enqueue_barrier.asciidoc[sycl_ext_oneapi_enqueue_barrier] can only be used in graph nodes created using the Record & Replay API, as -barriers rely on events to enforce dependencies. A synchronous exception will be -thrown with error code `invalid` if a user tries to add them to a graph using -the Explicit API. Empty nodes created with the `node::depends_on_all_leaves` -property can be used instead of barriers when a user is building a graph with -the explicit API. +barriers rely on events to enforce dependencies. For barriers with an empty +wait list parameter, the semantics are that the barrier node being added to +will depend on all the existing graph leave nodes, not only the leave nodes +that were added from the queue being recorded. + +A synchronous exception will be thrown with error code `invalid` if a user +tries to add them to a graph using the Explicit API. Empty nodes created with +the `node::depends_on_all_leaves` property can be used instead of barriers when +a user is building a graph with the explicit API. ==== sycl_ext_oneapi_memcpy2d diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index acaecf2696629..4cc1bf865b736 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -207,14 +207,13 @@ void queue::wait_and_throw_proxy(const detail::code_location &CodeLoc) { static event getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) { - // The last command recorded in the graph is not tracked by the queue but by - // the graph itself. We must therefore search for the last node/event in the + // This function should not be called when a queue is recording to a graph, + // as a graph can record from multiple queues and we cannot guarantee the + // last node added by an in-order queue will be the last node added to the // graph. - if (auto Graph = QueueImpl->getCommandGraph()) { - auto LastEvent = - Graph->getEventForNode(Graph->getLastInorderNode(QueueImpl)); - return sycl::detail::createSyclObjFromImpl(LastEvent); - } + assert(!QueueImpl->getCommandGraph() && + "Should not be called in on graph recording."); + auto LastEvent = QueueImpl->getLastEvent(); if (QueueImpl->MDiscardEvents) { std::cout << "Discard event enabled" << std::endl; @@ -241,7 +240,7 @@ getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) { /// \return a SYCL event object, which corresponds to the queue the command /// group is being enqueued on. event queue::ext_oneapi_submit_barrier(const detail::code_location &CodeLoc) { - if (is_in_order()) + if (is_in_order() && !impl->getCommandGraph()) return getBarrierEventForInorderQueueHelper(impl); return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(); }, CodeLoc); @@ -260,10 +259,10 @@ event queue::ext_oneapi_submit_barrier(const std::vector &WaitList, const detail::code_location &CodeLoc) { bool AllEventsEmptyOrNop = std::all_of( begin(WaitList), end(WaitList), [&](const event &Event) -> bool { - return !detail::getSyclObjImpl(Event)->isContextInitialized() || - detail::getSyclObjImpl(Event)->isNOP(); + auto EventImpl = detail::getSyclObjImpl(Event); + return !EventImpl->isContextInitialized() || EventImpl->isNOP(); }); - if (is_in_order() && AllEventsEmptyOrNop) + if (is_in_order() && !impl->getCommandGraph() && AllEventsEmptyOrNop) return getBarrierEventForInorderQueueHelper(impl); return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(WaitList); }, diff --git a/sycl/test-e2e/Graph/RecordReplay/barrier_multi_queue.cpp b/sycl/test-e2e/Graph/RecordReplay/barrier_multi_queue.cpp new file mode 100644 index 0000000000000..00db31e9ed212 --- /dev/null +++ b/sycl/test-e2e/Graph/RecordReplay/barrier_multi_queue.cpp @@ -0,0 +1,45 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero && linux %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +#include "../graph_common.hpp" + +int main() { + queue Queue1{{sycl::property::queue::in_order()}}; + queue Queue2{Queue1.get_context(), + Queue1.get_device(), + {sycl::property::queue::in_order()}}; + + int *PtrA = malloc_device(Size, Queue1); + int *PtrB = malloc_device(Size, Queue1); + + exp_ext::command_graph Graph{Queue1}; + Graph.begin_recording({Queue1, Queue2}); + + auto EventA = Queue1.submit([&](handler &CGH) { + CGH.parallel_for(range<1>{Size}, [=](id<1> it) { PtrA[it] = it; }); + }); + + Queue2.ext_oneapi_submit_barrier({EventA}); + + auto EventB = Queue2.copy(PtrA, PtrB, Size); + Graph.end_recording(); + + auto ExecGraph = Graph.finalize(); + Queue1.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); }); + + std::array Output; + Queue1.memcpy(Output.data(), PtrB, sizeof(int) * Size).wait(); + + for (int i = 0; i < Size; i++) { + assert(Output[i] == i); + } + + free(PtrA, Queue1); + free(PtrB, Queue1); + return 0; +} diff --git a/sycl/unittests/Extensions/CommandGraph/Barrier.cpp b/sycl/unittests/Extensions/CommandGraph/Barrier.cpp index 6135d1dca924f..79f55dc226b62 100644 --- a/sycl/unittests/Extensions/CommandGraph/Barrier.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Barrier.cpp @@ -283,3 +283,302 @@ TEST_F(CommandGraphTest, EnqueueMultipleBarrier) { } } } + +TEST_F(CommandGraphTest, InOrderQueueWithPreviousCommand) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + sycl::queue InOrderQueue{Dev, Properties}; + + auto NonGraphEvent = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + Graph.begin_recording(InOrderQueue); + + ASSERT_THROW( + { + try { + InOrderQueue.ext_oneapi_submit_barrier({NonGraphEvent}); + } catch (const sycl::exception &e) { + ASSERT_EQ(e.code(), make_error_code(sycl::errc::invalid)); + throw; + } + }, + sycl::exception); + + InOrderQueue.ext_oneapi_submit_barrier(); + Graph.end_recording(InOrderQueue); + + // Check the graph structure + // (B) + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + ASSERT_EQ(GraphImpl->MRoots.size(), 1lu); + + for (auto Root : GraphImpl->MRoots) { + auto RootNode = Root.lock(); + ASSERT_EQ(RootNode->MSuccessors.size(), 0lu); + ASSERT_TRUE(RootNode->MCGType == sycl::detail::CG::Barrier); + } +} + +TEST_F(CommandGraphTest, InOrderQueuesWithBarrier) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + sycl::queue InOrderQueue1{Dev, Properties}; + sycl::queue InOrderQueue2{InOrderQueue1.get_context(), Dev, Properties}; + sycl::queue InOrderQueue3{InOrderQueue1.get_context(), Dev, Properties}; + + experimental::command_graph Graph{ + InOrderQueue1}; + + Graph.begin_recording({InOrderQueue1, InOrderQueue2, InOrderQueue3}); + + auto Node1 = InOrderQueue1.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto Node2 = InOrderQueue2.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + InOrderQueue3.ext_oneapi_submit_barrier({Node1}); + + Graph.end_recording(); + + // Check the graph structure + // (1) (2) + // | + // (B) + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + ASSERT_EQ(GraphImpl->MRoots.size(), 2lu); + + for (auto Root : GraphImpl->MRoots) { + auto RootNode = Root.lock(); + + if (GraphImpl->getEventForNode(RootNode) == + sycl::detail::getSyclObjImpl(Node1)) { + ASSERT_EQ(RootNode->MSuccessors.size(), 1lu); + + auto SuccNode = RootNode->MSuccessors.front().lock(); + ASSERT_TRUE(SuccNode->MCGType == sycl::detail::CG::Barrier); + + ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } else if (GraphImpl->getEventForNode(RootNode) == + sycl::detail::getSyclObjImpl(Node2)) { + ASSERT_EQ(RootNode->MSuccessors.size(), 0lu); + } else { + ASSERT_TRUE(false && "Unexpected root node"); + } + } +} + +TEST_F(CommandGraphTest, InOrderQueuesWithBarrierWaitList) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + sycl::queue InOrderQueue1{Dev, Properties}; + sycl::queue InOrderQueue2{InOrderQueue1.get_context(), Dev, Properties}; + + experimental::command_graph Graph{ + InOrderQueue1}; + + Graph.begin_recording({InOrderQueue1, InOrderQueue2}); + + auto Node1 = InOrderQueue1.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto Node2 = InOrderQueue2.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto BarrierNode = InOrderQueue2.ext_oneapi_submit_barrier({Node1}); + + Graph.end_recording(); + + // Check the graph structure + // (1) (2) + // | / + // (B) + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + ASSERT_EQ(GraphImpl->MRoots.size(), 2lu); + + for (auto Root : GraphImpl->MRoots) { + auto RootNode = Root.lock(); + + ASSERT_EQ(RootNode->MSuccessors.size(), 1lu); + + auto SuccNode = RootNode->MSuccessors.front().lock(); + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(BarrierNode)); + + ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } +} + +TEST_F(CommandGraphTest, InOrderQueuesWithEmptyBarrierWaitList) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + sycl::queue InOrderQueue1{Dev, Properties}; + sycl::queue InOrderQueue2{InOrderQueue1.get_context(), Dev, Properties}; + + experimental::command_graph Graph{ + InOrderQueue1}; + + Graph.begin_recording({InOrderQueue1, InOrderQueue2}); + + auto Node1 = InOrderQueue1.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto Node2 = InOrderQueue2.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto BarrierNode = InOrderQueue1.ext_oneapi_submit_barrier(); + + auto Node3 = InOrderQueue2.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + Graph.end_recording(); + + // Check the graph structure + // (1) (2) + // \ / | + // (B) | + // | / + // (3) + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + ASSERT_EQ(GraphImpl->MRoots.size(), 2lu); + + for (auto Root : GraphImpl->MRoots) { + auto RootNode = Root.lock(); + + if (GraphImpl->getEventForNode(RootNode) == + sycl::detail::getSyclObjImpl(Node1)) { + ASSERT_EQ(RootNode->MSuccessors.size(), 1lu); + } else if (GraphImpl->getEventForNode(RootNode) == + sycl::detail::getSyclObjImpl(Node2)) { + ASSERT_EQ(RootNode->MSuccessors.size(), 2lu); + } else { + ASSERT_TRUE(false && "Unexpected root node"); + } + + auto SuccNode = RootNode->MSuccessors.front().lock(); + + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(BarrierNode)); + ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 1lu); + + auto SuccSuccNode = SuccNode->MSuccessors.front().lock(); + + ASSERT_EQ(SuccSuccNode->MPredecessors.size(), 2lu); + ASSERT_EQ(SuccSuccNode->MSuccessors.size(), 0lu); + + ASSERT_EQ(GraphImpl->getEventForNode(SuccSuccNode), + sycl::detail::getSyclObjImpl(Node3)); + } +} + +TEST_F(CommandGraphTest, BarrierMixedQueueTypes) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + sycl::queue InOrderQueue{Dev, Properties}; + sycl::queue OutOfOrderQueue{InOrderQueue.get_context(), Dev}; + + experimental::command_graph Graph{ + InOrderQueue}; + + Graph.begin_recording({InOrderQueue, OutOfOrderQueue}); + + auto Node1 = OutOfOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto Node2 = OutOfOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto BarrierNode = InOrderQueue.ext_oneapi_submit_barrier(); + + auto Node3 = OutOfOrderQueue.submit([&](sycl::handler &cgh) { + cgh.depends_on(Node2); + cgh.single_task>([]() {}); + }); + + Graph.end_recording(); + + // Check the graph structure + // (1) (2) + // \ /| + // (B) | + // | / + // (3) + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + ASSERT_EQ(GraphImpl->MRoots.size(), 2lu); + + for (auto Root : GraphImpl->MRoots) { + auto RootNode = Root.lock(); + + if (GraphImpl->getEventForNode(RootNode) == + sycl::detail::getSyclObjImpl(Node1)) { + ASSERT_EQ(RootNode->MSuccessors.size(), 1lu); + } else if (GraphImpl->getEventForNode(RootNode) == + sycl::detail::getSyclObjImpl(Node2)) { + ASSERT_EQ(RootNode->MSuccessors.size(), 2lu); + } else { + ASSERT_TRUE(false && "Unexpected root node"); + } + + auto SuccNode = RootNode->MSuccessors.front().lock(); + + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(BarrierNode)); + ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 1lu); + + auto SuccSuccNode = SuccNode->MSuccessors.front().lock(); + + ASSERT_EQ(SuccSuccNode->MPredecessors.size(), 2lu); + ASSERT_EQ(SuccSuccNode->MSuccessors.size(), 0lu); + + ASSERT_EQ(GraphImpl->getEventForNode(SuccSuccNode), + sycl::detail::getSyclObjImpl(Node3)); + } +} + +TEST_F(CommandGraphTest, BarrierBetweenExplicitNodes) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + sycl::queue InOrderQueue{Dev, Properties}; + + experimental::command_graph Graph{ + InOrderQueue}; + + auto Node1 = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + Graph.begin_recording(InOrderQueue); + auto BarrierNode = InOrderQueue.ext_oneapi_submit_barrier(); + Graph.end_recording(); + + auto Node2 = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + // Check the graph structure + // (1) + // | + // (B) + // | + // (2) + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + ASSERT_EQ(GraphImpl->MRoots.size(), 1lu); + + for (auto Root : GraphImpl->MRoots) { + auto RootNode = Root.lock(); + auto Node1Impl = sycl::detail::getSyclObjImpl(Node1); + ASSERT_EQ(RootNode, Node1Impl); + + auto SuccNode = RootNode->MSuccessors.front().lock(); + + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(BarrierNode)); + ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 1lu); + + auto SuccSuccNode = SuccNode->MSuccessors.front().lock(); + + ASSERT_EQ(SuccSuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccSuccNode->MSuccessors.size(), 0lu); + + auto Node2Impl = sycl::detail::getSyclObjImpl(Node2); + ASSERT_EQ(SuccSuccNode, Node2Impl); + } +}