From d89a3eaa72fd2e84d71e361eb787dc29619fc984 Mon Sep 17 00:00:00 2001 From: Ben Tracy Date: Thu, 7 Mar 2024 18:26:31 +0000 Subject: [PATCH] [SYCL][Graph] Fix access modes not being respected - Fix access modes not being respected and creating unnecessary edges - Update printing E2E tests since output has changed - Add unit tests for access modes --- sycl/source/detail/graph_impl.cpp | 4 +- sycl/source/detail/graph_impl.hpp | 32 ++++- .../Graph/Explicit/debug_print_graph.cpp | 22 ++-- .../Explicit/debug_print_graph_verbose.cpp | 24 ++-- .../Graph/RecordReplay/debug_print_graph.cpp | 22 ++-- .../debug_print_graph_verbose.cpp | 24 ++-- .../Extensions/CommandGraph/CMakeLists.txt | 1 + .../Extensions/CommandGraph/CommandGraph.cpp | 120 +++++++++++++++++- .../Extensions/CommandGraph/Regressions.cpp | 60 +++++++++ 9 files changed, 255 insertions(+), 54 deletions(-) create mode 100644 sycl/unittests/Extensions/CommandGraph/Regressions.cpp diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index f38f8a518a058..7f56e8fd55bc5 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -439,12 +439,12 @@ graph_impl::add(node_type NodeType, } // Look through the graph for nodes which share this requirement for (auto &Node : MNodeStorage) { - if (Node->hasRequirement(Req)) { + if (Node->hasRequirementDependency(Req)) { bool ShouldAddDep = true; // If any of this node's successors have this requirement then we skip // adding the current node as a dependency. for (auto &Succ : Node->MSuccessors) { - if (Succ.lock()->hasRequirement(Req)) { + if (Succ.lock()->hasRequirementDependency(Req)) { ShouldAddDep = false; break; } diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index 9da782c2fcd21..47a698a94a02c 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -154,16 +154,38 @@ class node_impl { MCGType(Other.MCGType), MNodeType(Other.MNodeType), MCommandGroup(Other.getCGCopy()), MSubGraphImpl(Other.MSubGraphImpl) {} - /// Checks if this node has a given requirement. - /// @param Requirement Requirement to lookup. - /// @return True if \p Requirement is present in node, false otherwise. - bool hasRequirement(sycl::detail::AccessorImplHost *IncomingReq) { + /// Checks if this node should be a dependency of another node based on + /// accessor requirements. This is calculated using access modes if a + /// requirement to the same buffer is found inside this node. + /// @param IncomingReq Incoming requirement. + /// @return True if a dependency is needed, false if not. + bool hasRequirementDependency(sycl::detail::AccessorImplHost *IncomingReq) { + access_mode InMode = IncomingReq->MAccessMode; + switch (InMode) { + case access_mode::read: + case access_mode::read_write: + case access_mode::atomic: + break; + // These access modes don't care about existing buffer data, so we don't + // need a dependency. + case access_mode::write: + case access_mode::discard_read_write: + case access_mode::discard_write: + return false; + } + for (sycl::detail::AccessorImplHost *CurrentReq : MCommandGroup->getRequirements()) { if (IncomingReq->MSYCLMemObj == CurrentReq->MSYCLMemObj) { - return true; + access_mode CurrentMode = CurrentReq->MAccessMode; + // Since we have an incoming read requirement, we only care + // about requirements on this node if they are write + if (CurrentMode != access_mode::read) { + return true; + } } } + // No dependency necessary return false; } diff --git a/sycl/test-e2e/Graph/Explicit/debug_print_graph.cpp b/sycl/test-e2e/Graph/Explicit/debug_print_graph.cpp index 92fda2c837dc6..b7a5c689de1ed 100644 --- a/sycl/test-e2e/Graph/Explicit/debug_print_graph.cpp +++ b/sycl/test-e2e/Graph/Explicit/debug_print_graph.cpp @@ -12,22 +12,22 @@ // CHECK-SAME: [style=bold, label="ID = 0x[[#NODE2]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE0_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n"]; // CHECK-NEXT: "0x[[#NODE1]]" -> "0x[[#NODE2]]" // CHECK-NEXT: "0x[[#%x,NODE3:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE1_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n"]; -// CHECK-NEXT: "0x[[#NODE2]]" -> "0x[[#NODE3]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE2_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n"]; +// CHECK-DAG: "0x[[#NODE2]]" -> "0x[[#NODE3]]" +// CHECK-DAG: "0x[[#%x,NODE7:]]" -> "0x[[#NODE3]]" // CHECK-NEXT: "0x[[#%x,NODE4:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE2_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n"]; +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGCopy Device-to-Device \n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE4]]" -// CHECK-DAG: "0x[[#NODE2]]" -> "0x[[#NODE4]]" +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE4]]" // CHECK-NEXT: "0x[[#%x,NODE5:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Device \n"]; +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Host \n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE5]]" -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE5]] // CHECK-NEXT: "0x[[#%x,NODE6:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = CGCopy Device-to-Host \n"]; -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE6]]" -// CHECK-NEXT: "0x[[#%x,NODE7:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = None \n"]; -// CHECK-DAG: "0x[[#NODE6]]" -> "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = None \n"]; +// CHECK-DAG: "0x[[#NODE5]]" -> "0x[[#NODE6]]" +// CHECK-NEXT: "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE1_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n"]; +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE7]]" #define GRAPH_E2E_EXPLICIT diff --git a/sycl/test-e2e/Graph/Explicit/debug_print_graph_verbose.cpp b/sycl/test-e2e/Graph/Explicit/debug_print_graph_verbose.cpp index e06deb61a205c..d3f79ce6b8a01 100644 --- a/sycl/test-e2e/Graph/Explicit/debug_print_graph_verbose.cpp +++ b/sycl/test-e2e/Graph/Explicit/debug_print_graph_verbose.cpp @@ -16,26 +16,26 @@ // CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR4]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR7:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR8:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR9:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR10:]]\n"]; // CHECK-NEXT: "0x[[#NODE1]]" -> "0x[[#NODE2]]" // CHECK-NEXT: "0x[[#%x,NODE3:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE1_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE2_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n // CHECK-SAME: ARGS = \n0) Type: Accessor Ptr: 0x[[#%x,ADDR11:]]\n1) Type: STD_Layout Ptr: 0x[[#%x,ADDR12:]]\n2) Type: STD_Layout Ptr: 0x[[#%x,ADDR13:]]\n // CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR11]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR14:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR15:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR16:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR17:]]\n"]; // CHECK-NEXT: "0x[[#NODE2]]" -> "0x[[#NODE3]]" +// CHECK-NEXT: "0x[[#%x,NODE7:]]" -> "0x[[#NODE3]]" // CHECK-NEXT: "0x[[#%x,NODE4:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE2_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n -// CHECK-SAME: ARGS = \n0) Type: Accessor Ptr: 0x[[#%x,ADDR18:]]\n1) Type: STD_Layout Ptr: 0x[[#%x,ADDR19:]]\n2) Type: STD_Layout Ptr: 0x[[#%x,ADDR20:]]\n -// CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR18]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR21:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR22:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR23:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR24:]]\n"]; +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGCopy Device-to-Device \nSrc: 0x[[#%x,ADDR18:]] Dst: 0x[[#%x,ADDR19:]]\n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE4]]" -// CHECK-DAG: "0x[[#NODE2]]" -> "0x[[#NODE4]]" +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE4]] // CHECK-NEXT: "0x[[#%x,NODE5:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Device \nSrc: 0x[[#%x,ADDR25:]] Dst: 0x[[#%x,ADDR26:]]\n"]; +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Host \nSrc: 0x[[#%x,ADDR20:]] Dst: 0x[[#%x,ADDR21:]]\n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE5]]" -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE5]] // CHECK-NEXT: "0x[[#%x,NODE6:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = CGCopy Device-to-Host \nSrc: 0x[[#%x,ADDR27:]] Dst: 0x[[#%x,ADDR28:]]\n"]; -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE6]]" -// CHECK-NEXT: "0x[[#%x,NODE7:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = None \n"]; -// CHECK-DAG: "0x[[#NODE6]]" -> "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = None \n"]; +// CHECK-DAG: "0x[[#NODE5]]" -> "0x[[#NODE6]]" +// CHECK-NEXT: "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE1_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n +// CHECK-SAME: ARGS = \n0) Type: Accessor Ptr: 0x[[#%x,ADDR22:]]\n1) Type: STD_Layout Ptr: 0x[[#%x,ADDR23:]]\n2) Type: STD_Layout Ptr: 0x[[#%x,ADDR24:]]\n +// CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR22]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR25:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR26:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR27:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR28:]]\n"]; +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE7]]" #define GRAPH_E2E_EXPLICIT diff --git a/sycl/test-e2e/Graph/RecordReplay/debug_print_graph.cpp b/sycl/test-e2e/Graph/RecordReplay/debug_print_graph.cpp index 6ef999f7d41a7..5729c36160acc 100644 --- a/sycl/test-e2e/Graph/RecordReplay/debug_print_graph.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/debug_print_graph.cpp @@ -12,22 +12,22 @@ // CHECK-SAME: [style=bold, label="ID = 0x[[#NODE2]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE0_clESE_EUlNS1_4itemILi1ELb1EEEE_\n"]; // CHECK-NEXT: "0x[[#NODE1]]" -> "0x[[#NODE2]]" // CHECK-NEXT: "0x[[#%x,NODE3:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE1_clESE_EUlNS1_4itemILi1ELb1EEEE_\n"]; -// CHECK-NEXT: "0x[[#NODE2]]" -> "0x[[#NODE3]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE2_clESE_EUlNS1_4itemILi1ELb1EEEE_\n"]; +// CHECK-DAG: "0x[[#NODE2]]" -> "0x[[#NODE3]]" +// CHECK-DAG: "0x[[#%x,NODE7:]]" -> "0x[[#NODE3]]" // CHECK-NEXT: "0x[[#%x,NODE4:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE2_clESE_EUlNS1_4itemILi1ELb1EEEE_\n"]; +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGCopy Device-to-Device \n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE4]]" -// CHECK-DAG: "0x[[#NODE2]]" -> "0x[[#NODE4]]" +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE4]]" // CHECK-NEXT: "0x[[#%x,NODE5:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Device \n"]; +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Host \n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE5]]" -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE5]] // CHECK-NEXT: "0x[[#%x,NODE6:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = CGCopy Device-to-Host \n"]; -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE6]]" -// CHECK-NEXT: "0x[[#%x,NODE7:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = None \n"]; -// CHECK-DAG: "0x[[#NODE6]]" -> "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = None \n"]; +// CHECK-DAG: "0x[[#NODE5]]" -> "0x[[#NODE6]]" +// CHECK-NEXT: "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE1_clESE_EUlNS1_4itemILi1ELb1EEEE_\n"]; +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE7]]" #define GRAPH_E2E_RECORD_REPLAY diff --git a/sycl/test-e2e/Graph/RecordReplay/debug_print_graph_verbose.cpp b/sycl/test-e2e/Graph/RecordReplay/debug_print_graph_verbose.cpp index c1697fc755ef2..f357645cfa25c 100644 --- a/sycl/test-e2e/Graph/RecordReplay/debug_print_graph_verbose.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/debug_print_graph_verbose.cpp @@ -17,26 +17,26 @@ // CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR4]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR7:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR8:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR9:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR10:]]\n"]; // CHECK-NEXT: "0x[[#NODE1]]" -> "0x[[#NODE2]]" // CHECK-NEXT: "0x[[#%x,NODE3:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE1_clESE_EUlNS1_4itemILi1ELb1EEEE_\n +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE2_clESE_EUlNS1_4itemILi1ELb1EEEE_\n // CHECK-SAME: ARGS = \n0) Type: Accessor Ptr: 0x[[#%x,ADDR11:]]\n1) Type: STD_Layout Ptr: 0x[[#%x,ADDR12:]]\n2) Type: STD_Layout Ptr: 0x[[#%x,ADDR13:]]\n // CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR11]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR14:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR15:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR16:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR17:]]\n"]; // CHECK-NEXT: "0x[[#NODE2]]" -> "0x[[#NODE3]]" +// CHECK-NEXT: "0x[[#%x,NODE7:]]" -> "0x[[#NODE3]]" // CHECK-NEXT: "0x[[#%x,NODE4:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE2_clESE_EUlNS1_4itemILi1ELb1EEEE_\n -// CHECK-SAME: ARGS = \n0) Type: Accessor Ptr: 0x[[#%x,ADDR18:]]\n1) Type: STD_Layout Ptr: 0x[[#%x,ADDR19:]]\n2) Type: STD_Layout Ptr: 0x[[#%x,ADDR20:]]\n -// CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR18]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR21:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR22:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR23:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR24:]]\n"]; +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGCopy Device-to-Device \nSrc: 0x[[#%x,ADDR18:]] Dst: 0x[[#%x,ADDR19:]]\n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE4]]" -// CHECK-DAG: "0x[[#NODE2]]" -> "0x[[#NODE4]]" +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE4]] // CHECK-NEXT: "0x[[#%x,NODE5:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Device \nSrc: 0x[[#%x,ADDR25:]] Dst: 0x[[#%x,ADDR26:]]\n"]; +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Host \nSrc: 0x[[#%x,ADDR20:]] Dst: 0x[[#%x,ADDR21:]]\n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE5]]" -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE5]] // CHECK-NEXT: "0x[[#%x,NODE6:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = CGCopy Device-to-Host \nSrc: 0x[[#%x,ADDR27:]] Dst: 0x[[#%x,ADDR28:]]\n"]; -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE6]]" -// CHECK-NEXT: "0x[[#%x,NODE7:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = None \n"]; -// CHECK-DAG: "0x[[#NODE6]]" -> "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = None \n"]; +// CHECK-DAG: "0x[[#NODE5]]" -> "0x[[#NODE6]]" +// CHECK-NEXT: "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE1_clESE_EUlNS1_4itemILi1ELb1EEEE_\n +// CHECK-SAME: ARGS = \n0) Type: Accessor Ptr: 0x[[#%x,ADDR22:]]\n1) Type: STD_Layout Ptr: 0x[[#%x,ADDR23:]]\n2) Type: STD_Layout Ptr: 0x[[#%x,ADDR24:]]\n +// CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR22]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR25:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR26:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR27:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR28:]]\n"]; +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE7]]" #define GRAPH_E2E_RECORD_REPLAY diff --git a/sycl/unittests/Extensions/CommandGraph/CMakeLists.txt b/sycl/unittests/Extensions/CommandGraph/CMakeLists.txt index 712d7345fd895..2232ce4abb54f 100644 --- a/sycl/unittests/Extensions/CommandGraph/CMakeLists.txt +++ b/sycl/unittests/Extensions/CommandGraph/CMakeLists.txt @@ -7,5 +7,6 @@ add_sycl_unittest(CommandGraphExtensionTests OBJECT InOrderQueue.cpp MultiThreaded.cpp Queries.cpp + Regressions.cpp Subgraph.cpp ) diff --git a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp index 07fa7434cca00..63b5b2a04de05 100644 --- a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp +++ b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp @@ -81,7 +81,7 @@ TEST_F(CommandGraphTest, Finalize) { // Add a node that depends on Node1 due to the accessor auto Node3 = Graph.add([&](sycl::handler &cgh) { - sycl::accessor A(Buf, cgh, sycl::write_only, sycl::no_init); + sycl::accessor A(Buf, cgh, sycl::read_write); cgh.single_task>([]() {}); }); @@ -510,3 +510,121 @@ TEST_F(CommandGraphTest, FillMemsetNodes) { sycl::free(USMPtr, Queue); } } + +// Test that the expected dependencies are created when recording a graph node +// containing an accessor with mode FirstMode, followed by one containing an +// accessor with mode SecondMode +template +void testAccessorModeCombo(sycl::queue Queue) { + buffer Buffer{range<1>{16}}; + + ext::oneapi::experimental::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {experimental::property::graph::assume_buffer_outlives_graph{}}}; + + Graph.begin_recording(Queue); + // Create the first node with a write mode + auto EventFirst = Queue.submit([&](handler &CGH) { + auto Acc = Buffer.get_access(CGH); + CGH.single_task>([]() {}); + }); + + auto EventSecond = Queue.submit([&](handler &CGH) { + auto Acc = Buffer.get_access(CGH); + CGH.single_task>([]() {}); + }); + Graph.end_recording(Queue); + + EXPECT_EQ(Graph.get_root_nodes().size(), ShouldCreateDep ? 1ul : 2ul); + + experimental::node NodeFirst = + experimental::node::get_node_from_event(EventFirst); + EXPECT_EQ(NodeFirst.get_predecessors().size(), 0ul); + EXPECT_EQ(NodeFirst.get_successors().size(), ShouldCreateDep ? 1ul : 0ul); + + experimental::node NodeSecond = + experimental::node::get_node_from_event(EventSecond); + EXPECT_EQ(NodeSecond.get_predecessors().size(), ShouldCreateDep ? 1ul : 0ul); + EXPECT_EQ(NodeSecond.get_successors().size(), 0ul); +} + +// Tests that access modes are correctly respected when recording graph nodes +TEST_F(CommandGraphTest, AccessorModeEdges) { + + // Testing access_mode::write and others + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo(Queue); + + // Testing access_mode::read and others + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo(Queue); + + // Testing access_mode::read_write and others + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo( + Queue); + + // Testing access_mode::discard_read_write and others + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + + // Testing access_mode::discard_write and others + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + + // Testing access_mode::atomic and others + testAccessorModeCombo( + Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo(Queue); +} diff --git a/sycl/unittests/Extensions/CommandGraph/Regressions.cpp b/sycl/unittests/Extensions/CommandGraph/Regressions.cpp new file mode 100644 index 0000000000000..17b58f542d760 --- /dev/null +++ b/sycl/unittests/Extensions/CommandGraph/Regressions.cpp @@ -0,0 +1,60 @@ +//==------------------------ Regressions.cpp -------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "Common.hpp" + +using namespace sycl; +using namespace sycl::ext::oneapi; + +// Tests in this file are based on specific error reports + +// Regression test example based on a reported issue with accessor modes not +// being respected in graphs. The test records 3 kernel nodes which all have +// read only dependencies on the same two buffers, with a write dependency on a +// buffer which is different per kernel. This should result in no edges being +// created between these nodes because the accessor mode combinations do not +// indicate a need for dependencies. +// Originally reported here: https://github.com/intel/llvm/issues/12473 +TEST_F(CommandGraphTest, AccessorModeRegression) { + buffer BufferA{range<1>{16}}; + buffer BufferB{range<1>{16}}; + buffer BufferC{range<1>{16}}; + buffer BufferD{range<1>{16}}; + buffer BufferE{range<1>{16}}; + Graph.begin_recording(Queue); + + auto EventA = Queue.submit([&](handler &CGH) { + auto AccA = BufferA.get_access(CGH); + auto AccB = BufferB.get_access(CGH); + auto AccC = BufferC.get_access(CGH); + CGH.single_task>([]() {}); + }); + auto EventB = Queue.submit([&](handler &CGH) { + auto AccA = BufferA.get_access(CGH); + auto AccB = BufferB.get_access(CGH); + auto AccD = BufferD.get_access(CGH); + CGH.single_task>([]() {}); + }); + auto EventC = Queue.submit([&](handler &CGH) { + auto AccA = BufferA.get_access(CGH); + auto AccB = BufferB.get_access(CGH); + auto AccE = BufferE.get_access(CGH); + CGH.single_task>([]() {}); + }); + + Graph.end_recording(Queue); + + experimental::node NodeA = experimental::node::get_node_from_event(EventA); + EXPECT_EQ(NodeA.get_predecessors().size(), 0ul); + EXPECT_EQ(NodeA.get_successors().size(), 0ul); + experimental::node NodeB = experimental::node::get_node_from_event(EventB); + EXPECT_EQ(NodeB.get_predecessors().size(), 0ul); + EXPECT_EQ(NodeB.get_successors().size(), 0ul); + experimental::node NodeC = experimental::node::get_node_from_event(EventC); + EXPECT_EQ(NodeC.get_predecessors().size(), 0ul); + EXPECT_EQ(NodeC.get_successors().size(), 0ul); +}