From d410644deca63b7ecc14cbf5193684f798bae694 Mon Sep 17 00:00:00 2001 From: Pablo Reble Date: Thu, 11 Aug 2022 16:33:23 -0500 Subject: [PATCH] Rework formatting and introducing USM shortcuts --- .../proposed/sycl_ext_oneapi_graph.asciidoc | 227 ++++++++++++++++-- 1 file changed, 204 insertions(+), 23 deletions(-) diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc index 4b0a5ea805d35..15dfa73d03b44 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc @@ -60,6 +60,7 @@ Table 1. Values of the `SYCL_EXT_ONEAPI_GRAPH` macro. == SYCL Graph Terminology Table 2. Terminology. +[%header,cols="1,3"] |=== |Concept|Description |graph| Class that stores structured work units and their dependencies @@ -81,8 +82,6 @@ namespace sycl::ext::oneapi::experimental { } ---- -NOTE: - == Edge A dependency between two nodes representing a happens-before relationship. `sender` and `receiver` may be associated to different graphs. @@ -100,7 +99,7 @@ namespace sycl::ext::oneapi::experimental { Graph is a class that represents a directed acyclic graph of nodes. A graph can have different states, can be nested, can have multiple root nodes that are scheduled for execution first and multiple leaf nodes that are scheduled for execution last. The execution of a graph has been completed when all leaf node tasks have been completed. -Member functions as listed in Table 2 and 3 can be used to add nodes to a graph. +Member functions as listed in Table 3 to 6 can be used to add nodes to a graph. [source,c++] ---- @@ -123,7 +122,18 @@ namespace sycl::ext::oneapi::experimental { } -sycl::event sycl::queue(const graph Graph); +---- + +The following member functions are added to the queue class. + +[source,c++] +---- + +namespace sycl { + +event submit(const ext::oneapi::experimental::graph& my_graph); + +} // namespace sycl ---- @@ -133,46 +143,209 @@ A `graph` object in `graph_state::executable` represents a user generated device The structure of such a `graph` object in this state is immutable and cannot be changed, so are the tasks assigned with each node. Support of submitting a graph for execution, before a previous execution has been completed is backend specific. The runtime may throw an error. +=== Graph member and helper functions + Table 3. Constructor of the `graph` class. +[cols="2a,a"] |=== |Constructor|Description -|`graph()` -|Creates a `graph` object. It's default state is `graph_state::modifiable`. +| +[source,c++] +---- +/* available only when graph_state == modifiable */` +graph(); +---- +|Creates a `graph` object. |=== Table 4. Member functions of the `graph` class. +[cols="2a,a"] |=== |Member function|Description -|`node add_node(const std::vector& dep = {});` -|This creates an empty node which is associated to no task. It's intended use is either a connection point inside a graph between groups of nodes, and can significantly reduce the number of edges ( O(n) vs. O(n^2) ). Another use-case is building the structure of a graph first and adding tasks later. +| +[source,c++] +---- +node add_node(const std::vector& dep = {}); +---- +|This creates an empty node which is associated to no task. Its intended use is either a connection point inside a graph between groups of nodes, and can significantly reduce the number of edges ( O(n) vs. O(n^2) ). Another use-case is building the structure of a graph first and adding tasks later. -|`template - node add_node(T cgf, const std::vector& dep = {});` +| +[source,c++] +---- +template + node add_node(T cgf, const std::vector& dep = {}); +---- |This node captures a command group function object containing host task which is scheduled by the SYCL runtime or a SYCL function for invoking kernels with all restrictions that apply as described in the spec. |=== +Memory that is allocated by the following functions is owned by the specific graph. When freed inside the graph, the memory is only accessible before the `free` node is executed and after the `malloc` node is executed. + Table 5. Member functions of the `graph` class (memory operations). +[cols="2a,a"] |=== |Member function|Description -|`node add_memcpy_node(void* dest, const void* src, size_t numBytes, const std::vector& dep = {});` +| +[source,c++] +---- +node memcpy(void* dest, const void* src, size_t numBytes, const std::vector& dep = {}); +---- |Adding a node that encapsulates a `memcpy` operation. -|`node add_memset_node(void* ptr, int value, size_t numBytes, const std::vector& dep = {});` +| +[source,c++] +---- +template node +copy(const T* src, T* dest, size_t count, const std::vector& dep = {}); +---- +|Adding a node that encapsulates a `copy` operation. + +| +[source,c++] +---- +node memset(void* ptr, int value, size_t numBytes, const std::vector& dep = {}); +---- |Adding a node that encapsulates a `memset` operation. -|`node add_malloc_node(void *data, size_t numBytes, usm::alloc kind, const std::vector& dep = {});` +| +[source,c++] +---- +template +node fill(void* ptr, const T& pattern, size_t count, const std::vector& dep = {}); +---- +|Adding a node that encapsulates a `fill` operation. + +| +[source,c++] +---- +node malloc(void *data, size_t numBytes, usm::alloc kind, const std::vector& dep = {}); +---- +|Adding a node that encapsulates a `malloc` operation. + +| +[source,c++] +---- +node malloc_shared(void *data, size_t numBytes, const std::vector& dep = {}); +---- +|Adding a node that encapsulates a `malloc` operation. + +| +[source,c++] +---- +node malloc_host(void *data, size_t numBytes, const std::vector& dep = {}); +---- |Adding a node that encapsulates a `malloc` operation. -|`node add_free_node(void *data, const std::vector& dep = {});` +| +[source,c++] +---- +node malloc_device(void *data, size_t numBytes, const std::vector& dep = {}); +---- +|Adding a node that encapsulates a `malloc` operation. + +| +[source,c++] +---- +node free(void *data, const std::vector& dep = {}); +---- |Adding a node that encapsulates a `free` operation. |=== +Table 6. Member functions of the `graph` class (convenience shortcuts). +[cols="2a,a"] +|=== +|Member function|Description + +| +[source,c++] +---- +template +node single_task(const KernelType &kernelFunc, const std::vector& dep = {}); +---- +|Adding a node that encapsulates a `single_task` operation. + +| +[source,c++] +---- +template +node parallel_for(range numWorkItems, Rest&& rest, const std::vector& dep = {}); +---- +|Adding a node that encapsulates a `parallel_for` operation. + +| +[source,c++] +---- +template +node parallel_for(nd_range executionRange, Rest&& rest, const std::vector& dep = {}); +---- +|Adding a node that encapsulates a `parallel_for` operation. + +|=== + +Table 7. Helper functions of the `graph` class. +[cols="a,a"] +|=== +|Function name|Description + +| +[source,c++] +---- +graph make_graph(); +---- +|Creates a `graph` object. It's state is `graph_state::modifiable`. + +|=== + +=== Node member functions + +Table 8. Constructor of the `node` class. +[cols="a,a"] +|=== +|Constructor|Description + +| +[source,c++] +---- +node(); +---- +|Creates an empty `node` object. That encapsulates no tasks and is not assigned to a graph. Prior to execution it has to be assigned to a graph. + +|=== + +Table 9. Member functions of the `node` class. +[cols="2a,a"] +|=== +|Function name|Description + +| +[source,c++] +---- +void set_graph(graph& Graph); +---- +|Assigns a `node` object to a `graph`. + +| +[source,c++] +---- +template +void update(T cgf); +---- +|Update a `node` object. + +| +[source,c++] +---- +template +void update(T cgf, graph& Graph); +---- +|Update a `node` object and assign it to a task. + +|=== == Examples @@ -196,31 +369,35 @@ int main() { auto g = sycl::ext::oneapi::experimental::make_graph(); - float *x = sycl::malloc_shared(n, q); - float *y = sycl::malloc_shared(n, q); - float *z = sycl::malloc_shared(n, q); + float *x , *y, *z; + + auto n_x = g.malloc_shared(x, n, q); + auto n_y = g.malloc_shared(y, n, q); + auto n_z = g.malloc_shared(z, n, q); float *dotp = sycl::malloc_shared(1, q); - for (int i = 0; i < n; i++) { + /* init data by using usm shortcut */ + auto n_i = g.parallel_for(n, [=](sycl::id<1> it){ + const size_t i = it[0]; x[i] = 1.0f; y[i] = 2.0f; z[i] = 3.0f; - } + }, {n_x, n_y, n_z}); auto node_a = g.add_node([&](sycl::handler &h) { h.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> it) { const size_t i = it[0]; x[i] = alpha * x[i] + beta * y[i]; }); - }); + }, {n_i}); auto node_b = g.add_node([&](sycl::handler &h) { h.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> it) { const size_t i = it[0]; z[i] = gamma * z[i] + beta * y[i]; }); - }); + }, {n_i}); auto node_c = g.add_node( [&](sycl::handler &h) { @@ -232,13 +409,15 @@ int main() { }); }, {node_a, node_b}); + + auto node_f1 = g.free(x, {node_c}); + auto node_f1 = g.free(y, {node_b}); auto exec = compile(q); q.submit(exec).wait(); - sycl::free(x, q); - sycl::free(y, q); + // memory can be freed inside or outside the graph sycl::free(z, q); sycl::free(dotp, q); @@ -271,4 +450,6 @@ Please, note that the following features are not yet implemented: |Rev|Date|Author|Changes |1|2022-02-11|Pablo Reble|Initial public working draft |2|2022-03-11|Pablo Reble|Incorporate feedback from PR +|3|2022-05-25|Pablo Reble|Extend API and Example +|4|2022-08-10|Pablo Reble|Adding USM shortcuts |========================================