@@ -366,8 +366,40 @@ int execute_and_wait(perf_function_t &exec_func, const dnnl_engine_t &engine,
366
366
367
367
execute_unmap_args (args, dnnl_args);
368
368
369
- auto status = exec_func (stream, dnnl_args);
370
- DNN_SAFE (dnnl_stream_wait (stream), CRIT);
369
+ dnnl_status_t status = dnnl_runtime_error;
370
+ bool run_regular_exec = true ;
371
+ #if DNNL_GPU_RUNTIME == DNNL_RUNTIME_DPCPP
372
+ while (use_sycl_graph && is_gpu (engine)) {
373
+ void *queue_ptr;
374
+ DNN_SAFE (dnnl_sycl_interop_stream_get_queue (stream, &queue_ptr), CRIT);
375
+ sycl::queue queue = *static_cast <sycl::queue *>(queue_ptr);
376
+ const bool can_run_sycl_graph = queue.get_device ().get_backend ()
377
+ == sycl::backend::ext_oneapi_level_zero;
378
+ if (!can_run_sycl_graph) break ;
379
+
380
+ BENCHDNN_PRINT (
381
+ 2 , " %s\n " , " [INFO] Using experimental SYCL graph execution." );
382
+ sycl::ext::oneapi::experimental::command_graph graph {
383
+ queue.get_context (), queue.get_device ()};
384
+
385
+ graph.begin_recording (queue);
386
+ status = exec_func (stream, dnnl_args);
387
+ graph.end_recording (queue);
388
+ DNN_SAFE (dnnl_stream_wait (stream), CRIT);
389
+
390
+ auto exec = graph.finalize ();
391
+ queue.ext_oneapi_graph (exec).wait ();
392
+
393
+ // SYCL graph feature completed submission and execution, no need to
394
+ // have a regular run.
395
+ run_regular_exec = false ;
396
+ break ;
397
+ }
398
+ #endif
399
+ if (run_regular_exec) {
400
+ status = exec_func (stream, dnnl_args);
401
+ DNN_SAFE (dnnl_stream_wait (stream), CRIT);
402
+ }
371
403
if (res) res->state = EXECUTED;
372
404
373
405
execute_map_args (args);
0 commit comments