Skip to content

Commit

Permalink
[SYCL][Graph] L0 Backend support for SYCL Graphs (2/4) (#9992)
Browse files Browse the repository at this point in the history
# Level Zero Backend Support for SYCL Graphs
This is the second patch of a series that adds support for an
[experimental command graph
extension](intel/llvm#5626)

A snapshot of the complete work can be seen in draft PR #9375 which has
support all the specification defined ways of
adding nodes and edges to the graph, including both Explicit and Record
& Replay graph construction. The two types of nodes currently
implemented are kernel execution and memcpy commands.

See https://github.com/reble/llvm#implementation-status for the status
of our total work.

## Scope
This second patch focuses on the required PI/UR support for the
experimental command-buffer feature in the Level Zero adapter:
* PI stubs for all adapters to enable compilation, no functionality.
* Command-buffer implementation for the Level Zero UR adapter.
* Stubs for the CUDA UR adapter to enable compilation, no functionality.

## Following Split PRs
Future follow-up PRs with the remainder of our work on the extension
will include:
* Hooking up backend to graphs runtime, bugfixes and other feature
additions, will add symbols but not break the ABI. (3/4)
* Add end-to-end tests for SYCL Graph extension. (4/4)
* NFC changes - Design doc and codeowner update.

## Authors
Co-authored-by: Pablo Reble <[email protected]>
Co-authored-by: Julian Miller <[email protected]>
Co-authored-by: Ben Tracy <[email protected]>
Co-authored-by: Ewan Crawford <[email protected]>
Co-authored-by: Maxime France-Pillois
<[email protected]>

---------

Co-authored-by: Ewan Crawford <[email protected]>
Co-authored-by: Maxime France-Pillois <[email protected]>
  • Loading branch information
3 people authored and fabiomestre committed Sep 26, 2023
1 parent d74234e commit 2fb0f02
Show file tree
Hide file tree
Showing 3 changed files with 293 additions and 0 deletions.
250 changes: 250 additions & 0 deletions command_buffer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
//===--------- command_buffer.cpp - CUDA Adapter ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===-----------------------------------------------------------------===//

#include "command_buffer.hpp"
#include "common.hpp"

/// Stub implementations of UR experimental feature command-buffers

UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
const ur_exp_command_buffer_desc_t *pCommandBufferDesc,
ur_exp_command_buffer_handle_t *phCommandBuffer) {
(void)hContext;
(void)hDevice;
(void)pCommandBufferDesc;
(void)phCommandBuffer;
sycl::detail::ur::die("Experimental Command-buffer feature is not "
"implemented for CUDA adapter.");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT ur_result_t UR_APICALL
urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
(void)hCommandBuffer;

sycl::detail::ur::die("Experimental Command-buffer feature is not "
"implemented for CUDA adapter.");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT ur_result_t UR_APICALL
urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
(void)hCommandBuffer;

sycl::detail::ur::die("Experimental Command-buffer feature is not "
"implemented for CUDA adapter.");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT ur_result_t UR_APICALL
urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
(void)hCommandBuffer;

sycl::detail::ur::die("Experimental Command-buffer feature is not "
"implemented for CUDA adapter.");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel,
uint32_t workDim, const size_t *pGlobalWorkOffset,
const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize,
uint32_t numSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint) {
(void)hCommandBuffer;
(void)hKernel;
(void)workDim;
(void)pGlobalWorkOffset;
(void)pGlobalWorkSize;
(void)pLocalWorkSize;
(void)numSyncPointsInWaitList;
(void)pSyncPointWaitList;
(void)pSyncPoint;

sycl::detail::ur::die("Experimental Command-buffer feature is not "
"implemented for CUDA adapter.");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp(
ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc,
size_t size, uint32_t numSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint) {
(void)hCommandBuffer;
(void)pDst;
(void)pSrc;
(void)size;
(void)numSyncPointsInWaitList;
(void)pSyncPointWaitList;
(void)pSyncPoint;

sycl::detail::ur::die("Experimental Command-buffer feature is not "
"implemented for CUDA adapter.");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp(
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem,
ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size,
uint32_t numSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint) {
(void)hCommandBuffer;
(void)hSrcMem;
(void)hDstMem;
(void)srcOffset;
(void)dstOffset;
(void)size;
(void)numSyncPointsInWaitList;
(void)pSyncPointWaitList;
(void)pSyncPoint;

sycl::detail::ur::die("Experimental Command-buffer feature is not "
"implemented for CUDA adapter.");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp(
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem,
ur_mem_handle_t hDstMem, ur_rect_offset_t srcOrigin,
ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch,
size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch,
uint32_t numSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint) {
(void)hCommandBuffer;
(void)hSrcMem;
(void)hDstMem;
(void)srcOrigin;
(void)dstOrigin;
(void)region;
(void)srcRowPitch;
(void)numSyncPointsInWaitList;
(void)pSyncPointWaitList;
(void)pSyncPoint;

sycl::detail::ur::die("Experimental Command-buffer feature is not "
"implemented for CUDA adapter.");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT
ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp(
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
size_t offset, size_t size, const void *pSrc,
uint32_t numSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint) {
(void)hCommandBuffer;
(void)hBuffer;
(void)offset;
(void)size;
(void)pSrc;
(void)numSyncPointsInWaitList;
(void)pSyncPointWaitList;
(void)pSyncPoint;

sycl::detail::ur::die("Experimental Command-buffer feature is not "
"implemented for CUDA adapter.");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT
ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp(
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
size_t offset, size_t size, void *pDst, uint32_t numSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint) {
(void)hCommandBuffer;
(void)hBuffer;
(void)offset;
(void)size;
(void)pDst;
(void)numSyncPointsInWaitList;
(void)pSyncPointWaitList;
(void)pSyncPoint;

sycl::detail::ur::die("Experimental Command-buffer feature is not "
"implemented for CUDA adapter.");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT
ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp(
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset,
ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch,
size_t hostRowPitch, size_t hostSlicePitch, void *pSrc,
uint32_t numSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint) {
(void)hCommandBuffer;
(void)hBuffer;
(void)bufferOffset;
(void)hostOffset;
(void)region;
(void)bufferRowPitch;
(void)bufferSlicePitch;
(void)hostRowPitch;
(void)hostSlicePitch;
(void)pSrc;
(void)numSyncPointsInWaitList;
(void)pSyncPointWaitList;
(void)pSyncPoint;

sycl::detail::ur::die("Experimental Command-buffer feature is not "
"implemented for CUDA adapter.");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT
ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp(
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset,
ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch,
size_t hostRowPitch, size_t hostSlicePitch, void *pDst,
uint32_t numSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint) {
(void)hCommandBuffer;
(void)hBuffer;
(void)bufferOffset;
(void)hostOffset;
(void)region;
(void)bufferRowPitch;
(void)bufferSlicePitch;
(void)hostRowPitch;
(void)hostSlicePitch;
(void)pDst;

(void)numSyncPointsInWaitList;
(void)pSyncPointWaitList;
(void)pSyncPoint;

sycl::detail::ur::die("Experimental Command-buffer feature is not "
"implemented for CUDA adapter.");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue,
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_event_handle_t *phEvent) {
(void)hCommandBuffer;
(void)hQueue;
(void)numEventsInWaitList;
(void)phEventWaitList;
(void)phEvent;

sycl::detail::ur::die("Experimental Command-buffer feature is not "
"implemented for CUDA adapter.");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
13 changes: 13 additions & 0 deletions command_buffer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
//===--------- command_buffer.hpp - CUDA Adapter ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===-----------------------------------------------------------------===//

#include <ur/ur.hpp>

/// Stub implementation of command-buffers for CUDA

struct ur_exp_command_buffer_handle_t_ {};
30 changes: 30 additions & 0 deletions ur_interface_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,36 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable(
return UR_RESULT_SUCCESS;
}

UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable(
ur_api_version_t version, ///< [in] API version requested
ur_command_buffer_exp_dditable_t
*pDdiTable ///< [in,out] pointer to table of DDI function pointers
) {
auto retVal = validateProcInputs(version, pDdiTable);
if (UR_RESULT_SUCCESS != retVal) {
return retVal;
}
pDdiTable->pfnCreateExp = urCommandBufferCreateExp;
pDdiTable->pfnRetainExp = urCommandBufferRetainExp;
pDdiTable->pfnReleaseExp = urCommandBufferReleaseExp;
pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp;
pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp;
pDdiTable->pfnAppendMemcpyUSMExp = urCommandBufferAppendMemcpyUSMExp;
pDdiTable->pfnAppendMembufferCopyExp = urCommandBufferAppendMembufferCopyExp;
pDdiTable->pfnAppendMembufferCopyRectExp =
urCommandBufferAppendMembufferCopyRectExp;
pDdiTable->pfnAppendMembufferReadExp = urCommandBufferAppendMembufferReadExp;
pDdiTable->pfnAppendMembufferReadRectExp =
urCommandBufferAppendMembufferReadRectExp;
pDdiTable->pfnAppendMembufferWriteExp =
urCommandBufferAppendMembufferWriteExp;
pDdiTable->pfnAppendMembufferWriteRectExp =
urCommandBufferAppendMembufferWriteRectExp;
pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp;

return retVal;
}

#if defined(__cplusplus)
} // extern "C"
#endif

0 comments on commit 2fb0f02

Please sign in to comment.