Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

POC: Enable Proton for XPU #2635

Draft
wants to merge 19 commits into
base: main
Choose a base branch
from
Draft
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
implement 'getDevice'
Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
  • Loading branch information
anmyachev committed Nov 25, 2024
commit 55fd9dab674eb85498e8f009bc0a3e6a87ab964c
29 changes: 29 additions & 0 deletions third_party/proton/csrc/include/Driver/GPU/XpuApi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#ifndef PROTON_DRIVER_GPU_SYCL_H_
#define PROTON_DRIVER_GPU_SYCL_H_

#include "Driver/Device.h"

namespace proton {

namespace xpu {

/*
template <bool CheckSuccess> CUresult init(int flags);

template <bool CheckSuccess> CUresult ctxSynchronize();

template <bool CheckSuccess> CUresult ctxGetCurrent(CUcontext *pctx);

template <bool CheckSuccess>
CUresult deviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);

template <bool CheckSuccess> CUresult deviceGet(CUdevice *device, int ordinal);
*/

Device getDevice(uint64_t index);

} // namespace xpu

} // namespace proton

#endif // PROTON_DRIVER_GPU_SYCL_H_
4 changes: 4 additions & 0 deletions third_party/proton/csrc/lib/Driver/Device.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "Driver/Device.h"
#include "Driver/GPU/CudaApi.h"
#include "Driver/GPU/HipApi.h"
#include "Driver/GPU/XpuApi.h"

#include "Utility/Errors.h"

@@ -13,6 +14,9 @@ Device getDevice(DeviceType type, uint64_t index) {
if (type == DeviceType::HIP) {
return hip::getDevice(index);
}
if (type == DeviceType::XPU) {
return xpu::getDevice(index);
}
throw std::runtime_error("DeviceType not supported");
}

90 changes: 90 additions & 0 deletions third_party/proton/csrc/lib/Driver/GPU/XpuApi.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#include "Driver/GPU/XpuApi.h"
#include "Driver/Dispatch.h"

#include "sycl_functions.h"
#include <level_zero/ze_api.h>
#include <string>
#include <vector>

namespace proton {

namespace xpu {

/*
struct ExternLibCuda : public ExternLibBase {
using RetType = CUresult;
//
https://forums.developer.nvidia.com/t/wsl2-libcuda-so-and-libcuda-so-1-should-be-symlink/236301
// On WSL, "libcuda.so" and "libcuda.so.1" may not be linked, so we use
// "libcuda.so.1" instead.
static constexpr const char *name = "libcuda.so.1";
static constexpr const char *defaultDir = "";
static constexpr RetType success = CUDA_SUCCESS;
static void *lib;
};

void *ExternLibCuda::lib = nullptr;

DEFINE_DISPATCH(ExternLibCuda, init, cuInit, int)

DEFINE_DISPATCH(ExternLibCuda, ctxSynchronize, cuCtxSynchronize)

DEFINE_DISPATCH(ExternLibCuda, ctxGetCurrent, cuCtxGetCurrent, CUcontext *)

DEFINE_DISPATCH(ExternLibCuda, deviceGet, cuDeviceGet, CUdevice *, int)

DEFINE_DISPATCH(ExternLibCuda, deviceGetAttribute, cuDeviceGetAttribute, int *,
CUdevice_attribute, CUdevice)
*/

// FIXME: for this initialization is needed
// ref: initDevices
static std::vector<std::pair<sycl::device, ze_device_handle_t>>
g_sycl_l0_device_list;

// FIXME: probably `DEFINE_DISPATCH` should be used in this function
Device getDevice(uint64_t index) {
// ref: getDeviceProperties
const auto device = g_sycl_l0_device_list[index];

// Get device handle
ze_device_handle_t phDevice = device.second;

// create a struct to hold device properties
ze_device_properties_t device_properties = {};
device_properties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
// FIXME: should it be: `zeDeviceGetComputeProperties` and
// `ZE_STRUCTURE_TYPE_DEVICE_COMPUTE_PROPERTIES` ref:
// https://spec.oneapi.io/level-zero/1.0.4/core/api.html
zeDeviceGetProperties(phDevice, &device_properties);

uint32_t clockRate = device_properties.coreClockRate;
uint32_t numSms =
device_properties.numSlices * device_properties.numSubslicesPerSlice;

// create a struct to hold device memory properties
uint32_t memoryCount = 0;
zeDeviceGetMemoryProperties(phDevice, &memoryCount, nullptr);
auto pMemoryProperties = new ze_device_memory_properties_t[memoryCount];
for (uint32_t mem = 0; mem < memoryCount; ++mem) {
pMemoryProperties[mem].stype = ZE_STRUCTURE_TYPE_DEVICE_MEMORY_PROPERTIES;
pMemoryProperties[mem].pNext = nullptr;
}
zeDeviceGetMemoryProperties(phDevice, &memoryCount, pMemoryProperties);

int memoryClockRate = pMemoryProperties[0].maxClockRate;
int busWidth = pMemoryProperties[0].maxBusWidth;

delete[] pMemoryProperties;

// FIXME how this can be defined for XPU?
// std::string arch = std::to_string(major * 10 + minor);
std::string arch = "unknown";

return Device(DeviceType::XPU, index, clockRate, memoryClockRate, busWidth,
numSms, arch);
}

} // namespace xpu

} // namespace proton
123 changes: 123 additions & 0 deletions third_party/proton/csrc/lib/Driver/GPU/XpuptiApi.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#include "Driver/GPU/XpuptiApi.h"
#include "Driver/Device.h"
#include "Driver/Dispatch.h"

namespace proton {

namespace xpupti {

/*
#define STRINGIFY(x) #x
#define TOSTRING(x) STRINGIFY(x)
struct ExternLibCupti : public ExternLibBase {
using RetType = CUptiResult;
static constexpr const char *name = "libcupti.so";
#ifdef CUPTI_LIB_DIR
static constexpr const char *defaultDir = TOSTRING(CUPTI_LIB_DIR);
#else
static constexpr const char *defaultDir = "";
#endif
static constexpr RetType success = CUPTI_SUCCESS;
static void *lib;
};

void *ExternLibCupti::lib = nullptr;

DEFINE_DISPATCH(ExternLibCupti, getVersion, cuptiGetVersion, uint32_t *);

DEFINE_DISPATCH(ExternLibCupti, getContextId, cuptiGetContextId, CUcontext,
uint32_t *);

DEFINE_DISPATCH(ExternLibCupti, activityRegisterCallbacks,
cuptiActivityRegisterCallbacks,
CUpti_BuffersCallbackRequestFunc,
CUpti_BuffersCallbackCompleteFunc)

DEFINE_DISPATCH(ExternLibCupti, subscribe, cuptiSubscribe,
CUpti_SubscriberHandle *, CUpti_CallbackFunc, void *)

DEFINE_DISPATCH(ExternLibCupti, enableDomain, cuptiEnableDomain, uint32_t,
CUpti_SubscriberHandle, CUpti_CallbackDomain)

DEFINE_DISPATCH(ExternLibCupti, enableCallback, cuptiEnableCallback, uint32_t,
CUpti_SubscriberHandle, CUpti_CallbackDomain, CUpti_CallbackId);

DEFINE_DISPATCH(ExternLibCupti, activityEnable, cuptiActivityEnable,
CUpti_ActivityKind)

DEFINE_DISPATCH(ExternLibCupti, activityDisable, cuptiActivityDisable,
CUpti_ActivityKind)

DEFINE_DISPATCH(ExternLibCupti, activityEnableContext,
cuptiActivityEnableContext, CUcontext, CUpti_ActivityKind)

DEFINE_DISPATCH(ExternLibCupti, activityDisableContext,
cuptiActivityDisableContext, CUcontext, CUpti_ActivityKind)

DEFINE_DISPATCH(ExternLibCupti, activityFlushAll, cuptiActivityFlushAll,
uint32_t)

DEFINE_DISPATCH(ExternLibCupti, activityGetNextRecord,
cuptiActivityGetNextRecord, uint8_t *, size_t,
CUpti_Activity **)

DEFINE_DISPATCH(ExternLibCupti, activityPushExternalCorrelationId,
cuptiActivityPushExternalCorrelationId,
CUpti_ExternalCorrelationKind, uint64_t)

DEFINE_DISPATCH(ExternLibCupti, activityPopExternalCorrelationId,
cuptiActivityPopExternalCorrelationId,
CUpti_ExternalCorrelationKind, uint64_t *)

DEFINE_DISPATCH(ExternLibCupti, activitySetAttribute, cuptiActivitySetAttribute,
CUpti_ActivityAttribute, size_t *, void *)

DEFINE_DISPATCH(ExternLibCupti, unsubscribe, cuptiUnsubscribe,
CUpti_SubscriberHandle)

DEFINE_DISPATCH(ExternLibCupti, finalize, cuptiFinalize)

DEFINE_DISPATCH(ExternLibCupti, getGraphExecId, cuptiGetGraphExecId,
CUgraphExec, uint32_t *);

DEFINE_DISPATCH(ExternLibCupti, getGraphId, cuptiGetGraphId, CUgraph,
uint32_t *);

DEFINE_DISPATCH(ExternLibCupti, getCubinCrc, cuptiGetCubinCrc,
CUpti_GetCubinCrcParams *);

DEFINE_DISPATCH(ExternLibCupti, getSassToSourceCorrelation,
cuptiGetSassToSourceCorrelation,
CUpti_GetSassToSourceCorrelationParams *);

DEFINE_DISPATCH(ExternLibCupti, pcSamplingGetNumStallReasons,
cuptiPCSamplingGetNumStallReasons,
CUpti_PCSamplingGetNumStallReasonsParams *);

DEFINE_DISPATCH(ExternLibCupti, pcSamplingGetStallReasons,
cuptiPCSamplingGetStallReasons,
CUpti_PCSamplingGetStallReasonsParams *);

DEFINE_DISPATCH(ExternLibCupti, pcSamplingSetConfigurationAttribute,
cuptiPCSamplingSetConfigurationAttribute,
CUpti_PCSamplingConfigurationInfoParams *);

DEFINE_DISPATCH(ExternLibCupti, pcSamplingEnable, cuptiPCSamplingEnable,
CUpti_PCSamplingEnableParams *);

DEFINE_DISPATCH(ExternLibCupti, pcSamplingDisable, cuptiPCSamplingDisable,
CUpti_PCSamplingDisableParams *);

DEFINE_DISPATCH(ExternLibCupti, pcSamplingGetData, cuptiPCSamplingGetData,
CUpti_PCSamplingGetDataParams *);

DEFINE_DISPATCH(ExternLibCupti, pcSamplingStart, cuptiPCSamplingStart,
CUpti_PCSamplingStartParams *);

DEFINE_DISPATCH(ExternLibCupti, pcSamplingStop, cuptiPCSamplingStop,
CUpti_PCSamplingStopParams *);

*/
} // namespace xpupti

} // namespace proton
Loading