diff --git a/CMakeLists.txt b/CMakeLists.txt index b04de886..0c6806bb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,12 +24,12 @@ if(MSVC AND MSVC_VERSION VERSION_LESS "1900") endif() include_directories(${LLVM_INCLUDE_DIRS}) -link_directories(${LLVM_LIBRARY_DIRS}) add_definitions(${LLVM_DEFINITIONS}) file(GLOB_RECURSE HIPIFY_SOURCES src/*.cpp) file(GLOB_RECURSE HIPIFY_HEADERS src/*.h) add_llvm_executable(hipify-clang ${HIPIFY_SOURCES} ${HIPIFY_HEADERS}) +target_link_directories(hipify-clang PRIVATE ${LLVM_LIBRARY_DIRS}) set(CMAKE_CXX_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang++) set(CMAKE_C_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang) @@ -131,7 +131,14 @@ install( PATTERN "openmp_wrappers" EXCLUDE) option(FILE_REORG_BACKWARD_COMPATIBILITY "Enable File Reorg with backward compatibility" ON) + if(UNIX) + + #get rid of any RPATH definations already + set_target_properties(hipify-clang PROPERTIES INSTALL_RPATH "") + #set RPATH for the binary + set_target_properties(hipify-clang PROPERTIES LINK_FLAGS "-Wl,--disable-new-dtags -Wl,--rpath,$ORIGIN/../lib" ) + if(FILE_REORG_BACKWARD_COMPATIBILITY) include(hipify-backward-compat.cmake) endif() diff --git a/README.md b/README.md index 7e7f8871..2040e605 100644 --- a/README.md +++ b/README.md @@ -43,9 +43,9 @@ After applying all the matchers, the output HIP source is produced. `hipify-clang` requires: -1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [4.0.0](http://releases.llvm.org/download.html#4.0.0); the latest stable and recommended release: [**15.0.0**](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.0). +1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [4.0.0](http://releases.llvm.org/download.html#4.0.0); the latest stable and recommended release: [**15.0.4**](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.4). -2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive), the latest supported version is [**11.7.1**](https://developer.nvidia.com/cuda-downloads). +2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive), the latest supported version is [**11.8.0**](https://developer.nvidia.com/cuda-downloads). @@ -160,7 +160,7 @@ After applying all the matchers, the output HIP source is produced. 14.0.2, 14.0.3, 14.0.4 - + - + 14.0.6,
+ 15.0.0, + 15.0.1,
+ 15.0.2, + 15.0.3,
+ 15.0.4 + @@ -187,7 +191,7 @@ After applying all the matchers, the output HIP source is produced. In most cases, you can get a suitable version of `LLVM+CLANG` with your package manager. Failing that or having multiple versions of `LLVM`, you can [download a release archive](http://releases.llvm.org/), build or install it, and set -[CMAKE_PREFIX_PATH](https://cmake.org/cmake/help/v3.5/variable/CMAKE_PREFIX_PATH.html) so `cmake` can find it; for instance: `-DCMAKE_PREFIX_PATH=d:\LLVM\15.0.0\dist` +[CMAKE_PREFIX_PATH](https://cmake.org/cmake/help/v3.5/variable/CMAKE_PREFIX_PATH.html) so `cmake` can find it; for instance: `-DCMAKE_PREFIX_PATH=d:\LLVM\15.0.4\dist` ### hipify-clang: usage @@ -196,14 +200,14 @@ To process a file, `hipify-clang` needs access to the same headers that would be For example: ```shell -./hipify-clang square.cu --cuda-path=/usr/local/cuda-11.7 -I /usr/local/cuda-11.7/samples/common/inc +./hipify-clang square.cu --cuda-path=/usr/local/cuda-11.8 -I /usr/local/cuda-11.8/samples/common/inc ``` `hipify-clang` arguments are given first, followed by a separator `'--'`, and then the arguments you'd pass to `clang` if you were compiling the input file. For example: ```bash -./hipify-clang cpp17.cu --cuda-path=/usr/local/cuda-11.7 -- -std=c++17 +./hipify-clang cpp17.cu --cuda-path=/usr/local/cuda-11.8 -- -std=c++17 ``` The [Clang manual for compiling CUDA](https://llvm.org/docs/CompileCudaWithLLVM.html#compiling-cuda-code) may be useful. @@ -285,7 +289,7 @@ Run `Visual Studio 16 2019`, open the generated `LLVM.sln`, build all, and build **LLVM >= 10.0.0:** -1. download [`LLVM project`](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.0) sources; +1. download [`LLVM project`](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.4) sources; 2. build [`LLVM project`](http://llvm.org/docs/CMake.html): ```bash @@ -324,9 +328,9 @@ Run `Visual Studio 17 2022`, open the generated `LLVM.sln`, build all, build pro - ***Linux***: `-DCUDA_TOOLKIT_ROOT_DIR=/usr/include` - - ***Windows***: `-DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.7"` + - ***Windows***: `-DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8"` - `-DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v11.7"` + `-DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v11.8"` 4. Ensure [`cuDNN`](https://developer.nvidia.com/rdp/cudnn-archive) of the version corresponding to CUDA's version is installed. @@ -334,7 +338,7 @@ Run `Visual Studio 17 2022`, open the generated `LLVM.sln`, build all, build pro - ***Linux***: `-DCUDA_DNN_ROOT_DIR=/usr/include` - - ***Windows***: `-DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.7-windows-x64-v8.4.1` + - ***Windows***: `-DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.8-windows-x64-v8.6.0` 5. Ensure [`CUB`](https://github.com/NVlabs/cub) of the version corresponding to CUDA's version is installed. @@ -350,21 +354,21 @@ Run `Visual Studio 17 2022`, open the generated `LLVM.sln`, build all, build pro * Install `lit` into `python`: - - ***Linux***: `python /usr/llvm/15.0.0/llvm-project/llvm/utils/lit/setup.py install` + - ***Linux***: `python /usr/llvm/15.0.4/llvm-project/llvm/utils/lit/setup.py install` - - ***Windows***: `python d:/LLVM/15.0.0/llvm-project/llvm/utils/lit/setup.py install` + - ***Windows***: `python d:/LLVM/15.0.4/llvm-project/llvm/utils/lit/setup.py install` * Starting with LLVM 6.0.1 path to `llvm-lit` python script should be specified by the `LLVM_EXTERNAL_LIT` option: - - ***Linux***: `-DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.0/build/bin/llvm-lit` + - ***Linux***: `-DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.4/build/bin/llvm-lit` - - ***Windows***: `-DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.0/build/Release/bin/llvm-lit.py` + - ***Windows***: `-DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.4/build/Release/bin/llvm-lit.py` * `FileCheck`: - - ***Linux***: copy from `/usr/llvm/15.0.0/build/bin/` to `CMAKE_INSTALL_PREFIX/dist/bin` + - ***Linux***: copy from `/usr/llvm/15.0.4/build/bin/` to `CMAKE_INSTALL_PREFIX/dist/bin` - - ***Windows***: copy from `d:/LLVM/15.0.0/build/Release/bin` to `CMAKE_INSTALL_PREFIX/dist/bin` + - ***Windows***: copy from `d:/LLVM/15.0.4/build/Release/bin` to `CMAKE_INSTALL_PREFIX/dist/bin` - Or specify the path to `FileCheck` in `CMAKE_INSTALL_PREFIX` option @@ -386,7 +390,7 @@ Ubuntu 14: LLVM 4.0.0 - 7.1.0, CUDA 7.0 - 9.0, cuDNN 5.0.5 - 7.6.5 Ubuntu 16-18: LLVM 8.0.0 - 14.0.6, CUDA 8.0 - 10.2, cuDNN 5.1.10 - 8.0.5 -Ubuntu 20-21: LLVM 9.0.0 - 15.0.0, CUDA 8.0 - 11.7.1, cuDNN 5.1.10 - 8.4.1 +Ubuntu 20-21: LLVM 9.0.0 - 15.0.4, CUDA 8.0 - 11.8.0, cuDNN 5.1.10 - 8.6.0 Minimum build system requirements for the above configurations: @@ -403,11 +407,11 @@ cmake -DHIPIFY_CLANG_TESTS=1 \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=/usr/llvm/15.0.0/dist \ + -DCMAKE_PREFIX_PATH=/usr/llvm/15.0.4/dist \ -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda \ -DCUDA_DNN_ROOT_DIR=/usr/local/cuda \ -DCUDA_CUB_ROOT_DIR=/usr/CUB \ - -DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.0/build/bin/llvm-lit \ + -DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.4/build/bin/llvm-lit \ ../hipify ``` *A corresponding successful output:* @@ -425,20 +429,20 @@ cmake -- Detecting CXX compile features -- Detecting CXX compile features - done -- Found ZLIB: /usr/lib/x86_64-linux-gnu/libz.so (found version "1.2.11") --- Found LLVM 15.0.0: --- - CMake module path: /usr/llvm/15.0.0/dist/lib/cmake/llvm --- - Include path : /usr/llvm/15.0.0/dist/include --- - Binary path : /usr/llvm/15.0.0/dist/bin +-- Found LLVM 15.0.4: +-- - CMake module path: /usr/llvm/15.0.4/dist/lib/cmake/llvm +-- - Include path : /usr/llvm/15.0.4/dist/include +-- - Binary path : /usr/llvm/15.0.4/dist/bin -- Linker detection: GNU ld -- Found PythonInterp: /usr/bin/python (found suitable version "3.9.7", minimum required is "2.7") -- Found lit: /usr/local/bin/lit --- Found FileCheck: /usr/llvm/15.0.0/dist/bin/FileCheck +-- Found FileCheck: /usr/llvm/15.0.4/dist/bin/FileCheck -- Looking for pthread.h -- Looking for pthread.h - found -- Performing Test CMAKE_HAVE_LIBC_PTHREAD -- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success -- Found Threads: TRUE --- Found CUDA: /usr/local/cuda (found version "11.7") +-- Found CUDA: /usr/local/cuda (found version "11.8") -- Configuring done -- Generating done -- Build files have been written to: /usr/hipify/build @@ -450,8 +454,8 @@ make test-hipify ```shell Running HIPify regression tests ======================================== -CUDA 11.7 - will be used for testing -LLVM 15.0.0 - will be used for testing +CUDA 11.8 - will be used for testing +LLVM 15.0.4 - will be used for testing x86_64 - Platform architecture Linux 5.13.0-21-generic - Platform OS 64 - hipify-clang binary bitness @@ -565,8 +569,8 @@ Testing Time: 6.22s | 11.0.1 - 11.1.0 | 7.0 - 11.2.2 | 7.6.5 - 8.0.5 | 2017.15.9.31, 2019.16.8.4 | 3.19.3 | 3.9.2 | | 12.0.0 - 13.0.1 | 7.0 - 11.5.1 | 7.6.5 - 8.3.2 | 2017.15.9.43, 2019.16.11.9 | 3.22.2 | 3.10.2 | | 14.0.0 - 14.0.6 | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.49, 2019.16.11.17, 2022.17.2.6 | 3.24.0 | 3.10.6 | -| 15.0.0 | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | -| 16.0.0git | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | +| 15.0.0 - 15.0.4 | 7.0 - 11.8.0 | 8.0.5 - 8.6.0 | 2017.15.9.50, 2019.16.11.20, 2022.17.3.6 | 3.24.2 | 3.11.0 | +| 16.0.0git | 7.0 - 11.8.0 | 8.0.5 - 8.6.0 | 2017.15.9.50, 2019.16.11.20, 2022.17.3.6 | 3.24.2 | 3.11.0 | *Building with testing support by `Visual Studio 17 2022` on `Windows 10`:* @@ -578,24 +582,24 @@ cmake -DHIPIFY_CLANG_TESTS=1 \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=d:/LLVM/15.0.0/dist \ - -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.7" \ - -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v11.7" \ - -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.7-windows-x64-v8.4.1 \ + -DCMAKE_PREFIX_PATH=d:/LLVM/15.0.4/dist \ + -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8" \ + -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v11.8" \ + -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.8-windows-x64-v8.6.0 \ -DCUDA_CUB_ROOT_DIR=d:/GIT/cub \ - -DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.0/build/Release/bin/llvm-lit.py \ + -DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.4/build/Release/bin/llvm-lit.py \ ../hipify ``` *A corresponding successful output:* ```shell --- Found LLVM 15.0.0: --- - CMake module path: d:/LLVM/15.0.0/dist/lib/cmake/llvm --- - Include path : d:/LLVM/15.0.0/dist/include --- - Binary path : d:/LLVM/15.0.0/dist/bin --- Found PythonInterp: c:/Program Files/Python39/python.exe (found suitable version "3.9.5", minimum required is "3.6") --- Found lit: c:/Program Files/Python39/Scripts/lit.exe --- Found FileCheck: d:/LLVM/15.0.0/dist/bin/FileCheck.exe --- Found CUDA: c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.7 (found version "11.7") +-- Found LLVM 15.0.4: +-- - CMake module path: d:/LLVM/15.0.4/dist/lib/cmake/llvm +-- - Include path : d:/LLVM/15.0.4/dist/include +-- - Binary path : d:/LLVM/15.0.4/dist/bin +-- Found PythonInterp: c:/Program Files/Python311/python.exe (found suitable version "3.11.0", minimum required is "3.6") +-- Found lit: c:/Program Files/Python311/Scripts/lit.exe +-- Found FileCheck: d:/LLVM/15.0.4/dist/bin/FileCheck.exe +-- Found CUDA: c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8 (found version "11.8") -- Configuring done -- Generating done -- Build files have been written to: d:/hipify/build diff --git a/bin/hipify-perl b/bin/hipify-perl index 4dfd064d..23da6adc 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -682,6 +682,8 @@ my %removed_funcs = ( "cudaSetupArgument" => "10.1", "cudaLaunch" => "10.1", "cudaConfigureCall" => "10.1", + "CUkernelNodeAttrValue_union" => "11.8", + "CUkernelNodeAttrID_enum" => "11.8", "CU_TARGET_COMPUTE_73" => "10.0", "CU_TARGET_COMPUTE_13" => "9.0", "CU_TARGET_COMPUTE_12" => "9.0", @@ -715,77 +717,8 @@ my %removed_funcs = ( ); my %experimental_funcs = ( - "nvrtcGetCUBINSize" => "5.3.0", - "nvrtcGetCUBIN" => "5.3.0", - "cudaUserObject_t" => "5.3.0", - "cudaUserObjectRetainFlags" => "5.3.0", - "cudaUserObjectRetain" => "5.3.0", - "cudaUserObjectRelease" => "5.3.0", - "cudaUserObjectNoDestructorSync" => "5.3.0", - "cudaUserObjectFlags" => "5.3.0", - "cudaUserObjectCreate" => "5.3.0", - "cudaMemoryTypeManaged" => "5.3.0", - "cudaLimitStackSize" => "5.3.0", - "cudaGraphUserObjectMove" => "5.3.0", - "cudaGraphUpload" => "5.3.0", - "cudaGraphRetainUserObject" => "5.3.0", - "cudaGraphReleaseUserObject" => "5.3.0", - "cudaGraphNodeTypeExtSemaphoreWait" => "5.3.0", - "cudaGraphNodeTypeExtSemaphoreSignal" => "5.3.0", - "cudaGraphMemAttributeType" => "5.3.0", - "cudaGraphMemAttrUsedMemHigh" => "5.3.0", - "cudaGraphMemAttrUsedMemCurrent" => "5.3.0", - "cudaGraphMemAttrReservedMemHigh" => "5.3.0", - "cudaGraphMemAttrReservedMemCurrent" => "5.3.0", - "cudaDeviceSetLimit" => "5.3.0", - "cudaDeviceSetGraphMemAttribute" => "5.3.0", - "cudaDeviceGraphMemTrim" => "5.3.0", - "cudaDeviceGetGraphMemAttribute" => "5.3.0", - "cuUserObjectRetain" => "5.3.0", - "cuUserObjectRelease" => "5.3.0", - "cuUserObjectCreate" => "5.3.0", - "cuLinkDestroy" => "5.3.0", - "cuLinkCreate_v2" => "5.3.0", - "cuLinkCreate" => "5.3.0", - "cuLinkComplete" => "5.3.0", - "cuLinkAddFile_v2" => "5.3.0", - "cuLinkAddFile" => "5.3.0", - "cuLinkAddData_v2" => "5.3.0", - "cuLinkAddData" => "5.3.0", - "cuGraphUpload" => "5.3.0", - "cuGraphRetainUserObject" => "5.3.0", - "cuGraphReleaseUserObject" => "5.3.0", - "cuDeviceSetGraphMemAttribute" => "5.3.0", - "cuDeviceGraphMemTrim" => "5.3.0", - "cuDeviceGetGraphMemAttribute" => "5.3.0", - "cuCtxSetLimit" => "5.3.0", - "CUuserObject_st" => "5.3.0", - "CUuserObject_flags_enum" => "5.3.0", - "CUuserObject_flags" => "5.3.0", - "CUuserObjectRetain_flags_enum" => "5.3.0", - "CUuserObjectRetain_flags" => "5.3.0", - "CUuserObject" => "5.3.0", - "CUjitInputType_enum" => "5.3.0", - "CUjitInputType" => "5.3.0", - "CUgraphMem_attribute_enum" => "5.3.0", - "CUgraphMem_attribute" => "5.3.0", - "CU_USER_OBJECT_NO_DESTRUCTOR_SYNC" => "5.3.0", - "CU_LIMIT_STACK_SIZE" => "5.3.0", - "CU_JIT_NUM_INPUT_TYPES" => "5.3.0", - "CU_JIT_INPUT_PTX" => "5.3.0", - "CU_JIT_INPUT_OBJECT" => "5.3.0", - "CU_JIT_INPUT_NVVM" => "5.3.0", - "CU_JIT_INPUT_LIBRARY" => "5.3.0", - "CU_JIT_INPUT_FATBINARY" => "5.3.0", - "CU_JIT_INPUT_CUBIN" => "5.3.0", - "CU_GRAPH_USER_OBJECT_MOVE" => "5.3.0", - "CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT" => "5.3.0", - "CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL" => "5.3.0", - "CU_GRAPH_MEM_ATTR_USED_MEM_HIGH" => "5.3.0", - "CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT" => "5.3.0", - "CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH" => "5.3.0", - "CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT" => "5.3.0", - "CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED" => "5.3.0" + "cuGetErrorString" => "5.4.0", + "cuGetErrorName" => "5.4.0" ); $print_stats = 1 if $examine; @@ -853,7 +786,7 @@ push(@exclude_filelist, split(',', $exclude_files)); %exclude_dirhash = map { $_ => 1 } @exclude_dirlist; %exclude_filehash = map { $_ => 1 } @exclude_filelist; -@statNames = ("error", "init", "version", "device", "context", "module", "memory", "virtual_memory", "stream_ordered_memory", "addressing", "stream", "event", "external_resource_interop", "stream_memory", "execution", "graph", "occupancy", "texture", "surface", "peer", "graphics", "interactions", "profiler", "openGL", "D3D9", "D3D10", "D3D11", "VDPAU", "EGL", "thread", "complex", "library", "device_library", "device_function", "include", "include_cuda_main_header", "include_cuda_main_header_v2", "type", "literal", "numeric_literal", "define", "extern_shared", "kernel_launch"); +@statNames = ("error", "init", "version", "device", "context", "module", "memory", "virtual_memory", "stream_ordered_memory", "addressing", "stream", "event", "external_resource_interop", "stream_memory", "execution", "graph", "occupancy", "texture", "surface", "peer", "graphics", "interactions", "profiler", "openGL", "D3D9", "D3D10", "D3D11", "VDPAU", "EGL", "thread", "complex", "library", "device_library", "device_function", "device_type", "include", "include_cuda_main_header", "include_cuda_main_header_v2", "type", "literal", "numeric_literal", "define", "extern_shared", "kernel_launch"); sub totalStats { my %count = %{shift()}; @@ -923,77 +856,8 @@ sub subst { } sub experimentalSubstitutions { - subst("cudaDeviceSetLimit", "hipDeviceSetLimit", "device"); - subst("cuCtxSetLimit", "hipDeviceSetLimit", "context"); - subst("cuLinkAddData", "hiprtcLinkAddData", "module"); - subst("cuLinkAddData_v2", "hiprtcLinkAddData", "module"); - subst("cuLinkAddFile", "hiprtcLinkAddFile", "module"); - subst("cuLinkAddFile_v2", "hiprtcLinkAddFile", "module"); - subst("cuLinkComplete", "hiprtcLinkComplete", "module"); - subst("cuLinkCreate", "hiprtcLinkCreate", "module"); - subst("cuLinkCreate_v2", "hiprtcLinkCreate", "module"); - subst("cuLinkDestroy", "hiprtcLinkDestroy", "module"); - subst("cuDeviceGetGraphMemAttribute", "hipDeviceGetGraphMemAttribute", "graph"); - subst("cuDeviceGraphMemTrim", "hipDeviceGraphMemTrim", "graph"); - subst("cuDeviceSetGraphMemAttribute", "hipDeviceSetGraphMemAttribute", "graph"); - subst("cuGraphReleaseUserObject", "hipGraphReleaseUserObject", "graph"); - subst("cuGraphRetainUserObject", "hipGraphRetainUserObject", "graph"); - subst("cuGraphUpload", "hipGraphUpload", "graph"); - subst("cuUserObjectCreate", "hipUserObjectCreate", "graph"); - subst("cuUserObjectRelease", "hipUserObjectRelease", "graph"); - subst("cuUserObjectRetain", "hipUserObjectRetain", "graph"); - subst("cudaDeviceGetGraphMemAttribute", "hipDeviceGetGraphMemAttribute", "graph"); - subst("cudaDeviceGraphMemTrim", "hipDeviceGraphMemTrim", "graph"); - subst("cudaDeviceSetGraphMemAttribute", "hipDeviceSetGraphMemAttribute", "graph"); - subst("cudaGraphReleaseUserObject", "hipGraphReleaseUserObject", "graph"); - subst("cudaGraphRetainUserObject", "hipGraphRetainUserObject", "graph"); - subst("cudaGraphUpload", "hipGraphUpload", "graph"); - subst("cudaUserObjectCreate", "hipUserObjectCreate", "graph"); - subst("cudaUserObjectRelease", "hipUserObjectRelease", "graph"); - subst("cudaUserObjectRetain", "hipUserObjectRetain", "graph"); - subst("nvrtcGetCUBIN", "hiprtcGetBitcode", "library"); - subst("nvrtcGetCUBINSize", "hiprtcGetBitcodeSize", "library"); - subst("CUgraphMem_attribute", "hipGraphMemAttributeType", "type"); - subst("CUgraphMem_attribute_enum", "hipGraphMemAttributeType", "type"); - subst("CUjitInputType", "hiprtcJITInputType", "type"); - subst("CUjitInputType_enum", "hiprtcJITInputType", "type"); - subst("CUuserObject", "hipUserObject_t", "type"); - subst("CUuserObjectRetain_flags", "hipUserObjectRetainFlags", "type"); - subst("CUuserObjectRetain_flags_enum", "hipUserObjectRetainFlags", "type"); - subst("CUuserObject_flags", "hipUserObjectFlags", "type"); - subst("CUuserObject_flags_enum", "hipUserObjectFlags", "type"); - subst("CUuserObject_st", "hipUserObject", "type"); - subst("cudaGraphMemAttributeType", "hipGraphMemAttributeType", "type"); - subst("cudaUserObjectFlags", "hipUserObjectFlags", "type"); - subst("cudaUserObjectRetainFlags", "hipUserObjectRetainFlags", "type"); - subst("cudaUserObject_t", "hipUserObject_t", "type"); - subst("CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED", "hipDeviceAttributeVirtualMemoryManagementSupported", "numeric_literal"); - subst("CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT", "hipGraphMemAttrReservedMemCurrent", "numeric_literal"); - subst("CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH", "hipGraphMemAttrReservedMemHigh", "numeric_literal"); - subst("CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT", "hipGraphMemAttrUsedMemCurrent", "numeric_literal"); - subst("CU_GRAPH_MEM_ATTR_USED_MEM_HIGH", "hipGraphMemAttrUsedMemHigh", "numeric_literal"); - subst("CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL", "hipGraphNodeTypeExtSemaphoreSignal", "numeric_literal"); - subst("CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT", "hipGraphNodeTypeExtSemaphoreWait", "numeric_literal"); - subst("CU_GRAPH_USER_OBJECT_MOVE", "hipGraphUserObjectMove", "numeric_literal"); - subst("CU_JIT_INPUT_CUBIN", "HIPRTC_JIT_INPUT_CUBIN", "numeric_literal"); - subst("CU_JIT_INPUT_FATBINARY", "HIPRTC_JIT_INPUT_FATBINARY", "numeric_literal"); - subst("CU_JIT_INPUT_LIBRARY", "HIPRTC_JIT_INPUT_LIBRARY", "numeric_literal"); - subst("CU_JIT_INPUT_NVVM", "HIPRTC_JIT_INPUT_NVVM", "numeric_literal"); - subst("CU_JIT_INPUT_OBJECT", "HIPRTC_JIT_INPUT_OBJECT", "numeric_literal"); - subst("CU_JIT_INPUT_PTX", "HIPRTC_JIT_INPUT_PTX", "numeric_literal"); - subst("CU_JIT_NUM_INPUT_TYPES", "HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES", "numeric_literal"); - subst("CU_LIMIT_STACK_SIZE", "hipLimitStackSize", "numeric_literal"); - subst("CU_USER_OBJECT_NO_DESTRUCTOR_SYNC", "hipUserObjectNoDestructorSync", "numeric_literal"); - subst("cudaGraphMemAttrReservedMemCurrent", "hipGraphMemAttrReservedMemCurrent", "numeric_literal"); - subst("cudaGraphMemAttrReservedMemHigh", "hipGraphMemAttrReservedMemHigh", "numeric_literal"); - subst("cudaGraphMemAttrUsedMemCurrent", "hipGraphMemAttrUsedMemCurrent", "numeric_literal"); - subst("cudaGraphMemAttrUsedMemHigh", "hipGraphMemAttrUsedMemHigh", "numeric_literal"); - subst("cudaGraphNodeTypeExtSemaphoreSignal", "hipGraphNodeTypeExtSemaphoreSignal", "numeric_literal"); - subst("cudaGraphNodeTypeExtSemaphoreWait", "hipGraphNodeTypeExtSemaphoreWait", "numeric_literal"); - subst("cudaGraphUserObjectMove", "hipGraphUserObjectMove", "numeric_literal"); - subst("cudaLimitStackSize", "hipLimitStackSize", "numeric_literal"); - subst("cudaMemoryTypeManaged", "hipMemoryTypeManaged", "numeric_literal"); - subst("cudaUserObjectNoDestructorSync", "hipUserObjectNoDestructorSync", "numeric_literal"); + subst("cuGetErrorName", "hipDrvGetErrorName", "error"); + subst("cuGetErrorString", "hipDrvGetErrorString", "error"); } sub rocSubstitutions { @@ -1033,7 +897,7 @@ sub rocSubstitutions { subst("cublasCher2k_v2", "rocblas_cher2k", "library"); subst("cublasCher_v2", "rocblas_cher", "library"); subst("cublasCherk", "rocblas_cherk", "library"); - subst("cublasCherk_v2", "rocblas_cherkx", "library"); + subst("cublasCherk_v2", "rocblas_cherk", "library"); subst("cublasCherkx", "rocblas_cherkx", "library"); subst("cublasChpmv", "rocblas_chpmv", "library"); subst("cublasChpmv_v2", "rocblas_chpmv", "library"); @@ -1076,8 +940,8 @@ sub rocSubstitutions { subst("cublasCtpmv_v2", "rocblas_ctpmv", "library"); subst("cublasCtpsv", "rocblas_ctpsv", "library"); subst("cublasCtpsv_v2", "rocblas_ctpsv", "library"); - subst("cublasCtrmm", "rocblas_ctrmm", "library"); - subst("cublasCtrmm_v2", "rocblas_ctrmm", "library"); + subst("cublasCtrmm", "rocblas_ctrmm_outofplace", "library"); + subst("cublasCtrmm_v2", "rocblas_ctrmm_outofplace", "library"); subst("cublasCtrmv", "rocblas_ctrmv", "library"); subst("cublasCtrmv_v2", "rocblas_ctrmv", "library"); subst("cublasCtrsm", "rocblas_ctrsm", "library"); @@ -1152,8 +1016,8 @@ sub rocSubstitutions { subst("cublasDtpmv_v2", "rocblas_dtpmv", "library"); subst("cublasDtpsv", "rocblas_dtpsv", "library"); subst("cublasDtpsv_v2", "rocblas_dtpsv", "library"); - subst("cublasDtrmm", "rocblas_dtrmm", "library"); - subst("cublasDtrmm_v2", "rocblas_dtrmm", "library"); + subst("cublasDtrmm", "rocblas_dtrmm_outofplace", "library"); + subst("cublasDtrmm_v2", "rocblas_dtrmm_outofplace", "library"); subst("cublasDtrmv", "rocblas_dtrmv", "library"); subst("cublasDtrmv_v2", "rocblas_dtrmv", "library"); subst("cublasDtrsm", "rocblas_dtrsm", "library"); @@ -1168,10 +1032,12 @@ sub rocSubstitutions { subst("cublasGemmBatchedEx", "rocblas_gemm_batched_ex", "library"); subst("cublasGemmEx", "rocblas_gemm_ex", "library"); subst("cublasGemmStridedBatchedEx", "rocblas_gemm_strided_batched_ex", "library"); + subst("cublasGetAtomicsMode", "rocblas_get_atomics_mode", "library"); subst("cublasGetMatrix", "rocblas_get_matrix", "library"); subst("cublasGetMatrixAsync", "rocblas_get_matrix_async", "library"); subst("cublasGetPointerMode", "rocblas_get_pointer_mode", "library"); subst("cublasGetPointerMode_v2", "rocblas_set_pointer_mode", "library"); + subst("cublasGetStatusString", "rocblas_status_to_string", "library"); subst("cublasGetStream", "rocblas_get_stream", "library"); subst("cublasGetStream_v2", "rocblas_get_stream", "library"); subst("cublasGetVector", "rocblas_get_vector", "library"); @@ -1187,6 +1053,7 @@ sub rocSubstitutions { subst("cublasIdamax_v2", "rocblas_idamax", "library"); subst("cublasIdamin", "rocblas_idamin", "library"); subst("cublasIdamin_v2", "rocblas_idamin", "library"); + subst("cublasInit", "rocblas_initialize", "library"); subst("cublasIsamax", "rocblas_isamax", "library"); subst("cublasIsamax_v2", "rocblas_isamax", "library"); subst("cublasIsamin", "rocblas_isamin", "library"); @@ -1211,6 +1078,7 @@ sub rocSubstitutions { subst("cublasSdgmm", "rocblas_sdgmm", "library"); subst("cublasSdot", "rocblas_sdot", "library"); subst("cublasSdot_v2", "rocblas_sdot", "library"); + subst("cublasSetAtomicsMode", "rocblas_set_atomics_mode", "library"); subst("cublasSetMatrix", "rocblas_set_matrix", "library"); subst("cublasSetMatrixAsync", "rocblas_set_matrix_async", "library"); subst("cublasSetPointerMode", "rocblas_set_pointer_mode", "library"); @@ -1273,8 +1141,8 @@ sub rocSubstitutions { subst("cublasStpmv_v2", "rocblas_stpmv", "library"); subst("cublasStpsv", "rocblas_stpsv", "library"); subst("cublasStpsv_v2", "rocblas_stpsv", "library"); - subst("cublasStrmm", "rocblas_strmm", "library"); - subst("cublasStrmm_v2", "rocblas_strmm", "library"); + subst("cublasStrmm", "rocblas_strmm_outofplace", "library"); + subst("cublasStrmm_v2", "rocblas_strmm_outofplace", "library"); subst("cublasStrmv", "rocblas_strmv", "library"); subst("cublasStrmv_v2", "rocblas_strmv", "library"); subst("cublasStrsm", "rocblas_strsm", "library"); @@ -1358,8 +1226,8 @@ sub rocSubstitutions { subst("cublasZtpmv_v2", "rocblas_ztpmv", "library"); subst("cublasZtpsv", "rocblas_ztpsv", "library"); subst("cublasZtpsv_v2", "rocblas_ztpsv", "library"); - subst("cublasZtrmm", "rocblas_ztrmm", "library"); - subst("cublasZtrmm_v2", "rocblas_ztrmm", "library"); + subst("cublasZtrmm", "rocblas_ztrmm_outofplace", "library"); + subst("cublasZtrmm_v2", "rocblas_ztrmm_outofplace", "library"); subst("cublasZtrmv", "rocblas_ztrmv", "library"); subst("cublasZtrmv_v2", "rocblas_ztrmv", "library"); subst("cublasZtrsm", "rocblas_ztrsm", "library"); @@ -1381,6 +1249,8 @@ sub rocSubstitutions { subst("cublasSideMode_t", "rocblas_side", "type"); subst("cublasStatus", "rocblas_status", "type"); subst("cublasStatus_t", "rocblas_status", "type"); + subst("cudaDataType", "rocblas_datatype", "type"); + subst("cudaDataType_t", "rocblas_datatype_", "type"); subst("CUBLAS_ATOMICS_ALLOWED", "rocblas_atomics_allowed", "numeric_literal"); subst("CUBLAS_ATOMICS_NOT_ALLOWED", "rocblas_atomics_not_allowed", "numeric_literal"); subst("CUBLAS_DIAG_NON_UNIT", "rocblas_diagonal_non_unit", "numeric_literal"); @@ -1407,6 +1277,22 @@ sub rocSubstitutions { subst("CUBLAS_STATUS_NOT_INITIALIZED", "rocblas_status_invalid_handle", "numeric_literal"); subst("CUBLAS_STATUS_NOT_SUPPORTED", "rocblas_status_perf_degraded", "numeric_literal"); subst("CUBLAS_STATUS_SUCCESS", "rocblas_status_success", "numeric_literal"); + subst("CUDA_C_16BF", "rocblas_datatype_bf16_c", "numeric_literal"); + subst("CUDA_C_16F", "rocblas_datatype_f16_c", "numeric_literal"); + subst("CUDA_C_32F", "rocblas_datatype_f32_c", "numeric_literal"); + subst("CUDA_C_32I", "rocblas_datatype_i32_c", "numeric_literal"); + subst("CUDA_C_32U", "rocblas_datatype_u32_c", "numeric_literal"); + subst("CUDA_C_64F", "rocblas_datatype_f64_c", "numeric_literal"); + subst("CUDA_C_8I", "rocblas_datatype_i8_c", "numeric_literal"); + subst("CUDA_C_8U", "rocblas_datatype_u8_c", "numeric_literal"); + subst("CUDA_R_16BF", "rocblas_datatype_bf16_r", "numeric_literal"); + subst("CUDA_R_16F", "rocblas_datatype_f16_r", "numeric_literal"); + subst("CUDA_R_32F", "rocblas_datatype_f32_r", "numeric_literal"); + subst("CUDA_R_32I", "rocblas_datatype_i32_r", "numeric_literal"); + subst("CUDA_R_32U", "rocblas_datatype_u32_r", "numeric_literal"); + subst("CUDA_R_64F", "rocblas_datatype_f64_r", "numeric_literal"); + subst("CUDA_R_8I", "rocblas_datatype_i8_r", "numeric_literal"); + subst("CUDA_R_8U", "rocblas_datatype_u8_r", "numeric_literal"); } sub simpleSubstitutions { @@ -1443,6 +1329,7 @@ sub simpleSubstitutions { subst("cudaDeviceGetStreamPriorityRange", "hipDeviceGetStreamPriorityRange", "device"); subst("cudaDeviceReset", "hipDeviceReset", "device"); subst("cudaDeviceSetCacheConfig", "hipDeviceSetCacheConfig", "device"); + subst("cudaDeviceSetLimit", "hipDeviceSetLimit", "device"); subst("cudaDeviceSetMemPool", "hipDeviceSetMemPool", "device"); subst("cudaDeviceSetSharedMemConfig", "hipDeviceSetSharedMemConfig", "device"); subst("cudaDeviceSynchronize", "hipDeviceSynchronize", "device"); @@ -1476,6 +1363,7 @@ sub simpleSubstitutions { subst("cuCtxPushCurrent_v2", "hipCtxPushCurrent", "context"); subst("cuCtxSetCacheConfig", "hipCtxSetCacheConfig", "context"); subst("cuCtxSetCurrent", "hipCtxSetCurrent", "context"); + subst("cuCtxSetLimit", "hipDeviceSetLimit", "context"); subst("cuCtxSetSharedMemConfig", "hipCtxSetSharedMemConfig", "context"); subst("cuCtxSynchronize", "hipCtxSynchronize", "context"); subst("cuDevicePrimaryCtxGetState", "hipDevicePrimaryCtxGetState", "context"); @@ -1486,6 +1374,14 @@ sub simpleSubstitutions { subst("cuDevicePrimaryCtxRetain", "hipDevicePrimaryCtxRetain", "context"); subst("cuDevicePrimaryCtxSetFlags", "hipDevicePrimaryCtxSetFlags", "context"); subst("cuDevicePrimaryCtxSetFlags_v2", "hipDevicePrimaryCtxSetFlags", "context"); + subst("cuLinkAddData", "hiprtcLinkAddData", "module"); + subst("cuLinkAddData_v2", "hiprtcLinkAddData", "module"); + subst("cuLinkAddFile", "hiprtcLinkAddFile", "module"); + subst("cuLinkAddFile_v2", "hiprtcLinkAddFile", "module"); + subst("cuLinkComplete", "hiprtcLinkComplete", "module"); + subst("cuLinkCreate", "hiprtcLinkCreate", "module"); + subst("cuLinkCreate_v2", "hiprtcLinkCreate", "module"); + subst("cuLinkDestroy", "hiprtcLinkDestroy", "module"); subst("cuModuleGetFunction", "hipModuleGetFunction", "module"); subst("cuModuleGetGlobal", "hipModuleGetGlobal", "module"); subst("cuModuleGetGlobal_v2", "hipModuleGetGlobal", "module"); @@ -1750,6 +1646,9 @@ sub simpleSubstitutions { subst("cudaLaunchHostFunc", "hipLaunchHostFunc", "execution"); subst("cudaLaunchKernel", "hipLaunchKernel", "execution"); subst("cudaSetupArgument", "hipSetupArgument", "execution"); + subst("cuDeviceGetGraphMemAttribute", "hipDeviceGetGraphMemAttribute", "graph"); + subst("cuDeviceGraphMemTrim", "hipDeviceGraphMemTrim", "graph"); + subst("cuDeviceSetGraphMemAttribute", "hipDeviceSetGraphMemAttribute", "graph"); subst("cuGraphAddChildGraphNode", "hipGraphAddChildGraphNode", "graph"); subst("cuGraphAddDependencies", "hipGraphAddDependencies", "graph"); subst("cuGraphAddEmptyNode", "hipGraphAddEmptyNode", "graph"); @@ -1794,7 +1693,16 @@ sub simpleSubstitutions { subst("cuGraphNodeGetDependencies", "hipGraphNodeGetDependencies", "graph"); subst("cuGraphNodeGetDependentNodes", "hipGraphNodeGetDependentNodes", "graph"); subst("cuGraphNodeGetType", "hipGraphNodeGetType", "graph"); + subst("cuGraphReleaseUserObject", "hipGraphReleaseUserObject", "graph"); subst("cuGraphRemoveDependencies", "hipGraphRemoveDependencies", "graph"); + subst("cuGraphRetainUserObject", "hipGraphRetainUserObject", "graph"); + subst("cuGraphUpload", "hipGraphUpload", "graph"); + subst("cuUserObjectCreate", "hipUserObjectCreate", "graph"); + subst("cuUserObjectRelease", "hipUserObjectRelease", "graph"); + subst("cuUserObjectRetain", "hipUserObjectRetain", "graph"); + subst("cudaDeviceGetGraphMemAttribute", "hipDeviceGetGraphMemAttribute", "graph"); + subst("cudaDeviceGraphMemTrim", "hipDeviceGraphMemTrim", "graph"); + subst("cudaDeviceSetGraphMemAttribute", "hipDeviceSetGraphMemAttribute", "graph"); subst("cudaGraphAddChildGraphNode", "hipGraphAddChildGraphNode", "graph"); subst("cudaGraphAddDependencies", "hipGraphAddDependencies", "graph"); subst("cudaGraphAddEmptyNode", "hipGraphAddEmptyNode", "graph"); @@ -1851,7 +1759,13 @@ sub simpleSubstitutions { subst("cudaGraphNodeGetDependencies", "hipGraphNodeGetDependencies", "graph"); subst("cudaGraphNodeGetDependentNodes", "hipGraphNodeGetDependentNodes", "graph"); subst("cudaGraphNodeGetType", "hipGraphNodeGetType", "graph"); + subst("cudaGraphReleaseUserObject", "hipGraphReleaseUserObject", "graph"); subst("cudaGraphRemoveDependencies", "hipGraphRemoveDependencies", "graph"); + subst("cudaGraphRetainUserObject", "hipGraphRetainUserObject", "graph"); + subst("cudaGraphUpload", "hipGraphUpload", "graph"); + subst("cudaUserObjectCreate", "hipUserObjectCreate", "graph"); + subst("cudaUserObjectRelease", "hipUserObjectRelease", "graph"); + subst("cudaUserObjectRetain", "hipUserObjectRetain", "graph"); subst("cuOccupancyMaxActiveBlocksPerMultiprocessor", "hipModuleOccupancyMaxActiveBlocksPerMultiprocessor", "occupancy"); subst("cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "occupancy"); subst("cuOccupancyMaxPotentialBlockSize", "hipModuleOccupancyMaxPotentialBlockSize", "occupancy"); @@ -2047,8 +1961,6 @@ sub simpleSubstitutions { subst("cublasCtpmv_v2", "hipblasCtpmv", "library"); subst("cublasCtpsv", "hipblasCtpsv", "library"); subst("cublasCtpsv_v2", "hipblasCtpsv", "library"); - subst("cublasCtrmm", "hipblasCtrmm", "library"); - subst("cublasCtrmm_v2", "hipblasCtrmm", "library"); subst("cublasCtrmv", "hipblasCtrmv", "library"); subst("cublasCtrmv_v2", "hipblasCtrmv", "library"); subst("cublasCtrsm", "hipblasCtrsm", "library"); @@ -2127,8 +2039,6 @@ sub simpleSubstitutions { subst("cublasDtpmv_v2", "hipblasDtpmv", "library"); subst("cublasDtpsv", "hipblasDtpsv", "library"); subst("cublasDtpsv_v2", "hipblasDtpsv", "library"); - subst("cublasDtrmm", "hipblasDtrmm", "library"); - subst("cublasDtrmm_v2", "hipblasDtrmm", "library"); subst("cublasDtrmv", "hipblasDtrmv", "library"); subst("cublasDtrmv_v2", "hipblasDtrmv", "library"); subst("cublasDtrsm", "hipblasDtrsm", "library"); @@ -2254,8 +2164,6 @@ sub simpleSubstitutions { subst("cublasStpmv_v2", "hipblasStpmv", "library"); subst("cublasStpsv", "hipblasStpsv", "library"); subst("cublasStpsv_v2", "hipblasStpsv", "library"); - subst("cublasStrmm", "hipblasStrmm", "library"); - subst("cublasStrmm_v2", "hipblasStrmm", "library"); subst("cublasStrmv", "hipblasStrmv", "library"); subst("cublasStrmv_v2", "hipblasStrmv", "library"); subst("cublasStrsm", "hipblasStrsm", "library"); @@ -2343,8 +2251,6 @@ sub simpleSubstitutions { subst("cublasZtpmv_v2", "hipblasZtpmv", "library"); subst("cublasZtpsv", "hipblasZtpsv", "library"); subst("cublasZtpsv_v2", "hipblasZtpsv", "library"); - subst("cublasZtrmm", "hipblasZtrmm", "library"); - subst("cublasZtrmm_v2", "hipblasZtrmm", "library"); subst("cublasZtrmv", "hipblasZtrmv", "library"); subst("cublasZtrmv_v2", "hipblasZtrmv", "library"); subst("cublasZtrsm", "hipblasZtrsm", "library"); @@ -3034,6 +2940,8 @@ sub simpleSubstitutions { subst("nvrtcCompileProgram", "hiprtcCompileProgram", "library"); subst("nvrtcCreateProgram", "hiprtcCreateProgram", "library"); subst("nvrtcDestroyProgram", "hiprtcDestroyProgram", "library"); + subst("nvrtcGetCUBIN", "hiprtcGetBitcode", "library"); + subst("nvrtcGetCUBINSize", "hiprtcGetBitcodeSize", "library"); subst("nvrtcGetErrorString", "hiprtcGetErrorString", "library"); subst("nvrtcGetLoweredName", "hiprtcGetLoweredName", "library"); subst("nvrtcGetPTX", "hiprtcGetCode", "library"); @@ -3064,6 +2972,10 @@ sub simpleSubstitutions { subst("curand_uniform4", "hiprand_uniform4", "device_library"); subst("curand_uniform4_double", "hiprand_uniform4_double", "device_library"); subst("curand_uniform_double", "hiprand_uniform_double", "device_library"); + subst("__half", "__half", "device_type"); + subst("__half2", "__half2", "device_type"); + subst("__half2_raw", "__half2_raw", "device_type"); + subst("__half_raw", "__half_raw", "device_type"); subst("caffe2\/core\/common_cudnn.h", "caffe2\/core\/hip\/common_miopen.h", "include"); subst("caffe2\/operators\/spatial_batch_norm_op.h", "caffe2\/operators\/hip\/spatial_batch_norm_op_miopen.hip", "include"); subst("channel_descriptor.h", "hip\/channel_descriptor.h", "include"); @@ -3211,6 +3123,8 @@ sub simpleSubstitutions { subst("CUgraphExec_st", "hipGraphExec", "type"); subst("CUgraphInstantiate_flags", "hipGraphInstantiateFlags", "type"); subst("CUgraphInstantiate_flags_enum", "hipGraphInstantiateFlags", "type"); + subst("CUgraphMem_attribute", "hipGraphMemAttributeType", "type"); + subst("CUgraphMem_attribute_enum", "hipGraphMemAttributeType", "type"); subst("CUgraphNode", "hipGraphNode_t", "type"); subst("CUgraphNodeType", "hipGraphNodeType", "type"); subst("CUgraphNodeType_enum", "hipGraphNodeType", "type"); @@ -3227,6 +3141,8 @@ sub simpleSubstitutions { subst("CUipcMemHandle", "hipIpcMemHandle_t", "type"); subst("CUipcMemHandle_st", "hipIpcMemHandle_st", "type"); subst("CUipcMemHandle_v1", "hipIpcMemHandle_t", "type"); + subst("CUjitInputType", "hiprtcJITInputType", "type"); + subst("CUjitInputType_enum", "hiprtcJITInputType", "type"); subst("CUjit_option", "hipJitOption", "type"); subst("CUjit_option_enum", "hipJitOption", "type"); subst("CUkernelNodeAttrID", "hipKernelNodeAttrID", "type"); @@ -3308,6 +3224,12 @@ sub simpleSubstitutions { subst("CUtexObject_v1", "hipTextureObject_t", "type"); subst("CUtexref", "hipTexRef", "type"); subst("CUtexref_st", "textureReference", "type"); + subst("CUuserObject", "hipUserObject_t", "type"); + subst("CUuserObjectRetain_flags", "hipUserObjectRetainFlags", "type"); + subst("CUuserObjectRetain_flags_enum", "hipUserObjectRetainFlags", "type"); + subst("CUuserObject_flags", "hipUserObjectFlags", "type"); + subst("CUuserObject_flags_enum", "hipUserObjectFlags", "type"); + subst("CUuserObject_st", "hipUserObject", "type"); subst("CUuuid", "hipUUID", "type"); subst("CUuuid_st", "hipUUID_t", "type"); subst("GLenum", "GLenum", "type"); @@ -3327,6 +3249,7 @@ sub simpleSubstitutions { subst("cuDoubleComplex", "hipDoubleComplex", "type"); subst("cuFloatComplex", "hipFloatComplex", "type"); subst("cublasAtomicsMode_t", "hipblasAtomicsMode_t", "type"); + subst("cublasComputeType_t", "hipblasDatatype_t", "type"); subst("cublasDataType_t", "hipblasDatatype_t", "type"); subst("cublasDiagType_t", "hipblasDiagType_t", "type"); subst("cublasFillMode_t", "hipblasFillMode_t", "type"); @@ -3374,6 +3297,7 @@ sub simpleSubstitutions { subst("cudaGraphExecUpdateResult", "hipGraphExecUpdateResult", "type"); subst("cudaGraphExec_t", "hipGraphExec_t", "type"); subst("cudaGraphInstantiateFlags", "hipGraphInstantiateFlags", "type"); + subst("cudaGraphMemAttributeType", "hipGraphMemAttributeType", "type"); subst("cudaGraphNodeType", "hipGraphNodeType", "type"); subst("cudaGraphNode_t", "hipGraphNode_t", "type"); subst("cudaGraph_t", "hipGraph_t", "type"); @@ -3431,6 +3355,9 @@ sub simpleSubstitutions { subst("cudaTextureObject_t", "hipTextureObject_t", "type"); subst("cudaTextureReadMode", "hipTextureReadMode", "type"); subst("cudaUUID_t", "hipUUID", "type"); + subst("cudaUserObjectFlags", "hipUserObjectFlags", "type"); + subst("cudaUserObjectRetainFlags", "hipUserObjectRetainFlags", "type"); + subst("cudaUserObject_t", "hipUserObject_t", "type"); subst("cudnnActivationDescriptor_t", "hipdnnActivationDescriptor_t", "type"); subst("cudnnActivationMode_t", "hipdnnActivationMode_t", "type"); subst("cudnnBatchNormMode_t", "hipdnnBatchNormMode_t", "type"); @@ -4028,6 +3955,7 @@ sub simpleSubstitutions { subst("CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT", "hipDeviceAttributeTexturePitchAlignment", "numeric_literal"); subst("CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY", "hipDeviceAttributeTotalConstantMemory", "numeric_literal"); subst("CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING", "hipDeviceAttributeUnifiedAddressing", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED", "hipDeviceAttributeVirtualMemoryManagementSupported", "numeric_literal"); subst("CU_DEVICE_ATTRIBUTE_WARP_SIZE", "hipDeviceAttributeWarpSize", "numeric_literal"); subst("CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED", "hipDevP2PAttrHipArrayAccessSupported", "numeric_literal"); subst("CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED", "hipDevP2PAttrAccessSupported", "numeric_literal"); @@ -4081,38 +4009,53 @@ sub simpleSubstitutions { subst("CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED", "hipGraphExecUpdateErrorTopologyChanged", "numeric_literal"); subst("CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE", "hipGraphExecUpdateErrorUnsupportedFunctionChange", "numeric_literal"); subst("CU_GRAPH_EXEC_UPDATE_SUCCESS", "hipGraphExecUpdateSuccess", "numeric_literal"); + subst("CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT", "hipGraphMemAttrReservedMemCurrent", "numeric_literal"); + subst("CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH", "hipGraphMemAttrReservedMemHigh", "numeric_literal"); + subst("CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT", "hipGraphMemAttrUsedMemCurrent", "numeric_literal"); + subst("CU_GRAPH_MEM_ATTR_USED_MEM_HIGH", "hipGraphMemAttrUsedMemHigh", "numeric_literal"); subst("CU_GRAPH_NODE_TYPE_COUNT", "hipGraphNodeTypeCount", "numeric_literal"); subst("CU_GRAPH_NODE_TYPE_EMPTY", "hipGraphNodeTypeEmpty", "numeric_literal"); subst("CU_GRAPH_NODE_TYPE_EVENT_RECORD", "hipGraphNodeTypeEventRecord", "numeric_literal"); + subst("CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL", "hipGraphNodeTypeExtSemaphoreSignal", "numeric_literal"); + subst("CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT", "hipGraphNodeTypeExtSemaphoreWait", "numeric_literal"); subst("CU_GRAPH_NODE_TYPE_GRAPH", "hipGraphNodeTypeGraph", "numeric_literal"); subst("CU_GRAPH_NODE_TYPE_HOST", "hipGraphNodeTypeHost", "numeric_literal"); subst("CU_GRAPH_NODE_TYPE_KERNEL", "hipGraphNodeTypeKernel", "numeric_literal"); subst("CU_GRAPH_NODE_TYPE_MEMCPY", "hipGraphNodeTypeMemcpy", "numeric_literal"); subst("CU_GRAPH_NODE_TYPE_MEMSET", "hipGraphNodeTypeMemset", "numeric_literal"); subst("CU_GRAPH_NODE_TYPE_WAIT_EVENT", "hipGraphNodeTypeWaitEvent", "numeric_literal"); + subst("CU_GRAPH_USER_OBJECT_MOVE", "hipGraphUserObjectMove", "numeric_literal"); subst("CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS", "hipIpcMemLazyEnablePeerAccess", "numeric_literal"); - subst("CU_JIT_CACHE_MODE", "hipJitOptionCacheMode", "numeric_literal"); - subst("CU_JIT_ERROR_LOG_BUFFER", "hipJitOptionErrorLogBuffer", "numeric_literal"); - subst("CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", "hipJitOptionErrorLogBufferSizeBytes", "numeric_literal"); - subst("CU_JIT_FALLBACK_STRATEGY", "hipJitOptionFallbackStrategy", "numeric_literal"); - subst("CU_JIT_FAST_COMPILE", "hipJitOptionFastCompile", "numeric_literal"); - subst("CU_JIT_GENERATE_DEBUG_INFO", "hipJitOptionGenerateDebugInfo", "numeric_literal"); - subst("CU_JIT_GENERATE_LINE_INFO", "hipJitOptionGenerateLineInfo", "numeric_literal"); - subst("CU_JIT_INFO_LOG_BUFFER", "hipJitOptionInfoLogBuffer", "numeric_literal"); - subst("CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES", "hipJitOptionInfoLogBufferSizeBytes", "numeric_literal"); - subst("CU_JIT_LOG_VERBOSE", "hipJitOptionLogVerbose", "numeric_literal"); - subst("CU_JIT_MAX_REGISTERS", "hipJitOptionMaxRegisters", "numeric_literal"); - subst("CU_JIT_NEW_SM3X_OPT", "hipJitOptionSm3xOpt", "numeric_literal"); - subst("CU_JIT_NUM_OPTIONS", "hipJitOptionNumOptions", "numeric_literal"); - subst("CU_JIT_OPTIMIZATION_LEVEL", "hipJitOptionOptimizationLevel", "numeric_literal"); - subst("CU_JIT_TARGET", "hipJitOptionTarget", "numeric_literal"); - subst("CU_JIT_TARGET_FROM_CUCONTEXT", "hipJitOptionTargetFromContext", "numeric_literal"); - subst("CU_JIT_THREADS_PER_BLOCK", "hipJitOptionThreadsPerBlock", "numeric_literal"); - subst("CU_JIT_WALL_TIME", "hipJitOptionWallTime", "numeric_literal"); + subst("CU_JIT_CACHE_MODE", "HIPRTC_JIT_CACHE_MODE", "numeric_literal"); + subst("CU_JIT_ERROR_LOG_BUFFER", "HIPRTC_JIT_ERROR_LOG_BUFFER", "numeric_literal"); + subst("CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", "HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", "numeric_literal"); + subst("CU_JIT_FALLBACK_STRATEGY", "HIPRTC_JIT_FALLBACK_STRATEGY", "numeric_literal"); + subst("CU_JIT_FAST_COMPILE", "HIPRTC_JIT_FAST_COMPILE", "numeric_literal"); + subst("CU_JIT_GENERATE_DEBUG_INFO", "HIPRTC_JIT_GENERATE_DEBUG_INFO", "numeric_literal"); + subst("CU_JIT_GENERATE_LINE_INFO", "HIPRTC_JIT_GENERATE_LINE_INFO", "numeric_literal"); + subst("CU_JIT_INFO_LOG_BUFFER", "HIPRTC_JIT_INFO_LOG_BUFFER", "numeric_literal"); + subst("CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES", "HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES", "numeric_literal"); + subst("CU_JIT_INPUT_CUBIN", "HIPRTC_JIT_INPUT_CUBIN", "numeric_literal"); + subst("CU_JIT_INPUT_FATBINARY", "HIPRTC_JIT_INPUT_FATBINARY", "numeric_literal"); + subst("CU_JIT_INPUT_LIBRARY", "HIPRTC_JIT_INPUT_LIBRARY", "numeric_literal"); + subst("CU_JIT_INPUT_NVVM", "HIPRTC_JIT_INPUT_NVVM", "numeric_literal"); + subst("CU_JIT_INPUT_OBJECT", "HIPRTC_JIT_INPUT_OBJECT", "numeric_literal"); + subst("CU_JIT_INPUT_PTX", "HIPRTC_JIT_INPUT_PTX", "numeric_literal"); + subst("CU_JIT_LOG_VERBOSE", "HIPRTC_JIT_LOG_VERBOSE", "numeric_literal"); + subst("CU_JIT_MAX_REGISTERS", "HIPRTC_JIT_MAX_REGISTERS", "numeric_literal"); + subst("CU_JIT_NEW_SM3X_OPT", "HIPRTC_JIT_NEW_SM3X_OPT", "numeric_literal"); + subst("CU_JIT_NUM_INPUT_TYPES", "HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES", "numeric_literal"); + subst("CU_JIT_NUM_OPTIONS", "HIPRTC_JIT_NUM_OPTIONS", "numeric_literal"); + subst("CU_JIT_OPTIMIZATION_LEVEL", "HIPRTC_JIT_OPTIMIZATION_LEVEL", "numeric_literal"); + subst("CU_JIT_TARGET", "HIPRTC_JIT_TARGET", "numeric_literal"); + subst("CU_JIT_TARGET_FROM_CUCONTEXT", "HIPRTC_JIT_TARGET_FROM_HIPCONTEXT", "numeric_literal"); + subst("CU_JIT_THREADS_PER_BLOCK", "HIPRTC_JIT_THREADS_PER_BLOCK", "numeric_literal"); + subst("CU_JIT_WALL_TIME", "HIPRTC_JIT_WALL_TIME", "numeric_literal"); subst("CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW", "hipKernelNodeAttributeAccessPolicyWindow", "numeric_literal"); subst("CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE", "hipKernelNodeAttributeCooperative", "numeric_literal"); subst("CU_LIMIT_MALLOC_HEAP_SIZE", "hipLimitMallocHeapSize", "numeric_literal"); subst("CU_LIMIT_PRINTF_FIFO_SIZE", "hipLimitPrintfFifoSize", "numeric_literal"); + subst("CU_LIMIT_STACK_SIZE", "hipLimitStackSize", "numeric_literal"); subst("CU_MEMORYTYPE_ARRAY", "hipMemoryTypeArray", "numeric_literal"); subst("CU_MEMORYTYPE_DEVICE", "hipMemoryTypeDevice", "numeric_literal"); subst("CU_MEMORYTYPE_HOST", "hipMemoryTypeHost", "numeric_literal"); @@ -4235,6 +4178,7 @@ sub simpleSubstitutions { subst("CU_TR_ADDRESS_MODE_WRAP", "HIP_TR_ADDRESS_MODE_WRAP", "numeric_literal"); subst("CU_TR_FILTER_MODE_LINEAR", "HIP_TR_FILTER_MODE_LINEAR", "numeric_literal"); subst("CU_TR_FILTER_MODE_POINT", "HIP_TR_FILTER_MODE_POINT", "numeric_literal"); + subst("CU_USER_OBJECT_NO_DESTRUCTOR_SYNC", "hipUserObjectNoDestructorSync", "numeric_literal"); subst("NVRTC_ERROR_BUILTIN_OPERATION_FAILURE", "HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE", "numeric_literal"); subst("NVRTC_ERROR_COMPILATION", "HIPRTC_ERROR_COMPILATION", "numeric_literal"); subst("NVRTC_ERROR_INTERNAL_ERROR", "HIPRTC_ERROR_INTERNAL_ERROR", "numeric_literal"); @@ -4462,15 +4406,22 @@ sub simpleSubstitutions { subst("cudaGraphExecUpdateErrorUnsupportedFunctionChange", "hipGraphExecUpdateErrorUnsupportedFunctionChange", "numeric_literal"); subst("cudaGraphExecUpdateSuccess", "hipGraphExecUpdateSuccess", "numeric_literal"); subst("cudaGraphInstantiateFlagAutoFreeOnLaunch", "hipGraphInstantiateFlagAutoFreeOnLaunch", "numeric_literal"); + subst("cudaGraphMemAttrReservedMemCurrent", "hipGraphMemAttrReservedMemCurrent", "numeric_literal"); + subst("cudaGraphMemAttrReservedMemHigh", "hipGraphMemAttrReservedMemHigh", "numeric_literal"); + subst("cudaGraphMemAttrUsedMemCurrent", "hipGraphMemAttrUsedMemCurrent", "numeric_literal"); + subst("cudaGraphMemAttrUsedMemHigh", "hipGraphMemAttrUsedMemHigh", "numeric_literal"); subst("cudaGraphNodeTypeCount", "hipGraphNodeTypeCount", "numeric_literal"); subst("cudaGraphNodeTypeEmpty", "hipGraphNodeTypeEmpty", "numeric_literal"); subst("cudaGraphNodeTypeEventRecord", "hipGraphNodeTypeEventRecord", "numeric_literal"); + subst("cudaGraphNodeTypeExtSemaphoreSignal", "hipGraphNodeTypeExtSemaphoreSignal", "numeric_literal"); + subst("cudaGraphNodeTypeExtSemaphoreWait", "hipGraphNodeTypeExtSemaphoreWait", "numeric_literal"); subst("cudaGraphNodeTypeGraph", "hipGraphNodeTypeGraph", "numeric_literal"); subst("cudaGraphNodeTypeHost", "hipGraphNodeTypeHost", "numeric_literal"); subst("cudaGraphNodeTypeKernel", "hipGraphNodeTypeKernel", "numeric_literal"); subst("cudaGraphNodeTypeMemcpy", "hipGraphNodeTypeMemcpy", "numeric_literal"); subst("cudaGraphNodeTypeMemset", "hipGraphNodeTypeMemset", "numeric_literal"); subst("cudaGraphNodeTypeWaitEvent", "hipGraphNodeTypeWaitEvent", "numeric_literal"); + subst("cudaGraphUserObjectMove", "hipGraphUserObjectMove", "numeric_literal"); subst("cudaGraphicsRegisterFlagsNone", "hipGraphicsRegisterFlagsNone", "numeric_literal"); subst("cudaGraphicsRegisterFlagsReadOnly", "hipGraphicsRegisterFlagsReadOnly", "numeric_literal"); subst("cudaGraphicsRegisterFlagsSurfaceLoadStore", "hipGraphicsRegisterFlagsSurfaceLoadStore", "numeric_literal"); @@ -4480,6 +4431,7 @@ sub simpleSubstitutions { subst("cudaKernelNodeAttributeCooperative", "hipKernelNodeAttributeCooperative", "numeric_literal"); subst("cudaLimitMallocHeapSize", "hipLimitMallocHeapSize", "numeric_literal"); subst("cudaLimitPrintfFifoSize", "hipLimitPrintfFifoSize", "numeric_literal"); + subst("cudaLimitStackSize", "hipLimitStackSize", "numeric_literal"); subst("cudaMemAccessFlagsProtNone", "hipMemAccessFlagsProtNone", "numeric_literal"); subst("cudaMemAccessFlagsProtRead", "hipMemAccessFlagsProtRead", "numeric_literal"); subst("cudaMemAccessFlagsProtReadWrite", "hipMemAccessFlagsProtReadWrite", "numeric_literal"); @@ -4517,6 +4469,7 @@ sub simpleSubstitutions { subst("cudaMemcpyHostToHost", "hipMemcpyHostToHost", "numeric_literal"); subst("cudaMemoryTypeDevice", "hipMemoryTypeDevice", "numeric_literal"); subst("cudaMemoryTypeHost", "hipMemoryTypeHost", "numeric_literal"); + subst("cudaMemoryTypeManaged", "hipMemoryTypeManaged", "numeric_literal"); subst("cudaReadModeElementType", "hipReadModeElementType", "numeric_literal"); subst("cudaReadModeNormalizedFloat", "hipReadModeNormalizedFloat", "numeric_literal"); subst("cudaResViewFormatFloat1", "hipResViewFormatFloat1", "numeric_literal"); @@ -4570,6 +4523,7 @@ sub simpleSubstitutions { subst("cudaStreamCaptureStatusNone", "hipStreamCaptureStatusNone", "numeric_literal"); subst("cudaStreamSetCaptureDependencies", "hipStreamSetCaptureDependencies", "numeric_literal"); subst("cudaSuccess", "hipSuccess", "numeric_literal"); + subst("cudaUserObjectNoDestructorSync", "hipUserObjectNoDestructorSync", "numeric_literal"); subst("CUB_MAX", "CUB_MAX", "define"); subst("CUB_MIN", "CUB_MIN", "define"); subst("CUB_NAMESPACE_BEGIN", "BEGIN_HIPCUB_NAMESPACE", "define"); @@ -5380,10 +5334,32 @@ sub warnUnsupportedDeviceFunctions { "__vabsdiffs2", "__vabs4", "__vabs2", + "__ushort_as_bfloat16", + "__ushort2bfloat16_rz", + "__ushort2bfloat16_ru", + "__ushort2bfloat16_rn", + "__ushort2bfloat16_rd", + "__ull2bfloat16_rz", + "__ull2bfloat16_ru", + "__ull2bfloat16_rn", + "__ull2bfloat16_rd", + "__uint2bfloat16_rz", + "__uint2bfloat16_ru", + "__uint2bfloat16_rn", + "__uint2bfloat16_rd", "__trap", + "__stwt", + "__stwb", + "__stcs", + "__stcg", "__signbitl", "__signbitf", "__signbit", + "__short_as_bfloat16", + "__short2bfloat16_rz", + "__short2bfloat16_ru", + "__short2bfloat16_rn", + "__short2bfloat16_rd", "__shfl_xor_sync", "__shfl_up_sync", "__shfl_sync", @@ -5393,12 +5369,56 @@ sub warnUnsupportedDeviceFunctions { "__pm2", "__pm1", "__pm0", + "__nv_cvt_halfraw_to_fp8", + "__nv_cvt_halfraw2_to_fp8x2", + "__nv_cvt_fp8x2_to_halfraw2", + "__nv_cvt_fp8_to_halfraw", + "__nv_cvt_float_to_fp8", + "__nv_cvt_float2_to_fp8x2", + "__nv_cvt_double_to_fp8", + "__nv_cvt_double2_to_fp8x2", + "__nv_cvt_bfloat16raw_to_fp8", + "__nv_cvt_bfloat16raw2_to_fp8x2", + "__lows2bfloat162", + "__low2bfloat162", + "__low2bfloat16", + "__ll2bfloat16_rz", + "__ll2bfloat16_ru", + "__ll2bfloat16_rn", + "__ll2bfloat16_rd", + "__ldlu", + "__ldcv", "__isnanl", "__isnanf", "__isnan", "__isinfl", "__isinff", "__isinf", + "__int2bfloat16_rz", + "__int2bfloat16_ru", + "__int2bfloat16_rn", + "__int2bfloat16_rd", + "__hsub_rn", + "__hsub2_rn", + "__hmul_rn", + "__hmul2_rn", + "__hmin_nan", + "__hmin2_nan", + "__hmin2", + "__hmin", + "__hmax_nan", + "__hmax2_nan", + "__hmax2", + "__hmax", + "__highs2bfloat162", + "__high2bfloat162", + "__high2bfloat16", + "__hfma_relu", + "__hfma2_relu", + "__hcmadd", + "__halves2bfloat162", + "__hadd_rn", + "__hadd2_rn", "__fsub_rz", "__fsub_ru", "__fsub_rd", @@ -5417,6 +5437,13 @@ sub warnUnsupportedDeviceFunctions { "__fma_rz", "__fma_ru", "__fma_rd", + "__floats2bfloat162_rn", + "__float2bfloat16_rz", + "__float2bfloat16_ru", + "__float2bfloat16_rn", + "__float2bfloat16_rd", + "__float2bfloat162_rn", + "__float2bfloat16", "__finitel", "__finitef", "__finite", @@ -5435,6 +5462,8 @@ sub warnUnsupportedDeviceFunctions { "__drcp_rz", "__drcp_ru", "__drcp_rd", + "__double2half", + "__double2bfloat16", "__dmul_rz", "__dmul_ru", "__dmul_rd", @@ -5445,6 +5474,35 @@ sub warnUnsupportedDeviceFunctions { "__dadd_ru", "__dadd_rd", "__brkpt", + "__bfloat16_as_ushort", + "__bfloat16_as_short", + "__bfloat162ushort_rz", + "__bfloat162ushort_ru", + "__bfloat162ushort_rn", + "__bfloat162ushort_rd", + "__bfloat162ull_rz", + "__bfloat162ull_ru", + "__bfloat162ull_rn", + "__bfloat162ull_rd", + "__bfloat162uint_rz", + "__bfloat162uint_ru", + "__bfloat162uint_rn", + "__bfloat162uint_rd", + "__bfloat162short_rz", + "__bfloat162short_ru", + "__bfloat162short_rn", + "__bfloat162short_rd", + "__bfloat162ll_rz", + "__bfloat162ll_ru", + "__bfloat162ll_rn", + "__bfloat162ll_rd", + "__bfloat162int_rz", + "__bfloat162int_ru", + "__bfloat162int_rn", + "__bfloat162int_rd", + "__bfloat162float", + "__bfloat162bfloat162", + "__bfloat1622float2", "_Pow_int" ) { @@ -5521,6 +5579,8 @@ sub warnUnsupportedFunctions { "nvrtcGetNumSupportedArchs", "nvrtcGetNVVMSize", "nvrtcGetNVVM", + "nv_bfloat162", + "nv_bfloat16", "memoryBarrier", "libraryPropertyType_t", "libraryPropertyType", @@ -5714,6 +5774,7 @@ sub warnUnsupportedFunctions { "cufftXtSetWorkAreaPolicy", "cufftXtSetWorkArea", "cufftXtSetGPUs", + "cufftXtSetDistribution", "cufftXtQueryType_t", "cufftXtQueryType", "cufftXtQueryPlan", @@ -5736,6 +5797,8 @@ sub warnUnsupportedFunctions { "cufftXt1dFactors", "cufftCompatibility_t", "cufftCompatibility", + "cufftBox3d_t", + "cufftBox3d", "cudnnWgradMode_t", "cudnnTransformTensorEx", "cudnnTransformTensor", @@ -5749,6 +5812,7 @@ sub warnUnsupportedFunctions { "cudnnSpatialTfSamplerBackward", "cudnnSpatialTfGridGeneratorForward", "cudnnSpatialTfGridGeneratorBackward", + "cudnnSignalMode_t", "cudnnSeverity_t", "cudnnSetTensorTransformDescriptor", "cudnnSetTensorNdDescriptorEx", @@ -5780,6 +5844,7 @@ sub warnUnsupportedFunctions { "cudnnRuntimeTag_t", "cudnnRestoreDropoutDescriptor", "cudnnRestoreAlgorithm", + "cudnnResampleMode_t", "cudnnReorderType_t", "cudnnReorderFilterAndBias", "cudnnReduceTensorStruct", @@ -5804,6 +5869,7 @@ sub warnUnsupportedFunctions { "cudnnPoolingStruct", "cudnnPointwiseMode_t", "cudnnPersistentRNNPlan", + "cudnnPaddingMode_t", "cudnnOpsTrainVersionCheck", "cudnnOpsInferVersionCheck", "cudnnOpTensorStruct", @@ -5848,6 +5914,7 @@ sub warnUnsupportedFunctions { "cudnnGetNormalizationBackwardWorkspaceSize", "cudnnGetMultiHeadAttnWeights", "cudnnGetMultiHeadAttnBuffers", + "cudnnGetMaxDeviceVersion", "cudnnGetFusedOpsVariantParamPackAttribute", "cudnnGetFusedOpsConstParamPackAttribute", "cudnnGetFoldedConvBackwardDataDescriptors", @@ -5891,6 +5958,8 @@ sub warnUnsupportedFunctions { "cudnnFusedOpsConstParamStruct", "cudnnFusedOpsConstParamPack_t", "cudnnFusedOpsConstParamLabel_t", + "cudnnFraction_t", + "cudnnFractionStruct", "cudnnForwardMode_t", "cudnnFoldingDirection_t", "cudnnFindRNNForwardTrainingAlgorithmEx", @@ -5949,8 +6018,11 @@ sub warnUnsupportedFunctions { "cudnnBatchNormalizationForwardTrainingEx", "cudnnBatchNormalizationBackwardEx", "cudnnBatchNormOps_t", + "cudnnBackendTensorReordering_t", "cudnnBackendSetAttribute", "cudnnBackendNumericalNote_t", + "cudnnBackendNormMode_t", + "cudnnBackendNormFwdPhase_t", "cudnnBackendLayoutType_t", "cudnnBackendKnobType_t", "cudnnBackendInitialize", @@ -6006,8 +6078,10 @@ sub warnUnsupportedFunctions { "cudaProfilerInitialize", "cudaOutputMode_t", "cudaOutputMode", + "cudaOccupancyMaxPotentialClusterSize", "cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags", "cudaOccupancyMaxPotentialBlockSizeVariableSMem", + "cudaOccupancyMaxActiveClusters", "cudaOccupancyDisableCachingOverride", "cudaOccupancyAvailableDynamicSMemPerBlock", "cudaNvSciSyncAttrWait", @@ -6025,8 +6099,26 @@ sub warnUnsupportedFunctions { "cudaLimitMaxL2FetchGranularity", "cudaLimitDevRuntimeSyncDepth", "cudaLimitDevRuntimePendingLaunchCount", + "cudaLaunchKernelExC", + "cudaLaunchConfig_t", + "cudaLaunchConfig_st", + "cudaLaunchAttribute_st", + "cudaLaunchAttributeValue", + "cudaLaunchAttributeSynchronizationPolicy", + "cudaLaunchAttributeProgrammaticStreamSerialization", + "cudaLaunchAttributeProgrammaticEvent", + "cudaLaunchAttributePriority", + "cudaLaunchAttributeIgnore", + "cudaLaunchAttributeID", + "cudaLaunchAttributeCooperative", + "cudaLaunchAttributeClusterSchedulingPolicyPreference", + "cudaLaunchAttributeClusterDimension", + "cudaLaunchAttributeAccessPolicyWindow", + "cudaLaunchAttribute", "cudaKeyValuePair", "cudaKernelNodeAttributePriority", + "cudaKernelNodeAttributeClusterSchedulingPolicyPreference", + "cudaKernelNodeAttributeClusterDimension", "cudaHostRegisterReadOnly", "cudaGraphicsVDPAURegisterVideoSurface", "cudaGraphicsVDPAURegisterOutputSurface", @@ -6078,6 +6170,7 @@ sub warnUnsupportedFunctions { "cudaGraphAddMemAllocNode", "cudaGraphAddExternalSemaphoresWaitNode", "cudaGraphAddExternalSemaphoresSignalNode", + "cudaGetTextureObjectTextureDesc_v2", "cudaGetSurfaceReference", "cudaGetSurfaceObjectResourceDesc", "cudaGetParameterBufferV2", @@ -6101,6 +6194,12 @@ sub warnUnsupportedFunctions { "cudaGLMapFlags", "cudaGLMapBufferObjectAsync", "cudaGLMapBufferObject", + "cudaFuncAttributeRequiredClusterWidth", + "cudaFuncAttributeRequiredClusterHeight", + "cudaFuncAttributeRequiredClusterDepth", + "cudaFuncAttributeNonPortableClusterSizeAllowed", + "cudaFuncAttributeClusterSchedulingPolicyPreference", + "cudaFuncAttributeClusterDimMustBeSet", "cudaFormatModeForced", "cudaFormatModeAuto", "cudaFlushGPUDirectRDMAWritesToOwner", @@ -6152,6 +6251,7 @@ sub warnUnsupportedFunctions { "cudaErrorMpsMaxConnectionsReached", "cudaErrorMpsMaxClientsReached", "cudaErrorMpsConnectionFailed", + "cudaErrorMpsClientTerminated", "cudaErrorMixedDeviceExecution", "cudaErrorMisalignedAddress", "cudaErrorMemoryValueTooLarge", @@ -6169,6 +6269,7 @@ sub warnUnsupportedFunctions { "cudaErrorInvalidNormSetting", "cudaErrorInvalidHostPointer", "cudaErrorInvalidFilterSetting", + "cudaErrorInvalidClusterSize", "cudaErrorInvalidChannelDescriptor", "cudaErrorInvalidAddressSpace", "cudaErrorIncompatibleDriverContext", @@ -6307,6 +6408,7 @@ sub warnUnsupportedFunctions { "cudaDevAttrGPUDirectRDMASupported", "cudaDevAttrGPUDirectRDMAFlushWritesOptions", "cudaDevAttrDeferredMappingCudaArraySupported", + "cudaDevAttrClusterLaunch", "cudaDevAttrCanFlushRemoteWrites", "cudaD3D9UnregisterResource", "cudaD3D9UnmapResources", @@ -6367,6 +6469,11 @@ sub warnUnsupportedFunctions { "cudaD3D10DeviceListAll", "cudaD3D10DeviceList", "cudaCtxResetPersistingL2Cache", + "cudaCreateTextureObject_v2", + "cudaClusterSchedulingPolicySpread", + "cudaClusterSchedulingPolicyLoadBalancing", + "cudaClusterSchedulingPolicyDefault", + "cudaClusterSchedulingPolicy", "cudaChannelFormatKindUnsignedNormalized8X4", "cudaChannelFormatKindUnsignedNormalized8X2", "cudaChannelFormatKindUnsignedNormalized8X1", @@ -6435,6 +6542,8 @@ sub warnUnsupportedFunctions { "cuParamSetf", "cuParamSetTexRef", "cuParamSetSize", + "cuOccupancyMaxPotentialClusterSize", + "cuOccupancyMaxActiveClusters", "cuOccupancyAvailableDynamicSMemPerBlock", "cuModuleLoadFatBinary", "cuModuleGetSurfRef", @@ -6466,6 +6575,7 @@ sub warnUnsupportedFunctions { "cuMemcpy3DPeer", "cuMemcpy", "cuMemGetHandleForAddressRange", + "cuLaunchKernelEx", "cuLaunchGridAsync", "cuLaunchGrid", "cuLaunchCooperativeKernelMultiDevice", @@ -6506,8 +6616,6 @@ sub warnUnsupportedFunctions { "cuGraphAddExternalSemaphoresSignalNode", "cuGraphAddBatchMemOpNode", "cuGetProcAddress", - "cuGetErrorString", - "cuGetErrorName", "cuGLUnregisterBufferObject", "cuGLUnmapBufferObjectAsync", "cuGLUnmapBufferObject", @@ -6592,14 +6700,45 @@ sub warnUnsupportedFunctions { "csrsm2Info", "csrilu02Info", "csrgemm2Info", + "cl_event_flags_enum", + "cl_event_flags", + "cl_context_flags_enum", + "cl_context_flags", "bsrsv2Info", "bsrilu02Info", "bsric02Info", + "__nv_saturation_t", + "__nv_fp8x4_storage_t", + "__nv_fp8x4_e5m2", + "__nv_fp8x4_e4m3", + "__nv_fp8x2_storage_t", + "__nv_fp8x2_e5m2", + "__nv_fp8x2_e4m3", + "__nv_fp8_storage_t", + "__nv_fp8_interpretation_t", + "__nv_fp8_e5m2", + "__nv_fp8_e4m3", + "__nv_bfloat16_raw", + "__nv_bfloat162_raw", + "__nv_bfloat162", + "__nv_bfloat16", "__curand_umul", + "__NV_SATFINITE", + "__NV_NOSAT", + "__NV_E5M2", + "__NV_E4M3", "__CUB_LP64__", "_CUB_ASM_PTR_SIZE_", "_CUB_ASM_PTR_", "PATCH_LEVEL", + "NVCL_EVENT_SCHED_YIELD", + "NVCL_EVENT_SCHED_SPIN", + "NVCL_EVENT_SCHED_BLOCKING_SYNC", + "NVCL_EVENT_SCHED_AUTO", + "NVCL_CTX_SCHED_YIELD", + "NVCL_CTX_SCHED_SPIN", + "NVCL_CTX_SCHED_BLOCKING_SYNC", + "NVCL_CTX_SCHED_AUTO", "MINOR_VERSION", "MAX_CUFFT_ERROR", "MAJOR_VERSION", @@ -6638,6 +6777,14 @@ sub warnUnsupportedFunctions { "CUmemRangeHandleType", "CUmemAttach_flags_enum", "CUmemAttach_flags", + "CUlaunchConfig_st", + "CUlaunchConfig", + "CUlaunchAttribute_st", + "CUlaunchAttributeValue_union", + "CUlaunchAttributeValue", + "CUlaunchAttributeID_enum", + "CUlaunchAttributeID", + "CUlaunchAttribute", "CUjit_target_enum", "CUjit_target", "CUjit_fallback_enum", @@ -6670,6 +6817,8 @@ sub warnUnsupportedFunctions { "CUexecAffinityParam", "CUevent_wait_flags_enum", "CUevent_wait_flags", + "CUevent_sched_flags_enum", + "CUevent_sched_flags", "CUevent_record_flags_enum", "CUevent_record_flags", "CUevent_flags_enum", @@ -6703,10 +6852,14 @@ sub warnUnsupportedFunctions { "CUd3d10DeviceList", "CUctx_flags_enum", "CUctx_flags", + "CUclusterSchedulingPolicy_enum", + "CUclusterSchedulingPolicy", "CUarray_cubemap_face_enum", "CUarray_cubemap_face", "CU_TRSF_SEAMLESS_CUBEMAP", "CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION", + "CU_TARGET_COMPUTE_90", + "CU_TARGET_COMPUTE_89", "CU_TARGET_COMPUTE_87", "CU_TARGET_COMPUTE_86", "CU_TARGET_COMPUTE_80", @@ -6778,7 +6931,18 @@ sub warnUnsupportedFunctions { "CU_LAUNCH_PARAM_END_AS_INT", "CU_LAUNCH_PARAM_BUFFER_SIZE_AS_INT", "CU_LAUNCH_PARAM_BUFFER_POINTER_AS_INT", + "CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY", + "CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION", + "CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT", + "CU_LAUNCH_ATTRIBUTE_PRIORITY", + "CU_LAUNCH_ATTRIBUTE_IGNORE", + "CU_LAUNCH_ATTRIBUTE_COOPERATIVE", + "CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", + "CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION", + "CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW", "CU_KERNEL_NODE_ATTRIBUTE_PRIORITY", + "CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", + "CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION", "CU_JIT_REFERENCED_VARIABLE_NAMES", "CU_JIT_REFERENCED_VARIABLE_COUNT", "CU_JIT_REFERENCED_KERNEL_NAMES", @@ -6825,6 +6989,12 @@ sub warnUnsupportedFunctions { "CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM", "CU_GET_PROC_ADDRESS_LEGACY_STREAM", "CU_GET_PROC_ADDRESS_DEFAULT", + "CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH", + "CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT", + "CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH", + "CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED", + "CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET", + "CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", "CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER", "CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES", "CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX", @@ -6841,6 +7011,10 @@ sub warnUnsupportedFunctions { "CU_EXEC_AFFINITY_TYPE_MAX", "CU_EVENT_WAIT_EXTERNAL", "CU_EVENT_WAIT_DEFAULT", + "CU_EVENT_SCHED_YIELD", + "CU_EVENT_SCHED_SPIN", + "CU_EVENT_SCHED_BLOCKING_SYNC", + "CU_EVENT_SCHED_AUTO", "CU_EVENT_RECORD_EXTERNAL", "CU_EVENT_RECORD_DEFAULT", "CU_EGL_RESOURCE_LOCATION_VIDMEM", @@ -6947,6 +7121,7 @@ sub warnUnsupportedFunctions { "CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED", "CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED", "CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED", + "CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH", "CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V2", "CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS", "CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V2", @@ -6979,6 +7154,9 @@ sub warnUnsupportedFunctions { "CU_CUBEMAP_FACE_NEGATIVE_Y", "CU_CUBEMAP_FACE_NEGATIVE_X", "CU_CTX_FLAGS_MASK", + "CU_CLUSTER_SCHEDULING_POLICY_SPREAD", + "CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING", + "CU_CLUSTER_SCHEDULING_POLICY_DEFAULT", "CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL", "CU_AD_FORMAT_UNORM_INT8X4", "CU_AD_FORMAT_UNORM_INT8X2", @@ -7065,6 +7243,8 @@ sub warnUnsupportedFunctions { "CUFFT_XT_FORMAT_INPUT", "CUFFT_XT_FORMAT_INPLACE_SHUFFLED", "CUFFT_XT_FORMAT_INPLACE", + "CUFFT_XT_FORMAT_DISTRIBUTED_OUTPUT", + "CUFFT_XT_FORMAT_DISTRIBUTED_INPUT", "CUFFT_XT_FORMAT_1D_INPUT_SHUFFLED", "CUFFT_WORKAREA_USER", "CUFFT_WORKAREA_PERFORMANCE", @@ -7084,23 +7264,33 @@ sub warnUnsupportedFunctions { "CUFFT_COPY_DEVICE_TO_DEVICE", "CUFFT_COMPATIBILITY_FFTW_PADDING", "CUFFT_COMPATIBILITY_DEFAULT", + "CUDNN_ZERO_PAD", "CUDNN_WGRAD_MODE_SET", "CUDNN_WGRAD_MODE_ADD", "CUDNN_TYPE_VOID_PTR", + "CUDNN_TYPE_TENSOR_REORDERING_MODE", + "CUDNN_TYPE_SIGNAL_MODE", + "CUDNN_TYPE_RESAMPLE_MODE", "CUDNN_TYPE_REDUCTION_OPERATOR_TYPE", "CUDNN_TYPE_POINTWISE_MODE", + "CUDNN_TYPE_PADDING_MODE", "CUDNN_TYPE_NUMERICAL_NOTE", + "CUDNN_TYPE_NORM_MODE", + "CUDNN_TYPE_NORM_FWD_PHASE", "CUDNN_TYPE_NAN_PROPOGATION", "CUDNN_TYPE_LAYOUT_TYPE", "CUDNN_TYPE_KNOB_TYPE", "CUDNN_TYPE_INT64", + "CUDNN_TYPE_INT32", "CUDNN_TYPE_HEUR_MODE", "CUDNN_TYPE_HANDLE", "CUDNN_TYPE_GENSTATS_MODE", + "CUDNN_TYPE_FRACTION", "CUDNN_TYPE_FLOAT", "CUDNN_TYPE_DOUBLE", "CUDNN_TYPE_DATA_TYPE", "CUDNN_TYPE_CONVOLUTION_MODE", + "CUDNN_TYPE_CHAR", "CUDNN_TYPE_BOOLEAN", "CUDNN_TYPE_BN_FINALIZE_STATS_MODE", "CUDNN_TYPE_BEHAVIOR_NOTE", @@ -7108,10 +7298,14 @@ sub warnUnsupportedFunctions { "CUDNN_TYPE_ATTRIB_NAME", "CUDNN_TRANSFORM_UNFOLD", "CUDNN_TRANSFORM_FOLD", + "CUDNN_TENSOR_REORDERING_NONE", + "CUDNN_TENSOR_REORDERING_INT8x32", "CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION", "CUDNN_STATUS_VERSION_MISMATCH", "CUDNN_STATUS_RUNTIME_IN_PROGRESS", "CUDNN_STATUS_RUNTIME_FP_OVERFLOW", + "CUDNN_SIGNAL_WAIT", + "CUDNN_SIGNAL_SET", "CUDNN_SEV_WARNING_EN", "CUDNN_SEV_WARNING", "CUDNN_SEV_INFO_EN", @@ -7137,6 +7331,12 @@ sub warnUnsupportedFunctions { "CUDNN_RNN_CLIP_NONE", "CUDNN_RNN_CLIP_MINMAX", "CUDNN_RNN_ALGO_COUNT", + "CUDNN_RESAMPLE_NEAREST", + "CUDNN_RESAMPLE_MAXPOOL", + "CUDNN_RESAMPLE_BILINEAR", + "CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING", + "CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING", + "CUDNN_RESAMPLE_AVGPOOL", "CUDNN_PTR_ZDATA", "CUDNN_PTR_YSUM", "CUDNN_PTR_YSQSUM", @@ -7166,23 +7366,53 @@ sub warnUnsupportedFunctions { "CUDNN_PTR_16B_ALIGNED", "CUDNN_POINTWISE_TANH_FWD", "CUDNN_POINTWISE_TANH_BWD", + "CUDNN_POINTWISE_TAN", "CUDNN_POINTWISE_SWISH_FWD", "CUDNN_POINTWISE_SWISH_BWD", + "CUDNN_POINTWISE_SUB", "CUDNN_POINTWISE_SQRT", "CUDNN_POINTWISE_SOFTPLUS_FWD", "CUDNN_POINTWISE_SOFTPLUS_BWD", + "CUDNN_POINTWISE_SIN", "CUDNN_POINTWISE_SIGMOID_FWD", "CUDNN_POINTWISE_SIGMOID_BWD", + "CUDNN_POINTWISE_RSQRT", "CUDNN_POINTWISE_RELU_FWD", "CUDNN_POINTWISE_RELU_BWD", + "CUDNN_POINTWISE_POW", + "CUDNN_POINTWISE_NEG", "CUDNN_POINTWISE_MUL", + "CUDNN_POINTWISE_MOD", "CUDNN_POINTWISE_MIN", "CUDNN_POINTWISE_MAX", + "CUDNN_POINTWISE_LOGICAL_OR", + "CUDNN_POINTWISE_LOGICAL_NOT", + "CUDNN_POINTWISE_LOGICAL_AND", + "CUDNN_POINTWISE_LOG", + "CUDNN_POINTWISE_IDENTITY", + "CUDNN_POINTWISE_GEN_INDEX", "CUDNN_POINTWISE_GELU_FWD", "CUDNN_POINTWISE_GELU_BWD", + "CUDNN_POINTWISE_GELU_APPROX_TANH_FWD", + "CUDNN_POINTWISE_GELU_APPROX_TANH_BWD", + "CUDNN_POINTWISE_FLOOR", + "CUDNN_POINTWISE_EXP", + "CUDNN_POINTWISE_ERF", "CUDNN_POINTWISE_ELU_FWD", "CUDNN_POINTWISE_ELU_BWD", + "CUDNN_POINTWISE_DIV", + "CUDNN_POINTWISE_COS", + "CUDNN_POINTWISE_CMP_NEQ", + "CUDNN_POINTWISE_CMP_LT", + "CUDNN_POINTWISE_CMP_LE", + "CUDNN_POINTWISE_CMP_GT", + "CUDNN_POINTWISE_CMP_GE", + "CUDNN_POINTWISE_CMP_EQ", + "CUDNN_POINTWISE_CEIL", + "CUDNN_POINTWISE_BINARY_SELECT", + "CUDNN_POINTWISE_ADD_SQUARE", "CUDNN_POINTWISE_ADD", + "CUDNN_POINTWISE_ABS", "CUDNN_PATCHLEVEL", "CUDNN_PARAM_ZDESC", "CUDNN_PARAM_ZDATA_PLACEHOLDER", @@ -7230,6 +7460,9 @@ sub warnUnsupportedFunctions { "CUDNN_OPS_INFER_PATCH", "CUDNN_OPS_INFER_MINOR", "CUDNN_OPS_INFER_MAJOR", + "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6", + "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4", + "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13", "CUDNN_NUMERICAL_NOTE_WINOGRAD", "CUDNN_NUMERICAL_NOTE_TYPE_COUNT", "CUDNN_NUMERICAL_NOTE_TENSOR_CORE", @@ -7243,9 +7476,12 @@ sub warnUnsupportedFunctions { "CUDNN_NORM_OPS_NORM_ADD_ACTIVATION", "CUDNN_NORM_OPS_NORM_ACTIVATION", "CUDNN_NORM_OPS_NORM", + "CUDNN_NORM_FWD_TRAINING", + "CUDNN_NORM_FWD_INFERENCE", "CUDNN_NORM_ALGO_STANDARD", "CUDNN_NORM_ALGO_PERSIST", "CUDNN_NON_DETERMINISTIC", + "CUDNN_NEG_INF_PAD", "CUDNN_MINOR", "CUDNN_MH_ATTN_V_WEIGHTS", "CUDNN_MH_ATTN_V_BIASES", @@ -7267,9 +7503,14 @@ sub warnUnsupportedFunctions { "CUDNN_LAYOUT_TYPE_PREFERRED_NHWC", "CUDNN_LAYOUT_TYPE_PREFERRED_NCHW", "CUDNN_LAYOUT_TYPE_COUNT", + "CUDNN_LAYER_NORM", + "CUDNN_KNOB_TYPE_WORKSPACE", "CUDNN_KNOB_TYPE_WINO_TILE", "CUDNN_KNOB_TYPE_USE_TEX", "CUDNN_KNOB_TYPE_TILE_SIZE", + "CUDNN_KNOB_TYPE_TILE_CGA_N", + "CUDNN_KNOB_TYPE_TILE_CGA_M", + "CUDNN_KNOB_TYPE_TILE_CGA", "CUDNN_KNOB_TYPE_TILEK", "CUDNN_KNOB_TYPE_SWIZZLE", "CUDNN_KNOB_TYPE_STAGES", @@ -7293,9 +7534,13 @@ sub warnUnsupportedFunctions { "CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE", "CUDNN_KNOB_TYPE_COUNTS", "CUDNN_KNOB_TYPE_CHUNK_K", + "CUDNN_INSTANCE_NORM", "CUDNN_HEUR_MODE_INSTANT", + "CUDNN_HEUR_MODE_FALLBACK", "CUDNN_HEUR_MODE_B", + "CUDNN_HEUR_MODE_A", "CUDNN_HEUR_MODES_COUNT", + "CUDNN_GROUP_NORM", "CUDNN_GENSTATS_SUM_SQSUM", "CUDNN_FWD_MODE_TRAINING", "CUDNN_FWD_MODE_INFERENCE", @@ -7310,6 +7555,7 @@ sub warnUnsupportedFunctions { "CUDNN_ERRQUERY_RAWCODE", "CUDNN_ERRQUERY_NONBLOCKING", "CUDNN_ERRQUERY_BLOCKING", + "CUDNN_EDGE_VAL_PAD", "CUDNN_DIVNORM_PRECOMPUTED_MEANS", "CUDNN_DIM_MAX", "CUDNN_DETERMINISTIC", @@ -7318,6 +7564,9 @@ sub warnUnsupportedFunctions { "CUDNN_DATA_UINT8", "CUDNN_DATA_INT8x32", "CUDNN_DATA_INT64", + "CUDNN_DATA_FP8_E5M2", + "CUDNN_DATA_FP8_E4M3", + "CUDNN_DATA_BOOLEAN", "CUDNN_DATA_BFLOAT16", "CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC", "CUDNN_CTC_LOSS_ALGO_DETERMINISTIC", @@ -7331,20 +7580,30 @@ sub warnUnsupportedFunctions { "CUDNN_BN_FINALIZE_STATISTICS_INFERENCE", "CUDNN_BEHAVIOR_NOTE_TYPE_COUNT", "CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", + "CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER", + "CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER", + "CUDNN_BATCH_NORM", "CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION", "CUDNN_BATCHNORM_OPS_BN_ACTIVATION", "CUDNN_BATCHNORM_OPS_BN", "CUDNN_BACKEND_VARIANT_PACK_DESCRIPTOR", "CUDNN_BACKEND_TENSOR_DESCRIPTOR", + "CUDNN_BACKEND_RESAMPLE_DESCRIPTOR", "CUDNN_BACKEND_REDUCTION_DESCRIPTOR", "CUDNN_BACKEND_POINTWISE_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR", "CUDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR", "CUDNN_BACKEND_OPERATION_POINTWISE_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR", "CUDNN_BACKEND_OPERATION_MATMUL_DESCRIPTOR", "CUDNN_BACKEND_OPERATION_GEN_STATS_DESCRIPTOR", "CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR", "CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR", "CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR", "CUDNN_BACKEND_OPERATION_BN_FINALIZE_STATISTICS_DESCRIPTOR", "CUDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR", "CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR", @@ -7366,11 +7625,21 @@ sub warnUnsupportedFunctions { "CUDNN_ATTR_TENSOR_VECTORIZED_DIMENSION", "CUDNN_ATTR_TENSOR_UNIQUE_ID", "CUDNN_ATTR_TENSOR_STRIDES", + "CUDNN_ATTR_TENSOR_REORDERING_MODE", "CUDNN_ATTR_TENSOR_IS_VIRTUAL", "CUDNN_ATTR_TENSOR_IS_BY_VALUE", "CUDNN_ATTR_TENSOR_DIMENSIONS", "CUDNN_ATTR_TENSOR_DATA_TYPE", "CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT", + "CUDNN_ATTR_RESAMPLE_WINDOW_DIMS", + "CUDNN_ATTR_RESAMPLE_STRIDES", + "CUDNN_ATTR_RESAMPLE_SPATIAL_DIMS", + "CUDNN_ATTR_RESAMPLE_PRE_PADDINGS", + "CUDNN_ATTR_RESAMPLE_POST_PADDINGS", + "CUDNN_ATTR_RESAMPLE_PADDING_MODE", + "CUDNN_ATTR_RESAMPLE_NAN_PROPAGATION", + "CUDNN_ATTR_RESAMPLE_MODE", + "CUDNN_ATTR_RESAMPLE_COMP_TYPE", "CUDNN_ATTR_REDUCTION_OPERATOR", "CUDNN_ATTR_REDUCTION_COMP_TYPE", "CUDNN_ATTR_POINTWISE_SWISH_BETA", @@ -7382,17 +7651,62 @@ sub warnUnsupportedFunctions { "CUDNN_ATTR_POINTWISE_MODE", "CUDNN_ATTR_POINTWISE_MATH_PREC", "CUDNN_ATTR_POINTWISE_ELU_ALPHA", + "CUDNN_ATTR_POINTWISE_AXIS", + "CUDNN_ATTR_OPERATION_SIGNAL_YDESC", + "CUDNN_ATTR_OPERATION_SIGNAL_XDESC", + "CUDNN_ATTR_OPERATION_SIGNAL_VALUE", + "CUDNN_ATTR_OPERATION_SIGNAL_MODE", + "CUDNN_ATTR_OPERATION_SIGNAL_FLAGDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA", "CUDNN_ATTR_OPERATION_REDUCTION_YDESC", "CUDNN_ATTR_OPERATION_REDUCTION_XDESC", "CUDNN_ATTR_OPERATION_REDUCTION_DESC", "CUDNN_ATTR_OPERATION_POINTWISE_YDESC", "CUDNN_ATTR_OPERATION_POINTWISE_XDESC", + "CUDNN_ATTR_OPERATION_POINTWISE_TDESC", "CUDNN_ATTR_OPERATION_POINTWISE_PW_DESCRIPTOR", "CUDNN_ATTR_OPERATION_POINTWISE_DYDESC", "CUDNN_ATTR_OPERATION_POINTWISE_DXDESC", "CUDNN_ATTR_OPERATION_POINTWISE_BDESC", "CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2", "CUDNN_ATTR_OPERATION_POINTWISE_ALPHA1", + "CUDNN_ATTR_OPERATION_NORM_FWD_YDESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_XDESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_PHASE", + "CUDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS", + "CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_MODE", + "CUDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_XDESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS", + "CUDNN_ATTR_OPERATION_NORM_BWD_MODE", + "CUDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_DYDESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_DXDESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC", "CUDNN_ATTR_OPERATION_MATMUL_IRREGULARLY_STRIDED_BATCH_COUNT", "CUDNN_ATTR_OPERATION_MATMUL_DESC", "CUDNN_ATTR_OPERATION_MATMUL_CDESC", @@ -7421,6 +7735,10 @@ sub warnUnsupportedFunctions { "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC", "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA", "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA", + "CUDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC", + "CUDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS", + "CUDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX", + "CUDNN_ATTR_OPERATION_CONCAT_AXIS", "CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SUM_DESC", "CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SQ_SUM_DESC", "CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_VAR_DESC", @@ -7467,6 +7785,7 @@ sub warnUnsupportedFunctions { "CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_ATTRIBUTES", "CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE", "CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS", + "CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION", "CUDNN_ATTR_EXECUTION_PLAN_HANDLE", "CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG", "CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS", @@ -7500,12 +7819,6 @@ sub warnUnsupportedFunctions { "CUDNN_ADV_INFER_PATCH", "CUDNN_ADV_INFER_MINOR", "CUDNN_ADV_INFER_MAJOR", - "CUDA_R_64U", - "CUDA_R_64I", - "CUDA_R_4U", - "CUDA_R_4I", - "CUDA_R_16U", - "CUDA_R_16I", "CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_v1", "CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st", "CUDA_POINTER_ATTRIBUTE_P2P_TOKENS", @@ -7548,11 +7861,13 @@ sub warnUnsupportedFunctions { "CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED", "CUDA_ERROR_MPS_MAX_CLIENTS_REACHED", "CUDA_ERROR_MPS_CONNECTION_FAILED", + "CUDA_ERROR_MPS_CLIENT_TERMINATED", "CUDA_ERROR_MISALIGNED_ADDRESS", "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING", "CUDA_ERROR_JIT_COMPILER_NOT_FOUND", "CUDA_ERROR_JIT_COMPILATION_DISABLED", "CUDA_ERROR_INVALID_PC", + "CUDA_ERROR_INVALID_CLUSTER_SIZE", "CUDA_ERROR_INVALID_ADDRESS_SPACE", "CUDA_ERROR_ILLEGAL_INSTRUCTION", "CUDA_ERROR_HARDWARE_STACK_ERROR", @@ -7561,12 +7876,6 @@ sub warnUnsupportedFunctions { "CUDA_ERROR_DEVICE_NOT_LICENSED", "CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE", "CUDA_EGL_MAX_PLANES", - "CUDA_C_64U", - "CUDA_C_64I", - "CUDA_C_4U", - "CUDA_C_4I", - "CUDA_C_16U", - "CUDA_C_16I", "CUDA_CB", "CUDA_BATCH_MEM_OP_NODE_PARAMS_st", "CUDA_BATCH_MEM_OP_NODE_PARAMS", @@ -7651,6 +7960,8 @@ sub warnHipOnlyUnsupportedFunctions { my $k = 0; foreach $func ( "cublasZtrttp", + "cublasZtrmm_v2", + "cublasZtrmm", "cublasZtpttr", "cublasZmatinvBatched", "cublasZgemm3m", @@ -7659,6 +7970,8 @@ sub warnHipOnlyUnsupportedFunctions { "cublasUint8gemmBias", "cublasSwapEx", "cublasStrttp", + "cublasStrmm_v2", + "cublasStrmm", "cublasStpttr", "cublasSmatinvBatched", "cublasShutdown", @@ -7690,16 +8003,19 @@ sub warnHipOnlyUnsupportedFunctions { "cublasGetCudartVersion", "cublasFree", "cublasDtrttp", + "cublasDtrmm_v2", + "cublasDtrmm", "cublasDtpttr", "cublasDmatinvBatched", "cublasDgelsBatched", "cublasCtrttp", + "cublasCtrmm_v2", + "cublasCtrmm", "cublasCtpttr", "cublasCsyrkEx", "cublasCsyrk3mEx", "cublasCopyEx", "cublasContext", - "cublasComputeType_t", "cublasCmatinvBatched", "cublasCherkEx", "cublasCherk3mEx", @@ -7711,6 +8027,20 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCgelsBatched", "cublasAsumEx", "cublasAlloc", + "CUDA_R_8F_E5M2", + "CUDA_R_8F_E4M3", + "CUDA_R_64U", + "CUDA_R_64I", + "CUDA_R_4U", + "CUDA_R_4I", + "CUDA_R_16U", + "CUDA_R_16I", + "CUDA_C_64U", + "CUDA_C_64I", + "CUDA_C_4U", + "CUDA_C_4I", + "CUDA_C_16U", + "CUDA_C_16I", "CUBLAS_VER_PATCH", "CUBLAS_VER_MINOR", "CUBLAS_VER_MAJOR", @@ -7816,7 +8146,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSetMathMode", "cublasSetLoggerCallback", "cublasSetKernelStream", - "cublasSetAtomicsMode", "cublasRotmgEx", "cublasRotmEx", "cublasRotgEx", @@ -7824,12 +8153,10 @@ sub warnRocOnlyUnsupportedFunctions { "cublasMath_t", "cublasLoggerConfigure", "cublasLogCallback", - "cublasInit", "cublasIaminEx", "cublasIamaxEx", "cublasGetVersion_v2", "cublasGetVersion", - "cublasGetStatusString", "cublasGetStatusName", "cublasGetSmCountTarget", "cublasGetProperty", @@ -7837,7 +8164,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasGetLoggerCallback", "cublasGetError", "cublasGetCudartVersion", - "cublasGetAtomicsMode", "cublasFree", "cublasDtrttp", "cublasDtpttr", @@ -7852,7 +8178,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCsyrkEx", "cublasCsyrk3mEx", "cublasCopyEx", - "cublasComputeType_t", "cublasCmatinvBatched", "cublasCherkEx", "cublasCherk3mEx", @@ -7868,6 +8193,20 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCgelsBatched", "cublasAsumEx", "cublasAlloc", + "CUDA_R_8F_E5M2", + "CUDA_R_8F_E4M3", + "CUDA_R_64U", + "CUDA_R_64I", + "CUDA_R_4U", + "CUDA_R_4I", + "CUDA_R_16U", + "CUDA_R_16I", + "CUDA_C_64U", + "CUDA_C_64I", + "CUDA_C_4U", + "CUDA_C_4I", + "CUDA_C_16U", + "CUDA_C_16I", "CUBLAS_VER_PATCH", "CUBLAS_VER_MINOR", "CUBLAS_VER_MAJOR", @@ -7955,7 +8294,7 @@ if ($help) { print STDERR "$USAGE\n"; } if ($version) { - print STDERR "HIP version 5.3.0\n"; + print STDERR "HIP version 5.4.0\n"; } while (@ARGV) { $fileName=shift (@ARGV); @@ -8084,7 +8423,7 @@ while (@ARGV) { transformHostFunctions(); # TODO: would like to move this code outside loop but it uses $_ which contains the whole file unless ($no_output) { - my $apiCalls = $ft{'error'} + $ft{'init'} + $ft{'version'} + $ft{'device'} + $ft{'context'} + $ft{'module'} + $ft{'memory'} + $ft{'virtual_memory'} + $ft{'stream_ordered_memory'} + $ft{'addressing'} + $ft{'stream'} + $ft{'event'} + $ft{'external_resource_interop'} + $ft{'stream_memory'} + $ft{'execution'} + $ft{'graph'} + $ft{'occupancy'} + $ft{'texture'} + $ft{'surface'} + $ft{'peer'} + $ft{'graphics'} + $ft{'interactions'} + $ft{'profiler'} + $ft{'openGL'} + $ft{'D3D9'} + $ft{'D3D10'} + $ft{'D3D11'} + $ft{'VDPAU'} + $ft{'EGL'} + $ft{'thread'} + $ft{'complex'} + $ft{'library'} + $ft{'device_library'} + $ft{'include'} + $ft{'include_cuda_main_header'} + $ft{'include_cuda_main_header_v2'} + $ft{'type'} + $ft{'literal'} + $ft{'numeric_literal'} + $ft{'define'}; + my $apiCalls = $ft{'error'} + $ft{'init'} + $ft{'version'} + $ft{'device'} + $ft{'context'} + $ft{'module'} + $ft{'memory'} + $ft{'virtual_memory'} + $ft{'stream_ordered_memory'} + $ft{'addressing'} + $ft{'stream'} + $ft{'event'} + $ft{'external_resource_interop'} + $ft{'stream_memory'} + $ft{'execution'} + $ft{'graph'} + $ft{'occupancy'} + $ft{'texture'} + $ft{'surface'} + $ft{'peer'} + $ft{'graphics'} + $ft{'interactions'} + $ft{'profiler'} + $ft{'openGL'} + $ft{'D3D9'} + $ft{'D3D10'} + $ft{'D3D11'} + $ft{'VDPAU'} + $ft{'EGL'} + $ft{'thread'} + $ft{'complex'} + $ft{'library'} + $ft{'device_library'} + $ft{'device_type'} + $ft{'include'} + $ft{'include_cuda_main_header'} + $ft{'include_cuda_main_header_v2'} + $ft{'type'} + $ft{'literal'} + $ft{'numeric_literal'} + $ft{'define'}; my $kernStuff = $hasDeviceCode + $ft{'kernel_launch'} + $ft{'device_function'}; my $totalCalls = $apiCalls + $kernStuff; $is_dos = m/\r\n$/; diff --git a/doc/markdown/CUBLAS_API_supported_by_HIP.md b/doc/markdown/CUBLAS_API_supported_by_HIP.md index 238db117..52f99a91 100644 --- a/doc/markdown/CUBLAS_API_supported_by_HIP.md +++ b/doc/markdown/CUBLAS_API_supported_by_HIP.md @@ -96,7 +96,7 @@ |`CUBLAS_VER_MINOR`|10.1| | | | | | | | |`CUBLAS_VER_PATCH`|10.1| | | | | | | | |`cublasAtomicsMode_t`| | | |`hipblasAtomicsMode_t`|3.10.0| | | | -|`cublasComputeType_t`|11.0| | | | | | | | +|`cublasComputeType_t`|11.0| | |`hipblasDatatype_t`|1.8.2| | | | |`cublasContext`| | | | | | | | | |`cublasDataType_t`|7.5| | |`hipblasDatatype_t`|1.8.2| | | | |`cublasDiagType_t`| | | |`hipblasDiagType_t`|1.8.2| | | | @@ -140,6 +140,8 @@ |`CUDA_R_64F`|8.0| | |`HIPBLAS_R_64F`|1.8.2| | | | |`CUDA_R_64I`|11.0| | | | | | | | |`CUDA_R_64U`|11.0| | | | | | | | +|`CUDA_R_8F_E4M3`|11.8| | | | | | | | +|`CUDA_R_8F_E5M2`|11.8| | | | | | | | |`CUDA_R_8I`|8.0| | |`HIPBLAS_R_8I`|3.0.0| | | | |`CUDA_R_8U`|8.0| | |`HIPBLAS_R_8U`|3.0.0| | | | |`cudaDataType`|8.0| | |`hipblasDatatype_t`|1.8.2| | | | @@ -483,8 +485,8 @@ |`cublasCsyrk`| | | |`hipblasCsyrk`|3.5.0| | | | |`cublasCsyrk_v2`| | | |`hipblasCsyrk`|3.5.0| | | | |`cublasCsyrkx`| | | |`hipblasCsyrkx`|3.5.0| | | | -|`cublasCtrmm`| | | |`hipblasCtrmm`|3.5.0| | | | -|`cublasCtrmm_v2`| | | |`hipblasCtrmm`|3.5.0| | | | +|`cublasCtrmm`| | | | | | | | | +|`cublasCtrmm_v2`| | | | | | | | | |`cublasCtrsm`| | | |`hipblasCtrsm`|3.5.0| | | | |`cublasCtrsm_v2`| | | |`hipblasCtrsm`|3.5.0| | | | |`cublasDgemm`| | | |`hipblasDgemm`|1.8.2| | | | @@ -498,8 +500,8 @@ |`cublasDsyrk`| | | |`hipblasDsyrk`|3.5.0| | | | |`cublasDsyrk_v2`| | | |`hipblasDsyrk`|3.5.0| | | | |`cublasDsyrkx`| | | |`hipblasDsyrkx`|3.5.0| | | | -|`cublasDtrmm`| | | |`hipblasDtrmm`|3.2.0| | | | -|`cublasDtrmm_v2`| | | |`hipblasDtrmm`|3.2.0| | | | +|`cublasDtrmm`| | | | | | | | | +|`cublasDtrmm_v2`| | | | | | | | | |`cublasDtrsm`| | | |`hipblasDtrsm`|1.8.2| | | | |`cublasDtrsm_v2`| | | |`hipblasDtrsm`|1.8.2| | | | |`cublasHgemm`|7.5| | |`hipblasHgemm`|1.8.2| | | | @@ -516,8 +518,8 @@ |`cublasSsyrk`| | | |`hipblasSsyrk`|3.5.0| | | | |`cublasSsyrk_v2`| | | |`hipblasSsyrk`|3.5.0| | | | |`cublasSsyrkx`| | | |`hipblasSsyrkx`|3.5.0| | | | -|`cublasStrmm`| | | |`hipblasStrmm`|3.2.0| | | | -|`cublasStrmm_v2`| | | |`hipblasStrmm`|3.2.0| | | | +|`cublasStrmm`| | | | | | | | | +|`cublasStrmm_v2`| | | | | | | | | |`cublasStrsm`| | | |`hipblasStrsm`|1.8.2| | | | |`cublasStrsm_v2`| | | |`hipblasStrsm`|1.8.2| | | | |`cublasZgemm`| | | |`hipblasZgemm`|1.8.2| | | | @@ -539,8 +541,8 @@ |`cublasZsyrk`| | | |`hipblasZsyrk`|3.5.0| | | | |`cublasZsyrk_v2`| | | |`hipblasZsyrk`|3.5.0| | | | |`cublasZsyrkx`| | | |`hipblasZsyrkx`|3.5.0| | | | -|`cublasZtrmm`| | | |`hipblasZtrmm`|3.5.0| | | | -|`cublasZtrmm_v2`| | | |`hipblasZtrmm`|3.5.0| | | | +|`cublasZtrmm`| | | | | | | | | +|`cublasZtrmm_v2`| | | | | | | | | |`cublasZtrsm`| | | |`hipblasZtrsm`|3.5.0| | | | |`cublasZtrsm_v2`| | | |`hipblasZtrsm`|3.5.0| | | | diff --git a/doc/markdown/CUDA_Device_API_supported_by_HIP.md b/doc/markdown/CUDA_Device_API_supported_by_HIP.md index f9310b6e..a8275f1b 100644 --- a/doc/markdown/CUDA_Device_API_supported_by_HIP.md +++ b/doc/markdown/CUDA_Device_API_supported_by_HIP.md @@ -10,6 +10,35 @@ |`__assert_fail`| | | |`__assert_fail`|1.9.0| | | | |`__assertfail`| | | |`__assertfail`|1.9.0| | | | |`__ballot`| | | |`__ballot`|1.6.0| | | | +|`__bfloat1622float2`|11.0| | | | | | | | +|`__bfloat162bfloat162`|11.0| | | | | | | | +|`__bfloat162float`|11.0| | | | | | | | +|`__bfloat162int_rd`|11.0| | | | | | | | +|`__bfloat162int_rn`|11.0| | | | | | | | +|`__bfloat162int_ru`|11.0| | | | | | | | +|`__bfloat162int_rz`|11.0| | | | | | | | +|`__bfloat162ll_rd`|11.0| | | | | | | | +|`__bfloat162ll_rn`|11.0| | | | | | | | +|`__bfloat162ll_ru`|11.0| | | | | | | | +|`__bfloat162ll_rz`|11.0| | | | | | | | +|`__bfloat162short_rd`|11.0| | | | | | | | +|`__bfloat162short_rn`|11.0| | | | | | | | +|`__bfloat162short_ru`|11.0| | | | | | | | +|`__bfloat162short_rz`|11.0| | | | | | | | +|`__bfloat162uint_rd`|11.0| | | | | | | | +|`__bfloat162uint_rn`|11.0| | | | | | | | +|`__bfloat162uint_ru`|11.0| | | | | | | | +|`__bfloat162uint_rz`|11.0| | | | | | | | +|`__bfloat162ull_rd`|11.0| | | | | | | | +|`__bfloat162ull_rn`|11.0| | | | | | | | +|`__bfloat162ull_ru`|11.0| | | | | | | | +|`__bfloat162ull_rz`|11.0| | | | | | | | +|`__bfloat162ushort_rd`|11.0| | | | | | | | +|`__bfloat162ushort_rn`|11.0| | | | | | | | +|`__bfloat162ushort_ru`|11.0| | | | | | | | +|`__bfloat162ushort_rz`|11.0| | | | | | | | +|`__bfloat16_as_short`|11.0| | | | | | | | +|`__bfloat16_as_ushort`|11.0| | | | | | | | |`__brev`| | | |`__brev`|1.6.0| | | | |`__brevll`| | | |`__brevll`|1.6.0| | | | |`__brkpt`| | | | | | | | | @@ -29,10 +58,12 @@ |`__dmul_rn`| | | |`__dmul_rn`|1.6.0| | | | |`__dmul_ru`| | | | | | | | | |`__dmul_rz`| | | | | | | | | +|`__double2bfloat16`|11.0| | | | | | | | |`__double2float_rd`| | | |`__double2float_rd`|1.6.0| | | | |`__double2float_rn`| | | |`__double2float_rn`|1.6.0| | | | |`__double2float_ru`| | | |`__double2float_ru`|1.6.0| | | | |`__double2float_rz`| | | |`__double2float_rz`|1.6.0| | | | +|`__double2half`|11.0| | | | | | | | |`__double2hiint`| | | |`__double2hiint`|1.6.0| | | | |`__double2int_rd`| | | |`__double2int_rd`|1.6.0| | | | |`__double2int_rn`| | | |`__double2int_rn`|1.6.0| | | | @@ -81,6 +112,12 @@ |`__finitef`| | | | | | | | | |`__finitel`| | | | | | | | | |`__float22half2_rn`| | | |`__float22half2_rn`|1.6.0| | | | +|`__float2bfloat16`|11.0| | | | | | | | +|`__float2bfloat162_rn`|11.0| | | | | | | | +|`__float2bfloat16_rd`|11.0| | | | | | | | +|`__float2bfloat16_rn`|11.0| | | | | | | | +|`__float2bfloat16_ru`|11.0| | | | | | | | +|`__float2bfloat16_rz`|11.0| | | | | | | | |`__float2half`| | | |`__float2half`|1.6.0| | | | |`__float2half2_rn`| | | |`__float2half2_rn`|1.6.0| | | | |`__float2half_rd`| | | |`__float2half_rd`|1.6.0| | | | @@ -105,6 +142,7 @@ |`__float2ull_rz`| | | |`__float2ull_rz`|1.6.0| | | | |`__float_as_int`| | | |`__float_as_int`|1.6.0| | | | |`__float_as_uint`| | | |`__float_as_uint`|1.6.0| | | | +|`__floats2bfloat162_rn`|11.0| | | | | | | | |`__floats2half2_rn`| | | |`__floats2half2_rn`|1.6.0| | | | |`__fma_rd`| | | | | | | | | |`__fma_rn`| | | |`__fma_rn`|1.6.0| | | | @@ -140,7 +178,9 @@ |`__habs2`| | | |`__habs2`|3.5.0| | | | |`__hadd`| | | |`__hadd`|1.6.0| | | | |`__hadd2`| | | |`__hadd2`|1.6.0| | | | +|`__hadd2_rn`|11.6| | | | | | | | |`__hadd2_sat`| | | |`__hadd2_sat`|1.6.0| | | | +|`__hadd_rn`|11.6| | | | | | | | |`__hadd_sat`| | | |`__hadd_sat`|1.6.0| | | | |`__half22float2`| | | |`__half22float2`|1.6.0| | | | |`__half2float`| | | |`__half2float`|1.6.0| | | | @@ -171,6 +211,7 @@ |`__half2ushort_rz`| | | |`__half2ushort_rz`|1.6.0| | | | |`__half_as_short`| | | |`__half_as_short`|1.6.0| | | | |`__half_as_ushort`| | | |`__half_as_ushort`|1.6.0| | | | +|`__halves2bfloat162`|11.0| | | | | | | | |`__halves2half2`| | | |`__halves2half2`|1.6.0| | | | |`__hbeq2`| | | |`__hbeq2`|1.6.0| | | | |`__hbequ2`| | | |`__hbequ2`|1.9.0| | | | @@ -184,6 +225,7 @@ |`__hbltu2`| | | |`__hbltu2`|1.9.0| | | | |`__hbne2`| | | |`__hbne2`|1.6.0| | | | |`__hbneu2`| | | |`__hbneu2`|1.9.0| | | | +|`__hcmadd`|11.1| | | | | | | | |`__hdiv`| | | |`__hdiv`|1.9.0| | | | |`__heq`| | | |`__heq`|1.6.0| | | | |`__heq2`| | | |`__heq2`|1.6.0| | | | @@ -191,7 +233,9 @@ |`__hequ2`| | | |`__hequ2`|1.9.0| | | | |`__hfma`| | | |`__hfma`|1.6.0| | | | |`__hfma2`| | | |`__hfma2`|1.6.0| | | | +|`__hfma2_relu`|11.0| | | | | | | | |`__hfma2_sat`| | | |`__hfma2_sat`|1.6.0| | | | +|`__hfma_relu`|11.0| | | | | | | | |`__hfma_sat`| | | |`__hfma_sat`|1.6.0| | | | |`__hge`| | | |`__hge`|1.6.0| | | | |`__hge2`| | | |`__hge2`|1.6.0| | | | @@ -201,9 +245,12 @@ |`__hgt2`| | | |`__hgt2`|1.6.0| | | | |`__hgtu`| | | |`__hgtu`|1.9.0| | | | |`__hgtu2`| | | |`__hgtu2`|1.9.0| | | | +|`__high2bfloat16`|11.0| | | | | | | | +|`__high2bfloat162`|11.0| | | | | | | | |`__high2float`| | | |`__high2float`|1.6.0| | | | |`__high2half`| | | |`__high2half`|1.6.0| | | | |`__high2half2`| | | |`__high2half2`|1.6.0| | | | +|`__highs2bfloat162`|11.0| | | | | | | | |`__highs2half2`| | | |`__highs2half2`|1.6.0| | | | |`__hiloint2double`| | | |`__hiloint2double`|1.6.0| | | | |`__hisinf`| | | |`__hisinf`|1.6.0| | | | @@ -217,9 +264,19 @@ |`__hlt2`| | | |`__hlt2`|1.6.0| | | | |`__hltu`| | | |`__hltu`|1.9.0| | | | |`__hltu2`| | | |`__hltu2`|1.9.0| | | | +|`__hmax`|11.0| | | | | | | | +|`__hmax2`|11.0| | | | | | | | +|`__hmax2_nan`|11.0| | | | | | | | +|`__hmax_nan`|11.0| | | | | | | | +|`__hmin`|11.0| | | | | | | | +|`__hmin2`|11.0| | | | | | | | +|`__hmin2_nan`|11.0| | | | | | | | +|`__hmin_nan`|11.0| | | | | | | | |`__hmul`| | | |`__hmul`|1.6.0| | | | |`__hmul2`| | | |`__hmul2`|1.6.0| | | | +|`__hmul2_rn`|11.6| | | | | | | | |`__hmul2_sat`| | | |`__hmul2_sat`|1.6.0| | | | +|`__hmul_rn`|11.6| | | | | | | | |`__hmul_sat`| | | |`__hmul_sat`|1.6.0| | | | |`__hne`| | | |`__hne`|1.6.0| | | | |`__hne2`| | | |`__hne2`|1.6.0| | | | @@ -229,8 +286,14 @@ |`__hneu2`| | | |`__hneu2`|1.9.0| | | | |`__hsub`| | | |`__hsub`|1.6.0| | | | |`__hsub2`| | | |`__hsub2`|1.6.0| | | | +|`__hsub2_rn`|11.6| | | | | | | | |`__hsub2_sat`| | | |`__hsub2_sat`|1.6.0| | | | +|`__hsub_rn`|11.6| | | | | | | | |`__hsub_sat`| | | |`__hsub_sat`|1.6.0| | | | +|`__int2bfloat16_rd`|11.0| | | | | | | | +|`__int2bfloat16_rn`|11.0| | | | | | | | +|`__int2bfloat16_ru`|11.0| | | | | | | | +|`__int2bfloat16_rz`|11.0| | | | | | | | |`__int2double_rn`| | | |`__int2double_rn`|1.6.0| | | | |`__int2float_rd`| | | |`__int2float_rd`|1.6.0| | | | |`__int2float_rn`| | | |`__int2float_rn`|1.6.0| | | | @@ -250,7 +313,13 @@ |`__ldca`| | | |`__ldca`|1.9.0| | | | |`__ldcg`| | | |`__ldcg`|1.9.0| | | | |`__ldcs`| | | |`__ldcs`|1.9.0| | | | +|`__ldcv`|11.0| | | | | | | | |`__ldg`| | | |`__ldg`|1.6.0| | | | +|`__ldlu`|11.0| | | | | | | | +|`__ll2bfloat16_rd`|11.0| | | | | | | | +|`__ll2bfloat16_rn`|11.0| | | | | | | | +|`__ll2bfloat16_ru`|11.0| | | | | | | | +|`__ll2bfloat16_rz`|11.0| | | | | | | | |`__ll2double_rd`| | | |`__ll2double_rd`|1.6.0| | | | |`__ll2double_rn`| | | |`__ll2double_rn`|1.6.0| | | | |`__ll2double_ru`| | | |`__ll2double_ru`|1.6.0| | | | @@ -267,14 +336,27 @@ |`__log2f`| | | |`__log2f`|1.6.0| | | | |`__logf`| | | |`__logf`|1.6.0| | | | |`__longlong_as_double`| | | |`__longlong_as_double`|1.6.0| | | | +|`__low2bfloat16`|11.0| | | | | | | | +|`__low2bfloat162`|11.0| | | | | | | | |`__low2float`| | | |`__low2float`|1.6.0| | | | |`__low2half`| | | |`__low2half`|1.6.0| | | | |`__low2half2`| | | |`__low2half2`|1.6.0| | | | |`__lowhigh2highlow`| | | |`__lowhigh2highlow`|1.6.0| | | | +|`__lows2bfloat162`|11.0| | | | | | | | |`__lows2half2`| | | |`__lows2half2`|1.6.0| | | | |`__mul24`| | | |`__mul24`|1.6.0| | | | |`__mul64hi`| | | |`__mul64hi`|1.6.0| | | | |`__mulhi`| | | |`__mulhi`|1.6.0| | | | +|`__nv_cvt_bfloat16raw2_to_fp8x2`|11.8| | | | | | | | +|`__nv_cvt_bfloat16raw_to_fp8`|11.8| | | | | | | | +|`__nv_cvt_double2_to_fp8x2`|11.8| | | | | | | | +|`__nv_cvt_double_to_fp8`|11.8| | | | | | | | +|`__nv_cvt_float2_to_fp8x2`|11.8| | | | | | | | +|`__nv_cvt_float_to_fp8`|11.8| | | | | | | | +|`__nv_cvt_fp8_to_halfraw`|11.8| | | | | | | | +|`__nv_cvt_fp8x2_to_halfraw2`|11.8| | | | | | | | +|`__nv_cvt_halfraw2_to_fp8x2`|11.8| | | | | | | | +|`__nv_cvt_halfraw_to_fp8`|11.8| | | | | | | | |`__pm0`| | | | | | | | | |`__pm1`| | | | | | | | | |`__pm2`| | | | | | | | | @@ -294,16 +376,25 @@ |`__shfl_up_sync`| | | | | | | | | |`__shfl_xor`|7.5|9.0| |`__shfl_xor`|1.6.0| | | | |`__shfl_xor_sync`| | | | | | | | | +|`__short2bfloat16_rd`|11.0| | | | | | | | +|`__short2bfloat16_rn`|11.0| | | | | | | | +|`__short2bfloat16_ru`|11.0| | | | | | | | +|`__short2bfloat16_rz`|11.0| | | | | | | | |`__short2half_rd`| | | |`__short2half_rd`|1.6.0| | | | |`__short2half_rn`| | | |`__short2half_rn`|1.6.0| | | | |`__short2half_ru`| | | |`__short2half_ru`|1.6.0| | | | |`__short2half_rz`| | | |`__short2half_rz`|1.6.0| | | | +|`__short_as_bfloat16`|11.0| | | | | | | | |`__short_as_half`| | | |`__short_as_half`|1.9.0| | | | |`__signbit`| | | | | | | | | |`__signbitf`| | | | | | | | | |`__signbitl`| | | | | | | | | |`__sincosf`| | | |`__sincosf`|1.6.0| | | | |`__sinf`| | | |`__sinf`|1.6.0| | | | +|`__stcg`|11.0| | | | | | | | +|`__stcs`|11.0| | | | | | | | +|`__stwb`|11.0| | | | | | | | +|`__stwt`|11.0| | | | | | | | |`__syncthreads`| | | |`__syncthreads`|1.6.0| | | | |`__syncthreads_and`| | | |`__syncthreads_and`|3.7.0| | | | |`__syncthreads_count`| | | |`__syncthreads_count`|3.7.0| | | | @@ -314,6 +405,10 @@ |`__threadfence_system`| | | |`__threadfence_system`|1.6.0| | | | |`__trap`| | | | | | | | | |`__uhadd`| | | |`__uhadd`|1.6.0| | | | +|`__uint2bfloat16_rd`|11.0| | | | | | | | +|`__uint2bfloat16_rn`|11.0| | | | | | | | +|`__uint2bfloat16_ru`|11.0| | | | | | | | +|`__uint2bfloat16_rz`|11.0| | | | | | | | |`__uint2double_rn`| | | |`__uint2double_rn`|1.6.0| | | | |`__uint2float_rd`| | | |`__uint2float_rd`|1.6.0| | | | |`__uint2float_rn`| | | |`__uint2float_rn`|1.6.0| | | | @@ -324,6 +419,10 @@ |`__uint2half_ru`| | | |`__uint2half_ru`|1.6.0| | | | |`__uint2half_rz`| | | |`__uint2half_rz`|1.6.0| | | | |`__uint_as_float`| | | |`__uint_as_float`|1.6.0| | | | +|`__ull2bfloat16_rd`|11.0| | | | | | | | +|`__ull2bfloat16_rn`|11.0| | | | | | | | +|`__ull2bfloat16_ru`|11.0| | | | | | | | +|`__ull2bfloat16_rz`|11.0| | | | | | | | |`__ull2double_rd`| | | |`__ull2double_rd`|1.6.0| | | | |`__ull2double_rn`| | | |`__ull2double_rn`|1.6.0| | | | |`__ull2double_ru`| | | |`__ull2double_ru`|1.6.0| | | | @@ -341,10 +440,15 @@ |`__umulhi`| | | |`__umulhi`|1.6.0| | | | |`__urhadd`| | | |`__urhadd`|1.6.0| | | | |`__usad`| | | |`__usad`|1.6.0| | | | +|`__ushort2bfloat16_rd`|11.0| | | | | | | | +|`__ushort2bfloat16_rn`|11.0| | | | | | | | +|`__ushort2bfloat16_ru`|11.0| | | | | | | | +|`__ushort2bfloat16_rz`|11.0| | | | | | | | |`__ushort2half_rd`| | | |`__ushort2half_rd`|1.6.0| | | | |`__ushort2half_rn`| | | |`__ushort2half_rn`|1.6.0| | | | |`__ushort2half_ru`| | | |`__ushort2half_ru`|1.6.0| | | | |`__ushort2half_rz`| | | |`__ushort2half_rz`|1.6.0| | | | +|`__ushort_as_bfloat16`|11.0| | | | | | | | |`__ushort_as_half`| | | |`__ushort_as_half`|1.6.0| | | | |`__vabs2`| | | | | | | | | |`__vabs4`| | | | | | | | | @@ -676,5 +780,35 @@ |`yn`| | | |`yn`|1.6.0| | | | |`ynf`| | | |`ynf`|1.6.0| | | | +## **2. Device Types** + +|**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**| +|:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:| +|`__NV_E4M3`|11.8| | | | | | | | +|`__NV_E5M2`|11.8| | | | | | | | +|`__NV_NOSAT`|11.8| | | | | | | | +|`__NV_SATFINITE`|11.8| | | | | | | | +|`__half`| | | |`__half`|1.6.0| | | | +|`__half2`| | | |`__half2`|1.6.0| | | | +|`__half2_raw`| | | |`__half2_raw`|1.9.0| | | | +|`__half_raw`| | | |`__half_raw`|1.9.0| | | | +|`__nv_bfloat16`|11.0| | | | | | | | +|`__nv_bfloat162`|11.0| | | | | | | | +|`__nv_bfloat162_raw`|11.0| | | | | | | | +|`__nv_bfloat16_raw`|11.0| | | | | | | | +|`__nv_fp8_e4m3`|11.8| | | | | | | | +|`__nv_fp8_e5m2`|11.8| | | | | | | | +|`__nv_fp8_interpretation_t`|11.8| | | | | | | | +|`__nv_fp8_storage_t`|11.8| | | | | | | | +|`__nv_fp8x2_e4m3`|11.8| | | | | | | | +|`__nv_fp8x2_e5m2`|11.8| | | | | | | | +|`__nv_fp8x2_storage_t`|11.8| | | | | | | | +|`__nv_fp8x4_e4m3`|11.8| | | | | | | | +|`__nv_fp8x4_e5m2`|11.8| | | | | | | | +|`__nv_fp8x4_storage_t`|11.8| | | | | | | | +|`__nv_saturation_t`|11.8| | | | | | | | +|`nv_bfloat16`|11.0| | | | | | | | +|`nv_bfloat162`|11.0| | | | | | | | + \*A - Added; D - Deprecated; R - Removed; E - Experimental \ No newline at end of file diff --git a/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md b/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md index 55ebfb57..510b653a 100644 --- a/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md @@ -56,6 +56,7 @@ |`CUDA_ERROR_ILLEGAL_INSTRUCTION`| | | | | | | | | |`CUDA_ERROR_ILLEGAL_STATE`|10.0| | |`hipErrorIllegalState`|5.0.0| | | | |`CUDA_ERROR_INVALID_ADDRESS_SPACE`| | | | | | | | | +|`CUDA_ERROR_INVALID_CLUSTER_SIZE`|11.8| | | | | | | | |`CUDA_ERROR_INVALID_CONTEXT`| | | |`hipErrorInvalidContext`|1.6.0| | | | |`CUDA_ERROR_INVALID_DEVICE`| | | |`hipErrorInvalidDevice`|1.6.0| | | | |`CUDA_ERROR_INVALID_GRAPHICS_CONTEXT`| | | |`hipErrorInvalidGraphicsContext`|1.6.0| | | | @@ -73,6 +74,7 @@ |`CUDA_ERROR_LAUNCH_TIMEOUT`| | | |`hipErrorLaunchTimeOut`|1.6.0| | | | |`CUDA_ERROR_MAP_FAILED`| | | |`hipErrorMapFailed`|1.6.0| | | | |`CUDA_ERROR_MISALIGNED_ADDRESS`| | | | | | | | | +|`CUDA_ERROR_MPS_CLIENT_TERMINATED`|11.8| | | | | | | | |`CUDA_ERROR_MPS_CONNECTION_FAILED`|11.4| | | | | | | | |`CUDA_ERROR_MPS_MAX_CLIENTS_REACHED`|11.4| | | | | | | | |`CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED`|11.4| | | | | | | | @@ -239,6 +241,9 @@ |`CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL`|11.1| | | | | | | | |`CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL`|11.1| | |`hipArraySparseSubresourceTypeMiptail`|5.2.0| | | | |`CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL`|11.1| | |`hipArraySparseSubresourceTypeSparseLevel`|5.2.0| | | | +|`CU_CLUSTER_SCHEDULING_POLICY_DEFAULT`|11.8| | | | | | | | +|`CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING`|11.8| | | | | | | | +|`CU_CLUSTER_SCHEDULING_POLICY_SPREAD`|11.8| | | | | | | | |`CU_COMPUTEMODE_DEFAULT`| | | |`hipComputeModeDefault`|1.9.0| | | | |`CU_COMPUTEMODE_EXCLUSIVE`| | |8.0|`hipComputeModeExclusive`|1.9.0| | | | |`CU_COMPUTEMODE_EXCLUSIVE_PROCESS`| | | |`hipComputeModeExclusiveProcess`|2.0.0| | | | @@ -288,6 +293,7 @@ |`CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR`|9.0| | |`hipDeviceAttributeCanUseStreamWaitValue`|4.3.0| | | | |`CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V2`|11.7| | | | | | | | |`CU_DEVICE_ATTRIBUTE_CLOCK_RATE`| | | |`hipDeviceAttributeClockRate`|1.6.0| | | | +|`CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH`|11.8| | | | | | | | |`CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR`| | | |`hipDeviceAttributeComputeCapabilityMajor`|1.6.0| | | | |`CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR`| | | |`hipDeviceAttributeComputeCapabilityMinor`|1.6.0| | | | |`CU_DEVICE_ATTRIBUTE_COMPUTE_MODE`| | | |`hipDeviceAttributeComputeMode`|1.6.0| | | | @@ -405,7 +411,7 @@ |`CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY`| | | |`hipDeviceAttributeTotalConstantMemory`|1.6.0| | | | |`CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING`| | | |`hipDeviceAttributeUnifiedAddressing`|4.3.0| | | | |`CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED`|10.2|11.2| | | | | | | -|`CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED`|11.2| | |`hipDeviceAttributeVirtualMemoryManagementSupported`|5.3.0| | |5.3.0| +|`CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED`|11.2| | |`hipDeviceAttributeVirtualMemoryManagementSupported`|5.3.0| | | | |`CU_DEVICE_ATTRIBUTE_WARP_SIZE`| | | |`hipDeviceAttributeWarpSize`|1.6.0| | | | |`CU_DEVICE_CPU`|8.0| | |`hipCpuDeviceId`|3.7.0| | | | |`CU_DEVICE_INVALID`|8.0| | |`hipInvalidDeviceId`|3.7.0| | | | @@ -498,6 +504,10 @@ |`CU_EVENT_INTERPROCESS`| | | |`hipEventInterprocess`|1.6.0| | | | |`CU_EVENT_RECORD_DEFAULT`|11.1| | | | | | | | |`CU_EVENT_RECORD_EXTERNAL`|11.1| | | | | | | | +|`CU_EVENT_SCHED_AUTO`|11.8| | | | | | | | +|`CU_EVENT_SCHED_BLOCKING_SYNC`|11.8| | | | | | | | +|`CU_EVENT_SCHED_SPIN`|11.8| | | | | | | | +|`CU_EVENT_SCHED_YIELD`|11.8| | | | | | | | |`CU_EVENT_WAIT_DEFAULT`|11.1| | | | | | | | |`CU_EVENT_WAIT_EXTERNAL`|11.1| | | | | | | | |`CU_EXEC_AFFINITY_TYPE_MAX`|11.4| | | | | | | | @@ -527,14 +537,20 @@ |`CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER`|11.3| | | | | | | | |`CU_FUNC_ATTRIBUTE_BINARY_VERSION`| | | |`HIP_FUNC_ATTRIBUTE_BINARY_VERSION`|2.8.0| | | | |`CU_FUNC_ATTRIBUTE_CACHE_MODE_CA`| | | |`HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA`|2.8.0| | | | +|`CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE`|11.8| | | | | | | | +|`CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET`|11.8| | | | | | | | |`CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES`| | | |`HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES`|2.8.0| | | | |`CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES`| | | |`HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES`|2.8.0| | | | |`CU_FUNC_ATTRIBUTE_MAX`| | | |`HIP_FUNC_ATTRIBUTE_MAX`|2.8.0| | | | |`CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES`|9.0| | |`HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES`|2.8.0| | | | |`CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK`| | | |`HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK`|2.8.0| | | | +|`CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED`|11.8| | | | | | | | |`CU_FUNC_ATTRIBUTE_NUM_REGS`| | | |`HIP_FUNC_ATTRIBUTE_NUM_REGS`|2.8.0| | | | |`CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT`|9.0| | |`HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT`|2.8.0| | | | |`CU_FUNC_ATTRIBUTE_PTX_VERSION`| | | |`HIP_FUNC_ATTRIBUTE_PTX_VERSION`|2.8.0| | | | +|`CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH`|11.8| | | | | | | | +|`CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT`|11.8| | | | | | | | +|`CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH`|11.8| | | | | | | | |`CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES`| | | |`HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES`|2.8.0| | | | |`CU_FUNC_CACHE_PREFER_EQUAL`| | | |`hipFuncCachePreferEqual`|1.6.0| | | | |`CU_FUNC_CACHE_PREFER_L1`| | | |`hipFuncCachePreferL1`|1.6.0| | | | @@ -583,16 +599,16 @@ |`CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED`|10.2| | |`hipGraphExecUpdateErrorTopologyChanged`|4.3.0| | | | |`CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE`|11.2| | |`hipGraphExecUpdateErrorUnsupportedFunctionChange`|4.3.0| | | | |`CU_GRAPH_EXEC_UPDATE_SUCCESS`|10.2| | |`hipGraphExecUpdateSuccess`|4.3.0| | | | -|`CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT`|11.4| | |`hipGraphMemAttrReservedMemCurrent`|5.3.0| | |5.3.0| -|`CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH`|11.4| | |`hipGraphMemAttrReservedMemHigh`|5.3.0| | |5.3.0| -|`CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT`|11.4| | |`hipGraphMemAttrUsedMemCurrent`|5.3.0| | |5.3.0| -|`CU_GRAPH_MEM_ATTR_USED_MEM_HIGH`|11.4| | |`hipGraphMemAttrUsedMemHigh`|5.3.0| | |5.3.0| +|`CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT`|11.4| | |`hipGraphMemAttrReservedMemCurrent`|5.3.0| | | | +|`CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH`|11.4| | |`hipGraphMemAttrReservedMemHigh`|5.3.0| | | | +|`CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT`|11.4| | |`hipGraphMemAttrUsedMemCurrent`|5.3.0| | | | +|`CU_GRAPH_MEM_ATTR_USED_MEM_HIGH`|11.4| | |`hipGraphMemAttrUsedMemHigh`|5.3.0| | | | |`CU_GRAPH_NODE_TYPE_BATCH_MEM_OP`|11.7| | | | | | | | |`CU_GRAPH_NODE_TYPE_COUNT`|10.0| |11.0|`hipGraphNodeTypeCount`|4.3.0| | | | |`CU_GRAPH_NODE_TYPE_EMPTY`|10.0| | |`hipGraphNodeTypeEmpty`|4.3.0| | | | |`CU_GRAPH_NODE_TYPE_EVENT_RECORD`|11.1| | |`hipGraphNodeTypeEventRecord`|4.3.0| | | | -|`CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL`|11.2| | |`hipGraphNodeTypeExtSemaphoreSignal`|5.3.0| | |5.3.0| -|`CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT`|11.2| | |`hipGraphNodeTypeExtSemaphoreWait`|5.3.0| | |5.3.0| +|`CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL`|11.2| | |`hipGraphNodeTypeExtSemaphoreSignal`|5.3.0| | | | +|`CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT`|11.2| | |`hipGraphNodeTypeExtSemaphoreWait`|5.3.0| | | | |`CU_GRAPH_NODE_TYPE_GRAPH`|10.0| | |`hipGraphNodeTypeGraph`|4.3.0| | | | |`CU_GRAPH_NODE_TYPE_HOST`|10.0| | |`hipGraphNodeTypeHost`|4.3.0| | | | |`CU_GRAPH_NODE_TYPE_KERNEL`|10.0| | |`hipGraphNodeTypeKernel`|4.3.0| | | | @@ -601,39 +617,39 @@ |`CU_GRAPH_NODE_TYPE_MEM_ALLOC`|11.4| | | | | | | | |`CU_GRAPH_NODE_TYPE_MEM_FREE`|11.4| | | | | | | | |`CU_GRAPH_NODE_TYPE_WAIT_EVENT`|11.1| | |`hipGraphNodeTypeWaitEvent`|4.3.0| | | | -|`CU_GRAPH_USER_OBJECT_MOVE`|11.3| | |`hipGraphUserObjectMove`|5.3.0| | |5.3.0| +|`CU_GRAPH_USER_OBJECT_MOVE`|11.3| | |`hipGraphUserObjectMove`|5.3.0| | | | |`CU_IPC_HANDLE_SIZE`| | | |`HIP_IPC_HANDLE_SIZE`|1.6.0| | | | |`CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS`| | | |`hipIpcMemLazyEnablePeerAccess`|1.6.0| | | | -|`CU_JIT_CACHE_MODE`| | | |`hipJitOptionCacheMode`|1.6.0| | | | +|`CU_JIT_CACHE_MODE`| | | |`HIPRTC_JIT_CACHE_MODE`|1.6.0| | | | |`CU_JIT_CACHE_OPTION_CA`| | | | | | | | | |`CU_JIT_CACHE_OPTION_CG`| | | | | | | | | |`CU_JIT_CACHE_OPTION_NONE`| | | | | | | | | -|`CU_JIT_ERROR_LOG_BUFFER`| | | |`hipJitOptionErrorLogBuffer`|1.6.0| | | | -|`CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES`| | | |`hipJitOptionErrorLogBufferSizeBytes`|1.6.0| | | | -|`CU_JIT_FALLBACK_STRATEGY`| | | |`hipJitOptionFallbackStrategy`|1.6.0| | | | -|`CU_JIT_FAST_COMPILE`| | | |`hipJitOptionFastCompile`|1.6.0| | | | +|`CU_JIT_ERROR_LOG_BUFFER`| | | |`HIPRTC_JIT_ERROR_LOG_BUFFER`|1.6.0| | | | +|`CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES`| | | |`HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES`|1.6.0| | | | +|`CU_JIT_FALLBACK_STRATEGY`| | | |`HIPRTC_JIT_FALLBACK_STRATEGY`|1.6.0| | | | +|`CU_JIT_FAST_COMPILE`| | | |`HIPRTC_JIT_FAST_COMPILE`|1.6.0| | | | |`CU_JIT_FMA`|11.4| | | | | | | | |`CU_JIT_FTZ`|11.4| | | | | | | | -|`CU_JIT_GENERATE_DEBUG_INFO`| | | |`hipJitOptionGenerateDebugInfo`|1.6.0| | | | -|`CU_JIT_GENERATE_LINE_INFO`| | | |`hipJitOptionGenerateLineInfo`|1.6.0| | | | +|`CU_JIT_GENERATE_DEBUG_INFO`| | | |`HIPRTC_JIT_GENERATE_DEBUG_INFO`|1.6.0| | | | +|`CU_JIT_GENERATE_LINE_INFO`| | | |`HIPRTC_JIT_GENERATE_LINE_INFO`|1.6.0| | | | |`CU_JIT_GLOBAL_SYMBOL_ADDRESSES`| | | | | | | | | |`CU_JIT_GLOBAL_SYMBOL_COUNT`| | | | | | | | | |`CU_JIT_GLOBAL_SYMBOL_NAMES`| | | | | | | | | -|`CU_JIT_INFO_LOG_BUFFER`| | | |`hipJitOptionInfoLogBuffer`|1.6.0| | | | -|`CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES`| | | |`hipJitOptionInfoLogBufferSizeBytes`|1.6.0| | | | -|`CU_JIT_INPUT_CUBIN`| | | |`HIPRTC_JIT_INPUT_CUBIN`|5.3.0| | |5.3.0| -|`CU_JIT_INPUT_FATBINARY`| | | |`HIPRTC_JIT_INPUT_FATBINARY`|5.3.0| | |5.3.0| -|`CU_JIT_INPUT_LIBRARY`| | | |`HIPRTC_JIT_INPUT_LIBRARY`|5.3.0| | |5.3.0| -|`CU_JIT_INPUT_NVVM`|11.4| | |`HIPRTC_JIT_INPUT_NVVM`|5.3.0| | |5.3.0| -|`CU_JIT_INPUT_OBJECT`| | | |`HIPRTC_JIT_INPUT_OBJECT`|5.3.0| | |5.3.0| -|`CU_JIT_INPUT_PTX`| | | |`HIPRTC_JIT_INPUT_PTX`|5.3.0| | |5.3.0| -|`CU_JIT_LOG_VERBOSE`| | | |`hipJitOptionLogVerbose`|1.6.0| | | | +|`CU_JIT_INFO_LOG_BUFFER`| | | |`HIPRTC_JIT_INFO_LOG_BUFFER`|1.6.0| | | | +|`CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES`| | | |`HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES`|1.6.0| | | | +|`CU_JIT_INPUT_CUBIN`| | | |`HIPRTC_JIT_INPUT_CUBIN`|5.3.0| | | | +|`CU_JIT_INPUT_FATBINARY`| | | |`HIPRTC_JIT_INPUT_FATBINARY`|5.3.0| | | | +|`CU_JIT_INPUT_LIBRARY`| | | |`HIPRTC_JIT_INPUT_LIBRARY`|5.3.0| | | | +|`CU_JIT_INPUT_NVVM`|11.4| | |`HIPRTC_JIT_INPUT_NVVM`|5.3.0| | | | +|`CU_JIT_INPUT_OBJECT`| | | |`HIPRTC_JIT_INPUT_OBJECT`|5.3.0| | | | +|`CU_JIT_INPUT_PTX`| | | |`HIPRTC_JIT_INPUT_PTX`|5.3.0| | | | +|`CU_JIT_LOG_VERBOSE`| | | |`HIPRTC_JIT_LOG_VERBOSE`|1.6.0| | | | |`CU_JIT_LTO`|11.4| | | | | | | | -|`CU_JIT_MAX_REGISTERS`| | | |`hipJitOptionMaxRegisters`|1.6.0| | | | -|`CU_JIT_NEW_SM3X_OPT`| | | |`hipJitOptionSm3xOpt`|1.6.0| | | | -|`CU_JIT_NUM_INPUT_TYPES`| | | |`HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES`|5.3.0| | |5.3.0| -|`CU_JIT_NUM_OPTIONS`| | | |`hipJitOptionNumOptions`|1.6.0| | | | -|`CU_JIT_OPTIMIZATION_LEVEL`| | | |`hipJitOptionOptimizationLevel`|1.6.0| | | | +|`CU_JIT_MAX_REGISTERS`| | | |`HIPRTC_JIT_MAX_REGISTERS`|1.6.0| | | | +|`CU_JIT_NEW_SM3X_OPT`| | | |`HIPRTC_JIT_NEW_SM3X_OPT`|1.6.0| | | | +|`CU_JIT_NUM_INPUT_TYPES`| | | |`HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES`|5.3.0| | | | +|`CU_JIT_NUM_OPTIONS`| | | |`HIPRTC_JIT_NUM_OPTIONS`|1.6.0| | | | +|`CU_JIT_OPTIMIZATION_LEVEL`| | | |`HIPRTC_JIT_OPTIMIZATION_LEVEL`|1.6.0| | | | |`CU_JIT_OPTIMIZE_UNUSED_DEVICE_VARIABLES`|11.7| | | | | | | | |`CU_JIT_PREC_DIV`|11.4| | | | | | | | |`CU_JIT_PREC_SQRT`|11.4| | | | | | | | @@ -641,13 +657,24 @@ |`CU_JIT_REFERENCED_KERNEL_NAMES`|11.7| | | | | | | | |`CU_JIT_REFERENCED_VARIABLE_COUNT`|11.7| | | | | | | | |`CU_JIT_REFERENCED_VARIABLE_NAMES`|11.7| | | | | | | | -|`CU_JIT_TARGET`| | | |`hipJitOptionTarget`|1.6.0| | | | -|`CU_JIT_TARGET_FROM_CUCONTEXT`| | | |`hipJitOptionTargetFromContext`|1.6.0| | | | -|`CU_JIT_THREADS_PER_BLOCK`| | | |`hipJitOptionThreadsPerBlock`|1.6.0| | | | -|`CU_JIT_WALL_TIME`| | | |`hipJitOptionWallTime`|1.6.0| | | | +|`CU_JIT_TARGET`| | | |`HIPRTC_JIT_TARGET`|1.6.0| | | | +|`CU_JIT_TARGET_FROM_CUCONTEXT`| | | |`HIPRTC_JIT_TARGET_FROM_HIPCONTEXT`|1.6.0| | | | +|`CU_JIT_THREADS_PER_BLOCK`| | | |`HIPRTC_JIT_THREADS_PER_BLOCK`|1.6.0| | | | +|`CU_JIT_WALL_TIME`| | | |`HIPRTC_JIT_WALL_TIME`|1.6.0| | | | |`CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW`|11.0| | |`hipKernelNodeAttributeAccessPolicyWindow`|5.2.0| | | | +|`CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION`|11.8| | | | | | | | +|`CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE`|11.8| | | | | | | | |`CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE`|11.0| | |`hipKernelNodeAttributeCooperative`|5.2.0| | | | |`CU_KERNEL_NODE_ATTRIBUTE_PRIORITY`|11.7| | | | | | | | +|`CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW`|11.8| | | | | | | | +|`CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION`|11.8| | | | | | | | +|`CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE`|11.8| | | | | | | | +|`CU_LAUNCH_ATTRIBUTE_COOPERATIVE`|11.8| | | | | | | | +|`CU_LAUNCH_ATTRIBUTE_IGNORE`|11.8| | | | | | | | +|`CU_LAUNCH_ATTRIBUTE_PRIORITY`|11.8| | | | | | | | +|`CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT`|11.8| | | | | | | | +|`CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION`|11.8| | | | | | | | +|`CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY`|11.8| | | | | | | | |`CU_LAUNCH_PARAM_BUFFER_POINTER`| | | |`HIP_LAUNCH_PARAM_BUFFER_POINTER`|1.6.0| | | | |`CU_LAUNCH_PARAM_BUFFER_POINTER_AS_INT`|11.7| | | | | | | | |`CU_LAUNCH_PARAM_BUFFER_SIZE`| | | |`HIP_LAUNCH_PARAM_BUFFER_SIZE`|1.6.0| | | | @@ -661,7 +688,7 @@ |`CU_LIMIT_MAX_L2_FETCH_GRANULARITY`|10.0| | | | | | | | |`CU_LIMIT_PERSISTING_L2_CACHE_SIZE`|11.0| | | | | | | | |`CU_LIMIT_PRINTF_FIFO_SIZE`| | | |`hipLimitPrintfFifoSize`|4.5.0| | | | -|`CU_LIMIT_STACK_SIZE`| | | |`hipLimitStackSize`|5.3.0| | |5.3.0| +|`CU_LIMIT_STACK_SIZE`| | | |`hipLimitStackSize`|5.3.0| | | | |`CU_MEMHOSTALLOC_DEVICEMAP`| | | |`hipHostMallocMapped`|1.6.0| | | | |`CU_MEMHOSTALLOC_PORTABLE`| | | |`hipHostMallocPortable`|1.6.0| | | | |`CU_MEMHOSTALLOC_WRITECOMBINED`| | | |`hipHostMallocWriteCombined`|1.6.0| | | | @@ -848,6 +875,8 @@ |`CU_TARGET_COMPUTE_80`|11.0| | | | | | | | |`CU_TARGET_COMPUTE_86`|11.1| | | | | | | | |`CU_TARGET_COMPUTE_87`|11.7| | | | | | | | +|`CU_TARGET_COMPUTE_89`|11.8| | | | | | | | +|`CU_TARGET_COMPUTE_90`|11.8| | | | | | | | |`CU_TRSA_OVERRIDE_FORMAT`| | | |`HIP_TRSA_OVERRIDE_FORMAT`|1.7.0| | | | |`CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION`|11.0| | | | | | | | |`CU_TRSF_NORMALIZED_COORDINATES`| | | |`HIP_TRSF_NORMALIZED_COORDINATES`|1.7.0| | | | @@ -860,7 +889,7 @@ |`CU_TR_ADDRESS_MODE_WRAP`| | | |`HIP_TR_ADDRESS_MODE_WRAP`|3.5.0| | | | |`CU_TR_FILTER_MODE_LINEAR`| | | |`HIP_TR_FILTER_MODE_LINEAR`|3.5.0| | | | |`CU_TR_FILTER_MODE_POINT`| | | |`HIP_TR_FILTER_MODE_POINT`|3.5.0| | | | -|`CU_USER_OBJECT_NO_DESTRUCTOR_SYNC`|11.3| | |`hipUserObjectNoDestructorSync`|5.3.0| | |5.3.0| +|`CU_USER_OBJECT_NO_DESTRUCTOR_SYNC`|11.3| | |`hipUserObjectNoDestructorSync`|5.3.0| | | | |`CUaccessPolicyWindow`|11.0| | |`hipAccessPolicyWindow`|5.2.0| | | | |`CUaccessPolicyWindow_st`|11.0| | |`hipAccessPolicyWindow`|5.2.0| | | | |`CUaccessProperty`|11.0| | |`hipAccessProperty`|5.2.0| | | | @@ -878,6 +907,8 @@ |`CUarray_format`| | | |`hipArray_Format`|1.7.0| | | | |`CUarray_format_enum`| | | |`hipArray_Format`|1.7.0| | | | |`CUarray_st`| | | |`hipArray`|1.7.0| | | | +|`CUclusterSchedulingPolicy`|11.8| | | | | | | | +|`CUclusterSchedulingPolicy_enum`|11.8| | | | | | | | |`CUcomputemode`| | | |`hipComputeMode`|1.9.0| | | | |`CUcomputemode_enum`| | | |`hipComputeMode`|1.9.0| | | | |`CUcontext`| | | |`hipCtx_t`|1.6.0| | | | @@ -925,6 +956,8 @@ |`CUevent_flags_enum`| | | | | | | | | |`CUevent_record_flags`|11.1| | | | | | | | |`CUevent_record_flags_enum`|11.1| | | | | | | | +|`CUevent_sched_flags`|11.8| | | | | | | | +|`CUevent_sched_flags_enum`|11.8| | | | | | | | |`CUevent_st`| | | |`ihipEvent_t`|1.6.0| | | | |`CUevent_wait_flags`|11.1| | | | | | | | |`CUevent_wait_flags_enum`| | | | | | | | | @@ -967,8 +1000,8 @@ |`CUgraphExec_st`|10.0| | |`hipGraphExec`|4.3.0| | | | |`CUgraphInstantiate_flags`|11.4| | |`hipGraphInstantiateFlags`|5.2.0| | | | |`CUgraphInstantiate_flags_enum`|11.4| | |`hipGraphInstantiateFlags`|5.2.0| | | | -|`CUgraphMem_attribute`|11.4| | |`hipGraphMemAttributeType`|5.3.0| | |5.3.0| -|`CUgraphMem_attribute_enum`|11.4| | |`hipGraphMemAttributeType`|5.3.0| | |5.3.0| +|`CUgraphMem_attribute`|11.4| | |`hipGraphMemAttributeType`|5.3.0| | | | +|`CUgraphMem_attribute_enum`|11.4| | |`hipGraphMemAttributeType`|5.3.0| | | | |`CUgraphNode`|10.0| | |`hipGraphNode_t`|4.3.0| | | | |`CUgraphNodeType`|10.0| | |`hipGraphNodeType`|4.3.0| | | | |`CUgraphNodeType_enum`|10.0| | |`hipGraphNodeType`|4.3.0| | | | @@ -989,8 +1022,8 @@ |`CUipcMemHandle_v1`|11.3| | |`hipIpcMemHandle_t`|1.6.0| | | | |`CUipcMem_flags`| | | | | | | | | |`CUipcMem_flags_enum`| | | | | | | | | -|`CUjitInputType`| | | |`hiprtcJITInputType`|5.3.0| | |5.3.0| -|`CUjitInputType_enum`| | | |`hiprtcJITInputType`|5.3.0| | |5.3.0| +|`CUjitInputType`| | | |`hiprtcJITInputType`|5.3.0| | | | +|`CUjitInputType_enum`| | | |`hiprtcJITInputType`|5.3.0| | | | |`CUjit_cacheMode`| | | | | | | | | |`CUjit_cacheMode_enum`| | | | | | | | | |`CUjit_fallback`| | | | | | | | | @@ -1000,14 +1033,22 @@ |`CUjit_target`| | | | | | | | | |`CUjit_target_enum`| | | | | | | | | |`CUkernelNodeAttrID`|11.0| | |`hipKernelNodeAttrID`|5.2.0| | | | -|`CUkernelNodeAttrID_enum`|11.0| | |`hipKernelNodeAttrID`|5.2.0| | | | +|`CUkernelNodeAttrID_enum`|11.0| |11.8|`hipKernelNodeAttrID`|5.2.0| | | | |`CUkernelNodeAttrValue`|11.0| | |`hipKernelNodeAttrValue`|5.2.0| | | | -|`CUkernelNodeAttrValue_union`|11.0| | |`hipKernelNodeAttrValue`|5.2.0| | | | +|`CUkernelNodeAttrValue_union`|11.0| |11.8|`hipKernelNodeAttrValue`|5.2.0| | | | |`CUkernelNodeAttrValue_v1`|11.3| | |`hipKernelNodeAttrValue`|5.2.0| | | | +|`CUlaunchAttribute`|11.8| | | | | | | | +|`CUlaunchAttributeID`|11.8| | | | | | | | +|`CUlaunchAttributeID_enum`|11.8| | | | | | | | +|`CUlaunchAttributeValue`|11.8| | | | | | | | +|`CUlaunchAttributeValue_union`|11.8| | | | | | | | +|`CUlaunchAttribute_st`|11.8| | | | | | | | +|`CUlaunchConfig`|11.8| | | | | | | | +|`CUlaunchConfig_st`|11.8| | | | | | | | |`CUlimit`| | | |`hipLimit_t`|1.6.0| | | | |`CUlimit_enum`| | | |`hipLimit_t`|1.6.0| | | | -|`CUlinkState`| | | |`hiprtcLinkState`|5.3.0| | |5.3.0| -|`CUlinkState_st`| | | |`ihiprtcLinkState`|5.3.0| | |5.3.0| +|`CUlinkState`| | | |`hiprtcLinkState`|5.3.0| | | | +|`CUlinkState_st`| | | |`ihiprtcLinkState`|5.3.0| | | | |`CUmemAccessDesc`|10.2| | |`hipMemAccessDesc`|5.2.0| | | | |`CUmemAccessDesc_st`|10.2| | |`hipMemAccessDesc`|5.2.0| | | | |`CUmemAccessDesc_v1`|11.3| | |`hipMemAccessDesc`|5.2.0| | | | @@ -1111,17 +1152,29 @@ |`CUtexObject_v1`|11.3| | |`hipTextureObject_t`|1.7.0| | | | |`CUtexref`| | | |`hipTexRef`|3.10.0| | | | |`CUtexref_st`| | | |`textureReference`|1.6.0| | | | -|`CUuserObject`|11.3| | |`hipUserObject_t`|5.3.0| | |5.3.0| -|`CUuserObjectRetain_flags`|11.3| | |`hipUserObjectRetainFlags`|5.3.0| | |5.3.0| -|`CUuserObjectRetain_flags_enum`|11.3| | |`hipUserObjectRetainFlags`|5.3.0| | |5.3.0| -|`CUuserObject_flags`|11.3| | |`hipUserObjectFlags`|5.3.0| | |5.3.0| -|`CUuserObject_flags_enum`|11.3| | |`hipUserObjectFlags`|5.3.0| | |5.3.0| -|`CUuserObject_st`|11.3| | |`hipUserObject`|5.3.0| | |5.3.0| +|`CUuserObject`|11.3| | |`hipUserObject_t`|5.3.0| | | | +|`CUuserObjectRetain_flags`|11.3| | |`hipUserObjectRetainFlags`|5.3.0| | | | +|`CUuserObjectRetain_flags_enum`|11.3| | |`hipUserObjectRetainFlags`|5.3.0| | | | +|`CUuserObject_flags`|11.3| | |`hipUserObjectFlags`|5.3.0| | | | +|`CUuserObject_flags_enum`|11.3| | |`hipUserObjectFlags`|5.3.0| | | | +|`CUuserObject_st`|11.3| | |`hipUserObject`|5.3.0| | | | |`CUuuid`| | | |`hipUUID`|5.2.0| | | | |`CUuuid_st`| | | |`hipUUID_t`|5.2.0| | | | |`GLenum`| | | |`GLenum`|5.1.0| | | | |`GLuint`| | | |`GLuint`|5.1.0| | | | +|`NVCL_CTX_SCHED_AUTO`|11.8| | | | | | | | +|`NVCL_CTX_SCHED_BLOCKING_SYNC`|11.8| | | | | | | | +|`NVCL_CTX_SCHED_SPIN`|11.8| | | | | | | | +|`NVCL_CTX_SCHED_YIELD`|11.8| | | | | | | | +|`NVCL_EVENT_SCHED_AUTO`|11.8| | | | | | | | +|`NVCL_EVENT_SCHED_BLOCKING_SYNC`|11.8| | | | | | | | +|`NVCL_EVENT_SCHED_SPIN`|11.8| | | | | | | | +|`NVCL_EVENT_SCHED_YIELD`|11.8| | | | | | | | |`__CUDACC__`| | | |`__HIPCC__`|1.6.0| | | | +|`cl_context_flags`|11.8| | | | | | | | +|`cl_context_flags_enum`|11.8| | | | | | | | +|`cl_event_flags`|11.8| | | | | | | | +|`cl_event_flags_enum`|11.8| | | | | | | | |`cudaError_enum`| | | |`hipError_t`|1.5.0| | | | |`memoryBarrier`|11.7| | | | | | | | @@ -1129,8 +1182,8 @@ |**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**| |:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:| -|`cuGetErrorName`| | | | | | | | | -|`cuGetErrorString`| | | | | | | | | +|`cuGetErrorName`| | | |`hipDrvGetErrorName`|5.4.0| | |5.4.0| +|`cuGetErrorString`| | | |`hipDrvGetErrorString`|5.4.0| | |5.4.0| ## **3. Initialization** @@ -1209,7 +1262,7 @@ |`cuCtxResetPersistingL2Cache`|11.0| | | | | | | | |`cuCtxSetCacheConfig`| | | |`hipCtxSetCacheConfig`|1.9.0|1.9.0| | | |`cuCtxSetCurrent`| | | |`hipCtxSetCurrent`|1.6.0|1.9.0| | | -|`cuCtxSetLimit`| | | |`hipDeviceSetLimit`|5.3.0| | |5.3.0| +|`cuCtxSetLimit`| | | |`hipDeviceSetLimit`|5.3.0| | | | |`cuCtxSetSharedMemConfig`| | | |`hipCtxSetSharedMemConfig`|1.9.0|1.9.0| | | |`cuCtxSynchronize`| | | |`hipCtxSynchronize`|1.9.0|1.9.0| | | @@ -1224,14 +1277,14 @@ |**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**| |:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:| -|`cuLinkAddData`| | | |`hiprtcLinkAddData`|5.3.0| | |5.3.0| -|`cuLinkAddData_v2`| | | |`hiprtcLinkAddData`|5.3.0| | |5.3.0| -|`cuLinkAddFile`| | | |`hiprtcLinkAddFile`|5.3.0| | |5.3.0| -|`cuLinkAddFile_v2`| | | |`hiprtcLinkAddFile`|5.3.0| | |5.3.0| -|`cuLinkComplete`| | | |`hiprtcLinkComplete`|5.3.0| | |5.3.0| -|`cuLinkCreate`| | | |`hiprtcLinkCreate`|5.3.0| | |5.3.0| -|`cuLinkCreate_v2`| | | |`hiprtcLinkCreate`|5.3.0| | |5.3.0| -|`cuLinkDestroy`| | | |`hiprtcLinkDestroy`|5.3.0| | |5.3.0| +|`cuLinkAddData`| | | |`hiprtcLinkAddData`|5.3.0| | | | +|`cuLinkAddData_v2`| | | |`hiprtcLinkAddData`|5.3.0| | | | +|`cuLinkAddFile`| | | |`hiprtcLinkAddFile`|5.3.0| | | | +|`cuLinkAddFile_v2`| | | |`hiprtcLinkAddFile`|5.3.0| | | | +|`cuLinkComplete`| | | |`hiprtcLinkComplete`|5.3.0| | | | +|`cuLinkCreate`| | | |`hiprtcLinkCreate`|5.3.0| | | | +|`cuLinkCreate_v2`| | | |`hiprtcLinkCreate`|5.3.0| | | | +|`cuLinkDestroy`| | | |`hiprtcLinkDestroy`|5.3.0| | | | |`cuModuleGetFunction`| | | |`hipModuleGetFunction`|1.6.0| | | | |`cuModuleGetGlobal`| | | |`hipModuleGetGlobal`|1.6.0| | | | |`cuModuleGetGlobal_v2`| | | |`hipModuleGetGlobal`|1.6.0| | | | @@ -1487,6 +1540,7 @@ |`cuLaunchCooperativeKernelMultiDevice`|9.0|11.3| | | | | | | |`cuLaunchHostFunc`|10.0| | |`hipLaunchHostFunc`|5.2.0| | | | |`cuLaunchKernel`| | | |`hipModuleLaunchKernel`|1.6.0| | | | +|`cuLaunchKernelEx`|11.8| | | | | | | | ## **20. Execution Control [DEPRECATED]** @@ -1507,9 +1561,9 @@ |**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**| |:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:| -|`cuDeviceGetGraphMemAttribute`|11.4| | |`hipDeviceGetGraphMemAttribute`|5.3.0| | |5.3.0| -|`cuDeviceGraphMemTrim`|11.4| | |`hipDeviceGraphMemTrim`|5.3.0| | |5.3.0| -|`cuDeviceSetGraphMemAttribute`|11.4| | |`hipDeviceSetGraphMemAttribute`|5.3.0| | |5.3.0| +|`cuDeviceGetGraphMemAttribute`|11.4| | |`hipDeviceGetGraphMemAttribute`|5.3.0| | | | +|`cuDeviceGraphMemTrim`|11.4| | |`hipDeviceGraphMemTrim`|5.3.0| | | | +|`cuDeviceSetGraphMemAttribute`|11.4| | |`hipDeviceSetGraphMemAttribute`|5.3.0| | | | |`cuGraphAddBatchMemOpNode`|11.7| | | | | | | | |`cuGraphAddChildGraphNode`|10.0| | |`hipGraphAddChildGraphNode`|5.0.0| | | | |`cuGraphAddDependencies`|10.0| | |`hipGraphAddDependencies`|4.5.0| | | | @@ -1578,13 +1632,13 @@ |`cuGraphNodeGetEnabled`|11.6| | | | | | | | |`cuGraphNodeGetType`|10.0| | |`hipGraphNodeGetType`|5.0.0| | | | |`cuGraphNodeSetEnabled`|11.6| | | | | | | | -|`cuGraphReleaseUserObject`|11.3| | |`hipGraphReleaseUserObject`|5.3.0| | |5.3.0| +|`cuGraphReleaseUserObject`|11.3| | |`hipGraphReleaseUserObject`|5.3.0| | | | |`cuGraphRemoveDependencies`|10.0| | |`hipGraphRemoveDependencies`|5.0.0| | | | -|`cuGraphRetainUserObject`|11.3| | |`hipGraphRetainUserObject`|5.3.0| | |5.3.0| -|`cuGraphUpload`|11.1| | |`hipGraphUpload`|5.3.0| | |5.3.0| -|`cuUserObjectCreate`|11.3| | |`hipUserObjectCreate`|5.3.0| | |5.3.0| -|`cuUserObjectRelease`|11.3| | |`hipUserObjectRelease`|5.3.0| | |5.3.0| -|`cuUserObjectRetain`|11.3| | |`hipUserObjectRetain`|5.3.0| | |5.3.0| +|`cuGraphRetainUserObject`|11.3| | |`hipGraphRetainUserObject`|5.3.0| | | | +|`cuGraphUpload`|11.1| | |`hipGraphUpload`|5.3.0| | | | +|`cuUserObjectCreate`|11.3| | |`hipUserObjectCreate`|5.3.0| | | | +|`cuUserObjectRelease`|11.3| | |`hipUserObjectRelease`|5.3.0| | | | +|`cuUserObjectRetain`|11.3| | |`hipUserObjectRetain`|5.3.0| | | | ## **22. Occupancy** @@ -1593,8 +1647,10 @@ |`cuOccupancyAvailableDynamicSMemPerBlock`|11.0| | | | | | | | |`cuOccupancyMaxActiveBlocksPerMultiprocessor`| | | |`hipModuleOccupancyMaxActiveBlocksPerMultiprocessor`|3.5.0| | | | |`cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`| | | |`hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`|3.5.0| | | | +|`cuOccupancyMaxActiveClusters`|11.8| | | | | | | | |`cuOccupancyMaxPotentialBlockSize`| | | |`hipModuleOccupancyMaxPotentialBlockSize`|3.5.0| | | | |`cuOccupancyMaxPotentialBlockSizeWithFlags`| | | |`hipModuleOccupancyMaxPotentialBlockSizeWithFlags`|3.5.0| | | | +|`cuOccupancyMaxPotentialClusterSize`|11.8| | | | | | | | ## **23. Texture Reference Management [DEPRECATED]** diff --git a/doc/markdown/CUDA_RTC_API_supported_by_HIP.md b/doc/markdown/CUDA_RTC_API_supported_by_HIP.md index 22d82bba..ec6c5c48 100644 --- a/doc/markdown/CUDA_RTC_API_supported_by_HIP.md +++ b/doc/markdown/CUDA_RTC_API_supported_by_HIP.md @@ -27,8 +27,8 @@ |`nvrtcCompileProgram`| | | |`hiprtcCompileProgram`|2.6.0| | | | |`nvrtcCreateProgram`| | | |`hiprtcCreateProgram`|2.6.0| | | | |`nvrtcDestroyProgram`| | | |`hiprtcDestroyProgram`|2.6.0| | | | -|`nvrtcGetCUBIN`|11.1| | |`hiprtcGetBitcode`|5.3.0| | |5.3.0| -|`nvrtcGetCUBINSize`|11.1| | |`hiprtcGetBitcodeSize`|5.3.0| | |5.3.0| +|`nvrtcGetCUBIN`|11.1| | |`hiprtcGetBitcode`|5.3.0| | | | +|`nvrtcGetCUBINSize`|11.1| | |`hiprtcGetBitcodeSize`|5.3.0| | | | |`nvrtcGetErrorString`| | | |`hiprtcGetErrorString`|2.6.0| | | | |`nvrtcGetLoweredName`|8.0| | |`hiprtcGetLoweredName`|2.6.0| | | | |`nvrtcGetNVVM`|11.4| | | | | | | | diff --git a/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md b/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md index a8314136..d9a3b2ce 100644 --- a/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md +++ b/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md @@ -20,7 +20,7 @@ |`cudaDeviceGetTexture1DLinearMaxWidth`|11.1| | | | | | | | |`cudaDeviceReset`| | | |`hipDeviceReset`|1.6.0| | | | |`cudaDeviceSetCacheConfig`| | | |`hipDeviceSetCacheConfig`|1.6.0| | | | -|`cudaDeviceSetLimit`| | | |`hipDeviceSetLimit`|5.3.0| | |5.3.0| +|`cudaDeviceSetLimit`| | | |`hipDeviceSetLimit`|5.3.0| | | | |`cudaDeviceSetMemPool`|11.2| | |`hipDeviceSetMemPool`|5.2.0| | | | |`cudaDeviceSetSharedMemConfig`| | | |`hipDeviceSetSharedMemConfig`|1.6.0| | | | |`cudaDeviceSynchronize`| | | |`hipDeviceSynchronize`|1.6.0| | | | @@ -122,6 +122,7 @@ |`cudaLaunchCooperativeKernelMultiDevice`|9.0|11.3| |`hipLaunchCooperativeKernelMultiDevice`|2.6.0| | | | |`cudaLaunchHostFunc`|10.0| | |`hipLaunchHostFunc`|5.2.0| | | | |`cudaLaunchKernel`| | | |`hipLaunchKernel`|1.6.0| | | | +|`cudaLaunchKernelExC`|11.8| | | | | | | | |`cudaSetDoubleForDevice`| |10.0| | | | | | | |`cudaSetDoubleForHost`| |10.0| | | | | | | @@ -132,10 +133,12 @@ |`cudaOccupancyAvailableDynamicSMemPerBlock`|11.0| | | | | | | | |`cudaOccupancyMaxActiveBlocksPerMultiprocessor`| | | |`hipOccupancyMaxActiveBlocksPerMultiprocessor`|1.6.0| | | | |`cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`| | | |`hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`|2.6.0| | | | +|`cudaOccupancyMaxActiveClusters`|11.8| | | | | | | | |`cudaOccupancyMaxPotentialBlockSize`| | | |`hipOccupancyMaxPotentialBlockSize`|1.6.0| | | | |`cudaOccupancyMaxPotentialBlockSizeVariableSMem`| | | | | | | | | |`cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags`| | | | | | | | | |`cudaOccupancyMaxPotentialBlockSizeWithFlags`| | | |`hipOccupancyMaxPotentialBlockSizeWithFlags`|3.5.0| | | | +|`cudaOccupancyMaxPotentialClusterSize`|11.8| | | | | | | | ## **9. Memory Management** @@ -393,10 +396,12 @@ |**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**| |:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:| |`cudaCreateTextureObject`| | | |`hipCreateTextureObject`|1.7.0| | | | +|`cudaCreateTextureObject_v2`|11.8| | | | | | | | |`cudaDestroyTextureObject`| | | |`hipDestroyTextureObject`|1.7.0| | | | |`cudaGetTextureObjectResourceDesc`| | | |`hipGetTextureObjectResourceDesc`|1.7.0| | | | |`cudaGetTextureObjectResourceViewDesc`| | | |`hipGetTextureObjectResourceViewDesc`|1.7.0| | | | |`cudaGetTextureObjectTextureDesc`| | | |`hipGetTextureObjectTextureDesc`|1.7.0| | | | +|`cudaGetTextureObjectTextureDesc_v2`|11.8| | | | | | | | ## **28. Surface Object Management** @@ -417,9 +422,9 @@ |**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**| |:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:| -|`cudaDeviceGetGraphMemAttribute`|11.4| | |`hipDeviceGetGraphMemAttribute`|5.3.0| | |5.3.0| -|`cudaDeviceGraphMemTrim`|11.4| | |`hipDeviceGraphMemTrim`|5.3.0| | |5.3.0| -|`cudaDeviceSetGraphMemAttribute`|11.4| | |`hipDeviceSetGraphMemAttribute`|5.3.0| | |5.3.0| +|`cudaDeviceGetGraphMemAttribute`|11.4| | |`hipDeviceGetGraphMemAttribute`|5.3.0| | | | +|`cudaDeviceGraphMemTrim`|11.4| | |`hipDeviceGraphMemTrim`|5.3.0| | | | +|`cudaDeviceSetGraphMemAttribute`|11.4| | |`hipDeviceSetGraphMemAttribute`|5.3.0| | | | |`cudaGraphAddChildGraphNode`|10.0| | |`hipGraphAddChildGraphNode`|5.0.0| | | | |`cudaGraphAddDependencies`|10.0| | |`hipGraphAddDependencies`|4.5.0| | | | |`cudaGraphAddEmptyNode`|10.0| | |`hipGraphAddEmptyNode`|4.5.0| | | | @@ -491,13 +496,13 @@ |`cudaGraphNodeGetDependentNodes`|11.0| | |`hipGraphNodeGetDependentNodes`|5.0.0| | | | |`cudaGraphNodeGetType`|11.0| | |`hipGraphNodeGetType`|5.0.0| | | | |`cudaGraphNodeSetEnabled`|11.6| | | | | | | | -|`cudaGraphReleaseUserObject`|11.3| | |`hipGraphReleaseUserObject`|5.3.0| | |5.3.0| +|`cudaGraphReleaseUserObject`|11.3| | |`hipGraphReleaseUserObject`|5.3.0| | | | |`cudaGraphRemoveDependencies`|11.0| | |`hipGraphRemoveDependencies`|5.0.0| | | | -|`cudaGraphRetainUserObject`|11.3| | |`hipGraphRetainUserObject`|5.3.0| | |5.3.0| -|`cudaGraphUpload`|11.1| | |`hipGraphUpload`|5.3.0| | |5.3.0| -|`cudaUserObjectCreate`|11.3| | |`hipUserObjectCreate`|5.3.0| | |5.3.0| -|`cudaUserObjectRelease`|11.3| | |`hipUserObjectRelease`|5.3.0| | |5.3.0| -|`cudaUserObjectRetain`|11.3| | |`hipUserObjectRetain`|5.3.0| | |5.3.0| +|`cudaGraphRetainUserObject`|11.3| | |`hipGraphRetainUserObject`|5.3.0| | | | +|`cudaGraphUpload`|11.1| | |`hipGraphUpload`|5.3.0| | | | +|`cudaUserObjectCreate`|11.3| | |`hipUserObjectCreate`|5.3.0| | | | +|`cudaUserObjectRelease`|11.3| | |`hipUserObjectRelease`|5.3.0| | | | +|`cudaUserObjectRetain`|11.3| | |`hipUserObjectRetain`|5.3.0| | | | ## **31. Driver Entry Point Access** @@ -610,6 +615,10 @@ Unsupported |`cudaChannelFormatKindUnsignedNormalized8X1`|11.5| | | | | | | | |`cudaChannelFormatKindUnsignedNormalized8X2`|11.5| | | | | | | | |`cudaChannelFormatKindUnsignedNormalized8X4`|11.5| | | | | | | | +|`cudaClusterSchedulingPolicy`|11.8| | | | | | | | +|`cudaClusterSchedulingPolicyDefault`|11.8| | | | | | | | +|`cudaClusterSchedulingPolicyLoadBalancing`|11.8| | | | | | | | +|`cudaClusterSchedulingPolicySpread`|11.8| | | | | | | | |`cudaComputeMode`| | | |`hipComputeMode`|1.9.0| | | | |`cudaComputeModeDefault`| | | |`hipComputeModeDefault`|1.9.0| | | | |`cudaComputeModeExclusive`| | | |`hipComputeModeExclusive`|1.9.0| | | | @@ -649,6 +658,7 @@ Unsupported |`cudaDevAttrCanMapHostMemory`| | | |`hipDeviceAttributeCanMapHostMemory`|2.10.0| | | | |`cudaDevAttrCanUseHostPointerForRegisteredMem`|8.0| | |`hipDeviceAttributeCanUseHostPointerForRegisteredMem`|4.3.0| | | | |`cudaDevAttrClockRate`| | | |`hipDeviceAttributeClockRate`|1.6.0| | | | +|`cudaDevAttrClusterLaunch`|11.8| | | | | | | | |`cudaDevAttrComputeCapabilityMajor`| | | |`hipDeviceAttributeComputeCapabilityMajor`|1.6.0| | | | |`cudaDevAttrComputeCapabilityMinor`| | | |`hipDeviceAttributeComputeCapabilityMinor`|1.6.0| | | | |`cudaDevAttrComputeMode`| | | |`hipDeviceAttributeComputeMode`|1.6.0| | | | @@ -898,6 +908,7 @@ Unsupported |`cudaErrorInsufficientDriver`| | | |`hipErrorInsufficientDriver`|1.7.0| | | | |`cudaErrorInvalidAddressSpace`| | | | | | | | | |`cudaErrorInvalidChannelDescriptor`| | | | | | | | | +|`cudaErrorInvalidClusterSize`|11.8| | | | | | | | |`cudaErrorInvalidConfiguration`| | | |`hipErrorInvalidConfiguration`|1.6.0| | | | |`cudaErrorInvalidDevice`| | | |`hipErrorInvalidDevice`|1.6.0| | | | |`cudaErrorInvalidDeviceFunction`| | | |`hipErrorInvalidDeviceFunction`|1.6.0| | | | @@ -934,6 +945,7 @@ Unsupported |`cudaErrorMisalignedAddress`| | | | | | | | | |`cudaErrorMissingConfiguration`| | | |`hipErrorMissingConfiguration`|1.6.0| | | | |`cudaErrorMixedDeviceExecution`| |3.1| | | | | | | +|`cudaErrorMpsClientTerminated`|11.8| | | | | | | | |`cudaErrorMpsConnectionFailed`|11.4| | | | | | | | |`cudaErrorMpsMaxClientsReached`|11.4| | | | | | | | |`cudaErrorMpsMaxConnectionsReached`|11.4| | | | | | | | @@ -1045,9 +1057,15 @@ Unsupported |`cudaFormatModeAuto`| | | | | | | | | |`cudaFormatModeForced`| | | | | | | | | |`cudaFuncAttribute`|9.0| | |`hipFuncAttribute`|3.9.0| | | | +|`cudaFuncAttributeClusterDimMustBeSet`|11.8| | | | | | | | +|`cudaFuncAttributeClusterSchedulingPolicyPreference`|11.8| | | | | | | | |`cudaFuncAttributeMax`|9.0| | |`hipFuncAttributeMax`|3.9.0| | | | |`cudaFuncAttributeMaxDynamicSharedMemorySize`|9.0| | |`hipFuncAttributeMaxDynamicSharedMemorySize`|3.9.0| | | | +|`cudaFuncAttributeNonPortableClusterSizeAllowed`|11.8| | | | | | | | |`cudaFuncAttributePreferredSharedMemoryCarveout`|9.0| | |`hipFuncAttributePreferredSharedMemoryCarveout`|3.9.0| | | | +|`cudaFuncAttributeRequiredClusterDepth`|11.8| | | | | | | | +|`cudaFuncAttributeRequiredClusterHeight`|11.8| | | | | | | | +|`cudaFuncAttributeRequiredClusterWidth`|11.8| | | | | | | | |`cudaFuncAttributes`| | | |`hipFuncAttributes`|1.9.0| | | | |`cudaFuncCache`| | | |`hipFuncCache_t`|1.6.0| | | | |`cudaFuncCachePreferEqual`| | | |`hipFuncCachePreferEqual`|1.6.0| | | | @@ -1092,17 +1110,17 @@ Unsupported |`cudaGraphInstantiateFlagAutoFreeOnLaunch`|11.4| | |`hipGraphInstantiateFlagAutoFreeOnLaunch`|5.2.0| | | | |`cudaGraphInstantiateFlagUseNodePriority`|11.7| | | | | | | | |`cudaGraphInstantiateFlags`|11.4| | |`hipGraphInstantiateFlags`|5.2.0| | | | -|`cudaGraphMemAttrReservedMemCurrent`|11.4| | |`hipGraphMemAttrReservedMemCurrent`|5.3.0| | |5.3.0| -|`cudaGraphMemAttrReservedMemHigh`|11.4| | |`hipGraphMemAttrReservedMemHigh`|5.3.0| | |5.3.0| -|`cudaGraphMemAttrUsedMemCurrent`|11.4| | |`hipGraphMemAttrUsedMemCurrent`|5.3.0| | |5.3.0| -|`cudaGraphMemAttrUsedMemHigh`|11.4| | |`hipGraphMemAttrUsedMemHigh`|5.3.0| | |5.3.0| -|`cudaGraphMemAttributeType`|11.4| | |`hipGraphMemAttributeType`|5.3.0| | |5.3.0| +|`cudaGraphMemAttrReservedMemCurrent`|11.4| | |`hipGraphMemAttrReservedMemCurrent`|5.3.0| | | | +|`cudaGraphMemAttrReservedMemHigh`|11.4| | |`hipGraphMemAttrReservedMemHigh`|5.3.0| | | | +|`cudaGraphMemAttrUsedMemCurrent`|11.4| | |`hipGraphMemAttrUsedMemCurrent`|5.3.0| | | | +|`cudaGraphMemAttrUsedMemHigh`|11.4| | |`hipGraphMemAttrUsedMemHigh`|5.3.0| | | | +|`cudaGraphMemAttributeType`|11.4| | |`hipGraphMemAttributeType`|5.3.0| | | | |`cudaGraphNodeType`|10.0| | |`hipGraphNodeType`|4.3.0| | | | |`cudaGraphNodeTypeCount`|10.0| | |`hipGraphNodeTypeCount`|4.3.0| | | | |`cudaGraphNodeTypeEmpty`|10.0| | |`hipGraphNodeTypeEmpty`|4.3.0| | | | |`cudaGraphNodeTypeEventRecord`|11.1| | |`hipGraphNodeTypeEventRecord`|4.3.0| | | | -|`cudaGraphNodeTypeExtSemaphoreSignal`|11.4| | |`hipGraphNodeTypeExtSemaphoreSignal`|5.3.0| | |5.3.0| -|`cudaGraphNodeTypeExtSemaphoreWait`|11.4| | |`hipGraphNodeTypeExtSemaphoreWait`|5.3.0| | |5.3.0| +|`cudaGraphNodeTypeExtSemaphoreSignal`|11.4| | |`hipGraphNodeTypeExtSemaphoreSignal`|5.3.0| | | | +|`cudaGraphNodeTypeExtSemaphoreWait`|11.4| | |`hipGraphNodeTypeExtSemaphoreWait`|5.3.0| | | | |`cudaGraphNodeTypeGraph`|10.0| | |`hipGraphNodeTypeGraph`|4.3.0| | | | |`cudaGraphNodeTypeHost`|10.0| | |`hipGraphNodeTypeHost`|4.3.0| | | | |`cudaGraphNodeTypeKernel`|10.0| | |`hipGraphNodeTypeKernel`|4.3.0| | | | @@ -1112,7 +1130,7 @@ Unsupported |`cudaGraphNodeTypeMemset`|10.0| | |`hipGraphNodeTypeMemset`|4.3.0| | | | |`cudaGraphNodeTypeWaitEvent`|11.1| | |`hipGraphNodeTypeWaitEvent`|4.3.0| | | | |`cudaGraphNode_t`|10.0| | |`hipGraphNode_t`|4.3.0| | | | -|`cudaGraphUserObjectMove`|11.3| | |`hipGraphUserObjectMove`|5.3.0| | |5.3.0| +|`cudaGraphUserObjectMove`|11.3| | |`hipGraphUserObjectMove`|5.3.0| | | | |`cudaGraph_t`|10.0| | |`hipGraph_t`|4.3.0| | | | |`cudaGraphicsCubeFace`| | | | | | | | | |`cudaGraphicsCubeFaceNegativeX`| | | | | | | | | @@ -1153,10 +1171,27 @@ Unsupported |`cudaKernelNodeAttrID`|11.0| | |`hipKernelNodeAttrID`|5.2.0| | | | |`cudaKernelNodeAttrValue`|11.0| | |`hipKernelNodeAttrValue`|5.2.0| | | | |`cudaKernelNodeAttributeAccessPolicyWindow`|11.0| | |`hipKernelNodeAttributeAccessPolicyWindow`|5.2.0| | | | +|`cudaKernelNodeAttributeClusterDimension`|11.8| | | | | | | | +|`cudaKernelNodeAttributeClusterSchedulingPolicyPreference`|11.8| | | | | | | | |`cudaKernelNodeAttributeCooperative`|11.0| | |`hipKernelNodeAttributeCooperative`|5.2.0| | | | |`cudaKernelNodeAttributePriority`|11.7| | | | | | | | |`cudaKernelNodeParams`|10.0| | |`hipKernelNodeParams`|4.3.0| | | | |`cudaKeyValuePair`| | | | | | | | | +|`cudaLaunchAttribute`|11.8| | | | | | | | +|`cudaLaunchAttributeAccessPolicyWindow`|11.8| | | | | | | | +|`cudaLaunchAttributeClusterDimension`|11.8| | | | | | | | +|`cudaLaunchAttributeClusterSchedulingPolicyPreference`|11.8| | | | | | | | +|`cudaLaunchAttributeCooperative`|11.8| | | | | | | | +|`cudaLaunchAttributeID`|11.8| | | | | | | | +|`cudaLaunchAttributeIgnore`|11.8| | | | | | | | +|`cudaLaunchAttributePriority`|11.8| | | | | | | | +|`cudaLaunchAttributeProgrammaticEvent`|11.8| | | | | | | | +|`cudaLaunchAttributeProgrammaticStreamSerialization`|11.8| | | | | | | | +|`cudaLaunchAttributeSynchronizationPolicy`|11.8| | | | | | | | +|`cudaLaunchAttributeValue`|11.8| | | | | | | | +|`cudaLaunchAttribute_st`|11.8| | | | | | | | +|`cudaLaunchConfig_st`|11.8| | | | | | | | +|`cudaLaunchConfig_t`|11.8| | | | | | | | |`cudaLaunchParams`|9.0| | |`hipLaunchParams`|2.6.0| | | | |`cudaLimit`| | | |`hipLimit_t`|1.6.0| | | | |`cudaLimitDevRuntimePendingLaunchCount`| | | | | | | | | @@ -1165,7 +1200,7 @@ Unsupported |`cudaLimitMaxL2FetchGranularity`|10.0| | | | | | | | |`cudaLimitPersistingL2CacheSize`|11.0| | | | | | | | |`cudaLimitPrintfFifoSize`| | | |`hipLimitPrintfFifoSize`|4.5.0| | | | -|`cudaLimitStackSize`| | | |`hipLimitStackSize`|5.3.0| | |5.3.0| +|`cudaLimitStackSize`| | | |`hipLimitStackSize`|5.3.0| | | | |`cudaMemAccessDesc`|11.2| | |`hipMemAccessDesc`|5.2.0| | | | |`cudaMemAccessFlags`|11.2| | |`hipMemAccessFlags`|5.2.0| | | | |`cudaMemAccessFlagsProtNone`|11.2| | |`hipMemAccessFlagsProtNone`|5.2.0| | | | @@ -1223,7 +1258,7 @@ Unsupported |`cudaMemoryType`| | | |`hipMemoryType`|1.6.0| | | | |`cudaMemoryTypeDevice`| | | |`hipMemoryTypeDevice`|1.6.0| | | | |`cudaMemoryTypeHost`| | | |`hipMemoryTypeHost`|1.6.0| | | | -|`cudaMemoryTypeManaged`|10.0| | |`hipMemoryTypeManaged`|5.3.0| | |5.3.0| +|`cudaMemoryTypeManaged`|10.0| | |`hipMemoryTypeManaged`|5.3.0| | | | |`cudaMemoryTypeUnregistered`| | | | | | | | | |`cudaMemsetParams`|10.0| | |`hipMemsetParams`|4.3.0| | | | |`cudaMipmappedArray`| | | |`hipMipmappedArray`|1.7.0| | | | @@ -1334,10 +1369,10 @@ Unsupported |`cudaTextureTypeCubemap`| | | |`hipTextureTypeCubemap`|1.7.0| | | | |`cudaTextureTypeCubemapLayered`| | | |`hipTextureTypeCubemapLayered`|1.7.0| | | | |`cudaUUID_t`| | | |`hipUUID`|5.2.0| | | | -|`cudaUserObjectFlags`|11.3| | |`hipUserObjectFlags`|5.3.0| | |5.3.0| -|`cudaUserObjectNoDestructorSync`|11.3| | |`hipUserObjectNoDestructorSync`|5.3.0| | |5.3.0| -|`cudaUserObjectRetainFlags`|11.3| | |`hipUserObjectRetainFlags`|5.3.0| | |5.3.0| -|`cudaUserObject_t`|11.3| | |`hipUserObject_t`|5.3.0| | |5.3.0| +|`cudaUserObjectFlags`|11.3| | |`hipUserObjectFlags`|5.3.0| | | | +|`cudaUserObjectNoDestructorSync`|11.3| | |`hipUserObjectNoDestructorSync`|5.3.0| | | | +|`cudaUserObjectRetainFlags`|11.3| | |`hipUserObjectRetainFlags`|5.3.0| | | | +|`cudaUserObject_t`|11.3| | |`hipUserObject_t`|5.3.0| | | | |`libraryPropertyType`|8.0| | | | | | | | |`libraryPropertyType_t`|8.0| | | | | | | | |`surfaceReference`| | | |`surfaceReference`|1.9.0| | | | diff --git a/doc/markdown/CUDNN_API_supported_by_HIP.md b/doc/markdown/CUDNN_API_supported_by_HIP.md index 7a5f5ae3..405b23c5 100644 --- a/doc/markdown/CUDNN_API_supported_by_HIP.md +++ b/doc/markdown/CUDNN_API_supported_by_HIP.md @@ -48,6 +48,7 @@ |`CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS`|8.0.2| | | | | | | | |`CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG`|8.0.1| | | | | | | | |`CUDNN_ATTR_EXECUTION_PLAN_HANDLE`|8.0.1| | | | | | | | +|`CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION`|8.4.0| | | | | | | | |`CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS`|8.0.2| | | | | | | | |`CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE`|8.0.1| | | | | | | | |`CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_ATTRIBUTES`|8.0.2| | | | | | | | @@ -94,6 +95,10 @@ |`CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_VAR_DESC`|8.1.0| | | | | | | | |`CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SQ_SUM_DESC`|8.1.0| | | | | | | | |`CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SUM_DESC`|8.1.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_CONCAT_AXIS`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC`|8.5.0| | | | | | | | |`CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA`|8.0.1| | | | | | | | |`CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA`|8.0.1| | | | | | | | |`CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC`|8.0.1| | | | | | | | @@ -122,17 +127,62 @@ |`CUDNN_ATTR_OPERATION_MATMUL_CDESC`|8.1.0| | | | | | | | |`CUDNN_ATTR_OPERATION_MATMUL_DESC`|8.1.0| | | | | | | | |`CUDNN_ATTR_OPERATION_MATMUL_IRREGULARLY_STRIDED_BATCH_COUNT`|8.1.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_BWD_DXDESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_BWD_DYDESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_BWD_MODE`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_BWD_XDESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_MODE`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_PHASE`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_XDESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_YDESC`|8.5.0| | | | | | | | |`CUDNN_ATTR_OPERATION_POINTWISE_ALPHA1`|8.0.1| | | | | | | | |`CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2`|8.0.1| | | | | | | | |`CUDNN_ATTR_OPERATION_POINTWISE_BDESC`|8.0.1| | | | | | | | |`CUDNN_ATTR_OPERATION_POINTWISE_DXDESC`|8.1.0| | | | | | | | |`CUDNN_ATTR_OPERATION_POINTWISE_DYDESC`|8.1.0| | | | | | | | |`CUDNN_ATTR_OPERATION_POINTWISE_PW_DESCRIPTOR`|8.0.1| | | | | | | | +|`CUDNN_ATTR_OPERATION_POINTWISE_TDESC`|8.3.0| | | | | | | | |`CUDNN_ATTR_OPERATION_POINTWISE_XDESC`|8.0.1| | | | | | | | |`CUDNN_ATTR_OPERATION_POINTWISE_YDESC`|8.0.1| | | | | | | | |`CUDNN_ATTR_OPERATION_REDUCTION_DESC`|8.1.0| | | | | | | | |`CUDNN_ATTR_OPERATION_REDUCTION_XDESC`|8.1.0| | | | | | | | |`CUDNN_ATTR_OPERATION_REDUCTION_YDESC`|8.1.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_SIGNAL_FLAGDESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_SIGNAL_MODE`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_SIGNAL_VALUE`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_SIGNAL_XDESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_SIGNAL_YDESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_POINTWISE_AXIS`|8.4.0| | | | | | | | |`CUDNN_ATTR_POINTWISE_ELU_ALPHA`|8.1.0| | | | | | | | |`CUDNN_ATTR_POINTWISE_MATH_PREC`|8.0.1| | | | | | | | |`CUDNN_ATTR_POINTWISE_MODE`|8.0.1| | | | | | | | @@ -144,11 +194,21 @@ |`CUDNN_ATTR_POINTWISE_SWISH_BETA`|8.1.0| | | | | | | | |`CUDNN_ATTR_REDUCTION_COMP_TYPE`|8.1.0| | | | | | | | |`CUDNN_ATTR_REDUCTION_OPERATOR`|8.1.0| | | | | | | | +|`CUDNN_ATTR_RESAMPLE_COMP_TYPE`|8.3.0| | | | | | | | +|`CUDNN_ATTR_RESAMPLE_MODE`|8.3.0| | | | | | | | +|`CUDNN_ATTR_RESAMPLE_NAN_PROPAGATION`|8.3.0| | | | | | | | +|`CUDNN_ATTR_RESAMPLE_PADDING_MODE`|8.3.0| | | | | | | | +|`CUDNN_ATTR_RESAMPLE_POST_PADDINGS`|8.3.0| | | | | | | | +|`CUDNN_ATTR_RESAMPLE_PRE_PADDINGS`|8.3.0| | | | | | | | +|`CUDNN_ATTR_RESAMPLE_SPATIAL_DIMS`|8.3.0| | | | | | | | +|`CUDNN_ATTR_RESAMPLE_STRIDES`|8.3.0| | | | | | | | +|`CUDNN_ATTR_RESAMPLE_WINDOW_DIMS`|8.3.0| | | | | | | | |`CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT`|8.0.1| | | | | | | | |`CUDNN_ATTR_TENSOR_DATA_TYPE`|8.0.1| | | | | | | | |`CUDNN_ATTR_TENSOR_DIMENSIONS`|8.0.1| | | | | | | | |`CUDNN_ATTR_TENSOR_IS_BY_VALUE`|8.1.0| | | | | | | | |`CUDNN_ATTR_TENSOR_IS_VIRTUAL`|8.0.1| | | | | | | | +|`CUDNN_ATTR_TENSOR_REORDERING_MODE`|8.3.0| | | | | | | | |`CUDNN_ATTR_TENSOR_STRIDES`|8.0.1| | | | | | | | |`CUDNN_ATTR_TENSOR_UNIQUE_ID`|8.0.1| | | | | | | | |`CUDNN_ATTR_TENSOR_VECTORIZED_DIMENSION`|8.0.1| | | | | | | | @@ -170,15 +230,22 @@ |`CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR`|8.0.1| | | | | | | | |`CUDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR`| | | | | | | | | |`CUDNN_BACKEND_OPERATION_BN_FINALIZE_STATISTICS_DESCRIPTOR`|8.1.0| | | | | | | | +|`CUDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR`|8.5.0| | | | | | | | |`CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR`|8.0.1| | | | | | | | |`CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR`|8.0.1| | | | | | | | |`CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR`|8.0.1| | | | | | | | |`CUDNN_BACKEND_OPERATION_GEN_STATS_DESCRIPTOR`|8.0.1| | | | | | | | |`CUDNN_BACKEND_OPERATION_MATMUL_DESCRIPTOR`|8.1.0| | | | | | | | +|`CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR`|8.5.0| | | | | | | | +|`CUDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR`|8.5.0| | | | | | | | |`CUDNN_BACKEND_OPERATION_POINTWISE_DESCRIPTOR`|8.0.1| | | | | | | | |`CUDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR`|8.1.0| | | | | | | | +|`CUDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR`|8.3.0| | | | | | | | +|`CUDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR`|8.3.0| | | | | | | | +|`CUDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR`|8.5.0| | | | | | | | |`CUDNN_BACKEND_POINTWISE_DESCRIPTOR`|8.0.1| | | | | | | | |`CUDNN_BACKEND_REDUCTION_DESCRIPTOR`|8.1.0| | | | | | | | +|`CUDNN_BACKEND_RESAMPLE_DESCRIPTOR`|8.3.0| | | | | | | | |`CUDNN_BACKEND_TENSOR_DESCRIPTOR`|8.0.1| | | | | | | | |`CUDNN_BACKEND_VARIANT_PACK_DESCRIPTOR`|8.0.1| | | | | | | | |`CUDNN_BATCHNORM_OPS_BN`|7.4.1| | | | | | | | @@ -187,6 +254,9 @@ |`CUDNN_BATCHNORM_PER_ACTIVATION`|4.0.0| | |`HIPDNN_BATCHNORM_PER_ACTIVATION`| | | | | |`CUDNN_BATCHNORM_SPATIAL`|4.0.0| | |`HIPDNN_BATCHNORM_SPATIAL`| | | | | |`CUDNN_BATCHNORM_SPATIAL_PERSISTENT`|7.0.5| | |`HIPDNN_BATCHNORM_SPATIAL_PERSISTENT`| | | | | +|`CUDNN_BATCH_NORM`|8.5.0| | | | | | | | +|`CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER`|8.3.0| | | | | | | | +|`CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER`|8.3.0| | | | | | | | |`CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION`|8.2.0| | | | | | | | |`CUDNN_BEHAVIOR_NOTE_TYPE_COUNT`|8.2.0| | | | | | | | |`CUDNN_BIDIRECTIONAL`|5.0.0| | |`HIPDNN_BIDIRECTIONAL`| | | | | @@ -237,8 +307,11 @@ |`CUDNN_CTC_LOSS_ALGO_DETERMINISTIC`|7.0.5| | | | | | | | |`CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC`|7.0.5| | | | | | | | |`CUDNN_DATA_BFLOAT16`|8.1.0| | | | | | | | +|`CUDNN_DATA_BOOLEAN`|8.3.0| | | | | | | | |`CUDNN_DATA_DOUBLE`|1.0.0| | |`HIPDNN_DATA_DOUBLE`| | | | | |`CUDNN_DATA_FLOAT`|1.0.0| | |`HIPDNN_DATA_FLOAT`| | | | | +|`CUDNN_DATA_FP8_E4M3`|8.6.0| | | | | | | | +|`CUDNN_DATA_FP8_E5M2`|8.6.0| | | | | | | | |`CUDNN_DATA_HALF`|3.0.0| | |`HIPDNN_DATA_HALF`| | | | | |`CUDNN_DATA_INT32`|6.0.0| | |`HIPDNN_DATA_INT32`| | | | | |`CUDNN_DATA_INT64`|8.1.0| | | | | | | | @@ -252,6 +325,7 @@ |`CUDNN_DETERMINISTIC`|6.0.0| | | | | | | | |`CUDNN_DIM_MAX`|4.0.0| | | | | | | | |`CUDNN_DIVNORM_PRECOMPUTED_MEANS`|3.0.0| | | | | | | | +|`CUDNN_EDGE_VAL_PAD`|8.3.0| | | | | | | | |`CUDNN_ERRQUERY_BLOCKING`|7.0.5| | | | | | | | |`CUDNN_ERRQUERY_NONBLOCKING`|7.0.5| | | | | | | | |`CUDNN_ERRQUERY_RAWCODE`|7.0.5| | | | | | | | @@ -266,10 +340,14 @@ |`CUDNN_FWD_MODE_INFERENCE`|8.0.1| | | | | | | | |`CUDNN_FWD_MODE_TRAINING`|8.0.1| | | | | | | | |`CUDNN_GENSTATS_SUM_SQSUM`|8.0.1| | | | | | | | +|`CUDNN_GROUP_NORM`|8.5.0| | | | | | | | |`CUDNN_GRU`|5.0.0| | |`HIPDNN_GRU`| | | | | |`CUDNN_HEUR_MODES_COUNT`|8.0.1| | | | | | | | +|`CUDNN_HEUR_MODE_A`|8.3.0| | | | | | | | |`CUDNN_HEUR_MODE_B`|8.0.1| | | | | | | | +|`CUDNN_HEUR_MODE_FALLBACK`|8.3.0| | | | | | | | |`CUDNN_HEUR_MODE_INSTANT`|8.0.1| | | | | | | | +|`CUDNN_INSTANCE_NORM`|8.5.0| | | | | | | | |`CUDNN_KNOB_TYPE_CHUNK_K`|8.0.1| | | | | | | | |`CUDNN_KNOB_TYPE_COUNTS`|8.0.1| | | | | | | | |`CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE`|8.0.1| | | | | | | | @@ -293,9 +371,14 @@ |`CUDNN_KNOB_TYPE_STAGES`|8.0.1| | | | | | | | |`CUDNN_KNOB_TYPE_SWIZZLE`|8.0.1| | | | | | | | |`CUDNN_KNOB_TYPE_TILEK`|8.0.1| | | | | | | | +|`CUDNN_KNOB_TYPE_TILE_CGA`|8.6.0| | | | | | | | +|`CUDNN_KNOB_TYPE_TILE_CGA_M`|8.6.0| | | | | | | | +|`CUDNN_KNOB_TYPE_TILE_CGA_N`|8.6.0| | | | | | | | |`CUDNN_KNOB_TYPE_TILE_SIZE`|8.0.1| | | | | | | | |`CUDNN_KNOB_TYPE_USE_TEX`|8.0.1| | | | | | | | |`CUDNN_KNOB_TYPE_WINO_TILE`|8.0.1| | | | | | | | +|`CUDNN_KNOB_TYPE_WORKSPACE`|8.4.0| | | | | | | | +|`CUDNN_LAYER_NORM`|8.5.0| | | | | | | | |`CUDNN_LAYOUT_TYPE_COUNT`|8.0.2| | | | | | | | |`CUDNN_LAYOUT_TYPE_PREFERRED_NCHW`|8.0.1| | | | | | | | |`CUDNN_LAYOUT_TYPE_PREFERRED_NHWC`|8.0.2| | | | | | | | @@ -320,9 +403,12 @@ |`CUDNN_MH_ATTN_V_BIASES`|7.6.3| | | | | | | | |`CUDNN_MH_ATTN_V_WEIGHTS`|7.5.0| | | | | | | | |`CUDNN_MINOR`|3.0.0| | | | | | | | +|`CUDNN_NEG_INF_PAD`|8.3.0| | | | | | | | |`CUDNN_NON_DETERMINISTIC`|6.0.0| | | | | | | | |`CUDNN_NORM_ALGO_PERSIST`|8.0.1| | | | | | | | |`CUDNN_NORM_ALGO_STANDARD`|8.0.1| | | | | | | | +|`CUDNN_NORM_FWD_INFERENCE`|8.5.0| | | | | | | | +|`CUDNN_NORM_FWD_TRAINING`|8.5.0| | | | | | | | |`CUDNN_NORM_OPS_NORM`|8.0.1| | | | | | | | |`CUDNN_NORM_OPS_NORM_ACTIVATION`|8.0.1| | | | | | | | |`CUDNN_NORM_OPS_NORM_ADD_ACTIVATION`|8.0.1| | | | | | | | @@ -337,6 +423,9 @@ |`CUDNN_NUMERICAL_NOTE_TENSOR_CORE`|8.0.1| | | | | | | | |`CUDNN_NUMERICAL_NOTE_TYPE_COUNT`|8.0.1| | | | | | | | |`CUDNN_NUMERICAL_NOTE_WINOGRAD`|8.0.1| | | | | | | | +|`CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13`|8.3.0| | | | | | | | +|`CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4`|8.3.0| | | | | | | | +|`CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6`|8.3.0| | | | | | | | |`CUDNN_OPS_INFER_MAJOR`|8.0.1| | | | | | | | |`CUDNN_OPS_INFER_MINOR`|8.0.1| | | | | | | | |`CUDNN_OPS_INFER_PATCH`|8.0.1| | | | | | | | @@ -389,23 +478,53 @@ |`CUDNN_PARAM_ZDATA_PLACEHOLDER`|7.6.0| | | | | | | | |`CUDNN_PARAM_ZDESC`|7.6.0| | | | | | | | |`CUDNN_PATCHLEVEL`|3.0.0| | | | | | | | +|`CUDNN_POINTWISE_ABS`|8.3.0| | | | | | | | |`CUDNN_POINTWISE_ADD`|8.0.1| | | | | | | | +|`CUDNN_POINTWISE_ADD_SQUARE`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_BINARY_SELECT`|8.4.0| | | | | | | | +|`CUDNN_POINTWISE_CEIL`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_CMP_EQ`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_CMP_GE`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_CMP_GT`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_CMP_LE`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_CMP_LT`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_CMP_NEQ`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_COS`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_DIV`|8.3.0| | | | | | | | |`CUDNN_POINTWISE_ELU_BWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_ELU_FWD`|8.0.1| | | | | | | | +|`CUDNN_POINTWISE_ERF`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_EXP`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_FLOOR`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_GELU_APPROX_TANH_BWD`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_GELU_APPROX_TANH_FWD`|8.5.0| | | | | | | | |`CUDNN_POINTWISE_GELU_BWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_GELU_FWD`|8.1.0| | | | | | | | +|`CUDNN_POINTWISE_GEN_INDEX`|8.4.0| | | | | | | | +|`CUDNN_POINTWISE_IDENTITY`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_LOG`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_LOGICAL_AND`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_LOGICAL_NOT`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_LOGICAL_OR`|8.3.0| | | | | | | | |`CUDNN_POINTWISE_MAX`|8.0.1| | | | | | | | |`CUDNN_POINTWISE_MIN`|8.0.1| | | | | | | | +|`CUDNN_POINTWISE_MOD`|8.3.0| | | | | | | | |`CUDNN_POINTWISE_MUL`|8.0.1| | | | | | | | +|`CUDNN_POINTWISE_NEG`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_POW`|8.3.0| | | | | | | | |`CUDNN_POINTWISE_RELU_BWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_RELU_FWD`|8.0.1| | | | | | | | +|`CUDNN_POINTWISE_RSQRT`|8.3.0| | | | | | | | |`CUDNN_POINTWISE_SIGMOID_BWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_SIGMOID_FWD`|8.0.1| | | | | | | | +|`CUDNN_POINTWISE_SIN`|8.3.0| | | | | | | | |`CUDNN_POINTWISE_SOFTPLUS_BWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_SOFTPLUS_FWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_SQRT`|8.0.1| | | | | | | | +|`CUDNN_POINTWISE_SUB`|8.3.0| | | | | | | | |`CUDNN_POINTWISE_SWISH_BWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_SWISH_FWD`|8.1.0| | | | | | | | +|`CUDNN_POINTWISE_TAN`|8.3.0| | | | | | | | |`CUDNN_POINTWISE_TANH_BWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_TANH_FWD`|8.0.1| | | | | | | | |`CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING`|2.0.0| | |`HIPDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING`| | | | | @@ -451,6 +570,12 @@ |`CUDNN_REDUCE_TENSOR_NORM1`|6.0.0| | |`HIPDNN_REDUCE_TENSOR_NORM1`| | | | | |`CUDNN_REDUCE_TENSOR_NORM2`|6.0.0| | |`HIPDNN_REDUCE_TENSOR_NORM2`| | | | | |`CUDNN_REDUCE_TENSOR_NO_INDICES`|6.0.0| | |`HIPDNN_REDUCE_TENSOR_NO_INDICES`| | | | | +|`CUDNN_RESAMPLE_AVGPOOL`|8.3.0| | | | | | | | +|`CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING`|8.6.0| | | | | | | | +|`CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING`|8.6.0| | | | | | | | +|`CUDNN_RESAMPLE_BILINEAR`|8.3.0| | | | | | | | +|`CUDNN_RESAMPLE_MAXPOOL`|8.3.0| | | | | | | | +|`CUDNN_RESAMPLE_NEAREST`|8.3.0| | | | | | | | |`CUDNN_RNN_ALGO_COUNT`|7.1.3| | | | | | | | |`CUDNN_RNN_ALGO_PERSIST_DYNAMIC`|6.0.0| | |`HIPDNN_RNN_ALGO_PERSIST_DYNAMIC`| | | | | |`CUDNN_RNN_ALGO_PERSIST_STATIC`|6.0.0| | |`HIPDNN_RNN_ALGO_PERSIST_STATIC`| | | | | @@ -486,6 +611,8 @@ |`CUDNN_SEV_INFO_EN`|7.1.3| | | | | | | | |`CUDNN_SEV_WARNING`|7.1.3| | | | | | | | |`CUDNN_SEV_WARNING_EN`|7.1.3| | | | | | | | +|`CUDNN_SIGNAL_SET`|8.5.0| | | | | | | | +|`CUDNN_SIGNAL_WAIT`|8.5.0| | | | | | | | |`CUDNN_SKIP_INPUT`|5.0.0| | |`HIPDNN_SKIP_INPUT`| | | | | |`CUDNN_SOFTMAX_ACCURATE`|1.0.0| | |`HIPDNN_SOFTMAX_ACCURATE`| | | | | |`CUDNN_SOFTMAX_FAST`|1.0.0| | |`HIPDNN_SOFTMAX_FAST`| | | | | @@ -512,6 +639,8 @@ |`CUDNN_TENSOR_NHWC`|1.0.0| | |`HIPDNN_TENSOR_NHWC`| | | | | |`CUDNN_TENSOR_OP_MATH`|7.0.5| | |`HIPDNN_TENSOR_OP_MATH`| | | | | |`CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION`|7.2.1| | | | | | | | +|`CUDNN_TENSOR_REORDERING_INT8x32`|8.3.0| | | | | | | | +|`CUDNN_TENSOR_REORDERING_NONE`|8.3.0| | | | | | | | |`CUDNN_TRANSFORM_FOLD`|7.5.0| | | | | | | | |`CUDNN_TRANSFORM_UNFOLD`|7.5.0| | | | | | | | |`CUDNN_TYPE_ATTRIB_NAME`|8.0.1| | | | | | | | @@ -519,25 +648,35 @@ |`CUDNN_TYPE_BEHAVIOR_NOTE`|8.2.0| | | | | | | | |`CUDNN_TYPE_BN_FINALIZE_STATS_MODE`|8.1.0| | | | | | | | |`CUDNN_TYPE_BOOLEAN`|8.0.1| | | | | | | | +|`CUDNN_TYPE_CHAR`|8.4.0| | | | | | | | |`CUDNN_TYPE_CONVOLUTION_MODE`|8.0.1| | | | | | | | |`CUDNN_TYPE_DATA_TYPE`|8.0.1| | | | | | | | |`CUDNN_TYPE_DOUBLE`|8.0.1| | | | | | | | |`CUDNN_TYPE_FLOAT`|8.0.1| | | | | | | | +|`CUDNN_TYPE_FRACTION`|8.5.0| | | | | | | | |`CUDNN_TYPE_GENSTATS_MODE`|8.0.1| | | | | | | | |`CUDNN_TYPE_HANDLE`|8.0.1| | | | | | | | |`CUDNN_TYPE_HEUR_MODE`|8.0.1| | | | | | | | +|`CUDNN_TYPE_INT32`|8.3.0| | | | | | | | |`CUDNN_TYPE_INT64`|8.0.1| | | | | | | | |`CUDNN_TYPE_KNOB_TYPE`|8.0.1| | | | | | | | |`CUDNN_TYPE_LAYOUT_TYPE`|8.0.2| | | | | | | | |`CUDNN_TYPE_NAN_PROPOGATION`|8.0.1| | | | | | | | +|`CUDNN_TYPE_NORM_FWD_PHASE`|8.5.0| | | | | | | | +|`CUDNN_TYPE_NORM_MODE`|8.5.0| | | | | | | | |`CUDNN_TYPE_NUMERICAL_NOTE`|8.0.1| | | | | | | | +|`CUDNN_TYPE_PADDING_MODE`|8.3.0| | | | | | | | |`CUDNN_TYPE_POINTWISE_MODE`|8.0.1| | | | | | | | |`CUDNN_TYPE_REDUCTION_OPERATOR_TYPE`|8.1.0| | | | | | | | +|`CUDNN_TYPE_RESAMPLE_MODE`|8.3.0| | | | | | | | +|`CUDNN_TYPE_SIGNAL_MODE`|8.5.0| | | | | | | | +|`CUDNN_TYPE_TENSOR_REORDERING_MODE`|8.3.0| | | | | | | | |`CUDNN_TYPE_VOID_PTR`|8.0.1| | | | | | | | |`CUDNN_UNIDIRECTIONAL`|5.0.0| | |`HIPDNN_UNIDIRECTIONAL`| | | | | |`CUDNN_VERSION`|2.0.0| | |`HIPDNN_VERSION`| | | | | |`CUDNN_WGRAD_MODE_ADD`|7.5.0| | | | | | | | |`CUDNN_WGRAD_MODE_SET`|7.5.0| | | | | | | | +|`CUDNN_ZERO_PAD`|8.3.0| | | | | | | | |`cudnnActivationDescriptor_t`|4.0.0| | |`hipdnnActivationDescriptor_t`| | | | | |`cudnnActivationMode_t`|1.0.0| | |`hipdnnActivationMode_t`| | | | | |`cudnnActivationStruct`|4.0.0| | | | | | | | @@ -558,7 +697,10 @@ |`cudnnBackendHeurMode_t`|8.0.1| | | | | | | | |`cudnnBackendKnobType_t`|8.0.1| | | | | | | | |`cudnnBackendLayoutType_t`|8.0.1| | | | | | | | +|`cudnnBackendNormFwdPhase_t`|8.5.0| | | | | | | | +|`cudnnBackendNormMode_t`|8.5.0| | | | | | | | |`cudnnBackendNumericalNote_t`|8.0.1| | | | | | | | +|`cudnnBackendTensorReordering_t`|8.3.0| | | | | | | | |`cudnnBatchNormMode_t`|4.0.0| | |`hipdnnBatchNormMode_t`| | | | | |`cudnnBatchNormOps_t`|7.4.1| | | | | | | | |`cudnnBnFinalizeStatsMode_t`|8.1.0| | | | | | | | @@ -595,6 +737,8 @@ |`cudnnFilterStruct`|1.0.0| | | | | | | | |`cudnnFoldingDirection_t`|7.5.0| | | | | | | | |`cudnnForwardMode_t`|8.0.1| | | | | | | | +|`cudnnFractionStruct`|8.5.0| | | | | | | | +|`cudnnFraction_t`|8.5.0| | | | | | | | |`cudnnFusedOpsConstParamLabel_t`|7.6.0| | | | | | | | |`cudnnFusedOpsConstParamPack_t`|7.6.0| | | | | | | | |`cudnnFusedOpsConstParamStruct`|7.6.0| | | | | | | | @@ -621,6 +765,7 @@ |`cudnnOpTensorDescriptor_t`|5.0.0| | |`hipdnnOpTensorDescriptor_t`| | | | | |`cudnnOpTensorOp_t`|5.0.0| | |`hipdnnOpTensorOp_t`| | | | | |`cudnnOpTensorStruct`|5.0.0| | | | | | | | +|`cudnnPaddingMode_t`|8.3.0| | | | | | | | |`cudnnPersistentRNNPlan`|6.0.0| | | | | | | | |`cudnnPersistentRNNPlan_t`|6.0.0| | |`hipdnnPersistentRNNPlan_t`| | | | | |`cudnnPointwiseMode_t`|8.0.1| | | | | | | | @@ -643,12 +788,14 @@ |`cudnnReduceTensorOp_t`|6.0.0| | |`hipdnnReduceTensorOp_t`| | | | | |`cudnnReduceTensorStruct`|6.0.0| | | | | | | | |`cudnnReorderType_t`|7.6.0| | | | | | | | +|`cudnnResampleMode_t`|8.3.0| | | | | | | | |`cudnnRuntimeTag_t`|7.0.5| | | | | | | | |`cudnnSamplerType_t`|5.0.0| | | | | | | | |`cudnnSeqDataAxis_t`|7.5.0| | | | | | | | |`cudnnSeqDataDescriptor_t`|7.5.0| | | | | | | | |`cudnnSeqDataStruct`|7.5.0| | | | | | | | |`cudnnSeverity_t`|7.1.3| | | | | | | | +|`cudnnSignalMode_t`|8.5.0| | | | | | | | |`cudnnSoftmaxAlgorithm_t`|1.0.0| | |`hipdnnSoftmaxAlgorithm_t`| | | | | |`cudnnSoftmaxMode_t`|1.0.0| | |`hipdnnSoftmaxMode_t`| | | | | |`cudnnSpatialTransformerDescriptor_t`|5.0.0| | | | | | | | @@ -804,6 +951,7 @@ |`cudnnGetFusedOpsConstParamPackAttribute`|7.6.0| | | | | | | | |`cudnnGetFusedOpsVariantParamPackAttribute`|7.6.0| | | | | | | | |`cudnnGetLRNDescriptor`|3.0.0| | |`hipdnnGetLRNDescriptor`| | | | | +|`cudnnGetMaxDeviceVersion`|8.6.0| | | | | | | | |`cudnnGetMultiHeadAttnBuffers`|7.5.0| | | | | | | | |`cudnnGetMultiHeadAttnWeights`|7.5.0| | | | | | | | |`cudnnGetNormalizationBackwardWorkspaceSize`|8.0.1| | | | | | | | diff --git a/doc/markdown/CUFFT_API_supported_by_HIP.md b/doc/markdown/CUFFT_API_supported_by_HIP.md index d9c3cbeb..163e5696 100644 --- a/doc/markdown/CUFFT_API_supported_by_HIP.md +++ b/doc/markdown/CUFFT_API_supported_by_HIP.md @@ -54,6 +54,8 @@ |`CUFFT_WORKAREA_PERFORMANCE`| | | | | | | | | |`CUFFT_WORKAREA_USER`|9.2| | | | | | | | |`CUFFT_XT_FORMAT_1D_INPUT_SHUFFLED`| | | | | | | | | +|`CUFFT_XT_FORMAT_DISTRIBUTED_INPUT`|11.8| | | | | | | | +|`CUFFT_XT_FORMAT_DISTRIBUTED_OUTPUT`|11.8| | | | | | | | |`CUFFT_XT_FORMAT_INPLACE`| | | | | | | | | |`CUFFT_XT_FORMAT_INPLACE_SHUFFLED`| | | | | | | | | |`CUFFT_XT_FORMAT_INPUT`| | | | | | | | | @@ -61,6 +63,8 @@ |`CUFFT_Z2D`| | | |`HIPFFT_Z2D`|1.7.0| | | | |`CUFFT_Z2Z`| | | |`HIPFFT_Z2Z`|1.7.0| | | | |`MAX_CUFFT_ERROR`| | | | | | | | | +|`cufftBox3d`|11.8| | | | | | | | +|`cufftBox3d_t`|11.8| | | | | | | | |`cufftCompatibility`| | | | | | | | | |`cufftCompatibility_t`| | | | | | | | | |`cufftComplex`| | | |`hipfftComplex`|1.7.0| | | | @@ -131,7 +135,7 @@ |`cufftSetWorkArea`| | | |`hipfftSetWorkArea`|1.7.0| | | | |`cufftXtClearCallback`| | | |`hipfftXtClearCallback`|4.3.0| | | | |`cufftXtExec`|8.0| | | | | | | | -|`cufftXtExecDescriptor`| | | | | | | | | +|`cufftXtExecDescriptor`|8.0| | | | | | | | |`cufftXtExecDescriptorC2C`| | | | | | | | | |`cufftXtExecDescriptorC2R`| | | | | | | | | |`cufftXtExecDescriptorD2Z`| | | | | | | | | @@ -146,6 +150,7 @@ |`cufftXtQueryPlan`| | | | | | | | | |`cufftXtSetCallback`| | | |`hipfftXtSetCallback`|4.3.0| | | | |`cufftXtSetCallbackSharedSize`| | | |`hipfftXtSetCallbackSharedSize`|4.3.0| | | | +|`cufftXtSetDistribution`|11.8| | | | | | | | |`cufftXtSetGPUs`| | | | | | | | | |`cufftXtSetWorkArea`| | | | | | | | | |`cufftXtSetWorkAreaPolicy`|9.2| | | | | | | | diff --git a/src/CUDA2HIP.cpp b/src/CUDA2HIP.cpp index 1711966c..e5e53e9c 100644 --- a/src/CUDA2HIP.cpp +++ b/src/CUDA2HIP.cpp @@ -25,63 +25,63 @@ THE SOFTWARE. // Maps CUDA header names to HIP header names const std::map CUDA_INCLUDE_MAP { // CUDA includes - {"cuda.h", {"hip/hip_runtime.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_DRIVER, 0}}, - {"cuda_runtime.h", {"hip/hip_runtime.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_RUNTIME, 0}}, - {"cuda_runtime_api.h", {"hip/hip_runtime_api.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, - {"channel_descriptor.h", {"hip/channel_descriptor.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, - {"device_functions.h", {"hip/device_functions.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, - {"driver_types.h", {"hip/driver_types.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, - {"cuda_fp16.h", {"hip/hip_fp16.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, - {"cuda_texture_types.h", {"hip/hip_texture_types.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, - {"texture_fetch_functions.h", {"", "", CONV_INCLUDE, API_RUNTIME, 0}}, - {"vector_types.h", {"hip/hip_vector_types.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, - {"cuda_profiler_api.h", {"hip/hip_runtime_api.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, - {"cooperative_groups.h", {"hip/hip_cooperative_groups.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, + {"cuda.h", {"hip/hip_runtime.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_DRIVER, 0}}, + {"cuda_runtime.h", {"hip/hip_runtime.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_RUNTIME, 0}}, + {"cuda_runtime_api.h", {"hip/hip_runtime_api.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, + {"channel_descriptor.h", {"hip/channel_descriptor.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, + {"device_functions.h", {"hip/device_functions.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, + {"driver_types.h", {"hip/driver_types.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, + {"cuda_fp16.h", {"hip/hip_fp16.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, + {"cuda_texture_types.h", {"hip/hip_texture_types.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, + {"texture_fetch_functions.h", {"", "", CONV_INCLUDE, API_RUNTIME, 0}}, + {"vector_types.h", {"hip/hip_vector_types.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, + {"cuda_profiler_api.h", {"hip/hip_runtime_api.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, + {"cooperative_groups.h", {"hip/hip_cooperative_groups.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, // cuComplex includes - {"cuComplex.h", {"hip/hip_complex.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_COMPLEX, 0}}, + {"cuComplex.h", {"hip/hip_complex.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_COMPLEX, 0}}, // cuBLAS includes - {"cublas.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_H, API_BLAS, 0}}, - {"cublas_v2.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_V2_H, API_BLAS, 0}}, - {"cublas_api.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE, API_BLAS, 0}}, + {"cublas.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_H, API_BLAS, 0}}, + {"cublas_v2.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_V2_H, API_BLAS, 0}}, + {"cublas_api.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE, API_BLAS, 0}}, // cuRAND includes - {"curand.h", {"hiprand.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_RAND, 0}}, - {"curand_kernel.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_discrete.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_discrete2.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_globals.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_lognormal.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_mrg32k3a.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_mtgp32.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_mtgp32_host.h", {"hiprand_mtgp32_host.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_mtgp32_kernel.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_mtgp32dc_p_11213.h", {"rocrand_mtgp32_11213.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_normal.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_normal_static.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_philox4x32_x.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_poisson.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_precalc.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_uniform.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand.h", {"hiprand.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_RAND, 0}}, + {"curand_kernel.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_discrete.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_discrete2.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_globals.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_lognormal.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_mrg32k3a.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_mtgp32.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_mtgp32_host.h", {"hiprand_mtgp32_host.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_mtgp32_kernel.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_mtgp32dc_p_11213.h", {"rocrand_mtgp32_11213.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_normal.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_normal_static.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_philox4x32_x.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_poisson.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_precalc.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_uniform.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, // cuDNN includes - {"cudnn.h", {"hipDNN.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_DNN, 0}}, + {"cudnn.h", {"hipDNN.h", "miopen/miopen.h", CONV_INCLUDE_CUDA_MAIN_H, API_DNN, 0}}, // cuFFT includes - {"cufft.h", {"hipfft.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_FFT, 0}}, - {"cufftXt.h", {"hipfftXt.h", "", CONV_INCLUDE, API_FFT, 0}}, + {"cufft.h", {"hipfft.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_FFT, 0}}, + {"cufftXt.h", {"hipfftXt.h", "", CONV_INCLUDE, API_FFT, 0}}, // cuSPARSE includes - {"cusparse.h", {"hipsparse.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_SPARSE, 0}}, - {"cusparse_v2.h", {"hipsparse.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_SPARSE, 0}}, + {"cusparse.h", {"hipsparse.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_SPARSE, 0}}, + {"cusparse_v2.h", {"hipsparse.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_SPARSE, 0}}, // CUB includes - {"cub/cub.cuh", {"hipcub/hipcub.hpp", "", CONV_INCLUDE_CUDA_MAIN_H, API_CUB, 0}}, + {"cub/cub.cuh", {"hipcub/hipcub.hpp", "", CONV_INCLUDE_CUDA_MAIN_H, API_CUB, 0}}, // CAFFE2 includes - {"caffe2/core/common_gpu.h", {"caffe2/core/hip/common_gpu.h", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, - {"caffe2/core/context_gpu.h", {"caffe2/core/hip/context_gpu.h", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, - {"caffe2/operators/operator_fallback_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, - {"caffe2/operators/spatial_batch_norm_op.h", {"caffe2/operators/hip/spatial_batch_norm_op_miopen.hip", "", CONV_INCLUDE, API_CAFFE2, 0}}, - {"caffe2/operators/generate_proposals_op_util_nms_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, - {"caffe2/operators/max_pool_with_index_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, - {"caffe2/operators/rnn/recurrent_network_executor_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, - {"caffe2/utils/math/reduce.cuh", {"caffe2/utils/math/hip/reduce.cuh", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, - {"caffe2/operators/gather_op.cuh", {"caffe2/operators/math/gather_op.cuh", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, - {"caffe2/core/common_cudnn.h", {"caffe2/core/hip/common_miopen.h", "", CONV_INCLUDE, API_CAFFE2, 0}}, + {"caffe2/core/common_gpu.h", {"caffe2/core/hip/common_gpu.h", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, + {"caffe2/core/context_gpu.h", {"caffe2/core/hip/context_gpu.h", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, + {"caffe2/operators/operator_fallback_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, + {"caffe2/operators/spatial_batch_norm_op.h", {"caffe2/operators/hip/spatial_batch_norm_op_miopen.hip", "", CONV_INCLUDE, API_CAFFE2, 0}}, + {"caffe2/operators/generate_proposals_op_util_nms_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, + {"caffe2/operators/max_pool_with_index_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, + {"caffe2/operators/rnn/recurrent_network_executor_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, + {"caffe2/utils/math/reduce.cuh", {"caffe2/utils/math/hip/reduce.cuh", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, + {"caffe2/operators/gather_op.cuh", {"caffe2/operators/math/gather_op.cuh", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, + {"caffe2/core/common_cudnn.h", {"caffe2/core/hip/common_miopen.h", "", CONV_INCLUDE, API_CAFFE2, 0}}, // RTC includes {"nvrtc.h", {"hiprtc.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_RTC, 0}}, }; @@ -113,6 +113,7 @@ const std::map &CUDA_RENAMES_MAP() { ret.insert(CUDA_CUB_FUNCTION_MAP.begin(), CUDA_CUB_FUNCTION_MAP.end()); ret.insert(CUDA_RTC_TYPE_NAME_MAP.begin(), CUDA_RTC_TYPE_NAME_MAP.end()); ret.insert(CUDA_RTC_FUNCTION_MAP.begin(), CUDA_RTC_FUNCTION_MAP.end()); + ret.insert(CUDA_DEVICE_TYPE_NAME_MAP.begin(), CUDA_DEVICE_TYPE_NAME_MAP.end()); return ret; }; diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index b36ae477..a912fa1f 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -26,22 +26,22 @@ THE SOFTWARE. const std::map CUDA_BLAS_FUNCTION_MAP { // Blas management functions - {"cublasInit", {"hipblasInit", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, + {"cublasInit", {"hipblasInit", "rocblas_initialize", CONV_LIB_FUNC, API_BLAS, 4, HIP_UNSUPPORTED}}, {"cublasShutdown", {"hipblasShutdown", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasGetVersion", {"hipblasGetVersion", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasGetError", {"hipblasGetError", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasAlloc", {"hipblasAlloc", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasFree", {"hipblasFree", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasSetKernelStream", {"hipblasSetKernelStream", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, - {"cublasGetAtomicsMode", {"hipblasGetAtomicsMode", "", CONV_LIB_FUNC, API_BLAS, 4, ROC_UNSUPPORTED}}, - {"cublasSetAtomicsMode", {"hipblasSetAtomicsMode", "", CONV_LIB_FUNC, API_BLAS, 4, ROC_UNSUPPORTED}}, + {"cublasGetAtomicsMode", {"hipblasGetAtomicsMode", "rocblas_get_atomics_mode", CONV_LIB_FUNC, API_BLAS, 4}}, + {"cublasSetAtomicsMode", {"hipblasSetAtomicsMode", "rocblas_set_atomics_mode", CONV_LIB_FUNC, API_BLAS, 4}}, {"cublasGetMathMode", {"hipblasGetMathMode", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasSetMathMode", {"hipblasSetMathMode", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasMigrateComputeType", {"hipblasMigrateComputeType", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasGetSmCountTarget", {"hipblasGetSmCountTarget", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasSetSmCountTarget", {"hipblasSetSmCountTarget", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasGetStatusName", {"hipblasGetStatusName", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, - {"cublasGetStatusString", {"hipblasGetStatusString", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, + {"cublasGetStatusString", {"hipblasGetStatusString", "rocblas_status_to_string", CONV_LIB_FUNC, API_BLAS, 4, HIP_UNSUPPORTED}}, // Blas logging {"cublasLogCallback", {"hipblasLogCallback", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, @@ -288,20 +288,20 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasHgemmStridedBatched", {"hipblasHgemmStridedBatched", "rocblas_hgemm_strided_batched", CONV_LIB_FUNC, API_BLAS, 7}}, // SYRK - {"cublasSsyrk", {"hipblasSsyrk", "rocblas_ssyrk", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasDsyrk", {"hipblasDsyrk", "rocblas_dsyrk", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasCsyrk", {"hipblasCsyrk", "rocblas_csyrk", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasZsyrk", {"hipblasZsyrk", "rocblas_zsyrk", CONV_LIB_FUNC, API_BLAS, 7}}, + {"cublasSsyrk", {"hipblasSsyrk", "rocblas_ssyrk", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDsyrk", {"hipblasDsyrk", "rocblas_dsyrk", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCsyrk", {"hipblasCsyrk", "rocblas_csyrk", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZsyrk", {"hipblasZsyrk", "rocblas_zsyrk", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, // HERK - {"cublasCherk", {"hipblasCherk", "rocblas_cherk", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasZherk", {"hipblasZherk", "rocblas_zherk", CONV_LIB_FUNC, API_BLAS, 7}}, + {"cublasCherk", {"hipblasCherk", "rocblas_cherk", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZherk", {"hipblasZherk", "rocblas_zherk", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, // SYR2K - {"cublasSsyr2k", {"hipblasSsyr2k", "rocblas_ssyr2k", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasDsyr2k", {"hipblasDsyr2k", "rocblas_dsyr2k", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasCsyr2k", {"hipblasCsyr2k", "rocblas_csyr2k", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasZsyr2k", {"hipblasZsyr2k", "rocblas_zsyr2k", CONV_LIB_FUNC, API_BLAS, 7}}, + {"cublasSsyr2k", {"hipblasSsyr2k", "rocblas_ssyr2k", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDsyr2k", {"hipblasDsyr2k", "rocblas_dsyr2k", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCsyr2k", {"hipblasCsyr2k", "rocblas_csyr2k", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZsyr2k", {"hipblasZsyr2k", "rocblas_zsyr2k", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, // SYRKX - eXtended SYRK {"cublasSsyrkx", {"hipblasSsyrkx", "rocblas_ssyrkx", CONV_LIB_FUNC, API_BLAS, 7}}, @@ -310,34 +310,34 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasZsyrkx", {"hipblasZsyrkx", "rocblas_zsyrkx", CONV_LIB_FUNC, API_BLAS, 7}}, // HER2K - {"cublasCher2k", {"hipblasCher2k", "rocblas_cher2k", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasZher2k", {"hipblasZher2k", "rocblas_zher2k", CONV_LIB_FUNC, API_BLAS, 7}}, + {"cublasCher2k", {"hipblasCher2k", "rocblas_cher2k", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZher2k", {"hipblasZher2k", "rocblas_zher2k", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, // HERKX - eXtended HERK {"cublasCherkx", {"hipblasCherkx", "rocblas_cherkx", CONV_LIB_FUNC, API_BLAS, 7}}, {"cublasZherkx", {"hipblasZherkx", "rocblas_zherkx", CONV_LIB_FUNC, API_BLAS, 7}}, // SYMM - {"cublasSsymm", {"hipblasSsymm", "rocblas_ssymm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasDsymm", {"hipblasDsymm", "rocblas_dsymm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasCsymm", {"hipblasCsymm", "rocblas_csymm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasZsymm", {"hipblasZsymm", "rocblas_zsymm", CONV_LIB_FUNC, API_BLAS, 7}}, + {"cublasSsymm", {"hipblasSsymm", "rocblas_ssymm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDsymm", {"hipblasDsymm", "rocblas_dsymm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCsymm", {"hipblasCsymm", "rocblas_csymm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZsymm", {"hipblasZsymm", "rocblas_zsymm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, // HEMM - {"cublasChemm", {"hipblasChemm", "rocblas_chemm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasZhemm", {"hipblasZhemm", "rocblas_zhemm", CONV_LIB_FUNC, API_BLAS, 7}}, + {"cublasChemm", {"hipblasChemm", "rocblas_chemm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZhemm", {"hipblasZhemm", "rocblas_zhemm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, // TRSM - {"cublasStrsm", {"hipblasStrsm", "rocblas_strsm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasDtrsm", {"hipblasDtrsm", "rocblas_dtrsm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasCtrsm", {"hipblasCtrsm", "rocblas_ctrsm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasZtrsm", {"hipblasZtrsm", "rocblas_ztrsm", CONV_LIB_FUNC, API_BLAS, 7}}, + {"cublasStrsm", {"hipblasStrsm", "rocblas_strsm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDtrsm", {"hipblasDtrsm", "rocblas_dtrsm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCtrsm", {"hipblasCtrsm", "rocblas_ctrsm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZtrsm", {"hipblasZtrsm", "rocblas_ztrsm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, // TRMM - {"cublasStrmm", {"hipblasStrmm", "rocblas_strmm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasDtrmm", {"hipblasDtrmm", "rocblas_dtrmm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasCtrmm", {"hipblasCtrmm", "rocblas_ctrmm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasZtrmm", {"hipblasZtrmm", "rocblas_ztrmm", CONV_LIB_FUNC, API_BLAS, 7}}, + {"cublasStrmm", {"hipblasStrmm", "rocblas_strmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}}, + {"cublasDtrmm", {"hipblasDtrmm", "rocblas_dtrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}}, + {"cublasCtrmm", {"hipblasCtrmm", "rocblas_ctrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}}, + {"cublasZtrmm", {"hipblasZtrmm", "rocblas_ztrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}}, // ------------------------ CUBLAS BLAS - like extension (cublas_api.h) // GEAM @@ -541,7 +541,7 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasCsyrk3mEx", {"hipblasCsyrk3mEx", "", CONV_LIB_FUNC, API_BLAS, 8, UNSUPPORTED}}, // HERK - {"cublasCherk_v2", {"hipblasCherk", "rocblas_cherkx", CONV_LIB_FUNC, API_BLAS, 7}}, + {"cublasCherk_v2", {"hipblasCherk", "rocblas_cherk", CONV_LIB_FUNC, API_BLAS, 7}}, // IO in Int8 complex/cuComplex, computation in cuComplex {"cublasCherkEx", {"hipblasCherkEx", "", CONV_LIB_FUNC, API_BLAS, 8, UNSUPPORTED}}, // IO in Int8 complex/cuComplex, computation in cuComplex, Gaussian math @@ -575,10 +575,10 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasZtrsm_v2", {"hipblasZtrsm", "rocblas_ztrsm", CONV_LIB_FUNC, API_BLAS, 7}}, // TRMM - {"cublasStrmm_v2", {"hipblasStrmm", "rocblas_strmm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasDtrmm_v2", {"hipblasDtrmm", "rocblas_dtrmm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasCtrmm_v2", {"hipblasCtrmm", "rocblas_ctrmm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasZtrmm_v2", {"hipblasZtrmm", "rocblas_ztrmm", CONV_LIB_FUNC, API_BLAS, 7}}, + {"cublasStrmm_v2", {"hipblasStrmm", "rocblas_strmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}}, + {"cublasDtrmm_v2", {"hipblasDtrmm", "rocblas_dtrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}}, + {"cublasCtrmm_v2", {"hipblasCtrmm", "rocblas_ctrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}}, + {"cublasZtrmm_v2", {"hipblasZtrmm", "rocblas_ztrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}}, // NRM2 {"cublasSnrm2_v2", {"hipblasSnrm2", "rocblas_snrm2", CONV_LIB_FUNC, API_BLAS, 5}}, @@ -953,6 +953,219 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasNrm2Ex", {HIP_4010, HIP_0, HIP_0 }}, {"hipblasRotEx", {HIP_4010, HIP_0, HIP_0 }}, {"hipblasScalEx", {HIP_4010, HIP_0, HIP_0 }}, + {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dscal", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_cscal", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_zscal", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_csscal", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zdscal", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_scopy", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dcopy", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_ccopy", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_zcopy", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_sdot", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_ddot", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_hdot", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_cdotu", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_zdotu", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_cdotc", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zdotc", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_sswap", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dswap", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_cswap", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_zswap", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_saxpy", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_daxpy", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_caxpy", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_zaxpy", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_sasum", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dasum", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_scasum", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dzasum", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_snrm2", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dnrm2", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_scnrm2", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dznrm2", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_isamax", {HIP_1064, HIP_0, HIP_0 }}, + {"rocblas_idamax", {HIP_1064, HIP_0, HIP_0 }}, + {"rocblas_icamax", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_izamax", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_isamin", {HIP_1064, HIP_0, HIP_0 }}, + {"rocblas_idamin", {HIP_1064, HIP_0, HIP_0 }}, + {"rocblas_icamin", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_izamin", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_srot", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_drot", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_crot", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_csrot", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zrot", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zdrot", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_srotg", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_drotg", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_crotg", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zrotg", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_srotm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_drotm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_srotmg", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_drotmg", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_sgbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dgbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_cgbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zgbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_sgemv", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dgemv", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_cgemv", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_zgemv", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_chbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zhbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_chemv", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_zhemv", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_cher", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zher", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_cher2", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zher2", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_chpmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zhpmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_chpr", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zhpr", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_chpr2", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zhpr2", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_strmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dtrmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ctrmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ztrmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_stpmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dtpmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ctpmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ztpmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_stbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dtbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ctbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ztbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_stbsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dtbsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ctbsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ztbsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_strsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dtrsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ctrsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ztrsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_stpsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dtpsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ctpsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ztpsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ssymv", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dsymv", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_csymv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zsymv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_sspmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dspmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ssbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dsbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_sger", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dger", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_cgeru", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zgeru", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_cgerc", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zgerc", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_sspr", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dspr", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_sspr2", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dspr2", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ssyr", {HIP_1071, HIP_0, HIP_0 }}, + {"rocblas_dsyr", {HIP_1071, HIP_0, HIP_0 }}, + {"rocblas_csyr", {HIP_1071, HIP_0, HIP_0 }}, + {"rocblas_zsyr", {HIP_1071, HIP_0, HIP_0 }}, + {"rocblas_ssyr2", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dsyr2", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_csyr2", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zsyr2", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_chemm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zhemm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_cherk", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zherk", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_cher2k", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zher2k", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_cherkx", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zherkx", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ssymm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dsymm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_csymm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zsymm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ssyrk", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dsyrk", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_csyrk", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zsyrk", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ssyr2k", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dsyr2k", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_csyr2k", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zsyr2k", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ssyrkx", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dsyrkx", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_csyrkx", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zsyrkx", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_strmm_outofplace", {HIP_5000, HIP_0, HIP_0 }}, + {"rocblas_dtrmm_outofplace", {HIP_5000, HIP_0, HIP_0 }}, + {"rocblas_ctrmm_outofplace", {HIP_5000, HIP_0, HIP_0 }}, + {"rocblas_ztrmm_outofplace", {HIP_5000, HIP_0, HIP_0 }}, + {"rocblas_strsm", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dtrsm", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_ctrsm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ztrsm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_strsm_batched", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dtrsm_batched", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ctrsm_batched", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ztrsm_batched", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_sgemm", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dgemm", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_hgemm", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_cgemm", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_zgemm", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_sgemm_batched", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dgemm_batched", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_hgemm_batched", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_cgemm_batched", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zgemm_batched", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_sgemm_strided_batched", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dgemm_strided_batched", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_hgemm_strided_batched", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_cgemm_strided_batched", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_zgemm_strided_batched", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_sdgmm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ddgmm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_cdgmm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zdgmm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_sgeam", {HIP_1064, HIP_0, HIP_0 }}, + {"rocblas_dgeam", {HIP_1064, HIP_0, HIP_0 }}, + {"rocblas_cgeam", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zgeam", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_gemm_ex", {HIP_1082, HIP_0, HIP_0 }}, + {"rocblas_gemm_batched_ex", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_gemm_strided_batched_ex", {HIP_1090, HIP_0, HIP_0 }}, + {"rocblas_axpy_ex", {HIP_3090, HIP_0, HIP_0 }}, + {"rocblas_dot_ex", {HIP_4010, HIP_0, HIP_0 }}, + {"rocblas_dotc_ex", {HIP_4010, HIP_0, HIP_0 }}, + {"rocblas_nrm2_ex", {HIP_4010, HIP_0, HIP_0 }}, + {"rocblas_rot_ex", {HIP_4010, HIP_0, HIP_0 }}, + {"rocblas_scal_ex", {HIP_4010, HIP_0, HIP_0 }}, + {"rocblas_initialize", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_create_handle", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_destroy_handle", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_set_stream", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_get_stream", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_set_pointer_mode", {HIP_1060, HIP_0, HIP_0 }}, + {"rocblas_get_pointer_mode", {HIP_1060, HIP_0, HIP_0 }}, + {"rocblas_set_atomics_mode", {HIP_3080, HIP_0, HIP_0 }}, + {"rocblas_get_atomics_mode", {HIP_3080, HIP_0, HIP_0 }}, + {"rocblas_set_vector", {HIP_1060, HIP_0, HIP_0 }}, + {"rocblas_get_vector", {HIP_1060, HIP_0, HIP_0 }}, + {"rocblas_set_matrix", {HIP_1060, HIP_0, HIP_0 }}, + {"rocblas_get_matrix", {HIP_1060, HIP_0, HIP_0 }}, + {"rocblas_set_vector_async", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_get_vector_async", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_set_matrix_async", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_get_matrix_async", {HIP_3050, HIP_0, HIP_0 }}, }; const std::map CUDA_BLAS_API_SECTION_MAP { diff --git a/src/CUDA2HIP_BLAS_API_types.cpp b/src/CUDA2HIP_BLAS_API_types.cpp index 997183f1..cd94f7b6 100644 --- a/src/CUDA2HIP_BLAS_API_types.cpp +++ b/src/CUDA2HIP_BLAS_API_types.cpp @@ -138,42 +138,46 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP { {"CUBLAS_GEMM_ALGO15_TENSOR_OP", {"HIPBLAS_GEMM_ALGO15_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 115 // TODO: rename hipblasDatatype_t to hipDataType_t and move from hipBLAS to HIP - {"cudaDataType_t", {"hipblasDatatype_t", "rocblas_datatype_", CONV_TYPE, API_RUNTIME, 3}}, - {"cudaDataType", {"hipblasDatatype_t", "rocblas_datatype", CONV_TYPE, API_RUNTIME, 3}}, - {"CUDA_R_16F", {"HIPBLAS_R_16F", "rocblas_datatype_f16_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 2 // 150 - {"CUDA_C_16F", {"HIPBLAS_C_16F", "rocblas_datatype_f16_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 6 // 153 - {"CUDA_R_32F", {"HIPBLAS_R_32F", "rocblas_datatype_f32_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 0 // 151 - {"CUDA_C_32F", {"HIPBLAS_C_32F", "rocblas_datatype_f32_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 4 // 154 - {"CUDA_R_64F", {"HIPBLAS_R_64F", "rocblas_datatype_f64_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 1 // 152 - {"CUDA_C_64F", {"HIPBLAS_C_64F", "rocblas_datatype_f64_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 5 // 155 - {"CUDA_R_8I", {"HIPBLAS_R_8I", "rocblas_datatype_i8_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 3 // 160 - {"CUDA_C_8I", {"HIPBLAS_C_8I", "rocblas_datatype_i8_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 7 // 164 - {"CUDA_R_8U", {"HIPBLAS_R_8U", "rocblas_datatype_u8_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 8 // 161 - {"CUDA_C_8U", {"HIPBLAS_C_8U", "rocblas_datatype_u8_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 9 // 165 - {"CUDA_R_32I", {"HIPBLAS_R_32I", "rocblas_datatype_i32_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 10 // 162 - {"CUDA_C_32I", {"HIPBLAS_C_32I", "rocblas_datatype_i32_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 11 // 166 - {"CUDA_R_32U", {"HIPBLAS_R_32U", "rocblas_datatype_u32_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 12 // 163 - {"CUDA_C_32U", {"HIPBLAS_C_32U", "rocblas_datatype_u32_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 13 // 167 - {"CUDA_R_16BF", {"HIPBLAS_R_16B", "rocblas_datatype_bf16_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 14 // 168 - {"CUDA_C_16BF", {"HIPBLAS_C_16B", "rocblas_datatype_bf16_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 15 // 169 - {"CUDA_R_4I", {"HIPBLAS_R_4I", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 16 - {"CUDA_C_4I", {"HIPBLAS_C_4I", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 17 - {"CUDA_R_4U", {"HIPBLAS_R_4U", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 18 - {"CUDA_C_4U", {"HIPBLAS_C_4U", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 19 - {"CUDA_R_16I", {"HIPBLAS_R_16I", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 20 - {"CUDA_C_16I", {"HIPBLAS_C_16I", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 21 - {"CUDA_R_16U", {"HIPBLAS_R_16U", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 22 - {"CUDA_C_16U", {"HIPBLAS_C_16U", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 23 - {"CUDA_R_64I", {"HIPBLAS_R_64I", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 24 - {"CUDA_C_64I", {"HIPBLAS_C_64I", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 25 - {"CUDA_R_64U", {"HIPBLAS_R_64U", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 26 - {"CUDA_C_64U", {"HIPBLAS_C_64U", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 27 + {"cudaDataType_t", {"hipblasDatatype_t", "rocblas_datatype_", CONV_TYPE, API_BLAS, 3}}, + {"cudaDataType", {"hipblasDatatype_t", "rocblas_datatype", CONV_TYPE, API_BLAS, 3}}, + {"CUDA_R_16F", {"HIPBLAS_R_16F", "rocblas_datatype_f16_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 2 // 150 + {"CUDA_C_16F", {"HIPBLAS_C_16F", "rocblas_datatype_f16_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 6 // 153 + {"CUDA_R_32F", {"HIPBLAS_R_32F", "rocblas_datatype_f32_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 0 // 151 + {"CUDA_C_32F", {"HIPBLAS_C_32F", "rocblas_datatype_f32_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 4 // 154 + {"CUDA_R_64F", {"HIPBLAS_R_64F", "rocblas_datatype_f64_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 1 // 152 + {"CUDA_C_64F", {"HIPBLAS_C_64F", "rocblas_datatype_f64_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 5 // 155 + {"CUDA_R_8I", {"HIPBLAS_R_8I", "rocblas_datatype_i8_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 3 // 160 + {"CUDA_C_8I", {"HIPBLAS_C_8I", "rocblas_datatype_i8_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 7 // 164 + {"CUDA_R_8U", {"HIPBLAS_R_8U", "rocblas_datatype_u8_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 8 // 161 + {"CUDA_C_8U", {"HIPBLAS_C_8U", "rocblas_datatype_u8_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 9 // 165 + {"CUDA_R_32I", {"HIPBLAS_R_32I", "rocblas_datatype_i32_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 10 // 162 + {"CUDA_C_32I", {"HIPBLAS_C_32I", "rocblas_datatype_i32_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 11 // 166 + {"CUDA_R_32U", {"HIPBLAS_R_32U", "rocblas_datatype_u32_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 12 // 163 + {"CUDA_C_32U", {"HIPBLAS_C_32U", "rocblas_datatype_u32_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 13 // 167 + {"CUDA_R_16BF", {"HIPBLAS_R_16B", "rocblas_datatype_bf16_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 14 // 168 + {"CUDA_C_16BF", {"HIPBLAS_C_16B", "rocblas_datatype_bf16_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 15 // 169 + {"CUDA_R_4I", {"HIPBLAS_R_4I", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 16 + {"CUDA_C_4I", {"HIPBLAS_C_4I", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 17 + {"CUDA_R_4U", {"HIPBLAS_R_4U", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 18 + {"CUDA_C_4U", {"HIPBLAS_C_4U", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 19 + {"CUDA_R_16I", {"HIPBLAS_R_16I", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 20 + {"CUDA_C_16I", {"HIPBLAS_C_16I", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 21 + {"CUDA_R_16U", {"HIPBLAS_R_16U", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 22 + {"CUDA_C_16U", {"HIPBLAS_C_16U", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 23 + {"CUDA_R_64I", {"HIPBLAS_R_64I", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 24 + {"CUDA_C_64I", {"HIPBLAS_C_64I", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 25 + {"CUDA_R_64U", {"HIPBLAS_R_64U", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 26 + {"CUDA_C_64U", {"HIPBLAS_C_64U", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 27 + {"CUDA_R_8F_E4M3", {"HIPBLAS_R_8F_E4M3", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 28 + {"CUDA_R_8F_E5M2", {"HIPBLAS_R_8F_E5M2", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 29 {"cublasHandle_t", {"hipblasHandle_t", "rocblas_handle", CONV_TYPE, API_BLAS, 2}}, // TODO: dereferencing: typedef struct cublasContext *cublasHandle_t; {"cublasContext", {"hipblasContext", "_rocblas_handle", CONV_TYPE, API_BLAS, 2, HIP_UNSUPPORTED}}, - {"cublasComputeType_t", {"hipblasComputeType_t", "", CONV_TYPE, API_BLAS, 2, UNSUPPORTED}}, + // NOTE: renamed UNSUPPORTED hipblasComputeType_t to the HIP supported hipblasDatatype_t (workaround) + // TODO: change the type to the correct one after fixing https://github.com/ROCmSoftwarePlatform/hipBLAS/issues/529 + {"cublasComputeType_t", {"hipblasDatatype_t", "", CONV_TYPE, API_BLAS, 2}}, {"CUBLAS_COMPUTE_16F", {"HIPBLAS_COMPUTE_16F", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 64 {"CUBLAS_COMPUTE_16F_PEDANTIC", {"HIPBLAS_COMPUTE_16F_PEDANTIC", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 65 {"CUBLAS_COMPUTE_32F", {"HIPBLAS_COMPUTE_32F", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 68 @@ -288,6 +292,8 @@ const std::map CUDA_BLAS_TYPE_NAME_VER_MAP { {"CUDA_C_64I", {CUDA_110, CUDA_0, CUDA_0}}, {"CUDA_R_64U", {CUDA_110, CUDA_0, CUDA_0}}, {"CUDA_C_64U", {CUDA_110, CUDA_0, CUDA_0}}, + {"CUDA_R_8F_E4M3", {CUDA_118, CUDA_0, CUDA_0}}, + {"CUDA_R_8F_E5M2", {CUDA_118, CUDA_0, CUDA_0}}, }; const std::map HIP_BLAS_TYPE_NAME_VER_MAP { diff --git a/src/CUDA2HIP_DNN_API_functions.cpp b/src/CUDA2HIP_DNN_API_functions.cpp index 3babf0da..d41f2dfc 100644 --- a/src/CUDA2HIP_DNN_API_functions.cpp +++ b/src/CUDA2HIP_DNN_API_functions.cpp @@ -27,6 +27,7 @@ const std::map CUDA_DNN_FUNCTION_MAP { {"cudnnGetVersion", {"hipdnnGetVersion", "", CONV_LIB_FUNC, API_DNN, 2}}, {"cudnnGetCudartVersion", {"hipdnnGetCudartVersion", "", CONV_LIB_FUNC, API_DNN, 2, HIP_UNSUPPORTED}}, + {"cudnnGetMaxDeviceVersion", {"hipdnnGetMaxDeviceVersion", "", CONV_LIB_FUNC, API_DNN, 2, HIP_UNSUPPORTED}}, {"cudnnQueryRuntimeError", {"hipdnnQueryRuntimeError", "", CONV_LIB_FUNC, API_DNN, 2, HIP_UNSUPPORTED}}, {"cudnnGetProperty", {"hipdnnGetProperty", "", CONV_LIB_FUNC, API_DNN, 2, HIP_UNSUPPORTED}}, {"cudnnGetErrorString", {"hipdnnGetErrorString", "", CONV_LIB_FUNC, API_DNN, 2}}, @@ -608,6 +609,7 @@ const std::map CUDA_DNN_FUNCTION_VER_MAP { {"cudnnSetRNNDescriptor_v5", {CUDNN_705, CUDNN_765, CUDNN_801}}, {"cudnnSetActivationDescriptorSwishBeta", {CUDNN_820, CUDA_0, CUDA_0}}, {"cudnnGetActivationDescriptorSwishBeta", {CUDNN_820, CUDA_0, CUDA_0}}, + {"cudnnGetMaxDeviceVersion", {CUDNN_860, CUDA_0, CUDA_0}}, }; const std::map HIP_DNN_FUNCTION_VER_MAP { diff --git a/src/CUDA2HIP_DNN_API_types.cpp b/src/CUDA2HIP_DNN_API_types.cpp index 98580c33..de977487 100644 --- a/src/CUDA2HIP_DNN_API_types.cpp +++ b/src/CUDA2HIP_DNN_API_types.cpp @@ -59,22 +59,22 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_OPS_TRAIN_PATCH", {"HIPDNN_OPS_TRAIN_PATCH", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // cuDNN enums - {"cudnnStatus_t", {"hipdnnStatus_t", "", CONV_TYPE, API_DNN, 1}}, - {"CUDNN_STATUS_SUCCESS", {"HIPDNN_STATUS_SUCCESS", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 0 - {"CUDNN_STATUS_NOT_INITIALIZED", {"HIPDNN_STATUS_NOT_INITIALIZED", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 1 - {"CUDNN_STATUS_ALLOC_FAILED", {"HIPDNN_STATUS_ALLOC_FAILED", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 2 - {"CUDNN_STATUS_BAD_PARAM", {"HIPDNN_STATUS_BAD_PARAM", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 3 - {"CUDNN_STATUS_INTERNAL_ERROR", {"HIPDNN_STATUS_INTERNAL_ERROR", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 4 - {"CUDNN_STATUS_INVALID_VALUE", {"HIPDNN_STATUS_INVALID_VALUE", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 5 - {"CUDNN_STATUS_ARCH_MISMATCH", {"HIPDNN_STATUS_ARCH_MISMATCH", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 6 - {"CUDNN_STATUS_MAPPING_ERROR", {"HIPDNN_STATUS_MAPPING_ERROR", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 7 - {"CUDNN_STATUS_EXECUTION_FAILED", {"HIPDNN_STATUS_EXECUTION_FAILED", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 8 - {"CUDNN_STATUS_NOT_SUPPORTED", {"HIPDNN_STATUS_NOT_SUPPORTED", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 9 - {"CUDNN_STATUS_LICENSE_ERROR", {"HIPDNN_STATUS_LICENSE_ERROR", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 10 - {"CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", {"HIPDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 11 - {"CUDNN_STATUS_RUNTIME_IN_PROGRESS", {"HIPDNN_STATUS_RUNTIME_IN_PROGRESS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 12 - {"CUDNN_STATUS_RUNTIME_FP_OVERFLOW", {"HIPDNN_STATUS_RUNTIME_FP_OVERFLOW", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 13 - {"CUDNN_STATUS_VERSION_MISMATCH", {"HIPDNN_STATUS_VERSION_MISMATCH", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 14 + {"cudnnStatus_t", {"hipdnnStatus_t", "miopenStatus_t", CONV_TYPE, API_DNN, 1}}, + {"CUDNN_STATUS_SUCCESS", {"HIPDNN_STATUS_SUCCESS", "miopenStatusSuccess", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 0 + {"CUDNN_STATUS_NOT_INITIALIZED", {"HIPDNN_STATUS_NOT_INITIALIZED", "miopenStatusNotInitialized", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 1 + {"CUDNN_STATUS_ALLOC_FAILED", {"HIPDNN_STATUS_ALLOC_FAILED", "miopenStatusAllocFailed", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 2 // 4 + {"CUDNN_STATUS_BAD_PARAM", {"HIPDNN_STATUS_BAD_PARAM", "miopenStatusBadParm", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 3 + {"CUDNN_STATUS_INTERNAL_ERROR", {"HIPDNN_STATUS_INTERNAL_ERROR", "miopenStatusInternalError", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 4 // 5 + {"CUDNN_STATUS_INVALID_VALUE", {"HIPDNN_STATUS_INVALID_VALUE", "miopenStatusInvalidValue", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 5 // 2 + {"CUDNN_STATUS_ARCH_MISMATCH", {"HIPDNN_STATUS_ARCH_MISMATCH", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED}}, // 6 + {"CUDNN_STATUS_MAPPING_ERROR", {"HIPDNN_STATUS_MAPPING_ERROR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED}}, // 7 + {"CUDNN_STATUS_EXECUTION_FAILED", {"HIPDNN_STATUS_EXECUTION_FAILED", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED}}, // 8 + {"CUDNN_STATUS_NOT_SUPPORTED", {"HIPDNN_STATUS_NOT_SUPPORTED", "miopenStatusUnsupportedOp", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 9 // 8 + {"CUDNN_STATUS_LICENSE_ERROR", {"HIPDNN_STATUS_LICENSE_ERROR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED}}, // 10 + {"CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", {"HIPDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED}}, // 11 + {"CUDNN_STATUS_RUNTIME_IN_PROGRESS", {"HIPDNN_STATUS_RUNTIME_IN_PROGRESS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 12 + {"CUDNN_STATUS_RUNTIME_FP_OVERFLOW", {"HIPDNN_STATUS_RUNTIME_FP_OVERFLOW", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 13 + {"CUDNN_STATUS_VERSION_MISMATCH", {"HIPDNN_STATUS_VERSION_MISMATCH", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 14 {"cudnnRuntimeTag_t", {"hipdnnRuntimeTag_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"cudnnConvolutionMode_t", {"hipdnnConvolutionMode_t", "", CONV_TYPE, API_DNN, 1}}, {"CUDNN_CONVOLUTION", {"HIPDNN_CONVOLUTION", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 0 @@ -98,6 +98,9 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_DATA_INT8x32", {"HIPDNN_DATA_INT8x32", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 8 {"CUDNN_DATA_BFLOAT16", {"HIPDNN_DATA_BFLOAT16", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 9 {"CUDNN_DATA_INT64", {"HIPDNN_DATA_INT64", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 10 + {"CUDNN_DATA_BOOLEAN", {"HIPDNN_DATA_BOOLEAN", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 11 + {"CUDNN_DATA_FP8_E4M3", {"HIPDNN_DATA_FP8_E4M3", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 11 + {"CUDNN_DATA_FP8_E5M2", {"HIPDNN_DATA_FP8_E5M2", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 11 {"cudnnErrQueryMode_t", {"hipdnnErrQueryMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_ERRQUERY_RAWCODE", {"HIPDNN_ERRQUERY_RAWCODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 0 {"CUDNN_ERRQUERY_NONBLOCKING", {"HIPDNN_ERRQUERY_NONBLOCKING", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1 @@ -370,6 +373,23 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_POINTWISE_MIN", {"HIPDNN_POINTWISE_MIN", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2 {"CUDNN_POINTWISE_MAX", {"HIPDNN_POINTWISE_MAX", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 3 {"CUDNN_POINTWISE_SQRT", {"HIPDNN_POINTWISE_SQRT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 4 + {"CUDNN_POINTWISE_ADD_SQUARE", {"HIPDNN_POINTWISE_ADD_SQUARE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 5 + {"CUDNN_POINTWISE_DIV", {"HIPDNN_POINTWISE_DIV", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 6 + {"CUDNN_POINTWISE_MOD", {"HIPDNN_POINTWISE_MOD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 7 + {"CUDNN_POINTWISE_POW", {"HIPDNN_POINTWISE_POW", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 8 + {"CUDNN_POINTWISE_SUB", {"HIPDNN_POINTWISE_SUB", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 9 + {"CUDNN_POINTWISE_ABS", {"HIPDNN_POINTWISE_ABS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 10 + {"CUDNN_POINTWISE_CEIL", {"HIPDNN_POINTWISE_CEIL", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 11 + {"CUDNN_POINTWISE_COS", {"HIPDNN_POINTWISE_COS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 12 + {"CUDNN_POINTWISE_EXP", {"HIPDNN_POINTWISE_EXP", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 13 + {"CUDNN_POINTWISE_FLOOR", {"HIPDNN_POINTWISE_FLOOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 14 + {"CUDNN_POINTWISE_LOG", {"HIPDNN_POINTWISE_LOG", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 15 + {"CUDNN_POINTWISE_NEG", {"HIPDNN_POINTWISE_NEG", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 16 + {"CUDNN_POINTWISE_RSQRT", {"HIPDNN_POINTWISE_RSQRT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 17 + {"CUDNN_POINTWISE_SIN", {"HIPDNN_POINTWISE_SIN", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 18 + {"CUDNN_POINTWISE_TAN", {"HIPDNN_POINTWISE_TAN", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 19 + {"CUDNN_POINTWISE_ERF", {"HIPDNN_POINTWISE_ERF", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 20 + {"CUDNN_POINTWISE_IDENTITY", {"HIPDNN_POINTWISE_IDENTITY", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 21 {"CUDNN_POINTWISE_RELU_FWD", {"HIPDNN_POINTWISE_RELU_FWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 100 {"CUDNN_POINTWISE_TANH_FWD", {"HIPDNN_POINTWISE_TANH_FWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 101 {"CUDNN_POINTWISE_SIGMOID_FWD", {"HIPDNN_POINTWISE_SIGMOID_FWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 102 @@ -377,6 +397,7 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_POINTWISE_GELU_FWD", {"HIPDNN_POINTWISE_GELU_FWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 104 {"CUDNN_POINTWISE_SOFTPLUS_FWD", {"HIPDNN_POINTWISE_SOFTPLUS_FWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 105 {"CUDNN_POINTWISE_SWISH_FWD", {"HIPDNN_POINTWISE_SWISH_FWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 106 + {"CUDNN_POINTWISE_GELU_APPROX_TANH_FWD", {"HIPDNN_POINTWISE_GELU_APPROX_TANH_FWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 107 {"CUDNN_POINTWISE_RELU_BWD", {"HIPDNN_POINTWISE_RELU_BWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 200 {"CUDNN_POINTWISE_TANH_BWD", {"HIPDNN_POINTWISE_TANH_BWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 201 {"CUDNN_POINTWISE_SIGMOID_BWD", {"HIPDNN_POINTWISE_SIGMOID_BWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 202 @@ -384,6 +405,18 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_POINTWISE_GELU_BWD", {"HIPDNN_POINTWISE_GELU_BWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 204 {"CUDNN_POINTWISE_SOFTPLUS_BWD", {"HIPDNN_POINTWISE_SOFTPLUS_BWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 205 {"CUDNN_POINTWISE_SWISH_BWD", {"HIPDNN_POINTWISE_SWISH_BWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 206 + {"CUDNN_POINTWISE_GELU_APPROX_TANH_BWD", {"HIPDNN_POINTWISE_GELU_APPROX_TANH_BWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 207 + {"CUDNN_POINTWISE_CMP_EQ", {"HIPDNN_POINTWISE_CMP_EQ", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 300 + {"CUDNN_POINTWISE_CMP_NEQ", {"HIPDNN_POINTWISE_CMP_NEQ", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 301 + {"CUDNN_POINTWISE_CMP_GT", {"HIPDNN_POINTWISE_CMP_GT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 302 + {"CUDNN_POINTWISE_CMP_GE", {"HIPDNN_POINTWISE_CMP_GE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 303 + {"CUDNN_POINTWISE_CMP_LT", {"HIPDNN_POINTWISE_CMP_LT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 304 + {"CUDNN_POINTWISE_CMP_LE", {"HIPDNN_POINTWISE_CMP_LE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 305 + {"CUDNN_POINTWISE_LOGICAL_AND", {"HIPDNN_POINTWISE_LOGICAL_AND", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 400 + {"CUDNN_POINTWISE_LOGICAL_OR", {"HIPDNN_POINTWISE_LOGICAL_OR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 401 + {"CUDNN_POINTWISE_LOGICAL_NOT", {"HIPDNN_POINTWISE_LOGICAL_NOT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 402 + {"CUDNN_POINTWISE_GEN_INDEX", {"HIPDNN_POINTWISE_GEN_INDEX", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 501 + {"CUDNN_POINTWISE_BINARY_SELECT", {"HIPDNN_POINTWISE_BINARY_SELECT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 601 {"cudnnGenStatsMode_t", {"hipdnnGenStatsMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_GENSTATS_SUM_SQSUM", {"HIPDNN_GENSTATS_SUM_SQSUM", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 0 {"cudnnBackendAttributeName_t", {"hipdnnBackendAttributeName_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, @@ -396,6 +429,7 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_ATTR_POINTWISE_ELU_ALPHA", {"HIPDNN_ATTR_POINTWISE_ELU_ALPHA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 6 {"CUDNN_ATTR_POINTWISE_SOFTPLUS_BETA", {"HIPDNN_ATTR_POINTWISE_SOFTPLUS_BETA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 7 {"CUDNN_ATTR_POINTWISE_SWISH_BETA", {"HIPDNN_ATTR_POINTWISE_SWISH_BETA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 8 + {"CUDNN_ATTR_POINTWISE_AXIS", {"HIPDNN_ATTR_POINTWISE_AXIS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 9 {"CUDNN_ATTR_CONVOLUTION_COMP_TYPE", {"HIPDNN_ATTR_CONVOLUTION_COMP_TYPE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 100 {"CUDNN_ATTR_CONVOLUTION_CONV_MODE", {"HIPDNN_ATTR_CONVOLUTION_CONV_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 101 {"CUDNN_ATTR_CONVOLUTION_DILATIONS", {"HIPDNN_ATTR_CONVOLUTION_DILATIONS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 102 @@ -414,6 +448,7 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE", {"HIPDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 402 {"CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS", {"HIPDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 403 {"CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS", {"HIPDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 404 + {"CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION", {"HIPDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 405 {"CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID", {"HIPDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 500 {"CUDNN_ATTR_INTERMEDIATE_INFO_SIZE", {"HIPDNN_ATTR_INTERMEDIATE_INFO_SIZE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 501 {"CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_DATA_UIDS", {"HIPDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_DATA_UIDS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 502 @@ -446,6 +481,7 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2", {"HIPDNN_ATTR_OPERATION_POINTWISE_ALPHA2", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 755 {"CUDNN_ATTR_OPERATION_POINTWISE_DXDESC", {"HIPDNN_ATTR_OPERATION_POINTWISE_DXDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 756 {"CUDNN_ATTR_OPERATION_POINTWISE_DYDESC", {"HIPDNN_ATTR_OPERATION_POINTWISE_DYDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 757 + {"CUDNN_ATTR_OPERATION_POINTWISE_TDESC", {"HIPDNN_ATTR_OPERATION_POINTWISE_TDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 758 {"CUDNN_ATTR_OPERATION_GENSTATS_MODE", {"HIPDNN_ATTR_OPERATION_GENSTATS_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 770 {"CUDNN_ATTR_OPERATION_GENSTATS_MATH_PREC", {"HIPDNN_ATTR_OPERATION_GENSTATS_MATH_PREC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 771 {"CUDNN_ATTR_OPERATION_GENSTATS_XDESC", {"HIPDNN_ATTR_OPERATION_GENSTATS_XDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 772 @@ -480,6 +516,7 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_ATTR_TENSOR_UNIQUE_ID", {"HIPDNN_ATTR_TENSOR_UNIQUE_ID", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 906 {"CUDNN_ATTR_TENSOR_IS_VIRTUAL", {"HIPDNN_ATTR_TENSOR_IS_VIRTUAL", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 907 {"CUDNN_ATTR_TENSOR_IS_BY_VALUE", {"HIPDNN_ATTR_TENSOR_IS_BY_VALUE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 908 + {"CUDNN_ATTR_TENSOR_REORDERING_MODE", {"HIPDNN_ATTR_TENSOR_REORDERING_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 909 {"CUDNN_ATTR_VARIANT_PACK_UNIQUE_IDS", {"HIPDNN_ATTR_VARIANT_PACK_UNIQUE_IDS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1000 {"CUDNN_ATTR_VARIANT_PACK_DATA_POINTERS", {"HIPDNN_ATTR_VARIANT_PACK_DATA_POINTERS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1001 {"CUDNN_ATTR_VARIANT_PACK_INTERMEDIATES", {"HIPDNN_ATTR_VARIANT_PACK_INTERMEDIATES", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1002 @@ -518,6 +555,62 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_DY_SCALE_DESC", {"HIPDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_DY_SCALE_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1628 {"CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_X_SCALE_DESC", {"HIPDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_X_SCALE_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1629 {"CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_BIAS", {"HIPDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_BIAS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1630 + {"CUDNN_ATTR_RESAMPLE_MODE", {"HIPDNN_ATTR_RESAMPLE_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1700 + {"CUDNN_ATTR_RESAMPLE_COMP_TYPE", {"HIPDNN_ATTR_RESAMPLE_COMP_TYPE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1701 + {"CUDNN_ATTR_RESAMPLE_SPATIAL_DIMS", {"HIPDNN_ATTR_RESAMPLE_SPATIAL_DIMS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1702 + {"CUDNN_ATTR_RESAMPLE_POST_PADDINGS", {"HIPDNN_ATTR_RESAMPLE_POST_PADDINGS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1703 + {"CUDNN_ATTR_RESAMPLE_PRE_PADDINGS", {"HIPDNN_ATTR_RESAMPLE_PRE_PADDINGS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1704 + {"CUDNN_ATTR_RESAMPLE_STRIDES", {"HIPDNN_ATTR_RESAMPLE_STRIDES", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1705 + {"CUDNN_ATTR_RESAMPLE_WINDOW_DIMS", {"HIPDNN_ATTR_RESAMPLE_WINDOW_DIMS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1706 + {"CUDNN_ATTR_RESAMPLE_NAN_PROPAGATION", {"HIPDNN_ATTR_RESAMPLE_NAN_PROPAGATION", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1707 + {"CUDNN_ATTR_RESAMPLE_PADDING_MODE", {"HIPDNN_ATTR_RESAMPLE_PADDING_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1708 + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1710 + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1711 + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1712 + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA", {"HIPDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1713 + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA", {"HIPDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1714 + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1716 + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1720 + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1721 + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1722 + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA", {"HIPDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1723 + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA", {"HIPDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1724 + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1725 + {"CUDNN_ATTR_OPERATION_CONCAT_AXIS", {"HIPDNN_ATTR_OPERATION_CONCAT_AXIS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1800 + {"CUDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS", {"HIPDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1801 + {"CUDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX", {"HIPDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1802 + {"CUDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC", {"HIPDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1803 + {"CUDNN_ATTR_OPERATION_SIGNAL_MODE", {"HIPDNN_ATTR_OPERATION_SIGNAL_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1900 + {"CUDNN_ATTR_OPERATION_SIGNAL_FLAGDESC", {"HIPDNN_ATTR_OPERATION_SIGNAL_FLAGDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1901 + {"CUDNN_ATTR_OPERATION_SIGNAL_VALUE", {"HIPDNN_ATTR_OPERATION_SIGNAL_VALUE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1902 + {"CUDNN_ATTR_OPERATION_SIGNAL_XDESC", {"HIPDNN_ATTR_OPERATION_SIGNAL_XDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1903 + {"CUDNN_ATTR_OPERATION_SIGNAL_YDESC", {"HIPDNN_ATTR_OPERATION_SIGNAL_YDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1904 + {"CUDNN_ATTR_OPERATION_NORM_FWD_MODE", {"HIPDNN_ATTR_OPERATION_NORM_FWD_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2000 + {"CUDNN_ATTR_OPERATION_NORM_FWD_PHASE", {"HIPDNN_ATTR_OPERATION_NORM_FWD_PHASE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2001 + {"CUDNN_ATTR_OPERATION_NORM_FWD_XDESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_XDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2002 + {"CUDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2003 + {"CUDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2004 + {"CUDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2005 + {"CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2006 + {"CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2007 + {"CUDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2008 + {"CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2009 + {"CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2010 + {"CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2011 + {"CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2012 + {"CUDNN_ATTR_OPERATION_NORM_FWD_YDESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_YDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2013 + {"CUDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS", {"HIPDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2014 + {"CUDNN_ATTR_OPERATION_NORM_BWD_MODE", {"HIPDNN_ATTR_OPERATION_NORM_BWD_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2100 + {"CUDNN_ATTR_OPERATION_NORM_BWD_XDESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_XDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2101 + {"CUDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2102 + {"CUDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2103 + {"CUDNN_ATTR_OPERATION_NORM_BWD_DYDESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_DYDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2104 + {"CUDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2105 + {"CUDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2106 + {"CUDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2107 + {"CUDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2108 + {"CUDNN_ATTR_OPERATION_NORM_BWD_DXDESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_DXDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2109 + {"CUDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS", {"HIPDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2110 {"cudnnBackendAttributeType_t", {"hipdnnBackendAttributeType_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_TYPE_HANDLE", {"HIPDNN_TYPE_HANDLE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_TYPE_DATA_TYPE", {"HIPDNN_TYPE_DATA_TYPE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, @@ -539,6 +632,15 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_TYPE_BN_FINALIZE_STATS_MODE", {"HIPDNN_TYPE_BN_FINALIZE_STATS_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_TYPE_REDUCTION_OPERATOR_TYPE", {"HIPDNN_TYPE_REDUCTION_OPERATOR_TYPE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_TYPE_BEHAVIOR_NOTE", {"HIPDNN_TYPE_BEHAVIOR_NOTE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_TYPE_TENSOR_REORDERING_MODE", {"HIPDNN_TYPE_TENSOR_REORDERING_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_TYPE_RESAMPLE_MODE", {"HIPDNN_TYPE_RESAMPLE_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_TYPE_PADDING_MODE", {"HIPDNN_TYPE_PADDING_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_TYPE_INT32", {"HIPDNN_TYPE_INT32", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_TYPE_CHAR", {"HIPDNN_TYPE_CHAR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_TYPE_SIGNAL_MODE", {"HIPDNN_TYPE_SIGNAL_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_TYPE_FRACTION", {"HIPDNN_TYPE_FRACTION", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_TYPE_NORM_MODE", {"HIPDNN_TYPE_NORM_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_TYPE_NORM_FWD_PHASE", {"HIPDNN_TYPE_NORM_FWD_PHASE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"cudnnBackendDescriptorType_t", {"hipdnnBackendDescriptorType_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_BACKEND_POINTWISE_DESCRIPTOR", {"HIPDNN_BACKEND_POINTWISE_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_BACKEND_CONVOLUTION_DESCRIPTOR", {"HIPDNN_BACKEND_CONVOLUTION_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, @@ -564,6 +666,13 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_BACKEND_REDUCTION_DESCRIPTOR", {"HIPDNN_BACKEND_REDUCTION_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_BACKEND_RESAMPLE_DESCRIPTOR", {"HIPDNN_BACKEND_RESAMPLE_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"cudnnBackendNumericalNote_t", {"hipdnnBackendNumericalNote_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_NUMERICAL_NOTE_TENSOR_CORE", {"HIPDNN_NUMERICAL_NOTE_TENSOR_CORE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS", {"HIPDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, @@ -571,6 +680,9 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_NUMERICAL_NOTE_FFT", {"HIPDNN_NUMERICAL_NOTE_FFT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC", {"HIPDNN_NUMERICAL_NOTE_NONDETERMINISTIC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_NUMERICAL_NOTE_WINOGRAD", {"HIPDNN_NUMERICAL_NOTE_WINOGRAD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4", {"HIPDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6", {"HIPDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13", {"HIPDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_NUMERICAL_NOTE_TYPE_COUNT", {"HIPDNN_NUMERICAL_NOTE_TYPE_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"cudnnBackendLayoutType_t", {"hipdnnBackendLayoutType_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_LAYOUT_TYPE_PREFERRED_NCHW", {"HIPDNN_LAYOUT_TYPE_PREFERRED_NCHW", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, @@ -604,10 +716,16 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_KNOB_TYPE_LDGC", {"HIPDNN_KNOB_TYPE_LDGC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_KNOB_TYPE_SPECFILT", {"HIPDNN_KNOB_TYPE_SPECFILT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_KNOB_TYPE_KERNEL_CFG", {"HIPDNN_KNOB_TYPE_KERNEL_CFG", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_KNOB_TYPE_WORKSPACE", {"HIPDNN_KNOB_TYPE_WORKSPACE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_KNOB_TYPE_TILE_CGA", {"HIPDNN_KNOB_TYPE_TILE_CGA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_KNOB_TYPE_TILE_CGA_M", {"HIPDNN_KNOB_TYPE_TILE_CGA_M", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_KNOB_TYPE_TILE_CGA_N", {"HIPDNN_KNOB_TYPE_TILE_CGA_N", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_KNOB_TYPE_COUNTS", {"HIPDNN_KNOB_TYPE_COUNTS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"cudnnBackendHeurMode_t", {"hipdnnBackendHeurMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_HEUR_MODE_INSTANT", {"HIPDNN_HEUR_MODE_INSTANT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_HEUR_MODE_B", {"HIPDNN_HEUR_MODE_B", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_HEUR_MODE_FALLBACK", {"HIPDNN_HEUR_MODE_FALLBACK", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_HEUR_MODE_A", {"HIPDNN_HEUR_MODE_A", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_HEUR_MODES_COUNT", {"HIPDNN_HEUR_MODES_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"cudnnNormMode_t", {"hipdnnNormMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_NORM_PER_ACTIVATION", {"HIPDNN_NORM_PER_ACTIVATION", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, @@ -624,7 +742,34 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_BN_FINALIZE_STATISTICS_INFERENCE", {"HIPDNN_BN_FINALIZE_STATISTICS_INFERENCE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"cudnnBackendBehaviorNote_t", {"hipdnnBackendBehaviorNote_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", {"HIPDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 0 + {"CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER", {"HIPDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1 + {"CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER", {"HIPDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2 {"CUDNN_BEHAVIOR_NOTE_TYPE_COUNT", {"HIPDNN_BEHAVIOR_NOTE_TYPE_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"cudnnResampleMode_t", {"hipdnnResampleMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_RESAMPLE_NEAREST", {"HIPDNN_RESAMPLE_NEAREST", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_RESAMPLE_BILINEAR", {"HIPDNN_RESAMPLE_BILINEAR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_RESAMPLE_AVGPOOL", {"HIPDNN_RESAMPLE_AVGPOOL", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING", {"HIPDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING", {"HIPDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_RESAMPLE_MAXPOOL", {"HIPDNN_RESAMPLE_MAXPOOL", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"cudnnSignalMode_t", {"hipdnnSignalMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_SIGNAL_SET", {"HIPDNN_SIGNAL_SET", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_SIGNAL_WAIT", {"HIPDNN_SIGNAL_WAIT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"cudnnBackendTensorReordering_t", {"hipdnnBackendTensorReordering_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_TENSOR_REORDERING_NONE", {"HIPDNN_TENSOR_REORDERING_NONE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_TENSOR_REORDERING_INT8x32", {"HIPDNN_TENSOR_REORDERING_INT8x32", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"cudnnPaddingMode_t", {"hipdnnPaddingMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_ZERO_PAD", {"HIPDNN_ZERO_PAD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_NEG_INF_PAD", {"HIPDNN_NEG_INF_PAD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_EDGE_VAL_PAD", {"HIPDNN_EDGE_VAL_PAD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"cudnnBackendNormMode_t", {"hipdnnBackendNormMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_LAYER_NORM", {"HIPDNN_LAYER_NORM", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_INSTANCE_NORM", {"HIPDNN_INSTANCE_NORM", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_BATCH_NORM", {"HIPDNN_BATCH_NORM", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_GROUP_NORM", {"HIPDNN_GROUP_NORM", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"cudnnBackendNormFwdPhase_t", {"hipdnnBackendNormFwdPhase_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_NORM_FWD_INFERENCE", {"HIPDNN_NORM_FWD_INFERENCE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_NORM_FWD_TRAINING", {"HIPDNN_NORM_FWD_TRAINING", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // cuDNN types {"cudnnContext", {"hipdnnContext", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, @@ -687,6 +832,8 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"cudnnBackendDescriptor_t", {"hipdnnBackendDescriptor_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"libraryPropertyType", {"hipdnnLibraryPropertyType", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"libraryPropertyType_t", {"hipdnnLibraryPropertyType_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, + {"cudnnFractionStruct", {"hipdnnFractionStruct", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, + {"cudnnFraction_t", {"hipdnnFraction_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, }; const std::map CUDA_DNN_TYPE_NAME_VER_MAP { @@ -1350,6 +1497,153 @@ const std::map CUDA_DNN_TYPE_NAME_VER_MAP { {"cudnnBackendBehaviorNote_t", {CUDNN_820, CUDA_0, CUDA_0 }}, {"CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", {CUDNN_820, CUDA_0, CUDA_0 }}, {"CUDNN_BEHAVIOR_NOTE_TYPE_COUNT", {CUDNN_820, CUDA_0, CUDA_0 }}, + {"CUDNN_DATA_BOOLEAN", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_ADD_SQUARE", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_DIV", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_MOD", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_POW", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_SUB", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_ABS", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_CEIL", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_COS", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_EXP", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_FLOOR", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_LOG", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_NEG", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_RSQRT", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_SIN", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_TAN", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_ERF", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_IDENTITY", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_GELU_APPROX_TANH_FWD", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_GELU_APPROX_TANH_BWD", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_CMP_EQ", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_CMP_NEQ", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_CMP_GT", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_CMP_GE", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_CMP_LT", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_CMP_LE", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_LOGICAL_AND", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_LOGICAL_OR", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_LOGICAL_NOT", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_GEN_INDEX", {CUDNN_840, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_BINARY_SELECT", {CUDNN_840, CUDA_0, CUDA_0 }}, + {"cudnnFractionStruct", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"cudnnFraction_t", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"cudnnResampleMode_t", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_RESAMPLE_NEAREST", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_RESAMPLE_BILINEAR", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_RESAMPLE_AVGPOOL", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_RESAMPLE_MAXPOOL", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"cudnnSignalMode_t", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_SIGNAL_SET", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_SIGNAL_WAIT", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_POINTWISE_AXIS", {CUDNN_840, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION", {CUDNN_840, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_POINTWISE_TDESC", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_TENSOR_REORDERING_MODE", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_RESAMPLE_MODE", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_RESAMPLE_COMP_TYPE", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_RESAMPLE_SPATIAL_DIMS", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_RESAMPLE_POST_PADDINGS", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_RESAMPLE_PRE_PADDINGS", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_RESAMPLE_STRIDES", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_RESAMPLE_WINDOW_DIMS", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_RESAMPLE_NAN_PROPAGATION", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_RESAMPLE_PADDING_MODE", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_CONCAT_AXIS", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_SIGNAL_MODE", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_SIGNAL_FLAGDESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_SIGNAL_VALUE", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_SIGNAL_XDESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_SIGNAL_YDESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_MODE", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_PHASE", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_XDESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_YDESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_BWD_MODE", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_BWD_XDESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_BWD_DYDESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_BWD_DXDESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_TYPE_TENSOR_REORDERING_MODE", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_TYPE_RESAMPLE_MODE", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_TYPE_PADDING_MODE", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_TYPE_INT32", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_TYPE_CHAR", {CUDNN_840, CUDA_0, CUDA_0 }}, + {"CUDNN_TYPE_SIGNAL_MODE", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_TYPE_FRACTION", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_TYPE_NORM_MODE", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_TYPE_NORM_FWD_PHASE", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_BACKEND_RESAMPLE_DESCRIPTOR", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_KNOB_TYPE_WORKSPACE", {CUDNN_840, CUDA_0, CUDA_0 }}, + {"CUDNN_HEUR_MODE_FALLBACK", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_HEUR_MODE_A", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"cudnnBackendTensorReordering_t", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_TENSOR_REORDERING_NONE", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_TENSOR_REORDERING_INT8x32", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"cudnnPaddingMode_t", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ZERO_PAD", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_NEG_INF_PAD", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_EDGE_VAL_PAD", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"cudnnBackendNormMode_t", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_LAYER_NORM", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_INSTANCE_NORM", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_BATCH_NORM", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_GROUP_NORM", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"cudnnBackendNormFwdPhase_t", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_NORM_FWD_INFERENCE", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_NORM_FWD_TRAINING", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING", {CUDNN_860, CUDA_0, CUDA_0 }}, + {"CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING", {CUDNN_860, CUDA_0, CUDA_0 }}, + {"CUDNN_KNOB_TYPE_TILE_CGA", {CUDNN_860, CUDA_0, CUDA_0 }}, + {"CUDNN_KNOB_TYPE_TILE_CGA_M", {CUDNN_860, CUDA_0, CUDA_0 }}, + {"CUDNN_KNOB_TYPE_TILE_CGA_N", {CUDNN_860, CUDA_0, CUDA_0 }}, + {"CUDNN_DATA_FP8_E4M3", {CUDNN_860, CUDA_0, CUDA_0 }}, + {"CUDNN_DATA_FP8_E5M2", {CUDNN_860, CUDA_0, CUDA_0 }}, }; const std::map HIP_DNN_TYPE_NAME_VER_MAP { diff --git a/src/CUDA2HIP_Device_functions.cpp b/src/CUDA2HIP_Device_functions.cpp index b54a8780..07c2f797 100644 --- a/src/CUDA2HIP_Device_functions.cpp +++ b/src/CUDA2HIP_Device_functions.cpp @@ -25,1210 +25,1412 @@ THE SOFTWARE. // Maps CUDA header names to HIP header names const std::map CUDA_DEVICE_FUNCTION_MAP { // math functions - {"abs", {"abs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"labs", {"labs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"llabs", {"llabs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fabs", {"fabs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fabsf", {"fabsf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"min", {"min", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fminf", {"fminf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fmin", {"fmin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"max", {"max", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fmaxf", {"fmaxf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fmax", {"fmax", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sin", {"sin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"cos", {"cos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sincos", {"sincos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sincosf", {"sincosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"tan", {"tan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sqrt", {"sqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rsqrt", {"rsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rsqrtf", {"rsqrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"log2", {"log2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"exp2", {"exp2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"exp2f", {"exp2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"exp10", {"exp10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"exp10f", {"exp10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"expm1", {"expm1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"expm1f", {"expm1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"log2f", {"log2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"log10", {"log10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"log", {"log", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"log1p", {"log1p", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"log1pf", {"log1pf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"floor", {"floor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"exp", {"exp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"cosh", {"cosh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sinh", {"sinh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"tanh", {"tanh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"acosh", {"acosh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"acoshf", {"acoshf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"asinh", {"asinh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"asinhf", {"asinhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atanh", {"atanh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atanhf", {"atanhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"ldexp", {"ldexp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"ldexpf", {"ldexpf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"logb", {"logb", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"logbf", {"logbf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"ilogb", {"ilogb", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"ilogbf", {"ilogbf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"scalbn", {"scalbn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"scalbnf", {"scalbnf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"scalbln", {"scalbln", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"scalblnf", {"scalblnf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"frexp", {"frexp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"frexpf", {"frexpf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"round", {"round", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"roundf", {"roundf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"lround", {"lround", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"lroundf", {"lroundf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"llround", {"llround", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"llroundf", {"llroundf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rint", {"rint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rintf", {"rintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"lrint", {"lrint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"lrintf", {"lrintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"llrint", {"llrint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"llrintf", {"llrintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"nearbyint", {"nearbyint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"nearbyintf", {"nearbyintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"ceil", {"ceil", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"trunc", {"trunc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"truncf", {"truncf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fdim", {"fdim", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fdimf", {"fdimf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atan2", {"atan2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atan", {"atan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"acos", {"acos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"asin", {"asin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hypot", {"hypot", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rhypot", {"rhypot", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hypotf", {"hypotf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rhypotf", {"rhypotf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"norm3d", {"norm3d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rnorm3d", {"rnorm3d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"norm4d", {"norm4d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rnorm4d", {"rnorm4d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"norm", {"norm", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rnorm", {"rnorm", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rnormf", {"rnormf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"normf", {"normf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"norm3df", {"norm3df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rnorm3df", {"rnorm3df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"norm4df", {"norm4df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rnorm4df", {"rnorm4df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"cbrt", {"cbrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"cbrtf", {"cbrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rcbrt", {"rcbrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rcbrtf", {"rcbrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sinpi", {"sinpi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sinpif", {"sinpif", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"cospi", {"cospi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"cospif", {"cospif", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sincospi", {"sincospi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sincospif", {"sincospif", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"pow", {"pow", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"modf", {"modf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fmod", {"fmod", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"remainder", {"remainder", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"remainderf", {"remainderf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"remquo", {"remquo", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"remquof", {"remquof", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"j0", {"j0", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"j0f", {"j0f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"j1", {"j1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"j1f", {"j1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"jn", {"jn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"jnf", {"jnf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"y0", {"y0", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"y0f", {"y0f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"y1", {"y1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"y1f", {"y1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"yn", {"yn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"ynf", {"ynf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"cyl_bessel_i0", {"cyl_bessel_i0", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"cyl_bessel_i0f", {"cyl_bessel_i0f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"cyl_bessel_i1", {"cyl_bessel_i1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"cyl_bessel_i1f", {"cyl_bessel_i1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"erf", {"erf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"erff", {"erff", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"erfinv", {"erfinv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"erfinvf", {"erfinvf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"erfc", {"erfc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"erfcf", {"erfcf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"lgamma", {"lgamma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"erfcinv", {"erfcinv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"erfcinvf", {"erfcinvf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"normcdfinv", {"normcdfinv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"normcdfinvf", {"normcdfinvf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"normcdf", {"normcdf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"normcdff", {"normcdff", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"erfcx", {"erfcx", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"erfcxf", {"erfcxf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"lgammaf", {"lgammaf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"tgamma", {"tgamma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"tgammaf", {"tgammaf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"copysign", {"copysign", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"copysignf", {"copysignf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"nextafter", {"nextafter", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"nextafterf", {"nextafterf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"nan", {"nan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"nanf", {"nanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fma", {"fma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fmaf", {"fmaf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"acosf", {"acosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"asinf", {"asinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atanf", {"atanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atan2f", {"atan2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"cosf", {"cosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sinf", {"sinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"tanf", {"tanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"coshf", {"coshf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sinhf", {"sinhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"tanhf", {"tanhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"expf", {"expf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"logf", {"logf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"log10f", {"log10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"modff", {"modff", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"powf", {"powf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sqrtf", {"sqrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"ceilf", {"ceilf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"floorf", {"floorf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fmodf", {"fmodf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"signbit", {"signbit", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"isfinite", {"isfinite", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"isnan", {"isnan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"isinf", {"isinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"umin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"llmin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"ullmin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"umax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"llmax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"ullmax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__isinff", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__isnanf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__finite", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__finitef", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__signbit", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__isnan", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__isinf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__signbitf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__signbitl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__finitel", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__isinfl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__isnanl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"_ldsign", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"_fdsign", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"_Pow_int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"abs", {"abs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"labs", {"labs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"llabs", {"llabs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fabs", {"fabs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fabsf", {"fabsf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"min", {"min", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fminf", {"fminf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fmin", {"fmin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"max", {"max", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fmaxf", {"fmaxf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fmax", {"fmax", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sin", {"sin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"cos", {"cos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sincos", {"sincos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sincosf", {"sincosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"tan", {"tan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sqrt", {"sqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rsqrt", {"rsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rsqrtf", {"rsqrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"log2", {"log2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"exp2", {"exp2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"exp2f", {"exp2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"exp10", {"exp10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"exp10f", {"exp10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"expm1", {"expm1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"expm1f", {"expm1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"log2f", {"log2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"log10", {"log10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"log", {"log", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"log1p", {"log1p", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"log1pf", {"log1pf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"floor", {"floor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"exp", {"exp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"cosh", {"cosh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sinh", {"sinh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"tanh", {"tanh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"acosh", {"acosh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"acoshf", {"acoshf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"asinh", {"asinh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"asinhf", {"asinhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atanh", {"atanh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atanhf", {"atanhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"ldexp", {"ldexp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"ldexpf", {"ldexpf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"logb", {"logb", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"logbf", {"logbf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"ilogb", {"ilogb", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"ilogbf", {"ilogbf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"scalbn", {"scalbn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"scalbnf", {"scalbnf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"scalbln", {"scalbln", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"scalblnf", {"scalblnf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"frexp", {"frexp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"frexpf", {"frexpf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"round", {"round", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"roundf", {"roundf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"lround", {"lround", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"lroundf", {"lroundf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"llround", {"llround", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"llroundf", {"llroundf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rint", {"rint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rintf", {"rintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"lrint", {"lrint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"lrintf", {"lrintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"llrint", {"llrint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"llrintf", {"llrintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"nearbyint", {"nearbyint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"nearbyintf", {"nearbyintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"ceil", {"ceil", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"trunc", {"trunc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"truncf", {"truncf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fdim", {"fdim", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fdimf", {"fdimf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atan2", {"atan2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atan", {"atan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"acos", {"acos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"asin", {"asin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hypot", {"hypot", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rhypot", {"rhypot", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hypotf", {"hypotf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rhypotf", {"rhypotf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"norm3d", {"norm3d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rnorm3d", {"rnorm3d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"norm4d", {"norm4d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rnorm4d", {"rnorm4d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"norm", {"norm", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rnorm", {"rnorm", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rnormf", {"rnormf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"normf", {"normf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"norm3df", {"norm3df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rnorm3df", {"rnorm3df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"norm4df", {"norm4df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rnorm4df", {"rnorm4df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"cbrt", {"cbrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"cbrtf", {"cbrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rcbrt", {"rcbrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rcbrtf", {"rcbrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sinpi", {"sinpi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sinpif", {"sinpif", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"cospi", {"cospi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"cospif", {"cospif", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sincospi", {"sincospi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sincospif", {"sincospif", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"pow", {"pow", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"modf", {"modf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fmod", {"fmod", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"remainder", {"remainder", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"remainderf", {"remainderf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"remquo", {"remquo", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"remquof", {"remquof", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"j0", {"j0", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"j0f", {"j0f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"j1", {"j1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"j1f", {"j1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"jn", {"jn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"jnf", {"jnf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"y0", {"y0", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"y0f", {"y0f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"y1", {"y1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"y1f", {"y1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"yn", {"yn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"ynf", {"ynf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"cyl_bessel_i0", {"cyl_bessel_i0", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"cyl_bessel_i0f", {"cyl_bessel_i0f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"cyl_bessel_i1", {"cyl_bessel_i1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"cyl_bessel_i1f", {"cyl_bessel_i1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"erf", {"erf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"erff", {"erff", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"erfinv", {"erfinv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"erfinvf", {"erfinvf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"erfc", {"erfc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"erfcf", {"erfcf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"lgamma", {"lgamma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"erfcinv", {"erfcinv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"erfcinvf", {"erfcinvf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"normcdfinv", {"normcdfinv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"normcdfinvf", {"normcdfinvf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"normcdf", {"normcdf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"normcdff", {"normcdff", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"erfcx", {"erfcx", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"erfcxf", {"erfcxf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"lgammaf", {"lgammaf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"tgamma", {"tgamma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"tgammaf", {"tgammaf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"copysign", {"copysign", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"copysignf", {"copysignf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"nextafter", {"nextafter", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"nextafterf", {"nextafterf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"nan", {"nan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"nanf", {"nanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fma", {"fma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fmaf", {"fmaf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"acosf", {"acosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"asinf", {"asinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atanf", {"atanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atan2f", {"atan2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"cosf", {"cosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sinf", {"sinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"tanf", {"tanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"coshf", {"coshf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sinhf", {"sinhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"tanhf", {"tanhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"expf", {"expf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"logf", {"logf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"log10f", {"log10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"modff", {"modff", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"powf", {"powf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sqrtf", {"sqrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"ceilf", {"ceilf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"floorf", {"floorf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fmodf", {"fmodf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"signbit", {"signbit", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"isfinite", {"isfinite", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"isnan", {"isnan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"isinf", {"isinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"umin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"llmin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"ullmin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"umax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"llmax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"ullmax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__isinff", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__isnanf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__finite", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__finitef", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__signbit", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__isnan", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__isinf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__signbitf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__signbitl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__finitel", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__isinfl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__isnanl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"_ldsign", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"_fdsign", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"_Pow_int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, // static math functions declared in device-functions.h - {"mulhi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"mul64hi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"float_as_int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"int_as_float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"float_as_uint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"uint_as_float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"saturate", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"mul24", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"umul24", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"float2int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"int2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"uint2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"mulhi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"mul64hi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"float_as_int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"int_as_float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"float_as_uint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"uint_as_float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"saturate", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"mul24", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"umul24", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"float2int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"int2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"uint2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, // device functions - {"__mulhi", {"__mulhi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__umulhi", {"__umulhi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__mul64hi", {"__mul64hi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__umul64hi", {"__umul64hi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__int_as_float", {"__int_as_float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float_as_int", {"__float_as_int", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__uint_as_float", {"__uint_as_float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float_as_uint", {"__float_as_uint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__syncthreads", {"__syncthreads", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__syncthreads_count", {"__syncthreads_count", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__syncthreads_and", {"__syncthreads_and", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__syncthreads_or", {"__syncthreads_or", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__threadfence", {"__threadfence", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__threadfence_block", {"__threadfence_block", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__threadfence_system",{"__threadfence_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__saturatef", {"__saturatef", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__sad", {"__sad", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__usad", {"__usad", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__mul24", {"__mul24", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__umul24", {"__umul24", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fdividef", {"fdividef", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__fdividef", {"__fdividef", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fdivide", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__sinf", {"__sinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__cosf", {"__cosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__tanf", {"__tanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__sincosf", {"__sincosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__expf", {"__expf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__exp10f", {"__exp10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__log2f", {"__log2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__log10f", {"__log10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__logf", {"__logf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__powf", {"__powf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2int_rn", {"__float2int_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2int_rz", {"__float2int_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2int_ru", {"__float2int_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2int_rd", {"__float2int_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2uint_rn", {"__float2uint_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2uint_rz", {"__float2uint_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2uint_ru", {"__float2uint_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2uint_rd", {"__float2uint_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__int2float_rn", {"__int2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__int2float_rz", {"__int2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__int2float_ru", {"__int2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__int2float_rd", {"__int2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__uint2float_rn", {"__uint2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__uint2float_rz", {"__uint2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__uint2float_ru", {"__uint2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__uint2float_rd", {"__uint2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2ll_rn", {"__float2ll_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2ll_rz", {"__float2ll_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2ll_ru", {"__float2ll_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2ll_rd", {"__float2ll_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2ull_rn", {"__float2ull_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2ull_rz", {"__float2ull_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2ull_ru", {"__float2ull_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2ull_rd", {"__float2ull_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2float_rn", {"__ll2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2float_rz", {"__ll2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2float_ru", {"__ll2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2float_rd", {"__ll2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2float_rn", {"__ull2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2float_rz", {"__ull2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2float_ru", {"__ull2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2float_rd", {"__ull2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__fadd_rn", {"__fadd_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__fadd_rz", {"__fadd_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fadd_ru", {"__fadd_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fadd_rd", {"__fadd_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fsub_rn", {"__fsub_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__fsub_rz", {"__fsub_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fsub_ru", {"__fsub_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fsub_rd", {"__fsub_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fmul_rn", {"__fmul_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__fmul_rz", {"__fmul_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fmul_ru", {"__fmul_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fmul_rd", {"__fmul_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fmaf_rn", {"__fmaf_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__fmaf_rz", {"__fmaf_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fmaf_ru", {"__fmaf_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fmaf_rd", {"__fmaf_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__frcp_rn", {"__frcp_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__frcp_rz", {"__frcp_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__frcp_ru", {"__frcp_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__frcp_rd", {"__frcp_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fsqrt_rn", {"__fsqrt_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__fsqrt_rz", {"__fsqrt_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fsqrt_ru", {"__fsqrt_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fsqrt_rd", {"__fsqrt_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__frsqrt_rn", {"__frsqrt_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__fdiv_rn", {"__fdiv_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__fdiv_rz", {"__fdiv_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fdiv_ru", {"__fdiv_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fdiv_rd", {"__fdiv_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__clz", {"__clz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ffs", {"__ffs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__popc", {"__popc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__brev", {"__brev", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__clzll", {"__clzll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ffsll", {"__ffsll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__popcll", {"__popcll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__brevll", {"__brevll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__byte_perm", {"__byte_perm", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hadd", {"__hadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__rhadd", {"__rhadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__uhadd", {"__uhadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__urhadd", {"__urhadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__prof_trigger", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__trap", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__brkpt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__pm0", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__pm1", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__pm2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__pm3", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__mulhi", {"__mulhi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__umulhi", {"__umulhi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__mul64hi", {"__mul64hi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__umul64hi", {"__umul64hi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__int_as_float", {"__int_as_float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float_as_int", {"__float_as_int", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__uint_as_float", {"__uint_as_float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float_as_uint", {"__float_as_uint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__syncthreads", {"__syncthreads", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__syncthreads_count", {"__syncthreads_count", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__syncthreads_and", {"__syncthreads_and", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__syncthreads_or", {"__syncthreads_or", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__threadfence", {"__threadfence", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__threadfence_block", {"__threadfence_block", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__threadfence_system", {"__threadfence_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__saturatef", {"__saturatef", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__sad", {"__sad", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__usad", {"__usad", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__mul24", {"__mul24", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__umul24", {"__umul24", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fdividef", {"fdividef", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__fdividef", {"__fdividef", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fdivide", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__sinf", {"__sinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__cosf", {"__cosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__tanf", {"__tanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__sincosf", {"__sincosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__expf", {"__expf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__exp10f", {"__exp10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__log2f", {"__log2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__log10f", {"__log10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__logf", {"__logf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__powf", {"__powf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2int_rn", {"__float2int_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2int_rz", {"__float2int_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2int_ru", {"__float2int_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2int_rd", {"__float2int_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2uint_rn", {"__float2uint_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2uint_rz", {"__float2uint_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2uint_ru", {"__float2uint_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2uint_rd", {"__float2uint_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__int2float_rn", {"__int2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__int2float_rz", {"__int2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__int2float_ru", {"__int2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__int2float_rd", {"__int2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__uint2float_rn", {"__uint2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__uint2float_rz", {"__uint2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__uint2float_ru", {"__uint2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__uint2float_rd", {"__uint2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2ll_rn", {"__float2ll_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2ll_rz", {"__float2ll_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2ll_ru", {"__float2ll_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2ll_rd", {"__float2ll_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2ull_rn", {"__float2ull_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2ull_rz", {"__float2ull_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2ull_ru", {"__float2ull_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2ull_rd", {"__float2ull_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2float_rn", {"__ll2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2float_rz", {"__ll2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2float_ru", {"__ll2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2float_rd", {"__ll2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2float_rn", {"__ull2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2float_rz", {"__ull2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2float_ru", {"__ull2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2float_rd", {"__ull2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__fadd_rn", {"__fadd_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__fadd_rz", {"__fadd_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fadd_ru", {"__fadd_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fadd_rd", {"__fadd_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fsub_rn", {"__fsub_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__fsub_rz", {"__fsub_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fsub_ru", {"__fsub_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fsub_rd", {"__fsub_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fmul_rn", {"__fmul_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__fmul_rz", {"__fmul_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fmul_ru", {"__fmul_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fmul_rd", {"__fmul_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fmaf_rn", {"__fmaf_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__fmaf_rz", {"__fmaf_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fmaf_ru", {"__fmaf_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fmaf_rd", {"__fmaf_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__frcp_rn", {"__frcp_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__frcp_rz", {"__frcp_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__frcp_ru", {"__frcp_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__frcp_rd", {"__frcp_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fsqrt_rn", {"__fsqrt_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__fsqrt_rz", {"__fsqrt_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fsqrt_ru", {"__fsqrt_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fsqrt_rd", {"__fsqrt_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__frsqrt_rn", {"__frsqrt_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__fdiv_rn", {"__fdiv_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__fdiv_rz", {"__fdiv_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fdiv_ru", {"__fdiv_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fdiv_rd", {"__fdiv_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__clz", {"__clz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ffs", {"__ffs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__popc", {"__popc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__brev", {"__brev", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__clzll", {"__clzll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ffsll", {"__ffsll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__popcll", {"__popcll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__brevll", {"__brevll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__byte_perm", {"__byte_perm", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hadd", {"__hadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hadd_rn", {"__hadd_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__rhadd", {"__rhadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__uhadd", {"__uhadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__urhadd", {"__urhadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__prof_trigger", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__trap", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__brkpt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__pm0", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__pm1", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__pm2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__pm3", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, // device double functions - {"__dadd_rz", {"__dadd_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dadd_ru", {"__dadd_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dadd_rd", {"__dadd_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dadd_rn", {"__dadd_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ddiv_rz", {"__ddiv_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__ddiv_ru", {"__ddiv_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__ddiv_rd", {"__ddiv_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__ddiv_rn", {"__ddiv_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__dmul_rz", {"__dmul_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dmul_ru", {"__dmul_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dmul_rd", {"__dmul_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dmul_rn", {"__dmul_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__drcp_rz", {"__drcp_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__drcp_ru", {"__drcp_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__drcp_rd", {"__drcp_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__drcp_rn", {"__drcp_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__dsqrt_rz", {"__dsqrt_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dsqrt_ru", {"__dsqrt_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dsqrt_rd", {"__dsqrt_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dsqrt_rn", {"__dsqrt_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__dsub_rz", {"__dsub_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dsub_ru", {"__dsub_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dsub_rd", {"__dsub_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dsub_rn", {"__dsub_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__fma_rz", {"__fma_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fma_ru", {"__fma_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fma_rd", {"__fma_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fma_rn", {"__fma_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2float_rd", {"__double2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2float_rn", {"__double2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2float_ru", {"__double2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2float_rz", {"__double2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2hiint", {"__double2hiint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2loint", {"__double2loint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2int_rd", {"__double2int_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2int_rn", {"__double2int_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2int_ru", {"__double2int_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2int_rz", {"__double2int_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2ll_rd", {"__double2ll_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2ll_rn", {"__double2ll_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2ll_ru", {"__double2ll_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2ll_rz", {"__double2ll_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2uint_rd", {"__double2uint_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2uint_rn", {"__double2uint_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2uint_ru", {"__double2uint_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2uint_rz", {"__double2uint_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2ull_rd", {"__double2ull_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2ull_rn", {"__double2ull_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2ull_ru", {"__double2ull_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2ull_rz", {"__double2ull_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double_as_longlong",{"__double_as_longlong", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hiloint2double", {"__hiloint2double", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__int2double_rn", {"__int2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2double_rd", {"__ll2double_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2double_rn", {"__ll2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2double_ru", {"__ll2double_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2double_rz", {"__ll2double_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__longlong_as_double",{"__longlong_as_double", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__uint2double_rn", {"__uint2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2double_rd", {"__ull2double_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2double_rn", {"__ull2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2double_ru", {"__ull2double_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2double_rz", {"__ull2double_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__dadd_rz", {"__dadd_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dadd_ru", {"__dadd_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dadd_rd", {"__dadd_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dadd_rn", {"__dadd_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ddiv_rz", {"__ddiv_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ddiv_ru", {"__ddiv_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ddiv_rd", {"__ddiv_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ddiv_rn", {"__ddiv_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__dmul_rz", {"__dmul_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dmul_ru", {"__dmul_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dmul_rd", {"__dmul_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dmul_rn", {"__dmul_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__drcp_rz", {"__drcp_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__drcp_ru", {"__drcp_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__drcp_rd", {"__drcp_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__drcp_rn", {"__drcp_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__dsqrt_rz", {"__dsqrt_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dsqrt_ru", {"__dsqrt_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dsqrt_rd", {"__dsqrt_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dsqrt_rn", {"__dsqrt_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__dsub_rz", {"__dsub_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dsub_ru", {"__dsub_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dsub_rd", {"__dsub_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dsub_rn", {"__dsub_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__fma_rz", {"__fma_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fma_ru", {"__fma_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fma_rd", {"__fma_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fma_rn", {"__fma_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2float_rd", {"__double2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2float_rn", {"__double2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2float_ru", {"__double2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2float_rz", {"__double2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2hiint", {"__double2hiint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2loint", {"__double2loint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2int_rd", {"__double2int_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2int_rn", {"__double2int_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2int_ru", {"__double2int_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2int_rz", {"__double2int_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2ll_rd", {"__double2ll_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2ll_rn", {"__double2ll_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2ll_ru", {"__double2ll_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2ll_rz", {"__double2ll_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2uint_rd", {"__double2uint_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2uint_rn", {"__double2uint_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2uint_ru", {"__double2uint_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2uint_rz", {"__double2uint_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2ull_rd", {"__double2ull_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2ull_rn", {"__double2ull_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2ull_ru", {"__double2ull_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2ull_rz", {"__double2ull_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double_as_longlong", {"__double_as_longlong", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hiloint2double", {"__hiloint2double", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__int2double_rn", {"__int2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2double_rd", {"__ll2double_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2double_rn", {"__ll2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2double_ru", {"__ll2double_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2double_rz", {"__ll2double_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__longlong_as_double", {"__longlong_as_double", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__uint2double_rn", {"__uint2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2double_rd", {"__ull2double_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2double_rn", {"__ull2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2double_ru", {"__ull2double_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2double_rz", {"__ull2double_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, // SIMD functions - {"__vabs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vabsss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vadd2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vaddss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vaddus2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vavgs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vavgu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vhaddu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpeq2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpges2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpgeu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpgts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpgtu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmples2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmplts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpltu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpne2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vabsdiffu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vmaxs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vmaxu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vmins2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vminu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vseteq2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetges2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetgeu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetgts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetles2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetleu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetlts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetltu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetne2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsadu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsub2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsubss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsubus2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vneg2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vnegss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vabsdiffs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsads2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vabs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vabsss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vadd4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vaddss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vaddus4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vavgs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vavgu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vhaddu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpeq4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpges4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpgeu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpgts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpgtu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmples4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpleu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmplts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpltu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpne4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vabsdiffu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vmaxs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vmaxu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vmins4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vminu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vseteq4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetles4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetleu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetlts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetltu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetges4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetgeu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetgts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetgtu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetne4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsadu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsub4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsubss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsubus4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vneg4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vnegss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vabsdiffs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsads4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vabs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vabsss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vadd2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vaddss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vaddus2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vavgs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vavgu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vhaddu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpeq2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpges2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpgeu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpgts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpgtu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmples2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmplts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpltu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpne2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vabsdiffu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vmaxs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vmaxu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vmins2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vminu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vseteq2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetges2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetgeu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetgts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetles2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetleu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetlts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetltu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetne2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsadu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsub2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsubss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsubus2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vneg2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vnegss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vabsdiffs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsads2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vabs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vabsss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vadd4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vaddss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vaddus4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vavgs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vavgu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vhaddu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpeq4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpges4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpgeu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpgts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpgtu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmples4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpleu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmplts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpltu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpne4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vabsdiffu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vmaxs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vmaxu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vmins4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vminu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vseteq4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetles4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetleu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetlts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetltu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetges4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetgeu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetgts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetgtu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetne4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsadu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsub4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsubss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsubus4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vneg4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vnegss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vabsdiffs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsads4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, // fp16 functions - {"__float2half", {"__float2half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2half_rn", {"__float2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2half_rz", {"__float2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2half_rd", {"__float2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2half_ru", {"__float2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2float", {"__half2float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2half2_rn", {"__float2half2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__floats2half2_rn", {"__floats2half2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__low2float", {"__low2float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__high2float", {"__high2float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float22half2_rn", {"__float22half2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half22float2", {"__half22float2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2int_rn", {"__half2int_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2int_rz", {"__half2int_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2int_rd", {"__half2int_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2int_ru", {"__half2int_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__int2half_rn", {"__int2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__int2half_rz", {"__int2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__int2half_rd", {"__int2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__int2half_ru", {"__int2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2short_rn", {"__half2short_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2short_rz", {"__half2short_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2short_rd", {"__half2short_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2short_ru", {"__half2short_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__short2half_rn", {"__short2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__short2half_rz", {"__short2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__short2half_rd", {"__short2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__short2half_ru", {"__short2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2uint_rn", {"__half2uint_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2uint_rz", {"__half2uint_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2uint_rd", {"__half2uint_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2uint_ru", {"__half2uint_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__uint2half_rn", {"__uint2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__uint2half_rz", {"__uint2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__uint2half_rd", {"__uint2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__uint2half_ru", {"__uint2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ushort_rn", {"__half2ushort_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ushort_rz", {"__half2ushort_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ushort_rd", {"__half2ushort_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ushort_ru", {"__half2ushort_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ushort2half_rn", {"__ushort2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ushort2half_rz", {"__ushort2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ushort2half_rd", {"__ushort2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ushort2half_ru", {"__ushort2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ull_rn", {"__half2ull_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ull_rz", {"__half2ull_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ull_rd", {"__half2ull_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ull_ru", {"__half2ull_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2half_rn", {"__ull2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2half_rz", {"__ull2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2half_rd", {"__ull2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2half_ru", {"__ull2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ll_rn", {"__half2ll_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ll_rz", {"__half2ll_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ll_rd", {"__half2ll_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ll_ru", {"__half2ll_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2half_rn", {"__ll2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2half_rz", {"__ll2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2half_rd", {"__ll2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2half_ru", {"__ll2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"htrunc", {"htrunc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hceil", {"hceil", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hfloor", {"hfloor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hrint", {"hrint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2trunc", {"h2trunc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2ceil", {"h2ceil", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2floor", {"h2floor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2rint", {"h2rint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2half2", {"__half2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__lowhigh2highlow", {"__lowhigh2highlow", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__lows2half2", {"__lows2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__highs2half2", {"__highs2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__high2half", {"__high2half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__low2half", {"__low2half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hisinf", {"__hisinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__halves2half2", {"__halves2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__low2half2", {"__low2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__high2half2", {"__high2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half_as_short", {"__half_as_short", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half_as_ushort", {"__half_as_ushort", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__short_as_half", {"__short_as_half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ushort_as_half", {"__ushort_as_half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ldg", {"__ldg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ldcg", {"__ldcg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ldca", {"__ldca", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ldcs", {"__ldcs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__heq2", {"__heq2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hne2", {"__hne2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hle2", {"__hle2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hge2", {"__hge2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hlt2", {"__hlt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hgt2", {"__hgt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hequ2", {"__hequ2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hneu2", {"__hneu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hleu2", {"__hleu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hgeu2", {"__hgeu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hltu2", {"__hltu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hgtu2", {"__hgtu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hisnan2", {"__hisnan2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hadd2", {"__hadd2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hsub2", {"__hsub2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hmul2", {"__hmul2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__h2div", {"__h2div", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hadd2_sat", {"__hadd2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hsub2_sat", {"__hsub2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hmul2_sat", {"__hmul2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hfma2", {"__hfma2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hfma2_sat", {"__hfma2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hneg2", {"__hneg2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hsub", {"__hsub", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hmul", {"__hmul", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hdiv", {"__hdiv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hadd_sat", {"__hadd_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hsub_sat", {"__hsub_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hmul_sat", {"__hmul_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hfma", {"__hfma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hfma_sat", {"__hfma_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hneg", {"__hneg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__habs2", {"__habs2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__habs", {"__habs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hbeq2", {"__hbeq2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hbne2", {"__hbne2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hble2", {"__hble2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hbge2", {"__hbge2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hblt2", {"__hblt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hbgt2", {"__hbgt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hbequ2", {"__hbequ2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hbneu2", {"__hbneu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hbleu2", {"__hbleu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hbgeu2", {"__hbgeu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hbltu2", {"__hbltu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hbgtu2", {"__hbgtu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__heq", {"__heq", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hne", {"__hne", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hle", {"__hle", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hge", {"__hge", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hlt", {"__hlt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hgt", {"__hgt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hequ", {"__hequ", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hneu", {"__hneu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hleu", {"__hleu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hgeu", {"__hgeu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hltu", {"__hltu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hgtu", {"__hgtu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hisnan", {"__hisnan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hsqrt", {"hsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hrsqrt", {"hrsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hrcp", {"hrcp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hlog", {"hlog", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hlog2", {"hlog2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hlog10", {"hlog10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hexp", {"hexp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hexp2", {"hexp2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hexp10", {"hexp10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hcos", {"hcos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hsin", {"hsin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2sqrt", {"h2sqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2rsqrt", {"h2rsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2rcp", {"h2rcp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2log", {"h2log", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2log2", {"h2log2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2log10", {"h2log10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2exp", {"h2exp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2exp2", {"h2exp2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2exp10", {"h2exp10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2cos", {"h2cos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2sin", {"h2sin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__shfl_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__shfl", {"__shfl", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, - {"__shfl_up_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__shfl_up", {"__shfl_up", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, - {"__shfl_down_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__shfl_down", {"__shfl_down", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, - {"__shfl_xor_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__shfl_xor", {"__shfl_xor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, - {"__funnelshift_l", {"__funnelshift_l", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__funnelshift_lc", {"__funnelshift_lc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__funnelshift_r", {"__funnelshift_r", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__funnelshift_rc", {"__funnelshift_rc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2half", {"__float2half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2half_rn", {"__float2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2half_rz", {"__float2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2half_rd", {"__float2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2half_ru", {"__float2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2float", {"__half2float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2half2_rn", {"__float2half2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__floats2half2_rn", {"__floats2half2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__low2float", {"__low2float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__high2float", {"__high2float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float22half2_rn", {"__float22half2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half22float2", {"__half22float2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2int_rn", {"__half2int_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2int_rz", {"__half2int_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2int_rd", {"__half2int_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2int_ru", {"__half2int_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__int2half_rn", {"__int2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__int2half_rz", {"__int2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__int2half_rd", {"__int2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__int2half_ru", {"__int2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2short_rn", {"__half2short_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2short_rz", {"__half2short_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2short_rd", {"__half2short_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2short_ru", {"__half2short_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__short2half_rn", {"__short2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__short2half_rz", {"__short2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__short2half_rd", {"__short2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__short2half_ru", {"__short2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2uint_rn", {"__half2uint_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2uint_rz", {"__half2uint_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2uint_rd", {"__half2uint_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2uint_ru", {"__half2uint_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__uint2half_rn", {"__uint2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__uint2half_rz", {"__uint2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__uint2half_rd", {"__uint2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__uint2half_ru", {"__uint2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ushort_rn", {"__half2ushort_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ushort_rz", {"__half2ushort_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ushort_rd", {"__half2ushort_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ushort_ru", {"__half2ushort_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ushort2half_rn", {"__ushort2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ushort2half_rz", {"__ushort2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ushort2half_rd", {"__ushort2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ushort2half_ru", {"__ushort2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ull_rn", {"__half2ull_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ull_rz", {"__half2ull_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ull_rd", {"__half2ull_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ull_ru", {"__half2ull_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2half_rn", {"__ull2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2half_rz", {"__ull2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2half_rd", {"__ull2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2half_ru", {"__ull2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ll_rn", {"__half2ll_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ll_rz", {"__half2ll_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ll_rd", {"__half2ll_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ll_ru", {"__half2ll_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2half_rn", {"__ll2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2half_rz", {"__ll2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2half_rd", {"__ll2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2half_ru", {"__ll2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"htrunc", {"htrunc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hceil", {"hceil", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hfloor", {"hfloor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hrint", {"hrint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2trunc", {"h2trunc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2ceil", {"h2ceil", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2floor", {"h2floor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2rint", {"h2rint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2half2", {"__half2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__lowhigh2highlow", {"__lowhigh2highlow", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__lows2half2", {"__lows2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__highs2half2", {"__highs2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__high2half", {"__high2half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__low2half", {"__low2half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hisinf", {"__hisinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__halves2half2", {"__halves2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__low2half2", {"__low2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__high2half2", {"__high2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half_as_short", {"__half_as_short", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half_as_ushort", {"__half_as_ushort", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__short_as_half", {"__short_as_half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ushort_as_half", {"__ushort_as_half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ldg", {"__ldg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ldcg", {"__ldcg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ldca", {"__ldca", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ldcs", {"__ldcs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ldlu", {"__ldlu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ldcv", {"__ldcv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__heq2", {"__heq2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hne2", {"__hne2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hle2", {"__hle2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hge2", {"__hge2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hlt2", {"__hlt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hgt2", {"__hgt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hequ2", {"__hequ2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hneu2", {"__hneu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hleu2", {"__hleu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hgeu2", {"__hgeu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hltu2", {"__hltu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hgtu2", {"__hgtu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hisnan2", {"__hisnan2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hadd2", {"__hadd2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hadd2_rn", {"__hadd2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hsub2", {"__hsub2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hsub2_rn", {"__hsub2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hmul2", {"__hmul2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hmul2_rn", {"__hmul2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__h2div", {"__h2div", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hadd2_sat", {"__hadd2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hsub2_sat", {"__hsub2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hmul2_sat", {"__hmul2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hfma2", {"__hfma2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hfma2_sat", {"__hfma2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hfma2_relu", {"__hfma2_relu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hneg2", {"__hneg2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hsub", {"__hsub", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hsub_rn", {"__hsub_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hmul", {"__hmul", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hmul_rn", {"__hmul_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hdiv", {"__hdiv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hadd_sat", {"__hadd_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hsub_sat", {"__hsub_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hmul_sat", {"__hmul_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hfma", {"__hfma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hfma_relu", {"__hfma_relu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hfma_sat", {"__hfma_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hneg", {"__hneg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__habs2", {"__habs2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__habs", {"__habs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hbeq2", {"__hbeq2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hbne2", {"__hbne2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hble2", {"__hble2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hbge2", {"__hbge2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hblt2", {"__hblt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hbgt2", {"__hbgt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hbequ2", {"__hbequ2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hbneu2", {"__hbneu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hbleu2", {"__hbleu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hbgeu2", {"__hbgeu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hbltu2", {"__hbltu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hbgtu2", {"__hbgtu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__heq", {"__heq", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hne", {"__hne", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hle", {"__hle", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hge", {"__hge", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hlt", {"__hlt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hgt", {"__hgt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hequ", {"__hequ", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hneu", {"__hneu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hleu", {"__hleu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hgeu", {"__hgeu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hltu", {"__hltu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hgtu", {"__hgtu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hisnan", {"__hisnan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hsqrt", {"hsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hrsqrt", {"hrsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hrcp", {"hrcp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hlog", {"hlog", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hlog2", {"hlog2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hlog10", {"hlog10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hexp", {"hexp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hexp2", {"hexp2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hexp10", {"hexp10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hcos", {"hcos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hsin", {"hsin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2sqrt", {"h2sqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2rsqrt", {"h2rsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2rcp", {"h2rcp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2log", {"h2log", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2log2", {"h2log2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2log10", {"h2log10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2exp", {"h2exp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2exp2", {"h2exp2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2exp10", {"h2exp10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2cos", {"h2cos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2sin", {"h2sin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__shfl_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__shfl", {"__shfl", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, + {"__shfl_up_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__shfl_up", {"__shfl_up", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, + {"__shfl_down_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__shfl_down", {"__shfl_down", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, + {"__shfl_xor_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__shfl_xor", {"__shfl_xor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, + {"__funnelshift_l", {"__funnelshift_l", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__funnelshift_lc", {"__funnelshift_lc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__funnelshift_r", {"__funnelshift_r", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__funnelshift_rc", {"__funnelshift_rc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2half", {"__double2half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hmax", {"__hmax", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hmax_nan", {"__hmax_nan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hmax2", {"__hmax2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hmax2_nan", {"__hmax2_nan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hmin", {"__hmin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hmin_nan", {"__hmin_nan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hmin2", {"__hmin2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hmin2_nan", {"__hmin2_nan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__stwb", {"__stwb", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__stcg", {"__stcg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__stcs", {"__stcs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__stwt", {"__stwt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hcmadd", {"__hcmadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + // bfp16 functions + {"__double2bfloat16", {"__double2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__float2bfloat16", {"__float2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__float2bfloat16_rn", {"__float2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__float2bfloat16_rz", {"__float2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__float2bfloat16_rd", {"__float2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__float2bfloat16_ru", {"__float2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162float", {"__bfloat162float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__float2bfloat162_rn", {"__float2bfloat162_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__floats2bfloat162_rn", {"__floats2bfloat162_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat1622float2", {"__bfloat1622float2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162int_rn", {"__bfloat162int_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162int_rz", {"__bfloat162int_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162int_rd", {"__bfloat162int_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162int_ru", {"__bfloat162int_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__int2bfloat16_rn", {"__int2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__int2bfloat16_rz", {"__int2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__int2bfloat16_rd", {"__int2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__int2bfloat16_ru", {"__int2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162short_rn", {"__bfloat162short_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162short_rz", {"__bfloat162short_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162short_rd", {"__bfloat162short_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162short_ru", {"__bfloat162short_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__short2bfloat16_rn", {"__short2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__short2bfloat16_rz", {"__short2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__short2bfloat16_rd", {"__short2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__short2bfloat16_ru", {"__short2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162uint_rn", {"__bfloat162uint_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162uint_rz", {"__bfloat162uint_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162uint_rd", {"__bfloat162uint_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162uint_ru", {"__bfloat162uint_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__uint2bfloat16_rn", {"__uint2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__uint2bfloat16_rz", {"__uint2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__uint2bfloat16_rd", {"__uint2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__uint2bfloat16_ru", {"__uint2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ushort_rn", {"__bfloat162ushort_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ushort_rz", {"__bfloat162ushort_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ushort_rd", {"__bfloat162ushort_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ushort_ru", {"__bfloat162ushort_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ushort2bfloat16_rn", {"__ushort2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ushort2bfloat16_rz", {"__ushort2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ushort2bfloat16_rd", {"__ushort2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ushort2bfloat16_ru", {"__ushort2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ull_rn", {"__bfloat162ull_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ull_rz", {"__bfloat162ull_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ull_rd", {"__bfloat162ull_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ull_ru", {"__bfloat162ull_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ull2bfloat16_rn", {"__ull2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ull2bfloat16_rz", {"__ull2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ull2bfloat16_rd", {"__ull2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ull2bfloat16_ru", {"__ull2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ll_rn", {"__bfloat162ll_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ll_rz", {"__bfloat162ll_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ll_rd", {"__bfloat162ll_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ll_ru", {"__bfloat162ll_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ll2bfloat16_rn", {"__ll2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ll2bfloat16_rz", {"__ll2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ll2bfloat16_rd", {"__ll2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ll2bfloat16_ru", {"__ll2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162bfloat162", {"__bfloat162bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__lows2bfloat162", {"__lows2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__highs2bfloat162", {"__highs2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__high2bfloat16", {"__high2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__low2bfloat16", {"__low2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__halves2bfloat162", {"__halves2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__low2bfloat162", {"__halves2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__high2bfloat162", {"__high2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat16_as_short", {"__bfloat16_as_short", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat16_as_ushort", {"__bfloat16_as_ushort", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__short_as_bfloat16", {"__short_as_bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ushort_as_bfloat16", {"__ushort_as_bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, // atomic functions - {"atomicAdd", {"atomicAdd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicAdd_system", {"atomicAdd_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicSub", {"atomicSub", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicSub_system", {"atomicSub_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicExch", {"atomicExch", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicExch_system", {"atomicExch_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicMin", {"atomicMin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicMin_system", {"atomicMin_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicMax", {"atomicMax", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicMax_system", {"atomicMax_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicInc", {"atomicInc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicDec", {"atomicDec", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicAnd", {"atomicAnd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicAnd_system", {"atomicAnd_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicOr", {"atomicOr", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicOr_system", {"atomicOr_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicXor", {"atomicXor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicXor_system", {"atomicXor_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicCAS", {"atomicCAS", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicCAS_system", {"atomicCAS_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__all", {"__all", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__any", {"__any", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ballot", {"__ballot", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicAdd", {"atomicAdd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicAdd_system", {"atomicAdd_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicSub", {"atomicSub", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicSub_system", {"atomicSub_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicExch", {"atomicExch", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicExch_system", {"atomicExch_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicMin", {"atomicMin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicMin_system", {"atomicMin_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicMax", {"atomicMax", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicMax_system", {"atomicMax_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicInc", {"atomicInc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicDec", {"atomicDec", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicAnd", {"atomicAnd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicAnd_system", {"atomicAnd_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicOr", {"atomicOr", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicOr_system", {"atomicOr_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicXor", {"atomicXor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicXor_system", {"atomicXor_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicCAS", {"atomicCAS", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicCAS_system", {"atomicCAS_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__all", {"__all", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__any", {"__any", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ballot", {"__ballot", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, // clock functions - {"clock64", {"clock64", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"clock", {"clock", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"clock64", {"clock64", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"clock", {"clock", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, // common functions - {"__assert_fail", {"__assert_fail", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__assertfail", {"__assertfail", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__assert_fail", {"__assert_fail", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__assertfail", {"__assertfail", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + // fp8 functions + {"__nv_cvt_double_to_fp8", {"__hip_cvt_double_to_fp8", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__nv_cvt_double2_to_fp8x2", {"__hip_cvt_double2_to_fp8x2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__nv_cvt_float_to_fp8", {"__hip_cvt_float_to_fp8", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__nv_cvt_float2_to_fp8x2", {"__hip_cvt_float2_to_fp8x2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__nv_cvt_halfraw_to_fp8", {"__hip_cvt_halfraw_to_fp8", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__nv_cvt_halfraw2_to_fp8x2", {"__hip_cvt_halfraw2_to_fp8x2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__nv_cvt_bfloat16raw_to_fp8", {"__hip_cvt_bfloat16raw_to_fp8", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__nv_cvt_bfloat16raw2_to_fp8x2", {"__hip_cvt_bfloat16raw2_to_fp8x2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__nv_cvt_fp8_to_halfraw", {"__hip_cvt_fp8_to_halfraw", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__nv_cvt_fp8x2_to_halfraw2", {"__hip_cvt_fp8x2_to_halfraw2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, }; const std::map CUDA_DEVICE_FUNCTION_VER_MAP { - {"__shfl", {CUDA_75, CUDA_90, CUDA_0 }}, - {"__shfl_up", {CUDA_75, CUDA_90, CUDA_0 }}, - {"__shfl_down", {CUDA_75, CUDA_90, CUDA_0 }}, - {"__shfl_xor", {CUDA_75, CUDA_90, CUDA_0 }}, + {"__shfl", {CUDA_75, CUDA_90, CUDA_0 }}, + {"__shfl_up", {CUDA_75, CUDA_90, CUDA_0 }}, + {"__shfl_down", {CUDA_75, CUDA_90, CUDA_0 }}, + {"__shfl_xor", {CUDA_75, CUDA_90, CUDA_0 }}, + {"__double2half", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__hmax", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__hmax2", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__hmin", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__hmin2", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ldlu", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ldcv", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__stwb", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__stcg", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__stcs", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__stwt", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__hmax_nan", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__hmin_nan", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__hmax2_nan", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__hmin2_nan", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__hfma_relu", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__hfma2_relu", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__double2bfloat16", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__float2bfloat16", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__float2bfloat16_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__float2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__float2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__float2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162float", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__float2bfloat162_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__floats2bfloat162_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat1622float2", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162int_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162int_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162int_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162int_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__int2bfloat16_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__int2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__int2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__int2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162short_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162short_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162short_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162short_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__short2bfloat16_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__short2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__short2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__short2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162uint_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162uint_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162uint_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162uint_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__uint2bfloat16_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__uint2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__uint2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__uint2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ushort_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ushort_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ushort_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ushort_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ushort2bfloat16_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ushort2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ushort2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ushort2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ull_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ull_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ull_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ull_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ull2bfloat16_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ull2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ull2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ull2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ll_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ll_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ll_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ll_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ll2bfloat16_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ll2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ll2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ll2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162bfloat162", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__lows2bfloat162", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__highs2bfloat162", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__high2bfloat16", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__low2bfloat16", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__halves2bfloat162", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__low2bfloat162", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__high2bfloat162", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat16_as_short", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat16_as_ushort", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__short_as_bfloat16", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ushort_as_bfloat16", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__hcmadd", {CUDA_111, CUDA_0, CUDA_0 }}, + {"__hadd2_rn", {CUDA_116, CUDA_0, CUDA_0 }}, + {"__hsub2_rn", {CUDA_116, CUDA_0, CUDA_0 }}, + {"__hmul2_rn", {CUDA_116, CUDA_0, CUDA_0 }}, + {"__hadd_rn", {CUDA_116, CUDA_0, CUDA_0 }}, + {"__hsub_rn", {CUDA_116, CUDA_0, CUDA_0 }}, + {"__hmul_rn", {CUDA_116, CUDA_0, CUDA_0 }}, + {"__nv_cvt_double_to_fp8", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_double2_to_fp8x2", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_float_to_fp8", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_float2_to_fp8x2", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_halfraw_to_fp8", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_halfraw2_to_fp8x2", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_bfloat16raw_to_fp8", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_bfloat16raw2_to_fp8x2", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_fp8_to_halfraw", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_fp8x2_to_halfraw2", {CUDA_118, CUDA_0, CUDA_0 }}, }; const std::map HIP_DEVICE_FUNCTION_VER_MAP { - {"abs", {HIP_1060, HIP_0, HIP_0 }}, - {"labs", {HIP_1090, HIP_0, HIP_0 }}, - {"llabs", {HIP_1090, HIP_0, HIP_0 }}, - {"fabs", {HIP_1060, HIP_0, HIP_0 }}, - {"fabsf", {HIP_1060, HIP_0, HIP_0 }}, - {"min", {HIP_1060, HIP_0, HIP_0 }}, - {"fminf", {HIP_1060, HIP_0, HIP_0 }}, - {"fmin", {HIP_1060, HIP_0, HIP_0 }}, - {"max", {HIP_1060, HIP_0, HIP_0 }}, - {"fmaxf", {HIP_1060, HIP_0, HIP_0 }}, - {"fmax", {HIP_1060, HIP_0, HIP_0 }}, - {"sin", {HIP_1060, HIP_0, HIP_0 }}, - {"cos", {HIP_1060, HIP_0, HIP_0 }}, - {"sincos", {HIP_1060, HIP_0, HIP_0 }}, - {"sincosf", {HIP_1060, HIP_0, HIP_0 }}, - {"tan", {HIP_1060, HIP_0, HIP_0 }}, - {"sqrt", {HIP_1060, HIP_0, HIP_0 }}, - {"rsqrt", {HIP_1060, HIP_0, HIP_0 }}, - {"rsqrtf", {HIP_1060, HIP_0, HIP_0 }}, - {"log2", {HIP_1060, HIP_0, HIP_0 }}, - {"exp2", {HIP_1060, HIP_0, HIP_0 }}, - {"exp2f", {HIP_1060, HIP_0, HIP_0 }}, - {"exp10", {HIP_1060, HIP_0, HIP_0 }}, - {"exp10f", {HIP_1060, HIP_0, HIP_0 }}, - {"expm1", {HIP_1060, HIP_0, HIP_0 }}, - {"expm1f", {HIP_1060, HIP_0, HIP_0 }}, - {"log2f", {HIP_1060, HIP_0, HIP_0 }}, - {"log10", {HIP_1060, HIP_0, HIP_0 }}, - {"log", {HIP_1060, HIP_0, HIP_0 }}, - {"log1p", {HIP_1060, HIP_0, HIP_0 }}, - {"log1pf", {HIP_1060, HIP_0, HIP_0 }}, - {"floor", {HIP_1060, HIP_0, HIP_0 }}, - {"exp", {HIP_1060, HIP_0, HIP_0 }}, - {"cosh", {HIP_1060, HIP_0, HIP_0 }}, - {"sinh", {HIP_1060, HIP_0, HIP_0 }}, - {"tanh", {HIP_1060, HIP_0, HIP_0 }}, - {"acosh", {HIP_1060, HIP_0, HIP_0 }}, - {"acoshf", {HIP_1060, HIP_0, HIP_0 }}, - {"asinh", {HIP_1060, HIP_0, HIP_0 }}, - {"asinhf", {HIP_1060, HIP_0, HIP_0 }}, - {"atanh", {HIP_1060, HIP_0, HIP_0 }}, - {"atanhf", {HIP_1060, HIP_0, HIP_0 }}, - {"ldexp", {HIP_1060, HIP_0, HIP_0 }}, - {"ldexpf", {HIP_1060, HIP_0, HIP_0 }}, - {"logb", {HIP_1060, HIP_0, HIP_0 }}, - {"logbf", {HIP_1060, HIP_0, HIP_0 }}, - {"ilogb", {HIP_1060, HIP_0, HIP_0 }}, - {"ilogbf", {HIP_1060, HIP_0, HIP_0 }}, - {"scalbn", {HIP_1060, HIP_0, HIP_0 }}, - {"scalbnf", {HIP_1060, HIP_0, HIP_0 }}, - {"scalbln", {HIP_1060, HIP_0, HIP_0 }}, - {"scalblnf", {HIP_1060, HIP_0, HIP_0 }}, - {"frexp", {HIP_1060, HIP_0, HIP_0 }}, - {"frexpf", {HIP_1060, HIP_0, HIP_0 }}, - {"round", {HIP_1060, HIP_0, HIP_0 }}, - {"roundf", {HIP_1060, HIP_0, HIP_0 }}, - {"lround", {HIP_1060, HIP_0, HIP_0 }}, - {"lroundf", {HIP_1060, HIP_0, HIP_0 }}, - {"llround", {HIP_1060, HIP_0, HIP_0 }}, - {"llroundf", {HIP_1060, HIP_0, HIP_0 }}, - {"rint", {HIP_1060, HIP_0, HIP_0 }}, - {"rintf", {HIP_1060, HIP_0, HIP_0 }}, - {"lrint", {HIP_1060, HIP_0, HIP_0 }}, - {"lrintf", {HIP_1060, HIP_0, HIP_0 }}, - {"llrint", {HIP_1060, HIP_0, HIP_0 }}, - {"llrintf", {HIP_1060, HIP_0, HIP_0 }}, - {"nearbyint", {HIP_1060, HIP_0, HIP_0 }}, - {"nearbyintf", {HIP_1060, HIP_0, HIP_0 }}, - {"ceil", {HIP_1060, HIP_0, HIP_0 }}, - {"trunc", {HIP_1060, HIP_0, HIP_0 }}, - {"truncf", {HIP_1060, HIP_0, HIP_0 }}, - {"fdim", {HIP_1060, HIP_0, HIP_0 }}, - {"fdimf", {HIP_1060, HIP_0, HIP_0 }}, - {"atan2", {HIP_1060, HIP_0, HIP_0 }}, - {"atan", {HIP_1060, HIP_0, HIP_0 }}, - {"acos", {HIP_1060, HIP_0, HIP_0 }}, - {"asin", {HIP_1060, HIP_0, HIP_0 }}, - {"hypot", {HIP_1060, HIP_0, HIP_0 }}, - {"rhypot", {HIP_1060, HIP_0, HIP_0 }}, - {"hypotf", {HIP_1060, HIP_0, HIP_0 }}, - {"rhypotf", {HIP_1060, HIP_0, HIP_0 }}, - {"norm3d", {HIP_1060, HIP_0, HIP_0 }}, - {"rnorm3d", {HIP_1060, HIP_0, HIP_0 }}, - {"norm4d", {HIP_1060, HIP_0, HIP_0 }}, - {"rnorm4d", {HIP_1060, HIP_0, HIP_0 }}, - {"norm", {HIP_1060, HIP_0, HIP_0 }}, - {"rnorm", {HIP_1060, HIP_0, HIP_0 }}, - {"rnormf", {HIP_1060, HIP_0, HIP_0 }}, - {"normf", {HIP_1060, HIP_0, HIP_0 }}, - {"norm3df", {HIP_1060, HIP_0, HIP_0 }}, - {"rnorm3df", {HIP_1060, HIP_0, HIP_0 }}, - {"norm4df", {HIP_1060, HIP_0, HIP_0 }}, - {"rnorm4df", {HIP_1060, HIP_0, HIP_0 }}, - {"cbrt", {HIP_1060, HIP_0, HIP_0 }}, - {"cbrtf", {HIP_1060, HIP_0, HIP_0 }}, - {"rcbrt", {HIP_1060, HIP_0, HIP_0 }}, - {"rcbrtf", {HIP_1060, HIP_0, HIP_0 }}, - {"sinpi", {HIP_1060, HIP_0, HIP_0 }}, - {"sinpif", {HIP_1060, HIP_0, HIP_0 }}, - {"cospi", {HIP_1060, HIP_0, HIP_0 }}, - {"cospif", {HIP_1060, HIP_0, HIP_0 }}, - {"sincospi", {HIP_1060, HIP_0, HIP_0 }}, - {"sincospif", {HIP_1060, HIP_0, HIP_0 }}, - {"pow", {HIP_1060, HIP_0, HIP_0 }}, - {"modf", {HIP_1090, HIP_0, HIP_0 }}, - {"fmod", {HIP_1060, HIP_0, HIP_0 }}, - {"remainder", {HIP_1060, HIP_0, HIP_0 }}, - {"remainderf", {HIP_1060, HIP_0, HIP_0 }}, - {"remquo", {HIP_1090, HIP_0, HIP_0 }}, - {"remquof", {HIP_1060, HIP_0, HIP_0 }}, - {"j0", {HIP_1060, HIP_0, HIP_0 }}, - {"j0f", {HIP_1060, HIP_0, HIP_0 }}, - {"j1", {HIP_1060, HIP_0, HIP_0 }}, - {"j1f", {HIP_1060, HIP_0, HIP_0 }}, - {"jn", {HIP_1060, HIP_0, HIP_0 }}, - {"jnf", {HIP_1060, HIP_0, HIP_0 }}, - {"y0", {HIP_1060, HIP_0, HIP_0 }}, - {"y0f", {HIP_1060, HIP_0, HIP_0 }}, - {"y1", {HIP_1060, HIP_0, HIP_0 }}, - {"y1f", {HIP_1060, HIP_0, HIP_0 }}, - {"yn", {HIP_1060, HIP_0, HIP_0 }}, - {"ynf", {HIP_1060, HIP_0, HIP_0 }}, - {"cyl_bessel_i0", {HIP_1090, HIP_0, HIP_0 }}, - {"cyl_bessel_i0f", {HIP_1090, HIP_0, HIP_0 }}, - {"cyl_bessel_i1", {HIP_1090, HIP_0, HIP_0 }}, - {"cyl_bessel_i1f", {HIP_1090, HIP_0, HIP_0 }}, - {"erf", {HIP_1060, HIP_0, HIP_0 }}, - {"erff", {HIP_1060, HIP_0, HIP_0 }}, - {"erfinv", {HIP_1060, HIP_0, HIP_0 }}, - {"erfinvf", {HIP_1060, HIP_0, HIP_0 }}, - {"erfc", {HIP_1060, HIP_0, HIP_0 }}, - {"erfcf", {HIP_1060, HIP_0, HIP_0 }}, - {"lgamma", {HIP_1060, HIP_0, HIP_0 }}, - {"erfcinv", {HIP_1060, HIP_0, HIP_0 }}, - {"erfcinvf", {HIP_1060, HIP_0, HIP_0 }}, - {"normcdfinv", {HIP_1060, HIP_0, HIP_0 }}, - {"normcdfinvf", {HIP_1060, HIP_0, HIP_0 }}, - {"normcdf", {HIP_1060, HIP_0, HIP_0 }}, - {"normcdff", {HIP_1060, HIP_0, HIP_0 }}, - {"erfcx", {HIP_1060, HIP_0, HIP_0 }}, - {"erfcxf", {HIP_1060, HIP_0, HIP_0 }}, - {"lgammaf", {HIP_1060, HIP_0, HIP_0 }}, - {"tgamma", {HIP_1060, HIP_0, HIP_0 }}, - {"tgammaf", {HIP_1060, HIP_0, HIP_0 }}, - {"copysign", {HIP_1060, HIP_0, HIP_0 }}, - {"copysignf", {HIP_1060, HIP_0, HIP_0 }}, - {"nextafter", {HIP_1060, HIP_0, HIP_0 }}, - {"nextafterf", {HIP_1090, HIP_0, HIP_0 }}, - {"nan", {HIP_1060, HIP_0, HIP_0 }}, - {"nanf", {HIP_1060, HIP_0, HIP_0 }}, - {"fma", {HIP_1060, HIP_0, HIP_0 }}, - {"fmaf", {HIP_1060, HIP_0, HIP_0 }}, - {"acosf", {HIP_1060, HIP_0, HIP_0 }}, - {"asinf", {HIP_1060, HIP_0, HIP_0 }}, - {"atanf", {HIP_1060, HIP_0, HIP_0 }}, - {"atan2f", {HIP_1060, HIP_0, HIP_0 }}, - {"cosf", {HIP_1060, HIP_0, HIP_0 }}, - {"sinf", {HIP_1060, HIP_0, HIP_0 }}, - {"tanf", {HIP_1060, HIP_0, HIP_0 }}, - {"coshf", {HIP_1060, HIP_0, HIP_0 }}, - {"sinhf", {HIP_1060, HIP_0, HIP_0 }}, - {"tanhf", {HIP_1060, HIP_0, HIP_0 }}, - {"expf", {HIP_1060, HIP_0, HIP_0 }}, - {"logf", {HIP_1060, HIP_0, HIP_0 }}, - {"log10f", {HIP_1060, HIP_0, HIP_0 }}, - {"modff", {HIP_1090, HIP_0, HIP_0 }}, - {"powf", {HIP_1060, HIP_0, HIP_0 }}, - {"sqrtf", {HIP_1060, HIP_0, HIP_0 }}, - {"ceilf", {HIP_1060, HIP_0, HIP_0 }}, - {"floorf", {HIP_1060, HIP_0, HIP_0 }}, - {"fmodf", {HIP_1060, HIP_0, HIP_0 }}, - {"signbit", {HIP_1060, HIP_0, HIP_0 }}, - {"isfinite", {HIP_1060, HIP_0, HIP_0 }}, - {"isnan", {HIP_1060, HIP_0, HIP_0 }}, - {"isinf", {HIP_1060, HIP_0, HIP_0 }}, - {"__mulhi", {HIP_1060, HIP_0, HIP_0 }}, - {"__umulhi", {HIP_1060, HIP_0, HIP_0 }}, - {"__mul64hi", {HIP_1060, HIP_0, HIP_0 }}, - {"__umul64hi", {HIP_1060, HIP_0, HIP_0 }}, - {"__int_as_float", {HIP_1060, HIP_0, HIP_0 }}, - {"__float_as_int", {HIP_1060, HIP_0, HIP_0 }}, - {"__float_as_uint", {HIP_1060, HIP_0, HIP_0 }}, - {"__uint_as_float", {HIP_1060, HIP_0, HIP_0 }}, - {"__syncthreads", {HIP_1060, HIP_0, HIP_0 }}, - {"__syncthreads_count", {HIP_3070, HIP_0, HIP_0 }}, - {"__syncthreads_and", {HIP_3070, HIP_0, HIP_0 }}, - {"__syncthreads_or", {HIP_3070, HIP_0, HIP_0 }}, - {"__threadfence", {HIP_1060, HIP_0, HIP_0 }}, - {"__threadfence_block", {HIP_1060, HIP_0, HIP_0 }}, - {"__threadfence_system",{HIP_1060, HIP_0, HIP_0 }}, - {"__saturatef", {HIP_1060, HIP_0, HIP_0 }}, - {"__sad", {HIP_1060, HIP_0, HIP_0 }}, - {"__usad", {HIP_1060, HIP_0, HIP_0 }}, - {"__mul24", {HIP_1060, HIP_0, HIP_0 }}, - {"__umul24", {HIP_1060, HIP_0, HIP_0 }}, - {"fdividef", {HIP_1060, HIP_0, HIP_0 }}, - {"__fdividef", {HIP_1060, HIP_0, HIP_0 }}, - {"__sinf", {HIP_1060, HIP_0, HIP_0 }}, - {"__cosf", {HIP_1060, HIP_0, HIP_0 }}, - {"__tanf", {HIP_1060, HIP_0, HIP_0 }}, - {"__sincosf", {HIP_1060, HIP_0, HIP_0 }}, - {"__expf", {HIP_1060, HIP_0, HIP_0 }}, - {"__exp10f", {HIP_1060, HIP_0, HIP_0 }}, - {"__log2f", {HIP_1060, HIP_0, HIP_0 }}, - {"__log10f", {HIP_1060, HIP_0, HIP_0 }}, - {"__logf", {HIP_1060, HIP_0, HIP_0 }}, - {"__powf", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2int_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2int_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2int_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2int_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2uint_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2uint_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2uint_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2uint_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__int2float_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__int2float_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__int2float_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__int2float_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__uint2float_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__uint2float_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__uint2float_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__uint2float_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2ll_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2ll_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2ll_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2ll_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2ull_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2ull_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2ull_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2ull_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2float_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2float_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2float_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2float_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2float_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2float_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2float_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2float_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__fadd_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__fsub_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__fmul_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__fmaf_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__frcp_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__fsqrt_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__frsqrt_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__fdiv_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__clz", {HIP_1060, HIP_0, HIP_0 }}, - {"__ffs", {HIP_1060, HIP_0, HIP_0 }}, - {"__popc", {HIP_1060, HIP_0, HIP_0 }}, - {"__brev", {HIP_1060, HIP_0, HIP_0 }}, - {"__clzll", {HIP_1060, HIP_0, HIP_0 }}, - {"__ffsll", {HIP_1060, HIP_0, HIP_0 }}, - {"__popcll", {HIP_1060, HIP_0, HIP_0 }}, - {"__brevll", {HIP_1060, HIP_0, HIP_0 }}, - {"__byte_perm", {HIP_1060, HIP_0, HIP_0 }}, - {"__hadd", {HIP_1060, HIP_0, HIP_0 }}, - {"__rhadd", {HIP_1060, HIP_0, HIP_0 }}, - {"__uhadd", {HIP_1060, HIP_0, HIP_0 }}, - {"__urhadd", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2float_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2float_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2float_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2float_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2hiint", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2loint", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2int_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2int_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2int_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2int_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2ll_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2ll_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2ll_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2ll_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2uint_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2uint_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2uint_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2uint_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2ull_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2ull_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2ull_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2ull_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__double_as_longlong",{HIP_1060, HIP_0, HIP_0 }}, - {"__hiloint2double", {HIP_1060, HIP_0, HIP_0 }}, - {"__int2double_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2double_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2double_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2double_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2double_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__longlong_as_double",{HIP_1060, HIP_0, HIP_0 }}, - {"__uint2double_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2double_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2double_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2double_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2double_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2half", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2half_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2half_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2half_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2half_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2float", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2half2_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__floats2half2_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__low2float", {HIP_1060, HIP_0, HIP_0 }}, - {"__high2float", {HIP_1060, HIP_0, HIP_0 }}, - {"__float22half2_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__half22float2", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2int_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2int_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2int_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2int_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__int2half_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__int2half_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__int2half_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__int2half_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2short_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2short_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2short_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2short_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__short2half_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__short2half_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__short2half_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__short2half_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2uint_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2uint_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2uint_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2uint_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__uint2half_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__uint2half_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__uint2half_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__uint2half_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ushort_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ushort_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ushort_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ushort_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__ushort2half_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__ushort2half_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__ushort2half_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__ushort2half_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ull_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ull_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ull_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ull_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2half_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2half_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2half_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2half_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ll_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ll_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ll_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ll_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2half_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2half_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2half_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2half_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"htrunc", {HIP_1060, HIP_0, HIP_0 }}, - {"hceil", {HIP_1060, HIP_0, HIP_0 }}, - {"hfloor", {HIP_1060, HIP_0, HIP_0 }}, - {"hrint", {HIP_1060, HIP_0, HIP_0 }}, - {"h2trunc", {HIP_1060, HIP_0, HIP_0 }}, - {"h2ceil", {HIP_1060, HIP_0, HIP_0 }}, - {"h2floor", {HIP_1060, HIP_0, HIP_0 }}, - {"h2rint", {HIP_1090, HIP_0, HIP_0 }}, - {"__half2half2", {HIP_1090, HIP_0, HIP_0 }}, - {"__lowhigh2highlow", {HIP_1060, HIP_0, HIP_0 }}, - {"__lows2half2", {HIP_1060, HIP_0, HIP_0 }}, - {"__highs2half2", {HIP_1060, HIP_0, HIP_0 }}, - {"__high2half", {HIP_1060, HIP_0, HIP_0 }}, - {"__low2half", {HIP_1060, HIP_0, HIP_0 }}, - {"__hisinf", {HIP_1060, HIP_0, HIP_0 }}, - {"__halves2half2", {HIP_1060, HIP_0, HIP_0 }}, - {"__low2half2", {HIP_1060, HIP_0, HIP_0 }}, - {"__high2half2", {HIP_1060, HIP_0, HIP_0 }}, - {"__half_as_short", {HIP_1060, HIP_0, HIP_0 }}, - {"__half_as_ushort", {HIP_1060, HIP_0, HIP_0 }}, - {"__short_as_half", {HIP_1090, HIP_0, HIP_0 }}, - {"__ushort_as_half", {HIP_1060, HIP_0, HIP_0 }}, - {"__ldg", {HIP_1060, HIP_0, HIP_0 }}, - {"__ldcg", {HIP_1090, HIP_0, HIP_0 }}, - {"__ldca", {HIP_1090, HIP_0, HIP_0 }}, - {"__ldcs", {HIP_1090, HIP_0, HIP_0 }}, - {"__heq2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hne2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hle2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hge2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hlt2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hgt2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hequ2", {HIP_1090, HIP_0, HIP_0 }}, - {"__hneu2", {HIP_1090, HIP_0, HIP_0 }}, - {"__hleu2", {HIP_1090, HIP_0, HIP_0 }}, - {"__hgeu2", {HIP_1090, HIP_0, HIP_0 }}, - {"__hltu2", {HIP_1090, HIP_0, HIP_0 }}, - {"__hgtu2", {HIP_1090, HIP_0, HIP_0 }}, - {"__hisnan2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hadd2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hsub2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hmul2", {HIP_1060, HIP_0, HIP_0 }}, - {"__h2div", {HIP_1090, HIP_0, HIP_0 }}, - {"__hadd2_sat", {HIP_1060, HIP_0, HIP_0 }}, - {"__hsub2_sat", {HIP_1060, HIP_0, HIP_0 }}, - {"__hmul2_sat", {HIP_1060, HIP_0, HIP_0 }}, - {"__hfma2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hfma2_sat", {HIP_1060, HIP_0, HIP_0 }}, - {"__hneg2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hsub", {HIP_1060, HIP_0, HIP_0 }}, - {"__hmul", {HIP_1060, HIP_0, HIP_0 }}, - {"__hdiv", {HIP_1090, HIP_0, HIP_0 }}, - {"__hadd_sat", {HIP_1060, HIP_0, HIP_0 }}, - {"__hsub_sat", {HIP_1060, HIP_0, HIP_0 }}, - {"__hmul_sat", {HIP_1060, HIP_0, HIP_0 }}, - {"__hfma", {HIP_1060, HIP_0, HIP_0 }}, - {"__hfma_sat", {HIP_1060, HIP_0, HIP_0 }}, - {"__hneg", {HIP_1060, HIP_0, HIP_0 }}, - {"__habs2", {HIP_3050, HIP_0, HIP_0 }}, - {"__habs", {HIP_3050, HIP_0, HIP_0 }}, - {"__hbeq2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hbne2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hble2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hbge2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hblt2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hbgt2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hbequ2", {HIP_1090, HIP_0, HIP_0 }}, - {"__hbneu2", {HIP_1090, HIP_0, HIP_0 }}, - {"__hbleu2", {HIP_1090, HIP_0, HIP_0 }}, - {"__hbgeu2", {HIP_1090, HIP_0, HIP_0 }}, - {"__hbltu2", {HIP_1090, HIP_0, HIP_0 }}, - {"__hbgtu2", {HIP_1090, HIP_0, HIP_0 }}, - {"__heq", {HIP_1060, HIP_0, HIP_0 }}, - {"__hne", {HIP_1060, HIP_0, HIP_0 }}, - {"__hle", {HIP_1060, HIP_0, HIP_0 }}, - {"__hge", {HIP_1060, HIP_0, HIP_0 }}, - {"__hlt", {HIP_1060, HIP_0, HIP_0 }}, - {"__hgt", {HIP_1060, HIP_0, HIP_0 }}, - {"__hequ", {HIP_1090, HIP_0, HIP_0 }}, - {"__hneu", {HIP_1090, HIP_0, HIP_0 }}, - {"__hleu", {HIP_1090, HIP_0, HIP_0 }}, - {"__hgeu", {HIP_1090, HIP_0, HIP_0 }}, - {"__hltu", {HIP_1090, HIP_0, HIP_0 }}, - {"__hgtu", {HIP_1090, HIP_0, HIP_0 }}, - {"__hisnan", {HIP_1060, HIP_0, HIP_0 }}, - {"hsqrt", {HIP_1060, HIP_0, HIP_0 }}, - {"hrsqrt", {HIP_1060, HIP_0, HIP_0 }}, - {"hrcp", {HIP_1090, HIP_0, HIP_0 }}, - {"hlog", {HIP_1060, HIP_0, HIP_0 }}, - {"hlog2", {HIP_1060, HIP_0, HIP_0 }}, - {"hlog10", {HIP_1060, HIP_0, HIP_0 }}, - {"hexp", {HIP_1060, HIP_0, HIP_0 }}, - {"hexp2", {HIP_1060, HIP_0, HIP_0 }}, - {"hexp10", {HIP_1060, HIP_0, HIP_0 }}, - {"hcos", {HIP_1060, HIP_0, HIP_0 }}, - {"hsin", {HIP_1060, HIP_0, HIP_0 }}, - {"h2sqrt", {HIP_1060, HIP_0, HIP_0 }}, - {"h2rsqrt", {HIP_1060, HIP_0, HIP_0 }}, - {"h2rcp", {HIP_1060, HIP_0, HIP_0 }}, - {"h2log", {HIP_1060, HIP_0, HIP_0 }}, - {"h2log2", {HIP_1060, HIP_0, HIP_0 }}, - {"h2log10", {HIP_1060, HIP_0, HIP_0 }}, - {"h2exp", {HIP_1060, HIP_0, HIP_0 }}, - {"h2exp2", {HIP_1060, HIP_0, HIP_0 }}, - {"h2exp10", {HIP_1060, HIP_0, HIP_0 }}, - {"h2cos", {HIP_1060, HIP_0, HIP_0 }}, - {"h2sin", {HIP_1060, HIP_0, HIP_0 }}, - {"__shfl", {HIP_1060, HIP_0, HIP_0 }}, - {"__shfl_up", {HIP_1060, HIP_0, HIP_0 }}, - {"__shfl_down", {HIP_1060, HIP_0, HIP_0 }}, - {"__shfl_xor", {HIP_1060, HIP_0, HIP_0 }}, - {"atomicAdd", {HIP_1060, HIP_0, HIP_0 }}, - {"atomicSub", {HIP_1060, HIP_0, HIP_0 }}, - {"atomicExch", {HIP_1060, HIP_0, HIP_0 }}, - {"atomicMin", {HIP_1060, HIP_0, HIP_0 }}, - {"atomicMax", {HIP_1060, HIP_0, HIP_0 }}, - {"atomicInc", {HIP_1060, HIP_0, HIP_0 }}, - {"atomicDec", {HIP_1060, HIP_0, HIP_0 }}, - {"atomicAnd", {HIP_1060, HIP_0, HIP_0 }}, - {"atomicOr", {HIP_1060, HIP_0, HIP_0 }}, - {"atomicXor", {HIP_1060, HIP_0, HIP_0 }}, - {"atomicCAS", {HIP_1060, HIP_0, HIP_0 }}, - {"__all", {HIP_1060, HIP_0, HIP_0 }}, - {"__any", {HIP_1060, HIP_0, HIP_0 }}, - {"__ballot", {HIP_1060, HIP_0, HIP_0 }}, - {"clock64", {HIP_1060, HIP_0, HIP_0 }}, - {"clock", {HIP_1060, HIP_0, HIP_0 }}, - {"__dadd_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__ddiv_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__dmul_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__drcp_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__dsqrt_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__dsub_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__fma_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__assert_fail", {HIP_1090, HIP_0, HIP_0 }}, - {"__assertfail", {HIP_1090, HIP_0, HIP_0 }}, - {"atomicCAS_system", {HIP_4030, HIP_0, HIP_0 }}, - {"atomicSub_system", {HIP_4030, HIP_0, HIP_0 }}, - {"atomicAdd_system", {HIP_4030, HIP_0, HIP_0 }}, - {"atomicExch_system", {HIP_4030, HIP_0, HIP_0 }}, - {"atomicMin_system", {HIP_4030, HIP_0, HIP_0 }}, - {"atomicMax_system", {HIP_4030, HIP_0, HIP_0 }}, - {"atomicAnd_system", {HIP_4030, HIP_0, HIP_0 }}, - {"atomicOr_system", {HIP_4030, HIP_0, HIP_0 }}, - {"atomicXor_system", {HIP_4030, HIP_0, HIP_0 }}, - {"__funnelshift_l", {HIP_4040, HIP_0, HIP_0 }}, - {"__funnelshift_lc", {HIP_4040, HIP_0, HIP_0 }}, - {"__funnelshift_r", {HIP_4040, HIP_0, HIP_0 }}, - {"__funnelshift_rc", {HIP_4040, HIP_0, HIP_0 }}, -}; - -const std::map CUDA_DEVICE_TYPE_NAME_MAP { -}; - -const std::map CUDA_DEVICE_TYPE_NAME_VER_MAP { -}; - -const std::map HIP_DEVICE_TYPE_NAME_VER_MAP { + {"abs", {HIP_1060, HIP_0, HIP_0 }}, + {"labs", {HIP_1090, HIP_0, HIP_0 }}, + {"llabs", {HIP_1090, HIP_0, HIP_0 }}, + {"fabs", {HIP_1060, HIP_0, HIP_0 }}, + {"fabsf", {HIP_1060, HIP_0, HIP_0 }}, + {"min", {HIP_1060, HIP_0, HIP_0 }}, + {"fminf", {HIP_1060, HIP_0, HIP_0 }}, + {"fmin", {HIP_1060, HIP_0, HIP_0 }}, + {"max", {HIP_1060, HIP_0, HIP_0 }}, + {"fmaxf", {HIP_1060, HIP_0, HIP_0 }}, + {"fmax", {HIP_1060, HIP_0, HIP_0 }}, + {"sin", {HIP_1060, HIP_0, HIP_0 }}, + {"cos", {HIP_1060, HIP_0, HIP_0 }}, + {"sincos", {HIP_1060, HIP_0, HIP_0 }}, + {"sincosf", {HIP_1060, HIP_0, HIP_0 }}, + {"tan", {HIP_1060, HIP_0, HIP_0 }}, + {"sqrt", {HIP_1060, HIP_0, HIP_0 }}, + {"rsqrt", {HIP_1060, HIP_0, HIP_0 }}, + {"rsqrtf", {HIP_1060, HIP_0, HIP_0 }}, + {"log2", {HIP_1060, HIP_0, HIP_0 }}, + {"exp2", {HIP_1060, HIP_0, HIP_0 }}, + {"exp2f", {HIP_1060, HIP_0, HIP_0 }}, + {"exp10", {HIP_1060, HIP_0, HIP_0 }}, + {"exp10f", {HIP_1060, HIP_0, HIP_0 }}, + {"expm1", {HIP_1060, HIP_0, HIP_0 }}, + {"expm1f", {HIP_1060, HIP_0, HIP_0 }}, + {"log2f", {HIP_1060, HIP_0, HIP_0 }}, + {"log10", {HIP_1060, HIP_0, HIP_0 }}, + {"log", {HIP_1060, HIP_0, HIP_0 }}, + {"log1p", {HIP_1060, HIP_0, HIP_0 }}, + {"log1pf", {HIP_1060, HIP_0, HIP_0 }}, + {"floor", {HIP_1060, HIP_0, HIP_0 }}, + {"exp", {HIP_1060, HIP_0, HIP_0 }}, + {"cosh", {HIP_1060, HIP_0, HIP_0 }}, + {"sinh", {HIP_1060, HIP_0, HIP_0 }}, + {"tanh", {HIP_1060, HIP_0, HIP_0 }}, + {"acosh", {HIP_1060, HIP_0, HIP_0 }}, + {"acoshf", {HIP_1060, HIP_0, HIP_0 }}, + {"asinh", {HIP_1060, HIP_0, HIP_0 }}, + {"asinhf", {HIP_1060, HIP_0, HIP_0 }}, + {"atanh", {HIP_1060, HIP_0, HIP_0 }}, + {"atanhf", {HIP_1060, HIP_0, HIP_0 }}, + {"ldexp", {HIP_1060, HIP_0, HIP_0 }}, + {"ldexpf", {HIP_1060, HIP_0, HIP_0 }}, + {"logb", {HIP_1060, HIP_0, HIP_0 }}, + {"logbf", {HIP_1060, HIP_0, HIP_0 }}, + {"ilogb", {HIP_1060, HIP_0, HIP_0 }}, + {"ilogbf", {HIP_1060, HIP_0, HIP_0 }}, + {"scalbn", {HIP_1060, HIP_0, HIP_0 }}, + {"scalbnf", {HIP_1060, HIP_0, HIP_0 }}, + {"scalbln", {HIP_1060, HIP_0, HIP_0 }}, + {"scalblnf", {HIP_1060, HIP_0, HIP_0 }}, + {"frexp", {HIP_1060, HIP_0, HIP_0 }}, + {"frexpf", {HIP_1060, HIP_0, HIP_0 }}, + {"round", {HIP_1060, HIP_0, HIP_0 }}, + {"roundf", {HIP_1060, HIP_0, HIP_0 }}, + {"lround", {HIP_1060, HIP_0, HIP_0 }}, + {"lroundf", {HIP_1060, HIP_0, HIP_0 }}, + {"llround", {HIP_1060, HIP_0, HIP_0 }}, + {"llroundf", {HIP_1060, HIP_0, HIP_0 }}, + {"rint", {HIP_1060, HIP_0, HIP_0 }}, + {"rintf", {HIP_1060, HIP_0, HIP_0 }}, + {"lrint", {HIP_1060, HIP_0, HIP_0 }}, + {"lrintf", {HIP_1060, HIP_0, HIP_0 }}, + {"llrint", {HIP_1060, HIP_0, HIP_0 }}, + {"llrintf", {HIP_1060, HIP_0, HIP_0 }}, + {"nearbyint", {HIP_1060, HIP_0, HIP_0 }}, + {"nearbyintf", {HIP_1060, HIP_0, HIP_0 }}, + {"ceil", {HIP_1060, HIP_0, HIP_0 }}, + {"trunc", {HIP_1060, HIP_0, HIP_0 }}, + {"truncf", {HIP_1060, HIP_0, HIP_0 }}, + {"fdim", {HIP_1060, HIP_0, HIP_0 }}, + {"fdimf", {HIP_1060, HIP_0, HIP_0 }}, + {"atan2", {HIP_1060, HIP_0, HIP_0 }}, + {"atan", {HIP_1060, HIP_0, HIP_0 }}, + {"acos", {HIP_1060, HIP_0, HIP_0 }}, + {"asin", {HIP_1060, HIP_0, HIP_0 }}, + {"hypot", {HIP_1060, HIP_0, HIP_0 }}, + {"rhypot", {HIP_1060, HIP_0, HIP_0 }}, + {"hypotf", {HIP_1060, HIP_0, HIP_0 }}, + {"rhypotf", {HIP_1060, HIP_0, HIP_0 }}, + {"norm3d", {HIP_1060, HIP_0, HIP_0 }}, + {"rnorm3d", {HIP_1060, HIP_0, HIP_0 }}, + {"norm4d", {HIP_1060, HIP_0, HIP_0 }}, + {"rnorm4d", {HIP_1060, HIP_0, HIP_0 }}, + {"norm", {HIP_1060, HIP_0, HIP_0 }}, + {"rnorm", {HIP_1060, HIP_0, HIP_0 }}, + {"rnormf", {HIP_1060, HIP_0, HIP_0 }}, + {"normf", {HIP_1060, HIP_0, HIP_0 }}, + {"norm3df", {HIP_1060, HIP_0, HIP_0 }}, + {"rnorm3df", {HIP_1060, HIP_0, HIP_0 }}, + {"norm4df", {HIP_1060, HIP_0, HIP_0 }}, + {"rnorm4df", {HIP_1060, HIP_0, HIP_0 }}, + {"cbrt", {HIP_1060, HIP_0, HIP_0 }}, + {"cbrtf", {HIP_1060, HIP_0, HIP_0 }}, + {"rcbrt", {HIP_1060, HIP_0, HIP_0 }}, + {"rcbrtf", {HIP_1060, HIP_0, HIP_0 }}, + {"sinpi", {HIP_1060, HIP_0, HIP_0 }}, + {"sinpif", {HIP_1060, HIP_0, HIP_0 }}, + {"cospi", {HIP_1060, HIP_0, HIP_0 }}, + {"cospif", {HIP_1060, HIP_0, HIP_0 }}, + {"sincospi", {HIP_1060, HIP_0, HIP_0 }}, + {"sincospif", {HIP_1060, HIP_0, HIP_0 }}, + {"pow", {HIP_1060, HIP_0, HIP_0 }}, + {"modf", {HIP_1090, HIP_0, HIP_0 }}, + {"fmod", {HIP_1060, HIP_0, HIP_0 }}, + {"remainder", {HIP_1060, HIP_0, HIP_0 }}, + {"remainderf", {HIP_1060, HIP_0, HIP_0 }}, + {"remquo", {HIP_1090, HIP_0, HIP_0 }}, + {"remquof", {HIP_1060, HIP_0, HIP_0 }}, + {"j0", {HIP_1060, HIP_0, HIP_0 }}, + {"j0f", {HIP_1060, HIP_0, HIP_0 }}, + {"j1", {HIP_1060, HIP_0, HIP_0 }}, + {"j1f", {HIP_1060, HIP_0, HIP_0 }}, + {"jn", {HIP_1060, HIP_0, HIP_0 }}, + {"jnf", {HIP_1060, HIP_0, HIP_0 }}, + {"y0", {HIP_1060, HIP_0, HIP_0 }}, + {"y0f", {HIP_1060, HIP_0, HIP_0 }}, + {"y1", {HIP_1060, HIP_0, HIP_0 }}, + {"y1f", {HIP_1060, HIP_0, HIP_0 }}, + {"yn", {HIP_1060, HIP_0, HIP_0 }}, + {"ynf", {HIP_1060, HIP_0, HIP_0 }}, + {"cyl_bessel_i0", {HIP_1090, HIP_0, HIP_0 }}, + {"cyl_bessel_i0f", {HIP_1090, HIP_0, HIP_0 }}, + {"cyl_bessel_i1", {HIP_1090, HIP_0, HIP_0 }}, + {"cyl_bessel_i1f", {HIP_1090, HIP_0, HIP_0 }}, + {"erf", {HIP_1060, HIP_0, HIP_0 }}, + {"erff", {HIP_1060, HIP_0, HIP_0 }}, + {"erfinv", {HIP_1060, HIP_0, HIP_0 }}, + {"erfinvf", {HIP_1060, HIP_0, HIP_0 }}, + {"erfc", {HIP_1060, HIP_0, HIP_0 }}, + {"erfcf", {HIP_1060, HIP_0, HIP_0 }}, + {"lgamma", {HIP_1060, HIP_0, HIP_0 }}, + {"erfcinv", {HIP_1060, HIP_0, HIP_0 }}, + {"erfcinvf", {HIP_1060, HIP_0, HIP_0 }}, + {"normcdfinv", {HIP_1060, HIP_0, HIP_0 }}, + {"normcdfinvf", {HIP_1060, HIP_0, HIP_0 }}, + {"normcdf", {HIP_1060, HIP_0, HIP_0 }}, + {"normcdff", {HIP_1060, HIP_0, HIP_0 }}, + {"erfcx", {HIP_1060, HIP_0, HIP_0 }}, + {"erfcxf", {HIP_1060, HIP_0, HIP_0 }}, + {"lgammaf", {HIP_1060, HIP_0, HIP_0 }}, + {"tgamma", {HIP_1060, HIP_0, HIP_0 }}, + {"tgammaf", {HIP_1060, HIP_0, HIP_0 }}, + {"copysign", {HIP_1060, HIP_0, HIP_0 }}, + {"copysignf", {HIP_1060, HIP_0, HIP_0 }}, + {"nextafter", {HIP_1060, HIP_0, HIP_0 }}, + {"nextafterf", {HIP_1090, HIP_0, HIP_0 }}, + {"nan", {HIP_1060, HIP_0, HIP_0 }}, + {"nanf", {HIP_1060, HIP_0, HIP_0 }}, + {"fma", {HIP_1060, HIP_0, HIP_0 }}, + {"fmaf", {HIP_1060, HIP_0, HIP_0 }}, + {"acosf", {HIP_1060, HIP_0, HIP_0 }}, + {"asinf", {HIP_1060, HIP_0, HIP_0 }}, + {"atanf", {HIP_1060, HIP_0, HIP_0 }}, + {"atan2f", {HIP_1060, HIP_0, HIP_0 }}, + {"cosf", {HIP_1060, HIP_0, HIP_0 }}, + {"sinf", {HIP_1060, HIP_0, HIP_0 }}, + {"tanf", {HIP_1060, HIP_0, HIP_0 }}, + {"coshf", {HIP_1060, HIP_0, HIP_0 }}, + {"sinhf", {HIP_1060, HIP_0, HIP_0 }}, + {"tanhf", {HIP_1060, HIP_0, HIP_0 }}, + {"expf", {HIP_1060, HIP_0, HIP_0 }}, + {"logf", {HIP_1060, HIP_0, HIP_0 }}, + {"log10f", {HIP_1060, HIP_0, HIP_0 }}, + {"modff", {HIP_1090, HIP_0, HIP_0 }}, + {"powf", {HIP_1060, HIP_0, HIP_0 }}, + {"sqrtf", {HIP_1060, HIP_0, HIP_0 }}, + {"ceilf", {HIP_1060, HIP_0, HIP_0 }}, + {"floorf", {HIP_1060, HIP_0, HIP_0 }}, + {"fmodf", {HIP_1060, HIP_0, HIP_0 }}, + {"signbit", {HIP_1060, HIP_0, HIP_0 }}, + {"isfinite", {HIP_1060, HIP_0, HIP_0 }}, + {"isnan", {HIP_1060, HIP_0, HIP_0 }}, + {"isinf", {HIP_1060, HIP_0, HIP_0 }}, + {"__mulhi", {HIP_1060, HIP_0, HIP_0 }}, + {"__umulhi", {HIP_1060, HIP_0, HIP_0 }}, + {"__mul64hi", {HIP_1060, HIP_0, HIP_0 }}, + {"__umul64hi", {HIP_1060, HIP_0, HIP_0 }}, + {"__int_as_float", {HIP_1060, HIP_0, HIP_0 }}, + {"__float_as_int", {HIP_1060, HIP_0, HIP_0 }}, + {"__float_as_uint", {HIP_1060, HIP_0, HIP_0 }}, + {"__uint_as_float", {HIP_1060, HIP_0, HIP_0 }}, + {"__syncthreads", {HIP_1060, HIP_0, HIP_0 }}, + {"__syncthreads_count", {HIP_3070, HIP_0, HIP_0 }}, + {"__syncthreads_and", {HIP_3070, HIP_0, HIP_0 }}, + {"__syncthreads_or", {HIP_3070, HIP_0, HIP_0 }}, + {"__threadfence", {HIP_1060, HIP_0, HIP_0 }}, + {"__threadfence_block", {HIP_1060, HIP_0, HIP_0 }}, + {"__threadfence_system", {HIP_1060, HIP_0, HIP_0 }}, + {"__saturatef", {HIP_1060, HIP_0, HIP_0 }}, + {"__sad", {HIP_1060, HIP_0, HIP_0 }}, + {"__usad", {HIP_1060, HIP_0, HIP_0 }}, + {"__mul24", {HIP_1060, HIP_0, HIP_0 }}, + {"__umul24", {HIP_1060, HIP_0, HIP_0 }}, + {"fdividef", {HIP_1060, HIP_0, HIP_0 }}, + {"__fdividef", {HIP_1060, HIP_0, HIP_0 }}, + {"__sinf", {HIP_1060, HIP_0, HIP_0 }}, + {"__cosf", {HIP_1060, HIP_0, HIP_0 }}, + {"__tanf", {HIP_1060, HIP_0, HIP_0 }}, + {"__sincosf", {HIP_1060, HIP_0, HIP_0 }}, + {"__expf", {HIP_1060, HIP_0, HIP_0 }}, + {"__exp10f", {HIP_1060, HIP_0, HIP_0 }}, + {"__log2f", {HIP_1060, HIP_0, HIP_0 }}, + {"__log10f", {HIP_1060, HIP_0, HIP_0 }}, + {"__logf", {HIP_1060, HIP_0, HIP_0 }}, + {"__powf", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2int_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2int_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2int_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2int_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2uint_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2uint_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2uint_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2uint_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__int2float_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__int2float_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__int2float_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__int2float_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__uint2float_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__uint2float_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__uint2float_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__uint2float_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2ll_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2ll_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2ll_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2ll_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2ull_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2ull_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2ull_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2ull_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2float_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2float_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2float_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2float_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2float_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2float_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2float_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2float_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__fadd_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__fsub_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__fmul_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__fmaf_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__frcp_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__fsqrt_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__frsqrt_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__fdiv_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__clz", {HIP_1060, HIP_0, HIP_0 }}, + {"__ffs", {HIP_1060, HIP_0, HIP_0 }}, + {"__popc", {HIP_1060, HIP_0, HIP_0 }}, + {"__brev", {HIP_1060, HIP_0, HIP_0 }}, + {"__clzll", {HIP_1060, HIP_0, HIP_0 }}, + {"__ffsll", {HIP_1060, HIP_0, HIP_0 }}, + {"__popcll", {HIP_1060, HIP_0, HIP_0 }}, + {"__brevll", {HIP_1060, HIP_0, HIP_0 }}, + {"__byte_perm", {HIP_1060, HIP_0, HIP_0 }}, + {"__hadd", {HIP_1060, HIP_0, HIP_0 }}, + {"__rhadd", {HIP_1060, HIP_0, HIP_0 }}, + {"__uhadd", {HIP_1060, HIP_0, HIP_0 }}, + {"__urhadd", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2float_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2float_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2float_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2float_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2hiint", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2loint", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2int_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2int_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2int_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2int_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2ll_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2ll_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2ll_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2ll_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2uint_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2uint_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2uint_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2uint_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2ull_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2ull_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2ull_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2ull_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__double_as_longlong", {HIP_1060, HIP_0, HIP_0 }}, + {"__hiloint2double", {HIP_1060, HIP_0, HIP_0 }}, + {"__int2double_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2double_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2double_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2double_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2double_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__longlong_as_double", {HIP_1060, HIP_0, HIP_0 }}, + {"__uint2double_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2double_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2double_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2double_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2double_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2half", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2half_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2half_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2half_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2half_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2float", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2half2_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__floats2half2_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__low2float", {HIP_1060, HIP_0, HIP_0 }}, + {"__high2float", {HIP_1060, HIP_0, HIP_0 }}, + {"__float22half2_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__half22float2", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2int_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2int_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2int_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2int_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__int2half_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__int2half_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__int2half_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__int2half_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2short_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2short_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2short_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2short_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__short2half_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__short2half_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__short2half_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__short2half_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2uint_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2uint_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2uint_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2uint_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__uint2half_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__uint2half_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__uint2half_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__uint2half_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ushort_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ushort_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ushort_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ushort_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__ushort2half_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__ushort2half_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__ushort2half_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__ushort2half_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ull_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ull_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ull_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ull_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2half_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2half_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2half_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2half_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ll_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ll_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ll_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ll_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2half_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2half_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2half_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2half_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"htrunc", {HIP_1060, HIP_0, HIP_0 }}, + {"hceil", {HIP_1060, HIP_0, HIP_0 }}, + {"hfloor", {HIP_1060, HIP_0, HIP_0 }}, + {"hrint", {HIP_1060, HIP_0, HIP_0 }}, + {"h2trunc", {HIP_1060, HIP_0, HIP_0 }}, + {"h2ceil", {HIP_1060, HIP_0, HIP_0 }}, + {"h2floor", {HIP_1060, HIP_0, HIP_0 }}, + {"h2rint", {HIP_1090, HIP_0, HIP_0 }}, + {"__half2half2", {HIP_1090, HIP_0, HIP_0 }}, + {"__lowhigh2highlow", {HIP_1060, HIP_0, HIP_0 }}, + {"__lows2half2", {HIP_1060, HIP_0, HIP_0 }}, + {"__highs2half2", {HIP_1060, HIP_0, HIP_0 }}, + {"__high2half", {HIP_1060, HIP_0, HIP_0 }}, + {"__low2half", {HIP_1060, HIP_0, HIP_0 }}, + {"__hisinf", {HIP_1060, HIP_0, HIP_0 }}, + {"__halves2half2", {HIP_1060, HIP_0, HIP_0 }}, + {"__low2half2", {HIP_1060, HIP_0, HIP_0 }}, + {"__high2half2", {HIP_1060, HIP_0, HIP_0 }}, + {"__half_as_short", {HIP_1060, HIP_0, HIP_0 }}, + {"__half_as_ushort", {HIP_1060, HIP_0, HIP_0 }}, + {"__short_as_half", {HIP_1090, HIP_0, HIP_0 }}, + {"__ushort_as_half", {HIP_1060, HIP_0, HIP_0 }}, + {"__ldg", {HIP_1060, HIP_0, HIP_0 }}, + {"__ldcg", {HIP_1090, HIP_0, HIP_0 }}, + {"__ldca", {HIP_1090, HIP_0, HIP_0 }}, + {"__ldcs", {HIP_1090, HIP_0, HIP_0 }}, + {"__heq2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hne2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hle2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hge2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hlt2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hgt2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hequ2", {HIP_1090, HIP_0, HIP_0 }}, + {"__hneu2", {HIP_1090, HIP_0, HIP_0 }}, + {"__hleu2", {HIP_1090, HIP_0, HIP_0 }}, + {"__hgeu2", {HIP_1090, HIP_0, HIP_0 }}, + {"__hltu2", {HIP_1090, HIP_0, HIP_0 }}, + {"__hgtu2", {HIP_1090, HIP_0, HIP_0 }}, + {"__hisnan2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hadd2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hsub2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hmul2", {HIP_1060, HIP_0, HIP_0 }}, + {"__h2div", {HIP_1090, HIP_0, HIP_0 }}, + {"__hadd2_sat", {HIP_1060, HIP_0, HIP_0 }}, + {"__hsub2_sat", {HIP_1060, HIP_0, HIP_0 }}, + {"__hmul2_sat", {HIP_1060, HIP_0, HIP_0 }}, + {"__hfma2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hfma2_sat", {HIP_1060, HIP_0, HIP_0 }}, + {"__hneg2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hsub", {HIP_1060, HIP_0, HIP_0 }}, + {"__hmul", {HIP_1060, HIP_0, HIP_0 }}, + {"__hdiv", {HIP_1090, HIP_0, HIP_0 }}, + {"__hadd_sat", {HIP_1060, HIP_0, HIP_0 }}, + {"__hsub_sat", {HIP_1060, HIP_0, HIP_0 }}, + {"__hmul_sat", {HIP_1060, HIP_0, HIP_0 }}, + {"__hfma", {HIP_1060, HIP_0, HIP_0 }}, + {"__hfma_sat", {HIP_1060, HIP_0, HIP_0 }}, + {"__hneg", {HIP_1060, HIP_0, HIP_0 }}, + {"__habs2", {HIP_3050, HIP_0, HIP_0 }}, + {"__habs", {HIP_3050, HIP_0, HIP_0 }}, + {"__hbeq2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hbne2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hble2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hbge2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hblt2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hbgt2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hbequ2", {HIP_1090, HIP_0, HIP_0 }}, + {"__hbneu2", {HIP_1090, HIP_0, HIP_0 }}, + {"__hbleu2", {HIP_1090, HIP_0, HIP_0 }}, + {"__hbgeu2", {HIP_1090, HIP_0, HIP_0 }}, + {"__hbltu2", {HIP_1090, HIP_0, HIP_0 }}, + {"__hbgtu2", {HIP_1090, HIP_0, HIP_0 }}, + {"__heq", {HIP_1060, HIP_0, HIP_0 }}, + {"__hne", {HIP_1060, HIP_0, HIP_0 }}, + {"__hle", {HIP_1060, HIP_0, HIP_0 }}, + {"__hge", {HIP_1060, HIP_0, HIP_0 }}, + {"__hlt", {HIP_1060, HIP_0, HIP_0 }}, + {"__hgt", {HIP_1060, HIP_0, HIP_0 }}, + {"__hequ", {HIP_1090, HIP_0, HIP_0 }}, + {"__hneu", {HIP_1090, HIP_0, HIP_0 }}, + {"__hleu", {HIP_1090, HIP_0, HIP_0 }}, + {"__hgeu", {HIP_1090, HIP_0, HIP_0 }}, + {"__hltu", {HIP_1090, HIP_0, HIP_0 }}, + {"__hgtu", {HIP_1090, HIP_0, HIP_0 }}, + {"__hisnan", {HIP_1060, HIP_0, HIP_0 }}, + {"hsqrt", {HIP_1060, HIP_0, HIP_0 }}, + {"hrsqrt", {HIP_1060, HIP_0, HIP_0 }}, + {"hrcp", {HIP_1090, HIP_0, HIP_0 }}, + {"hlog", {HIP_1060, HIP_0, HIP_0 }}, + {"hlog2", {HIP_1060, HIP_0, HIP_0 }}, + {"hlog10", {HIP_1060, HIP_0, HIP_0 }}, + {"hexp", {HIP_1060, HIP_0, HIP_0 }}, + {"hexp2", {HIP_1060, HIP_0, HIP_0 }}, + {"hexp10", {HIP_1060, HIP_0, HIP_0 }}, + {"hcos", {HIP_1060, HIP_0, HIP_0 }}, + {"hsin", {HIP_1060, HIP_0, HIP_0 }}, + {"h2sqrt", {HIP_1060, HIP_0, HIP_0 }}, + {"h2rsqrt", {HIP_1060, HIP_0, HIP_0 }}, + {"h2rcp", {HIP_1060, HIP_0, HIP_0 }}, + {"h2log", {HIP_1060, HIP_0, HIP_0 }}, + {"h2log2", {HIP_1060, HIP_0, HIP_0 }}, + {"h2log10", {HIP_1060, HIP_0, HIP_0 }}, + {"h2exp", {HIP_1060, HIP_0, HIP_0 }}, + {"h2exp2", {HIP_1060, HIP_0, HIP_0 }}, + {"h2exp10", {HIP_1060, HIP_0, HIP_0 }}, + {"h2cos", {HIP_1060, HIP_0, HIP_0 }}, + {"h2sin", {HIP_1060, HIP_0, HIP_0 }}, + {"__shfl", {HIP_1060, HIP_0, HIP_0 }}, + {"__shfl_up", {HIP_1060, HIP_0, HIP_0 }}, + {"__shfl_down", {HIP_1060, HIP_0, HIP_0 }}, + {"__shfl_xor", {HIP_1060, HIP_0, HIP_0 }}, + {"atomicAdd", {HIP_1060, HIP_0, HIP_0 }}, + {"atomicSub", {HIP_1060, HIP_0, HIP_0 }}, + {"atomicExch", {HIP_1060, HIP_0, HIP_0 }}, + {"atomicMin", {HIP_1060, HIP_0, HIP_0 }}, + {"atomicMax", {HIP_1060, HIP_0, HIP_0 }}, + {"atomicInc", {HIP_1060, HIP_0, HIP_0 }}, + {"atomicDec", {HIP_1060, HIP_0, HIP_0 }}, + {"atomicAnd", {HIP_1060, HIP_0, HIP_0 }}, + {"atomicOr", {HIP_1060, HIP_0, HIP_0 }}, + {"atomicXor", {HIP_1060, HIP_0, HIP_0 }}, + {"atomicCAS", {HIP_1060, HIP_0, HIP_0 }}, + {"__all", {HIP_1060, HIP_0, HIP_0 }}, + {"__any", {HIP_1060, HIP_0, HIP_0 }}, + {"__ballot", {HIP_1060, HIP_0, HIP_0 }}, + {"clock64", {HIP_1060, HIP_0, HIP_0 }}, + {"clock", {HIP_1060, HIP_0, HIP_0 }}, + {"__dadd_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__ddiv_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__dmul_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__drcp_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__dsqrt_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__dsub_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__fma_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__assert_fail", {HIP_1090, HIP_0, HIP_0 }}, + {"__assertfail", {HIP_1090, HIP_0, HIP_0 }}, + {"atomicCAS_system", {HIP_4030, HIP_0, HIP_0 }}, + {"atomicSub_system", {HIP_4030, HIP_0, HIP_0 }}, + {"atomicAdd_system", {HIP_4030, HIP_0, HIP_0 }}, + {"atomicExch_system", {HIP_4030, HIP_0, HIP_0 }}, + {"atomicMin_system", {HIP_4030, HIP_0, HIP_0 }}, + {"atomicMax_system", {HIP_4030, HIP_0, HIP_0 }}, + {"atomicAnd_system", {HIP_4030, HIP_0, HIP_0 }}, + {"atomicOr_system", {HIP_4030, HIP_0, HIP_0 }}, + {"atomicXor_system", {HIP_4030, HIP_0, HIP_0 }}, + {"__funnelshift_l", {HIP_4040, HIP_0, HIP_0 }}, + {"__funnelshift_lc", {HIP_4040, HIP_0, HIP_0 }}, + {"__funnelshift_r", {HIP_4040, HIP_0, HIP_0 }}, + {"__funnelshift_rc", {HIP_4040, HIP_0, HIP_0 }}, }; const std::map CUDA_DEVICE_FUNCTION_API_SECTION_MAP { {1, "Device Functions"}, + {2, "Device Types"}, }; diff --git a/src/CUDA2HIP_Device_types.cpp b/src/CUDA2HIP_Device_types.cpp new file mode 100644 index 00000000..f621707a --- /dev/null +++ b/src/CUDA2HIP_Device_types.cpp @@ -0,0 +1,86 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "CUDA2HIP.h" + +// Maps the names of CUDA Device/Host types to the corresponding HIP types +const std::map CUDA_DEVICE_TYPE_NAME_MAP { + // float16 Precision Device types + {"__half", {"__half", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}}, + {"__half_raw", {"__half_raw", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}}, + {"__half2", {"__half2", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}}, + {"__half2_raw", {"__half2_raw", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}}, + // Bfloat16 Precision Device types + {"__nv_bfloat16", {"__hip_bfloat16", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"nv_bfloat16", {"hip_bfloat16", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_bfloat16_raw", {"__hip_bfloat16_raw", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_bfloat162", {"__hip_bfloat162", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"nv_bfloat162", {"hip_bfloat162", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_bfloat162_raw", {"__hip_bfloat162_raw", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + // float8 Precision Device types + {"__nv_fp8_storage_t", {"__hip_fp8_storage_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_fp8x2_storage_t", {"__hip_fp8x2_storage_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_fp8x4_storage_t", {"__hip_fp8x4_storage_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_fp8_e5m2", {"__hip_fp8_e5m2", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_fp8x2_e5m2", {"__hip_fp8x2_e5m2", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_fp8_e4m3", {"__hip_fp8_e4m3", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_fp8x2_e4m3", {"__hip_fp8x2_e4m3", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_fp8x4_e4m3", {"__hip_fp8x4_e4m3", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_saturation_t", {"__hip_saturation_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__NV_NOSAT", {"__HIP_NOSAT", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2, UNSUPPORTED}}, + {"__NV_SATFINITE", {"__HIP_SATFINITE", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_fp8_interpretation_t", {"__hip_fp8_interpretation_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__NV_E4M3", {"__HIP_E4M3", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2, UNSUPPORTED}}, + {"__NV_E5M2", {"__HIP_E5M2", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_fp8x4_e5m2", {"__hip_fp8x4_e5m2", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2, UNSUPPORTED}}, +}; + +const std::map CUDA_DEVICE_TYPE_NAME_VER_MAP { + {"__nv_bfloat16", {CUDA_110, CUDA_0, CUDA_0 }}, + {"nv_bfloat16", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__nv_bfloat16_raw", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__nv_bfloat162", {CUDA_110, CUDA_0, CUDA_0 }}, + {"nv_bfloat162", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__nv_bfloat162_raw", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__nv_fp8_storage_t", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_fp8x2_storage_t", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_fp8x4_storage_t", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_fp8_e5m2", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_fp8x2_e5m2", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_fp8_e4m3", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_fp8x2_e4m3", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_fp8x4_e4m3", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_saturation_t", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__NV_NOSAT", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__NV_SATFINITE", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_fp8_interpretation_t", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__NV_E4M3", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__NV_E5M2", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_fp8x4_e5m2", {CUDA_118, CUDA_0, CUDA_0 }}, +}; + +const std::map HIP_DEVICE_TYPE_NAME_VER_MAP { + {"__half", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2", {HIP_1060, HIP_0, HIP_0 }}, + {"__half_raw", {HIP_1090, HIP_0, HIP_0 }}, + {"__half2_raw", {HIP_1090, HIP_0, HIP_0 }}, +}; diff --git a/src/CUDA2HIP_Driver_API_functions.cpp b/src/CUDA2HIP_Driver_API_functions.cpp index be0b0684..eff0c0c9 100644 --- a/src/CUDA2HIP_Driver_API_functions.cpp +++ b/src/CUDA2HIP_Driver_API_functions.cpp @@ -27,10 +27,10 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // 2. Error Handling // no analogue // NOTE: cudaGetErrorName and cuGetErrorName have different signatures - {"cuGetErrorName", {"hipDrvGetErrorName", "", CONV_ERROR, API_DRIVER, 2, HIP_UNSUPPORTED}}, + {"cuGetErrorName", {"hipDrvGetErrorName", "", CONV_ERROR, API_DRIVER, 2, HIP_EXPERIMENTAL}}, // no analogue // NOTE: cudaGetErrorString and cuGetErrorString have different signatures - {"cuGetErrorString", {"hipDrvGetErrorString", "", CONV_ERROR, API_DRIVER, 2, HIP_UNSUPPORTED}}, + {"cuGetErrorString", {"hipDrvGetErrorString", "", CONV_ERROR, API_DRIVER, 2, HIP_EXPERIMENTAL}}, // 3. Initialization // no analogue @@ -119,7 +119,7 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { {"cuCtxResetPersistingL2Cache", {"hipCtxResetPersistingL2Cache", "", CONV_CONTEXT, API_DRIVER, 8, HIP_UNSUPPORTED}}, {"cuCtxSetCurrent", {"hipCtxSetCurrent", "", CONV_CONTEXT, API_DRIVER, 8, HIP_DEPRECATED}}, // cudaDeviceSetLimit - {"cuCtxSetLimit", {"hipDeviceSetLimit", "", CONV_CONTEXT, API_DRIVER, 8, HIP_EXPERIMENTAL}}, + {"cuCtxSetLimit", {"hipDeviceSetLimit", "", CONV_CONTEXT, API_DRIVER, 8}}, // cudaDeviceSetSharedMemConfig // TODO: rename to hipDeviceSetSharedMemConfig {"cuCtxSetSharedMemConfig", {"hipCtxSetSharedMemConfig", "", CONV_CONTEXT, API_DRIVER, 8, HIP_DEPRECATED}}, @@ -136,14 +136,14 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // 10. Module Management // no analogues - {"cuLinkAddData", {"hiprtcLinkAddData", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, - {"cuLinkAddData_v2", {"hiprtcLinkAddData", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, - {"cuLinkAddFile", {"hiprtcLinkAddFile", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, - {"cuLinkAddFile_v2", {"hiprtcLinkAddFile", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, - {"cuLinkComplete", {"hiprtcLinkComplete", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, - {"cuLinkCreate", {"hiprtcLinkCreate", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, - {"cuLinkCreate_v2", {"hiprtcLinkCreate", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, - {"cuLinkDestroy", {"hiprtcLinkDestroy", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, + {"cuLinkAddData", {"hiprtcLinkAddData", "", CONV_MODULE, API_DRIVER, 10}}, + {"cuLinkAddData_v2", {"hiprtcLinkAddData", "", CONV_MODULE, API_DRIVER, 10}}, + {"cuLinkAddFile", {"hiprtcLinkAddFile", "", CONV_MODULE, API_DRIVER, 10}}, + {"cuLinkAddFile_v2", {"hiprtcLinkAddFile", "", CONV_MODULE, API_DRIVER, 10}}, + {"cuLinkComplete", {"hiprtcLinkComplete", "", CONV_MODULE, API_DRIVER, 10}}, + {"cuLinkCreate", {"hiprtcLinkCreate", "", CONV_MODULE, API_DRIVER, 10}}, + {"cuLinkCreate_v2", {"hiprtcLinkCreate", "", CONV_MODULE, API_DRIVER, 10}}, + {"cuLinkDestroy", {"hiprtcLinkDestroy", "", CONV_MODULE, API_DRIVER, 10}}, {"cuModuleGetFunction", {"hipModuleGetFunction", "", CONV_MODULE, API_DRIVER, 10}}, {"cuModuleGetGlobal", {"hipModuleGetGlobal", "", CONV_MODULE, API_DRIVER, 10}}, {"cuModuleGetGlobal_v2", {"hipModuleGetGlobal", "", CONV_MODULE, API_DRIVER, 10}}, @@ -543,6 +543,9 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // no analogue // NOTE: Not equal to cudaLaunchKernel due to different signatures {"cuLaunchKernel", {"hipModuleLaunchKernel", "", CONV_EXECUTION, API_DRIVER, 19}}, + // no analogue + // NOTE: Not equal to cudaLaunchKernelExC due to different signatures + {"cuLaunchKernelEx", {"hipLaunchKernelEx", "", CONV_EXECUTION, API_DRIVER, 19, HIP_UNSUPPORTED}}, // 20. Execution Control [DEPRECATED] // no analogue @@ -684,7 +687,7 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // cudaGraphExecEventWaitNodeSetEvent {"cuGraphExecEventWaitNodeSetEvent", {"hipGraphExecEventWaitNodeSetEvent", "", CONV_GRAPH, API_DRIVER, 21}}, // cudaGraphUpload - {"cuGraphUpload", {"hipGraphUpload", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}}, + {"cuGraphUpload", {"hipGraphUpload", "", CONV_GRAPH, API_DRIVER, 21}}, // cudaGraphAddExternalSemaphoresSignalNode {"cuGraphAddExternalSemaphoresSignalNode", {"hipGraphAddExternalSemaphoresSignalNode", "", CONV_GRAPH, API_DRIVER, 21, HIP_UNSUPPORTED}}, // cudaGraphExternalSemaphoresSignalNodeGetParams @@ -702,15 +705,15 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // cudaGraphExecExternalSemaphoresWaitNodeSetParams {"cuGraphExecExternalSemaphoresWaitNodeSetParams", {"hipGraphExecExternalSemaphoresWaitNodeSetParams", "", CONV_GRAPH, API_DRIVER, 21, HIP_UNSUPPORTED}}, // cudaUserObjectCreate - {"cuUserObjectCreate", {"hipUserObjectCreate", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}}, + {"cuUserObjectCreate", {"hipUserObjectCreate", "", CONV_GRAPH, API_DRIVER, 21}}, // cudaUserObjectRetain - {"cuUserObjectRetain", {"hipUserObjectRetain", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}}, + {"cuUserObjectRetain", {"hipUserObjectRetain", "", CONV_GRAPH, API_DRIVER, 21}}, // cudaUserObjectRelease - {"cuUserObjectRelease", {"hipUserObjectRelease", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}}, + {"cuUserObjectRelease", {"hipUserObjectRelease", "", CONV_GRAPH, API_DRIVER, 21}}, // cudaGraphRetainUserObject - {"cuGraphRetainUserObject", {"hipGraphRetainUserObject", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}}, + {"cuGraphRetainUserObject", {"hipGraphRetainUserObject", "", CONV_GRAPH, API_DRIVER, 21}}, // cudaGraphReleaseUserObject - {"cuGraphReleaseUserObject", {"hipGraphReleaseUserObject", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}}, + {"cuGraphReleaseUserObject", {"hipGraphReleaseUserObject", "", CONV_GRAPH, API_DRIVER, 21}}, // cudaGraphAddMemAllocNode {"cuGraphAddMemAllocNode", {"hipGraphAddMemAllocNode", "", CONV_GRAPH, API_DRIVER, 21, HIP_UNSUPPORTED}}, // cudaGraphMemAllocNodeGetParams @@ -720,11 +723,11 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // cudaGraphMemFreeNodeGetParams {"cuGraphMemFreeNodeGetParams", {"hipGraphMemFreeNodeGetParams", "", CONV_GRAPH, API_DRIVER, 21, HIP_UNSUPPORTED}}, // cudaDeviceGraphMemTrim - {"cuDeviceGraphMemTrim", {"hipDeviceGraphMemTrim", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}}, + {"cuDeviceGraphMemTrim", {"hipDeviceGraphMemTrim", "", CONV_GRAPH, API_DRIVER, 21}}, // cudaDeviceGetGraphMemAttribute - {"cuDeviceGetGraphMemAttribute", {"hipDeviceGetGraphMemAttribute", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}}, + {"cuDeviceGetGraphMemAttribute", {"hipDeviceGetGraphMemAttribute", "", CONV_GRAPH, API_DRIVER, 21}}, // cudaDeviceSetGraphMemAttribute - {"cuDeviceSetGraphMemAttribute", {"hipDeviceSetGraphMemAttribute", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}}, + {"cuDeviceSetGraphMemAttribute", {"hipDeviceSetGraphMemAttribute", "", CONV_GRAPH, API_DRIVER, 21}}, // cudaGraphInstantiateWithFlags {"cuGraphInstantiateWithFlags", {"hipGraphInstantiateWithFlags", "", CONV_GRAPH, API_DRIVER, 21}}, // @@ -747,6 +750,10 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { {"cuOccupancyMaxPotentialBlockSize", {"hipModuleOccupancyMaxPotentialBlockSize", "", CONV_OCCUPANCY, API_DRIVER, 22}}, // cudaOccupancyMaxPotentialBlockSizeWithFlags {"cuOccupancyMaxPotentialBlockSizeWithFlags", {"hipModuleOccupancyMaxPotentialBlockSizeWithFlags", "", CONV_OCCUPANCY, API_DRIVER, 22}}, + // cudaOccupancyMaxPotentialClusterSize + {"cuOccupancyMaxPotentialClusterSize", {"hipOccupancyMaxPotentialClusterSize", "", CONV_OCCUPANCY, API_DRIVER, 22, HIP_UNSUPPORTED}}, + // cudaOccupancyMaxActiveClusters + {"cuOccupancyMaxActiveClusters", {"hipOccupancyMaxActiveClusters", "", CONV_OCCUPANCY, API_DRIVER, 22, HIP_UNSUPPORTED}}, // 23. Texture Reference Management [DEPRECATED] // no analogues @@ -1282,6 +1289,9 @@ const std::map CUDA_DRIVER_FUNCTION_VER_MAP { {"cuGraphBatchMemOpNodeGetParams", {CUDA_117, CUDA_0, CUDA_0 }}, {"cuGraphBatchMemOpNodeSetParams", {CUDA_117, CUDA_0, CUDA_0 }}, {"cuGraphExecBatchMemOpNodeSetParams", {CUDA_117, CUDA_0, CUDA_0 }}, + {"cuLaunchKernelEx", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cuOccupancyMaxPotentialClusterSize", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cuOccupancyMaxActiveClusters", {CUDA_118, CUDA_0, CUDA_0 }}, }; const std::map HIP_DRIVER_FUNCTION_VER_MAP { @@ -1410,11 +1420,13 @@ const std::map HIP_DRIVER_FUNCTION_VER_MAP { {"hipMemRetainAllocationHandle", {HIP_5020, HIP_0, HIP_0 }}, {"hipMemSetAccess", {HIP_5020, HIP_0, HIP_0 }}, {"hipMemUnmap", {HIP_5020, HIP_0, HIP_0 }}, - {"hiprtcLinkCreate", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hiprtcLinkAddFile", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hiprtcLinkAddData", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hiprtcLinkComplete", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hiprtcLinkDestroy", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"hiprtcLinkCreate", {HIP_5030, HIP_0, HIP_0 }}, + {"hiprtcLinkAddFile", {HIP_5030, HIP_0, HIP_0 }}, + {"hiprtcLinkAddData", {HIP_5030, HIP_0, HIP_0 }}, + {"hiprtcLinkComplete", {HIP_5030, HIP_0, HIP_0 }}, + {"hiprtcLinkDestroy", {HIP_5030, HIP_0, HIP_0 }}, + {"hipDrvGetErrorName", {HIP_5040, HIP_0, HIP_0, HIP_LATEST}}, + {"hipDrvGetErrorString", {HIP_5040, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map CUDA_DRIVER_API_SECTION_MAP { diff --git a/src/CUDA2HIP_Driver_API_types.cpp b/src/CUDA2HIP_Driver_API_types.cpp index 633a70b4..2af7b1b7 100644 --- a/src/CUDA2HIP_Driver_API_types.cpp +++ b/src/CUDA2HIP_Driver_API_types.cpp @@ -281,9 +281,9 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUmemPoolPtrExportData_v1", {"hipMemPoolPtrExportData", "", CONV_TYPE, API_DRIVER, 1}}, // - {"CUuserObject_st", {"hipUserObject", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CUuserObject_st", {"hipUserObject", "", CONV_TYPE, API_DRIVER, 1}}, // cudaUserObject_t - {"CUuserObject", {"hipUserObject_t", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CUuserObject", {"hipUserObject_t", "", CONV_TYPE, API_DRIVER, 1}}, // {"CUexecAffinitySmCount_st", {"hipExecAffinitySmCount", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, @@ -312,6 +312,16 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUDA_BATCH_MEM_OP_NODE_PARAMS_st", {"HIP_BATCH_MEM_OP_NODE_PARAMS", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, {"CUDA_BATCH_MEM_OP_NODE_PARAMS", {"HIP_BATCH_MEM_OP_NODE_PARAMS", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaLaunchAttribute_st + {"CUlaunchAttribute_st", {"hipLaunchAttribute", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaLaunchAttribute + {"CUlaunchAttribute", {"hipLaunchAttribute", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + + // cudaLaunchConfig_st + {"CUlaunchConfig_st", {"hipLaunchConfig", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaLaunchConfig_t + {"CUlaunchConfig", {"hipLaunchConfig", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // 2. Unions {"CUstreamBatchMemOpParams", {"hipStreamBatchMemOpParams", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, @@ -319,9 +329,12 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUstreamBatchMemOpParams_union", {"hipStreamBatchMemOpParams", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // cudaKernelNodeAttrValue + // NOTE: Starting from CUDA 11.8 CUlaunchAttributeValue is used instead of CUkernelNodeAttrValue: + // typedef CUlaunchAttributeValue CUkernelNodeAttrValue_v1; + // typedef CUkernelNodeAttrValue_v1 CUkernelNodeAttrValue; {"CUkernelNodeAttrValue", {"hipKernelNodeAttrValue", "", CONV_TYPE, API_DRIVER, 1}}, {"CUkernelNodeAttrValue_v1", {"hipKernelNodeAttrValue", "", CONV_TYPE, API_DRIVER, 1}}, - {"CUkernelNodeAttrValue_union", {"hipKernelNodeAttrValue", "", CONV_TYPE, API_DRIVER, 1}}, + {"CUkernelNodeAttrValue_union", {"hipKernelNodeAttrValue", "", CONV_TYPE, API_DRIVER, 1, CUDA_REMOVED}}, // cudaStreamAttrValue {"CUstreamAttrValue", {"hipStreamAttrValue", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, @@ -332,6 +345,10 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUlinkState_st", {"ihiprtcLinkState", "", CONV_TYPE, API_DRIVER, 1}}, {"CUlinkState", {"hiprtcLinkState", "", CONV_TYPE, API_DRIVER, 1}}, + // cudaLaunchAttributeValue + {"CUlaunchAttributeValue", {"hipLaunchAttributeValue", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + {"CUlaunchAttributeValue_union", {"hipLaunchAttributeValue", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // 3. Enums // TODO: HIPaddress_mode_enum and all its values should be hipTextureAddressMode as long as they are equal. {"CUaddress_mode", {"HIPaddress_mode", "", CONV_TYPE, API_DRIVER, 1}}, @@ -697,7 +714,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { // no analogue {"CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED", {"hipDeviceAttributeVirtualAddressManagementSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED | CUDA_DEPRECATED}}, // 102 // no analogue - {"CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED", {"hipDeviceAttributeVirtualMemoryManagementSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // 102 + {"CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED", {"hipDeviceAttributeVirtualMemoryManagementSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 102 // no analogue {"CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED", {"hipDeviceAttributeHandleTypePosixFileDescriptorSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 103 // no analogue @@ -733,6 +750,8 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING", {"hipDeviceAttributeGpuDirectRdmaWritesOrdering", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 118 // cudaDevAttrMemoryPoolSupportedHandleTypes {"CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES", {"hipDeviceAttributeMempoolSupportedHandleTypes", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 119 + // cudaDevAttrClusterLaunch + {"CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH", {"hipDeviceAttributeClusterLaunch", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 120 // cudaDevAttrDeferredMappingCudaArraySupported {"CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED", {"hipDeviceAttributeDeferredMappingCudaArraySupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 121 // @@ -1038,8 +1057,20 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES", {"HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 8 // cudaFuncAttributePreferredSharedMemoryCarveout {"CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT", {"HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 9 + // cudaFuncAttributeClusterDimMustBeSet + {"CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET", {"HIP_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 10 + // cudaFuncAttributeRequiredClusterWidth + {"CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH", {"HIP_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 11 + // cudaFuncAttributeRequiredClusterHeight + {"CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT", {"HIP_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 12 + // cudaFuncAttributeRequiredClusterDepth + {"CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH", {"HIP_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 13 + // cudaFuncAttributeNonPortableClusterSizeAllowed + {"CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED", {"HIP_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 14 + // cudaFuncAttributeClusterSchedulingPolicyPreference + {"CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {"HIP_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 15 // cudaFuncAttributeMax - {"CU_FUNC_ATTRIBUTE_MAX", {"HIP_FUNC_ATTRIBUTE_MAX", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 10 + {"CU_FUNC_ATTRIBUTE_MAX", {"HIP_FUNC_ATTRIBUTE_MAX", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 16 // cudaGraphicsMapFlags {"CUgraphicsMapResourceFlags", {"hipGraphicsMapFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, @@ -1088,9 +1119,9 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { // cudaGraphNodeTypeEventRecord = 0x07 {"CU_GRAPH_NODE_TYPE_EVENT_RECORD", {"hipGraphNodeTypeEventRecord", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 7 // cudaGraphNodeTypeExtSemaphoreSignal - {"CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL", {"hipGraphNodeTypeExtSemaphoreSignal", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // 8 + {"CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL", {"hipGraphNodeTypeExtSemaphoreSignal", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 8 // cudaGraphNodeTypeExtSemaphoreWait - {"CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT", {"hipGraphNodeTypeExtSemaphoreWait", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // 9 + {"CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT", {"hipGraphNodeTypeExtSemaphoreWait", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 9 // cudaGraphNodeTypeMemAlloc {"CU_GRAPH_NODE_TYPE_MEM_ALLOC", {"hipGraphNodeTypeMemAlloc", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 10 // cudaGraphNodeTypeMemFree @@ -1153,23 +1184,23 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUjit_option", {"hipJitOption", "", CONV_TYPE, API_DRIVER, 1}}, {"CUjit_option_enum", {"hipJitOption", "", CONV_TYPE, API_DRIVER, 1}}, // CUjit_option enum values - {"CU_JIT_MAX_REGISTERS", {"hipJitOptionMaxRegisters", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 0 - {"CU_JIT_THREADS_PER_BLOCK", {"hipJitOptionThreadsPerBlock", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_WALL_TIME", {"hipJitOptionWallTime", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_INFO_LOG_BUFFER", {"hipJitOptionInfoLogBuffer", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES", {"hipJitOptionInfoLogBufferSizeBytes", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_ERROR_LOG_BUFFER", {"hipJitOptionErrorLogBuffer", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", {"hipJitOptionErrorLogBufferSizeBytes", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_OPTIMIZATION_LEVEL", {"hipJitOptionOptimizationLevel", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_TARGET_FROM_CUCONTEXT", {"hipJitOptionTargetFromContext", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_TARGET", {"hipJitOptionTarget", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_FALLBACK_STRATEGY", {"hipJitOptionFallbackStrategy", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_GENERATE_DEBUG_INFO", {"hipJitOptionGenerateDebugInfo", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_LOG_VERBOSE", {"hipJitOptionLogVerbose", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_GENERATE_LINE_INFO", {"hipJitOptionGenerateLineInfo", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_CACHE_MODE", {"hipJitOptionCacheMode", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_NEW_SM3X_OPT", {"hipJitOptionSm3xOpt", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_FAST_COMPILE", {"hipJitOptionFastCompile", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_MAX_REGISTERS", {"HIPRTC_JIT_MAX_REGISTERS", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 0 + {"CU_JIT_THREADS_PER_BLOCK", {"HIPRTC_JIT_THREADS_PER_BLOCK", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_WALL_TIME", {"HIPRTC_JIT_WALL_TIME", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_INFO_LOG_BUFFER", {"HIPRTC_JIT_INFO_LOG_BUFFER", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES", {"HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_ERROR_LOG_BUFFER", {"HIPRTC_JIT_ERROR_LOG_BUFFER", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", {"HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_OPTIMIZATION_LEVEL", {"HIPRTC_JIT_OPTIMIZATION_LEVEL", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_TARGET_FROM_CUCONTEXT", {"HIPRTC_JIT_TARGET_FROM_HIPCONTEXT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_TARGET", {"HIPRTC_JIT_TARGET", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_FALLBACK_STRATEGY", {"HIPRTC_JIT_FALLBACK_STRATEGY", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_GENERATE_DEBUG_INFO", {"HIPRTC_JIT_GENERATE_DEBUG_INFO", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_LOG_VERBOSE", {"HIPRTC_JIT_LOG_VERBOSE", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_GENERATE_LINE_INFO", {"HIPRTC_JIT_GENERATE_LINE_INFO", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_CACHE_MODE", {"HIPRTC_JIT_CACHE_MODE", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_NEW_SM3X_OPT", {"HIPRTC_JIT_NEW_SM3X_OPT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_FAST_COMPILE", {"HIPRTC_JIT_FAST_COMPILE", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, {"CU_JIT_GLOBAL_SYMBOL_NAMES", {"hipJitGlobalSymbolNames", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, {"CU_JIT_GLOBAL_SYMBOL_ADDRESSES", {"hipJitGlobalSymbolAddresses", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, {"CU_JIT_GLOBAL_SYMBOL_COUNT", {"hipJitGlobalSymbolCount", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, @@ -1183,7 +1214,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_JIT_REFERENCED_VARIABLE_NAMES", {"hipJitReferencedVariableNames", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, {"CU_JIT_REFERENCED_VARIABLE_COUNT", {"hipJitReferencedVariableCount", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, {"CU_JIT_OPTIMIZE_UNUSED_DEVICE_VARIABLES", {"hipJitOptimizeUnusedDeviceVariables", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, - {"CU_JIT_NUM_OPTIONS", {"hipJitOptionNumOptions", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_NUM_OPTIONS", {"HIPRTC_JIT_NUM_OPTIONS", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // no analogue {"CUjit_target", {"hipJitTarget", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, @@ -1217,25 +1248,27 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_TARGET_COMPUTE_80", {"hipJitTargetCompute80", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 80 {"CU_TARGET_COMPUTE_86", {"hipJitTargetCompute86", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 86 {"CU_TARGET_COMPUTE_87", {"hipJitTargetCompute87", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 87 + {"CU_TARGET_COMPUTE_89", {"hipJitTargetCompute89", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 89 + {"CU_TARGET_COMPUTE_90", {"hipJitTargetCompute90", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 90 // no analogue - {"CUjitInputType", {"hiprtcJITInputType", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}}, - {"CUjitInputType_enum", {"hiprtcJITInputType", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CUjitInputType", {"hiprtcJITInputType", "", CONV_TYPE, API_DRIVER, 1}}, + {"CUjitInputType_enum", {"hiprtcJITInputType", "", CONV_TYPE, API_DRIVER, 1}}, // CUjitInputType enum values - {"CU_JIT_INPUT_CUBIN", {"HIPRTC_JIT_INPUT_CUBIN", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // 0 - {"CU_JIT_INPUT_PTX", {"HIPRTC_JIT_INPUT_PTX", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, - {"CU_JIT_INPUT_FATBINARY", {"HIPRTC_JIT_INPUT_FATBINARY", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, - {"CU_JIT_INPUT_OBJECT", {"HIPRTC_JIT_INPUT_OBJECT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, - {"CU_JIT_INPUT_LIBRARY", {"HIPRTC_JIT_INPUT_LIBRARY", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, - {"CU_JIT_INPUT_NVVM", {"HIPRTC_JIT_INPUT_NVVM", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, - {"CU_JIT_NUM_INPUT_TYPES", {"HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CU_JIT_INPUT_CUBIN", {"HIPRTC_JIT_INPUT_CUBIN", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 0 + {"CU_JIT_INPUT_PTX", {"HIPRTC_JIT_INPUT_PTX", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_INPUT_FATBINARY", {"HIPRTC_JIT_INPUT_FATBINARY", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_INPUT_OBJECT", {"HIPRTC_JIT_INPUT_OBJECT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_INPUT_LIBRARY", {"HIPRTC_JIT_INPUT_LIBRARY", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_INPUT_NVVM", {"HIPRTC_JIT_INPUT_NVVM", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_NUM_INPUT_TYPES", {"HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // cudaLimit {"CUlimit", {"hipLimit_t", "", CONV_TYPE, API_DRIVER, 1}}, {"CUlimit_enum", {"hipLimit_t", "", CONV_TYPE, API_DRIVER, 1}}, // CUlimit enum values // cudaLimitStackSize - {"CU_LIMIT_STACK_SIZE", {"hipLimitStackSize", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // 0x00 + {"CU_LIMIT_STACK_SIZE", {"hipLimitStackSize", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 0x00 // cudaLimitPrintfFifoSize {"CU_LIMIT_PRINTF_FIFO_SIZE", {"hipLimitPrintfFifoSize", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 0x01 // cudaLimitMallocHeapSize @@ -1584,6 +1617,8 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUDA_ERROR_MPS_MAX_CLIENTS_REACHED", {"hipErrorMpsMaxClientsReached", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 808 // cudaErrorMpsMaxConnectionsReached {"CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED", {"hipErrorMpsMaxConnectionsReached", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 809 + // cudaErrorMpsClientTerminated + {"CUDA_ERROR_MPS_CLIENT_TERMINATED", {"hipErrorMpsClientTerminated", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 810 // cudaErrorStreamCaptureUnsupported {"CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED", {"hipErrorStreamCaptureUnsupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 900 // cudaErrorStreamCaptureInvalidated @@ -1608,6 +1643,8 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE", {"hipErrorGraphExecUpdateFailure", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 910 // cudaErrorExternalDevice {"CUDA_ERROR_EXTERNAL_DEVICE", {"hipErrorExternalDevice", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 911 + // cudaErrorInvalidClusterSize + {"CUDA_ERROR_INVALID_CLUSTER_SIZE", {"hipErrorInvalidClusterSize", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 912 // cudaErrorUnknown {"CUDA_ERROR_UNKNOWN", {"hipErrorUnknown", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 999 @@ -1871,7 +1908,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { // cudaKernelNodeAttrID {"CUkernelNodeAttrID", {"hipKernelNodeAttrID", "", CONV_TYPE, API_DRIVER, 1}}, - {"CUkernelNodeAttrID_enum", {"hipKernelNodeAttrID", "", CONV_TYPE, API_DRIVER, 1}}, + {"CUkernelNodeAttrID_enum", {"hipKernelNodeAttrID", "", CONV_TYPE, API_DRIVER, 1, CUDA_REMOVED}}, // CUkernelNodeAttrID_enum enum values // cudaKernelNodeAttributeAccessPolicyWindow {"CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW", {"hipKernelNodeAttributeAccessPolicyWindow", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 1 @@ -2046,18 +2083,18 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_GRAPH_DEBUG_DOT_FLAGS_BATCH_MEM_OP_NODE_PARAMS", {"hipGraphDebugDotFlagsBatchMemOpNodeParams", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 1<<13 // cudaUserObjectFlags - {"CUuserObject_flags", {"hipUserObjectFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}}, - {"CUuserObject_flags_enum", {"hipUserObjectFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CUuserObject_flags", {"hipUserObjectFlags", "", CONV_TYPE, API_DRIVER, 1}}, + {"CUuserObject_flags_enum", {"hipUserObjectFlags", "", CONV_TYPE, API_DRIVER, 1}}, // CUuserObject_flags enum values // cudaUserObjectNoDestructorSync - {"CU_USER_OBJECT_NO_DESTRUCTOR_SYNC", {"hipUserObjectNoDestructorSync", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // 1 + {"CU_USER_OBJECT_NO_DESTRUCTOR_SYNC", {"hipUserObjectNoDestructorSync", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 1 // cudaUserObjectRetainFlags - {"CUuserObjectRetain_flags", {"hipUserObjectRetainFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}}, - {"CUuserObjectRetain_flags_enum", {"hipUserObjectRetainFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CUuserObjectRetain_flags", {"hipUserObjectRetainFlags", "", CONV_TYPE, API_DRIVER, 1}}, + {"CUuserObjectRetain_flags_enum", {"hipUserObjectRetainFlags", "", CONV_TYPE, API_DRIVER, 1}}, // CUuserObjectRetain_flags enum values // cudaGraphUserObjectMove - {"CU_GRAPH_USER_OBJECT_MOVE", {"hipGraphUserObjectMove", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // 1 + {"CU_GRAPH_USER_OBJECT_MOVE", {"hipGraphUserObjectMove", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 1 // no analogue {"CUexecAffinityType", {"hipExecAffinityType", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, @@ -2067,17 +2104,17 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_EXEC_AFFINITY_TYPE_MAX", {"hipExecAffinityTypeMax", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // // cudaGraphMemAttributeType - {"CUgraphMem_attribute", {"hipGraphMemAttributeType", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}}, - {"CUgraphMem_attribute_enum", {"hipGraphMemAttributeType", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CUgraphMem_attribute", {"hipGraphMemAttributeType", "", CONV_TYPE, API_DRIVER, 1}}, + {"CUgraphMem_attribute_enum", {"hipGraphMemAttributeType", "", CONV_TYPE, API_DRIVER, 1}}, // CUgraphMem_attribute enum values // cudaGraphMemAttrUsedMemCurrent - {"CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT", {"hipGraphMemAttrUsedMemCurrent", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT", {"hipGraphMemAttrUsedMemCurrent", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // cudaGraphMemAttrUsedMemHigh - {"CU_GRAPH_MEM_ATTR_USED_MEM_HIGH", {"hipGraphMemAttrUsedMemHigh", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CU_GRAPH_MEM_ATTR_USED_MEM_HIGH", {"hipGraphMemAttrUsedMemHigh", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // cudaGraphMemAttrReservedMemCurrent - {"CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT", {"hipGraphMemAttrReservedMemCurrent", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT", {"hipGraphMemAttrReservedMemCurrent", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // cudaGraphMemAttrReservedMemHigh - {"CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH", {"hipGraphMemAttrReservedMemHigh", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH", {"hipGraphMemAttrReservedMemHigh", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // cudaGraphInstantiateFlags {"CUgraphInstantiate_flags", {"hipGraphInstantiateFlags", "", CONV_TYPE, API_DRIVER, 1}}, @@ -2109,6 +2146,79 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_MODULE_EAGER_LOADING", {"HIP_MODULE_EAGER_LOADING", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, {"CU_MODULE_LAZY_LOADING", {"HIP_MODULE_LAZY_LOADING", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"CUevent_sched_flags", {"hipEventSchedFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + {"CUevent_sched_flags_enum", {"hipEventSchedFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // CUevent_sched_flags enum values + // + {"CU_EVENT_SCHED_AUTO", {"HIP_EVENT_SCHED_AUTO", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"CU_EVENT_SCHED_SPIN", {"HIP_EVENT_SCHED_SPIN", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"CU_EVENT_SCHED_YIELD", {"HIP_EVENT_SCHED_YIELD", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"CU_EVENT_SCHED_BLOCKING_SYNC", {"HIP_EVENT_SCHED_BLOCKING_SYNC", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + + // + {"cl_event_flags", {"hipClEventFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + {"cl_event_flags_enum", {"hipClEventFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cl_event_flags enum values + // + {"NVCL_EVENT_SCHED_AUTO", {"HIP_CL_EVENT_SCHED_AUTO", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"NVCL_EVENT_SCHED_SPIN", {"HIP_CL_EVENT_SCHED_SPIN", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"NVCL_EVENT_SCHED_YIELD", {"HIP_CL_EVENT_SCHED_YIELD", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"NVCL_EVENT_SCHED_BLOCKING_SYNC", {"HIP_CL_EVENT_SCHED_BLOCKING_SYNC", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + + // + {"cl_context_flags", {"hipClContextFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + {"cl_context_flags_enum", {"hipClContextFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cl_context_flags enum values + // + {"NVCL_CTX_SCHED_AUTO", {"HIP_CL_CTX_SCHED_AUTO", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"NVCL_CTX_SCHED_SPIN", {"HIP_CL_CTX_SCHED_SPIN", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"NVCL_CTX_SCHED_YIELD", {"HIP_CL_CTX_SCHED_YIELD", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"NVCL_CTX_SCHED_BLOCKING_SYNC", {"HIP_CL_CTX_SCHED_BLOCKING_SYNC", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + + // cudaClusterSchedulingPolicy + {"CUclusterSchedulingPolicy", {"hipClusterSchedulingPolicy", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + {"CUclusterSchedulingPolicy_enum", {"hipClusterSchedulingPolicy", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // CUclusterSchedulingPolicy enum values + // cudaClusterSchedulingPolicyDefault + {"CU_CLUSTER_SCHEDULING_POLICY_DEFAULT", {"hipClusterSchedulingPolicyDefault", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaClusterSchedulingPolicySpread + {"CU_CLUSTER_SCHEDULING_POLICY_SPREAD", {"hipClusterSchedulingPolicySpread", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaClusterSchedulingPolicyLoadBalancing + {"CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING", {"hipClusterSchedulingPolicyLoadBalancing", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + + // cudaLaunchAttributeID + {"CUlaunchAttributeID", {"hipLaunchAttributeID", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + {"CUlaunchAttributeID_enum", {"hipLaunchAttributeID", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // CUlaunchAttributeID enum values + // cudaLaunchAttributeIgnore + {"CU_LAUNCH_ATTRIBUTE_IGNORE", {"hipLaunchAttributeIgnore", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaLaunchAttributeAccessPolicyWindow + {"CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW", {"hipLaunchAttributeAccessPolicyWindow", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaLaunchAttributeCooperative + {"CU_LAUNCH_ATTRIBUTE_COOPERATIVE", {"hipLaunchAttributeCooperative", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaLaunchAttributeSynchronizationPolicy + {"CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY", {"hipLaunchAttributeSynchronizationPolicy", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaLaunchAttributeClusterDimension + {"CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION", {"hipLaunchAttributeClusterDimension", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaLaunchAttributeClusterSchedulingPolicyPreference + {"CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {"hipLaunchAttributeClusterSchedulingPolicyPreference", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaLaunchAttributeProgrammaticStreamSerialization + {"CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION", {"hipLaunchAttributeProgrammaticStreamSerialization", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaLaunchAttributeProgrammaticEvent + {"CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT", {"hipLaunchAttributeProgrammaticEvent", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaLaunchAttributePriority + {"CU_LAUNCH_ATTRIBUTE_PRIORITY", {"hipLaunchAttributePriority", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // 4. Typedefs // no analogue @@ -2227,6 +2337,10 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_LAUNCH_PARAM_BUFFER_POINTER_AS_INT", {"HIP_LAUNCH_PARAM_BUFFER_POINTER_AS_INT", "", CONV_DEFINE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 0x01 // {"CU_LAUNCH_PARAM_BUFFER_SIZE_AS_INT", {"HIP_LAUNCH_PARAM_BUFFER_SIZE_AS_INT", "", CONV_DEFINE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 0x02 + // cudaKernelNodeAttributeClusterDimension + {"CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION", {"hipKernelNodeAttributeClusterDimension", "", CONV_DEFINE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION + // cudaKernelNodeAttributeClusterSchedulingPolicyPreference + {"CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {"hipKernelNodeAttributeClusterSchedulingPolicyPreference", "", CONV_DEFINE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE }; const std::map CUDA_DRIVER_TYPE_NAME_VER_MAP { @@ -2274,7 +2388,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_VER_MAP { {"CUstreamBatchMemOpParams", {CUDA_80, CUDA_0, CUDA_0 }}, {"CUstreamBatchMemOpParams_union", {CUDA_80, CUDA_0, CUDA_0 }}, {"CUkernelNodeAttrValue", {CUDA_110, CUDA_0, CUDA_0 }}, - {"CUkernelNodeAttrValue_union", {CUDA_110, CUDA_0, CUDA_0 }}, + {"CUkernelNodeAttrValue_union", {CUDA_110, CUDA_0, CUDA_118}}, {"CUstreamAttrValue", {CUDA_110, CUDA_0, CUDA_0 }}, {"CUstreamAttrValue_union", {CUDA_110, CUDA_0, CUDA_0 }}, {"CU_COMPUTEMODE_EXCLUSIVE", {CUDA_0, CUDA_0, CUDA_80 }}, @@ -2557,7 +2671,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_VER_MAP { {"CU_SYNC_POLICY_YIELD", {CUDA_110, CUDA_0, CUDA_0 }}, {"CU_SYNC_POLICY_BLOCKING_SYNC", {CUDA_110, CUDA_0, CUDA_0 }}, {"CUkernelNodeAttrID", {CUDA_110, CUDA_0, CUDA_0 }}, - {"CUkernelNodeAttrID_enum", {CUDA_110, CUDA_0, CUDA_0 }}, + {"CUkernelNodeAttrID_enum", {CUDA_110, CUDA_0, CUDA_118}}, {"CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW", {CUDA_110, CUDA_0, CUDA_0 }}, {"CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE", {CUDA_110, CUDA_0, CUDA_0 }}, {"CUstreamAttrID", {CUDA_110, CUDA_0, CUDA_0 }}, @@ -2863,6 +2977,59 @@ const std::map CUDA_DRIVER_TYPE_NAME_VER_MAP { {"CUmoduleLoadingMode_enum", {CUDA_117, CUDA_0, CUDA_0 }}, {"CU_MODULE_EAGER_LOADING", {CUDA_117, CUDA_0, CUDA_0 }}, {"CU_MODULE_LAZY_LOADING", {CUDA_117, CUDA_0, CUDA_0 }}, + {"CUevent_sched_flags", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUevent_sched_flags_enum", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_EVENT_SCHED_AUTO", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_EVENT_SCHED_SPIN", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_EVENT_SCHED_YIELD", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_EVENT_SCHED_BLOCKING_SYNC", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cl_event_flags", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cl_event_flags_enum", {CUDA_118, CUDA_0, CUDA_0 }}, + {"NVCL_EVENT_SCHED_AUTO", {CUDA_118, CUDA_0, CUDA_0 }}, + {"NVCL_EVENT_SCHED_SPIN", {CUDA_118, CUDA_0, CUDA_0 }}, + {"NVCL_EVENT_SCHED_YIELD", {CUDA_118, CUDA_0, CUDA_0 }}, + {"NVCL_EVENT_SCHED_BLOCKING_SYNC", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cl_context_flags", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cl_context_flags_enum", {CUDA_118, CUDA_0, CUDA_0 }}, + {"NVCL_CTX_SCHED_AUTO", {CUDA_118, CUDA_0, CUDA_0 }}, + {"NVCL_CTX_SCHED_SPIN", {CUDA_118, CUDA_0, CUDA_0 }}, + {"NVCL_CTX_SCHED_YIELD", {CUDA_118, CUDA_0, CUDA_0 }}, + {"NVCL_CTX_SCHED_BLOCKING_SYNC", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_TARGET_COMPUTE_89", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_TARGET_COMPUTE_90", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUclusterSchedulingPolicy", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUclusterSchedulingPolicy_enum", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_CLUSTER_SCHEDULING_POLICY_DEFAULT", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_CLUSTER_SCHEDULING_POLICY_SPREAD", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUlaunchAttributeID", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUlaunchAttributeID_enum", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_LAUNCH_ATTRIBUTE_IGNORE", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_LAUNCH_ATTRIBUTE_COOPERATIVE", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_LAUNCH_ATTRIBUTE_PRIORITY", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUlaunchAttributeValue", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUlaunchAttributeValue_union", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUlaunchAttribute", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUlaunchAttribute_st", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUlaunchConfig", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUlaunchConfig_st", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUDA_ERROR_MPS_CLIENT_TERMINATED", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUDA_ERROR_INVALID_CLUSTER_SIZE", {CUDA_118, CUDA_0, CUDA_0 }}, }; const std::map HIP_DRIVER_TYPE_NAME_VER_MAP { @@ -2941,24 +3108,24 @@ const std::map HIP_DRIVER_TYPE_NAME_VER_MAP { {"hipMemRangeAttributeAccessedBy", {HIP_3070, HIP_0, HIP_0 }}, {"hipMemRangeAttributeLastPrefetchLocation", {HIP_3070, HIP_0, HIP_0 }}, {"hipJitOption", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionMaxRegisters", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionThreadsPerBlock", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionWallTime", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionInfoLogBuffer", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionInfoLogBufferSizeBytes", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionErrorLogBuffer", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionErrorLogBufferSizeBytes", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionOptimizationLevel", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionTargetFromContext", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionTarget", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionFallbackStrategy", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionGenerateDebugInfo", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionLogVerbose", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionGenerateLineInfo", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionCacheMode", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionSm3xOpt", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionFastCompile", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionNumOptions", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_MAX_REGISTERS", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_THREADS_PER_BLOCK", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_WALL_TIME", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_INFO_LOG_BUFFER", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_ERROR_LOG_BUFFER", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_OPTIMIZATION_LEVEL", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_TARGET_FROM_HIPCONTEXT", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_TARGET", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_FALLBACK_STRATEGY", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_GENERATE_DEBUG_INFO", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_LOG_VERBOSE", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_GENERATE_LINE_INFO", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_CACHE_MODE", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_NEW_SM3X_OPT", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_FAST_COMPILE", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_NUM_OPTIONS", {HIP_1060, HIP_0, HIP_0 }}, {"hipFuncCache_t", {HIP_1060, HIP_0, HIP_0 }}, {"hipFuncCachePreferNone", {HIP_1060, HIP_0, HIP_0 }}, {"hipFuncCachePreferShared", {HIP_1060, HIP_0, HIP_0 }}, @@ -3288,17 +3455,17 @@ const std::map HIP_DRIVER_TYPE_NAME_VER_MAP { {"hipArraySparseSubresourceType", {HIP_5020, HIP_0, HIP_0 }}, {"hipArraySparseSubresourceTypeSparseLevel", {HIP_5020, HIP_0, HIP_0 }}, {"hipArraySparseSubresourceTypeMiptail", {HIP_5020, HIP_0, HIP_0 }}, - {"hipDeviceAttributeVirtualMemoryManagementSupported", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipUserObject", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipUserObject_t", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hiprtcJITInputType", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"HIPRTC_JIT_INPUT_CUBIN", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"HIPRTC_JIT_INPUT_PTX", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"HIPRTC_JIT_INPUT_FATBINARY", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"HIPRTC_JIT_INPUT_OBJECT", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"HIPRTC_JIT_INPUT_LIBRARY", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"HIPRTC_JIT_INPUT_NVVM", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"ihiprtcLinkState", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hiprtcLinkState", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipDeviceAttributeVirtualMemoryManagementSupported", {HIP_5030, HIP_0, HIP_0 }}, + {"hipUserObject", {HIP_5030, HIP_0, HIP_0 }}, + {"hipUserObject_t", {HIP_5030, HIP_0, HIP_0 }}, + {"hiprtcJITInputType", {HIP_5030, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_INPUT_CUBIN", {HIP_5030, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_INPUT_PTX", {HIP_5030, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_INPUT_FATBINARY", {HIP_5030, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_INPUT_OBJECT", {HIP_5030, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_INPUT_LIBRARY", {HIP_5030, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_INPUT_NVVM", {HIP_5030, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES", {HIP_5030, HIP_0, HIP_0 }}, + {"ihiprtcLinkState", {HIP_5030, HIP_0, HIP_0 }}, + {"hiprtcLinkState", {HIP_5030, HIP_0, HIP_0 }}, }; diff --git a/src/CUDA2HIP_FFT_API_functions.cpp b/src/CUDA2HIP_FFT_API_functions.cpp index a4db4976..01f9a2e2 100644 --- a/src/CUDA2HIP_FFT_API_functions.cpp +++ b/src/CUDA2HIP_FFT_API_functions.cpp @@ -84,6 +84,7 @@ const std::map CUDA_FFT_FUNCTION_MAP { {"cufftXtExec", {"hipfftXtExec", "", CONV_LIB_FUNC, API_FFT, 2, HIP_UNSUPPORTED}}, {"cufftXtExecDescriptor", {"hipfftXtExecDescriptor", "", CONV_LIB_FUNC, API_FFT, 2, HIP_UNSUPPORTED}}, {"cufftXtSetWorkAreaPolicy", {"hipfftXtSetWorkAreaPolicy", "", CONV_LIB_FUNC, API_FFT, 2, HIP_UNSUPPORTED}}, + {"cufftXtSetDistribution", {"hipfftXtSetDistribution", "", CONV_LIB_FUNC, API_FFT, 2, HIP_UNSUPPORTED}}, }; const std::map CUDA_FFT_FUNCTION_VER_MAP { @@ -93,7 +94,9 @@ const std::map CUDA_FFT_FUNCTION_VER_MAP { {"cufftXtMakePlanMany", {CUDA_80, CUDA_0, CUDA_0}}, {"cufftXtGetSizeMany", {CUDA_80, CUDA_0, CUDA_0}}, {"cufftXtExec", {CUDA_80, CUDA_0, CUDA_0}}, + {"cufftXtExecDescriptor", {CUDA_80, CUDA_0, CUDA_0}}, {"cufftXtSetWorkAreaPolicy", {CUDA_92, CUDA_0, CUDA_0}}, + {"cufftXtSetDistribution", {CUDA_118, CUDA_0, CUDA_0}}, }; const std::map HIP_FFT_FUNCTION_VER_MAP { diff --git a/src/CUDA2HIP_FFT_API_types.cpp b/src/CUDA2HIP_FFT_API_types.cpp index 9b003869..8831f73f 100644 --- a/src/CUDA2HIP_FFT_API_types.cpp +++ b/src/CUDA2HIP_FFT_API_types.cpp @@ -26,155 +26,163 @@ THE SOFTWARE. const std::map CUDA_FFT_TYPE_NAME_MAP { // cuFFT defines - {"CUFFT_VER_MAJOR", {"HIPRAND_VER_MAJOR", "", CONV_DEFINE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"CUFFT_VER_MINOR", {"HIPRAND_VER_MINOR", "", CONV_DEFINE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"CUFFT_VER_PATCH", {"HIPRAND_VER_PATCH", "", CONV_DEFINE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"CUFFT_VER_BUILD", {"HIPRAND_VER_BUILD", "", CONV_DEFINE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"CUFFT_VERSION", {"HIPRAND_VERSION", "", CONV_DEFINE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"CUFFT_FORWARD", {"HIPFFT_FORWARD", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // -1 - {"CUFFT_INVERSE", {"HIPFFT_BACKWARD", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 1 - {"CUFFT_COMPATIBILITY_DEFAULT", {"HIPFFT_COMPATIBILITY_DEFAULT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // CUFFT_COMPATIBILITY_FFTW_PADDING - {"MAX_CUFFT_ERROR", {"HIPFFT_MAX_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x11 + {"CUFFT_VER_MAJOR", {"HIPRAND_VER_MAJOR", "", CONV_DEFINE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_VER_MINOR", {"HIPRAND_VER_MINOR", "", CONV_DEFINE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_VER_PATCH", {"HIPRAND_VER_PATCH", "", CONV_DEFINE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_VER_BUILD", {"HIPRAND_VER_BUILD", "", CONV_DEFINE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_VERSION", {"HIPRAND_VERSION", "", CONV_DEFINE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_FORWARD", {"HIPFFT_FORWARD", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // -1 + {"CUFFT_INVERSE", {"HIPFFT_BACKWARD", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 1 + {"CUFFT_COMPATIBILITY_DEFAULT", {"HIPFFT_COMPATIBILITY_DEFAULT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // CUFFT_COMPATIBILITY_FFTW_PADDING + {"MAX_CUFFT_ERROR", {"HIPFFT_MAX_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x11 // cuFFT enums - {"cufftResult_t", {"hipfftResult_t", "", CONV_TYPE, API_FFT, 1}}, - {"cufftResult", {"hipfftResult", "", CONV_TYPE, API_FFT, 1}}, - {"CUFFT_SUCCESS", {"HIPFFT_SUCCESS", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x0 0 - {"CUFFT_INVALID_PLAN", {"HIPFFT_INVALID_PLAN", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x1 1 - {"CUFFT_ALLOC_FAILED", {"HIPFFT_ALLOC_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2 2 - {"CUFFT_INVALID_TYPE", {"HIPFFT_INVALID_TYPE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x3 3 - {"CUFFT_INVALID_VALUE", {"HIPFFT_INVALID_VALUE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x4 4 - {"CUFFT_INTERNAL_ERROR", {"HIPFFT_INTERNAL_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x5 5 - {"CUFFT_EXEC_FAILED", {"HIPFFT_EXEC_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6 6 - {"CUFFT_SETUP_FAILED", {"HIPFFT_SETUP_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7 7 - {"CUFFT_INVALID_SIZE", {"HIPFFT_INVALID_SIZE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x8 8 - {"CUFFT_UNALIGNED_DATA", {"HIPFFT_UNALIGNED_DATA", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x9 9 - {"CUFFT_INCOMPLETE_PARAMETER_LIST", {"HIPFFT_INCOMPLETE_PARAMETER_LIST", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xA 10 - {"CUFFT_INVALID_DEVICE", {"HIPFFT_INVALID_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xB 11 - {"CUFFT_PARSE_ERROR", {"HIPFFT_PARSE_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xC 12 - {"CUFFT_NO_WORKSPACE", {"HIPFFT_NO_WORKSPACE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xD 13 - {"CUFFT_NOT_IMPLEMENTED", {"HIPFFT_NOT_IMPLEMENTED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xE 14 - {"CUFFT_LICENSE_ERROR", {"HIPFFT_LICENSE_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, - {"CUFFT_NOT_SUPPORTED", {"HIPFFT_NOT_SUPPORTED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x10 16 - - {"cufftType_t", {"hipfftType_t", "", CONV_TYPE, API_FFT, 1}}, - {"cufftType", {"hipfftType", "", CONV_TYPE, API_FFT, 1}}, - {"CUFFT_R2C", {"HIPFFT_R2C", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2a - {"CUFFT_C2R", {"HIPFFT_C2R", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2c - {"CUFFT_C2C", {"HIPFFT_C2C", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x29 - {"CUFFT_D2Z", {"HIPFFT_D2Z", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6a - {"CUFFT_Z2D", {"HIPFFT_Z2D", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6c - {"CUFFT_Z2Z", {"HIPFFT_Z2Z", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x69 - - {"cufftCompatibility_t", {"hipfftCompatibility_t", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"cufftCompatibility", {"hipfftCompatibility", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"CUFFT_COMPATIBILITY_FFTW_PADDING", {"HIPFFT_COMPATIBILITY_FFTW_PADDING", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x01 - - {"cufftXtSubFormat_t", {"hipfftXtSubFormat_t", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"cufftXtSubFormat", {"hipfftXtSubFormat", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"CUFFT_XT_FORMAT_INPUT", {"HIPFFT_XT_FORMAT_INPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x00 - {"CUFFT_XT_FORMAT_OUTPUT", {"HIPFFT_XT_FORMAT_OUTPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x01 - {"CUFFT_XT_FORMAT_INPLACE", {"HIPFFT_XT_FORMAT_INPLACE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x02 - {"CUFFT_XT_FORMAT_INPLACE_SHUFFLED", {"HIPFFT_XT_FORMAT_INPLACE_SHUFFLED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x03 - {"CUFFT_XT_FORMAT_1D_INPUT_SHUFFLED",{"HIPFFT_XT_FORMAT_1D_INPUT_SHUFFLED","", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x04 - {"CUFFT_FORMAT_UNDEFINED", {"HIPFFT_FORMAT_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x05 - - {"cufftXtCopyType_t", {"hipfftXtCopyType_t", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"cufftXtCopyType", {"hipfftXtCopyType", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"CUFFT_COPY_HOST_TO_DEVICE", {"HIPFFT_COPY_HOST_TO_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x00 - {"CUFFT_COPY_DEVICE_TO_HOST", {"HIPFFT_COPY_DEVICE_TO_HOST", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x01 - {"CUFFT_COPY_DEVICE_TO_DEVICE", {"HIPFFT_COPY_DEVICE_TO_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x02 - {"CUFFT_COPY_UNDEFINED", {"HIPFFT_COPY_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x03 - - {"cufftXtQueryType_t", {"hipfftXtQueryType_t", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"cufftXtQueryType", {"hipfftXtQueryType", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"CUFFT_QUERY_1D_FACTORS", {"HIPFFT_QUERY_1D_FACTORS", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x00 - {"CUFFT_QUERY_UNDEFINED", {"HIPFFT_QUERY_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x01 - - {"cufftXtWorkAreaPolicy_t", {"hipfftXtWorkAreaPolicy_t", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"cufftXtWorkAreaPolicy", {"hipfftXtWorkAreaPolicy", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"CUFFT_WORKAREA_MINIMAL", {"HIPFFT_WORKAREA_MINIMAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0 - {"CUFFT_WORKAREA_USER", {"HIPFFT_WORKAREA_USER", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 1 - {"CUFFT_WORKAREA_PERFORMANCE", {"HIPFFT_WORKAREA_PERFORMANCE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 2 - - {"cufftXtCallbackType_t", {"hipfftXtCallbackType_t", "", CONV_TYPE, API_FFT, 1}}, - {"cufftXtCallbackType", {"hipfftXtCallbackType", "", CONV_TYPE, API_FFT, 1}}, - {"CUFFT_CB_LD_COMPLEX", {"HIPFFT_CB_LD_COMPLEX", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x0 - {"CUFFT_CB_LD_COMPLEX_DOUBLE", {"HIPFFT_CB_LD_COMPLEX_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x1 - {"CUFFT_CB_LD_REAL", {"HIPFFT_CB_LD_REAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2 - {"CUFFT_CB_LD_REAL_DOUBLE", {"HIPFFT_CB_LD_REAL_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x3 - {"CUFFT_CB_ST_COMPLEX", {"HIPFFT_CB_ST_COMPLEX", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x4 - {"CUFFT_CB_ST_COMPLEX_DOUBLE", {"HIPFFT_CB_ST_COMPLEX_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x5 - {"CUFFT_CB_ST_REAL", {"HIPFFT_CB_ST_REAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6 - {"CUFFT_CB_ST_REAL_DOUBLE", {"HIPFFT_CB_ST_REAL_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7 - {"CUFFT_CB_UNDEFINED", {"HIPFFT_CB_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7 + {"cufftResult_t", {"hipfftResult_t", "", CONV_TYPE, API_FFT, 1}}, + {"cufftResult", {"hipfftResult", "", CONV_TYPE, API_FFT, 1}}, + {"CUFFT_SUCCESS", {"HIPFFT_SUCCESS", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x0 0 + {"CUFFT_INVALID_PLAN", {"HIPFFT_INVALID_PLAN", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x1 1 + {"CUFFT_ALLOC_FAILED", {"HIPFFT_ALLOC_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2 2 + {"CUFFT_INVALID_TYPE", {"HIPFFT_INVALID_TYPE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x3 3 + {"CUFFT_INVALID_VALUE", {"HIPFFT_INVALID_VALUE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x4 4 + {"CUFFT_INTERNAL_ERROR", {"HIPFFT_INTERNAL_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x5 5 + {"CUFFT_EXEC_FAILED", {"HIPFFT_EXEC_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6 6 + {"CUFFT_SETUP_FAILED", {"HIPFFT_SETUP_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7 7 + {"CUFFT_INVALID_SIZE", {"HIPFFT_INVALID_SIZE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x8 8 + {"CUFFT_UNALIGNED_DATA", {"HIPFFT_UNALIGNED_DATA", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x9 9 + {"CUFFT_INCOMPLETE_PARAMETER_LIST", {"HIPFFT_INCOMPLETE_PARAMETER_LIST", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xA 10 + {"CUFFT_INVALID_DEVICE", {"HIPFFT_INVALID_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xB 11 + {"CUFFT_PARSE_ERROR", {"HIPFFT_PARSE_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xC 12 + {"CUFFT_NO_WORKSPACE", {"HIPFFT_NO_WORKSPACE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xD 13 + {"CUFFT_NOT_IMPLEMENTED", {"HIPFFT_NOT_IMPLEMENTED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xE 14 + {"CUFFT_LICENSE_ERROR", {"HIPFFT_LICENSE_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_NOT_SUPPORTED", {"HIPFFT_NOT_SUPPORTED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x10 16 + + {"cufftType_t", {"hipfftType_t", "", CONV_TYPE, API_FFT, 1}}, + {"cufftType", {"hipfftType", "", CONV_TYPE, API_FFT, 1}}, + {"CUFFT_R2C", {"HIPFFT_R2C", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2a + {"CUFFT_C2R", {"HIPFFT_C2R", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2c + {"CUFFT_C2C", {"HIPFFT_C2C", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x29 + {"CUFFT_D2Z", {"HIPFFT_D2Z", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6a + {"CUFFT_Z2D", {"HIPFFT_Z2D", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6c + {"CUFFT_Z2Z", {"HIPFFT_Z2Z", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x69 + + {"cufftCompatibility_t", {"hipfftCompatibility_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftCompatibility", {"hipfftCompatibility", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_COMPATIBILITY_FFTW_PADDING", {"HIPFFT_COMPATIBILITY_FFTW_PADDING", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x01 + + {"cufftXtSubFormat_t", {"hipfftXtSubFormat_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftXtSubFormat", {"hipfftXtSubFormat", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_XT_FORMAT_INPUT", {"HIPFFT_XT_FORMAT_INPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x00 + {"CUFFT_XT_FORMAT_OUTPUT", {"HIPFFT_XT_FORMAT_OUTPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x01 + {"CUFFT_XT_FORMAT_INPLACE", {"HIPFFT_XT_FORMAT_INPLACE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x02 + {"CUFFT_XT_FORMAT_INPLACE_SHUFFLED", {"HIPFFT_XT_FORMAT_INPLACE_SHUFFLED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x03 + {"CUFFT_XT_FORMAT_1D_INPUT_SHUFFLED", {"HIPFFT_XT_FORMAT_1D_INPUT_SHUFFLED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x04 + {"CUFFT_XT_FORMAT_DISTRIBUTED_INPUT", {"HIPFFT_XT_FORMAT_DISTRIBUTED_INPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x05 + {"CUFFT_XT_FORMAT_DISTRIBUTED_OUTPUT", {"HIPFFT_XT_FORMAT_DISTRIBUTED_OUTPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x06 + {"CUFFT_FORMAT_UNDEFINED", {"HIPFFT_FORMAT_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x07 + + {"cufftXtCopyType_t", {"hipfftXtCopyType_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftXtCopyType", {"hipfftXtCopyType", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_COPY_HOST_TO_DEVICE", {"HIPFFT_COPY_HOST_TO_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x00 + {"CUFFT_COPY_DEVICE_TO_HOST", {"HIPFFT_COPY_DEVICE_TO_HOST", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x01 + {"CUFFT_COPY_DEVICE_TO_DEVICE", {"HIPFFT_COPY_DEVICE_TO_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x02 + {"CUFFT_COPY_UNDEFINED", {"HIPFFT_COPY_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x03 + + {"cufftXtQueryType_t", {"hipfftXtQueryType_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftXtQueryType", {"hipfftXtQueryType", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_QUERY_1D_FACTORS", {"HIPFFT_QUERY_1D_FACTORS", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x00 + {"CUFFT_QUERY_UNDEFINED", {"HIPFFT_QUERY_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x01 + + {"cufftXtWorkAreaPolicy_t", {"hipfftXtWorkAreaPolicy_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftXtWorkAreaPolicy", {"hipfftXtWorkAreaPolicy", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_WORKAREA_MINIMAL", {"HIPFFT_WORKAREA_MINIMAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0 + {"CUFFT_WORKAREA_USER", {"HIPFFT_WORKAREA_USER", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 1 + {"CUFFT_WORKAREA_PERFORMANCE", {"HIPFFT_WORKAREA_PERFORMANCE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 2 + + {"cufftXtCallbackType_t", {"hipfftXtCallbackType_t", "", CONV_TYPE, API_FFT, 1}}, + {"cufftXtCallbackType", {"hipfftXtCallbackType", "", CONV_TYPE, API_FFT, 1}}, + {"CUFFT_CB_LD_COMPLEX", {"HIPFFT_CB_LD_COMPLEX", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x0 + {"CUFFT_CB_LD_COMPLEX_DOUBLE", {"HIPFFT_CB_LD_COMPLEX_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x1 + {"CUFFT_CB_LD_REAL", {"HIPFFT_CB_LD_REAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2 + {"CUFFT_CB_LD_REAL_DOUBLE", {"HIPFFT_CB_LD_REAL_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x3 + {"CUFFT_CB_ST_COMPLEX", {"HIPFFT_CB_ST_COMPLEX", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x4 + {"CUFFT_CB_ST_COMPLEX_DOUBLE", {"HIPFFT_CB_ST_COMPLEX_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x5 + {"CUFFT_CB_ST_REAL", {"HIPFFT_CB_ST_REAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6 + {"CUFFT_CB_ST_REAL_DOUBLE", {"HIPFFT_CB_ST_REAL_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7 + {"CUFFT_CB_UNDEFINED", {"HIPFFT_CB_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7 // cuFFT types - {"cufftReal", {"hipfftReal", "", CONV_TYPE, API_FFT, 1}}, - {"cufftDoubleReal", {"hipfftDoubleReal", "", CONV_TYPE, API_FFT, 1}}, - {"cufftComplex", {"hipfftComplex", "", CONV_TYPE, API_FFT, 1}}, - {"cufftDoubleComplex", {"hipfftDoubleComplex", "", CONV_TYPE, API_FFT, 1}}, - {"cufftHandle", {"hipfftHandle", "", CONV_TYPE, API_FFT, 1}}, - {"cufftXt1dFactors_t", {"hipfftXt1dFactors_t", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"cufftXt1dFactors", {"hipfftXt1dFactors", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, + {"cufftReal", {"hipfftReal", "", CONV_TYPE, API_FFT, 1}}, + {"cufftDoubleReal", {"hipfftDoubleReal", "", CONV_TYPE, API_FFT, 1}}, + {"cufftComplex", {"hipfftComplex", "", CONV_TYPE, API_FFT, 1}}, + {"cufftDoubleComplex", {"hipfftDoubleComplex", "", CONV_TYPE, API_FFT, 1}}, + {"cufftHandle", {"hipfftHandle", "", CONV_TYPE, API_FFT, 1}}, + {"cufftXt1dFactors_t", {"hipfftXt1dFactors_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftXt1dFactors", {"hipfftXt1dFactors", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftBox3d_t", {"hipfftBox3d_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftBox3d", {"hipfftBox3d", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, }; const std::map CUDA_FFT_TYPE_NAME_VER_MAP { - {"CUFFT_VER_MAJOR", {CUDA_102, CUDA_0, CUDA_0}}, - {"CUFFT_VER_MINOR", {CUDA_102, CUDA_0, CUDA_0}}, - {"CUFFT_VER_PATCH", {CUDA_102, CUDA_0, CUDA_0}}, - {"CUFFT_VER_BUILD", {CUDA_102, CUDA_0, CUDA_0}}, - {"CUFFT_VERSION", {CUDA_102, CUDA_0, CUDA_0}}, - {"CUFFT_NOT_SUPPORTED", {CUDA_80, CUDA_0, CUDA_0}}, - {"cufftXtWorkAreaPolicy_t", {CUDA_92, CUDA_0, CUDA_0}}, - {"cufftXtWorkAreaPolicy", {CUDA_92, CUDA_0, CUDA_0}}, - {"CUFFT_WORKAREA_MINIMAL", {CUDA_92, CUDA_0, CUDA_0}}, - {"CUFFT_WORKAREA_USER", {CUDA_92, CUDA_0, CUDA_0}}, + {"CUFFT_VER_MAJOR", {CUDA_102, CUDA_0, CUDA_0}}, + {"CUFFT_VER_MINOR", {CUDA_102, CUDA_0, CUDA_0}}, + {"CUFFT_VER_PATCH", {CUDA_102, CUDA_0, CUDA_0}}, + {"CUFFT_VER_BUILD", {CUDA_102, CUDA_0, CUDA_0}}, + {"CUFFT_VERSION", {CUDA_102, CUDA_0, CUDA_0}}, + {"CUFFT_NOT_SUPPORTED", {CUDA_80, CUDA_0, CUDA_0}}, + {"cufftXtWorkAreaPolicy_t", {CUDA_92, CUDA_0, CUDA_0}}, + {"cufftXtWorkAreaPolicy", {CUDA_92, CUDA_0, CUDA_0}}, + {"CUFFT_WORKAREA_MINIMAL", {CUDA_92, CUDA_0, CUDA_0}}, + {"CUFFT_WORKAREA_USER", {CUDA_92, CUDA_0, CUDA_0}}, + {"CUFFT_XT_FORMAT_DISTRIBUTED_INPUT", {CUDA_118, CUDA_0, CUDA_0}}, + {"CUFFT_XT_FORMAT_DISTRIBUTED_OUTPUT", {CUDA_118, CUDA_0, CUDA_0}}, + {"cufftBox3d_t", {CUDA_118, CUDA_0, CUDA_0}}, + {"cufftBox3d", {CUDA_118, CUDA_0, CUDA_0}}, }; const std::map HIP_FFT_TYPE_NAME_VER_MAP { - {"HIPFFT_FORWARD", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_BACKWARD", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftResult_t", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftResult", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_SUCCESS", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_INVALID_PLAN", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_ALLOC_FAILED", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_INVALID_TYPE", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_INVALID_VALUE", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_INTERNAL_ERROR", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_EXEC_FAILED", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_SETUP_FAILED", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_INVALID_SIZE", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_UNALIGNED_DATA", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_INCOMPLETE_PARAMETER_LIST", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_INVALID_DEVICE", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_PARSE_ERROR", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_NO_WORKSPACE", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_NOT_IMPLEMENTED", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_NOT_SUPPORTED", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftType_t", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftType", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_R2C", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_C2R", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_C2C", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_D2Z", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_Z2D", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_Z2Z", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftReal", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftDoubleReal", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftComplex", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftDoubleComplex", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftHandle", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftXtCallbackType_t", {HIP_4030, HIP_0, HIP_0 }}, - {"hipfftXtCallbackType", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_LD_COMPLEX", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_LD_COMPLEX_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_LD_REAL", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_LD_REAL_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_ST_COMPLEX", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_ST_COMPLEX_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_ST_REAL", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_ST_REAL_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_UNDEFINED", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_FORWARD", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_BACKWARD", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftResult_t", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftResult", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_SUCCESS", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_INVALID_PLAN", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_ALLOC_FAILED", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_INVALID_TYPE", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_INVALID_VALUE", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_INTERNAL_ERROR", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_EXEC_FAILED", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_SETUP_FAILED", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_INVALID_SIZE", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_UNALIGNED_DATA", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_INCOMPLETE_PARAMETER_LIST", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_INVALID_DEVICE", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_PARSE_ERROR", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_NO_WORKSPACE", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_NOT_IMPLEMENTED", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_NOT_SUPPORTED", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftType_t", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftType", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_R2C", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_C2R", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_C2C", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_D2Z", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_Z2D", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_Z2Z", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftReal", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftDoubleReal", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftComplex", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftDoubleComplex", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftHandle", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftXtCallbackType_t", {HIP_4030, HIP_0, HIP_0 }}, + {"hipfftXtCallbackType", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_LD_COMPLEX", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_LD_COMPLEX_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_LD_REAL", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_LD_REAL_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_ST_COMPLEX", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_ST_COMPLEX_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_ST_REAL", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_ST_REAL_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_UNDEFINED", {HIP_4030, HIP_0, HIP_0 }}, }; diff --git a/src/CUDA2HIP_RTC_API_functions.cpp b/src/CUDA2HIP_RTC_API_functions.cpp index 2763485b..263c1811 100644 --- a/src/CUDA2HIP_RTC_API_functions.cpp +++ b/src/CUDA2HIP_RTC_API_functions.cpp @@ -33,8 +33,8 @@ const std::map CUDA_RTC_FUNCTION_MAP { {"nvrtcCompileProgram", {"hiprtcCompileProgram", "", CONV_LIB_FUNC, API_RTC, 2}}, {"nvrtcGetPTXSize", {"hiprtcGetCodeSize", "", CONV_LIB_FUNC, API_RTC, 2}}, {"nvrtcGetPTX", {"hiprtcGetCode", "", CONV_LIB_FUNC, API_RTC, 2}}, - {"nvrtcGetCUBINSize", {"hiprtcGetBitcodeSize", "", CONV_LIB_FUNC, API_RTC, 2, HIP_EXPERIMENTAL}}, - {"nvrtcGetCUBIN", {"hiprtcGetBitcode", "", CONV_LIB_FUNC, API_RTC, 2, HIP_EXPERIMENTAL}}, + {"nvrtcGetCUBINSize", {"hiprtcGetBitcodeSize", "", CONV_LIB_FUNC, API_RTC, 2}}, + {"nvrtcGetCUBIN", {"hiprtcGetBitcode", "", CONV_LIB_FUNC, API_RTC, 2}}, {"nvrtcGetNVVMSize", {"hiprtcGetNVVMSize", "", CONV_LIB_FUNC, API_RTC, 2, HIP_UNSUPPORTED}}, {"nvrtcGetNVVM", {"hiprtcGetNVVM", "", CONV_LIB_FUNC, API_RTC, 2, HIP_UNSUPPORTED}}, {"nvrtcGetProgramLogSize", {"hiprtcGetProgramLogSize", "", CONV_LIB_FUNC, API_RTC, 2}}, @@ -66,8 +66,8 @@ const std::map HIP_RTC_FUNCTION_VER_MAP { {"hiprtcGetProgramLog", {HIP_2060, HIP_0, HIP_0 }}, {"hiprtcAddNameExpression", {HIP_2060, HIP_0, HIP_0 }}, {"hiprtcGetLoweredName", {HIP_2060, HIP_0, HIP_0 }}, - {"hiprtcGetBitcode", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hiprtcGetBitcodeSize", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"hiprtcGetBitcode", {HIP_5030, HIP_0, HIP_0 }}, + {"hiprtcGetBitcodeSize", {HIP_5030, HIP_0, HIP_0 }}, }; const std::map CUDA_RTC_API_SECTION_MAP { diff --git a/src/CUDA2HIP_Runtime_API_functions.cpp b/src/CUDA2HIP_Runtime_API_functions.cpp index 4ead60ca..4a9ef995 100644 --- a/src/CUDA2HIP_Runtime_API_functions.cpp +++ b/src/CUDA2HIP_Runtime_API_functions.cpp @@ -52,7 +52,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // no analogue {"cudaDeviceSetCacheConfig", {"hipDeviceSetCacheConfig", "", CONV_DEVICE, API_RUNTIME, 1}}, // cuCtxSetLimit - {"cudaDeviceSetLimit", {"hipDeviceSetLimit", "", CONV_DEVICE, API_RUNTIME, 1, HIP_EXPERIMENTAL}}, + {"cudaDeviceSetLimit", {"hipDeviceSetLimit", "", CONV_DEVICE, API_RUNTIME, 1}}, // cuCtxSetSharedMemConfig {"cudaDeviceSetSharedMemConfig", {"hipDeviceSetSharedMemConfig", "", CONV_DEVICE, API_RUNTIME, 1}}, // cuCtxSynchronize @@ -229,6 +229,9 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { {"cudaSetDoubleForDevice", {"hipSetDoubleForDevice", "", CONV_EXECUTION, API_RUNTIME, 7, HIP_UNSUPPORTED | CUDA_DEPRECATED}}, // no analogue {"cudaSetDoubleForHost", {"hipSetDoubleForHost", "", CONV_EXECUTION, API_RUNTIME, 7, HIP_UNSUPPORTED | CUDA_DEPRECATED}}, + // no analogue + // NOTE: Not equal to cuLaunchKernelEx due to different signatures + {"cudaLaunchKernelExC", {"hipLaunchKernelExC", "", CONV_EXECUTION, API_RUNTIME, 7, HIP_UNSUPPORTED}}, // 8. Occupancy // cuOccupancyAvailableDynamicSMemPerBlock @@ -245,6 +248,10 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { {"cudaOccupancyMaxPotentialBlockSizeVariableSMem", {"hipOccupancyMaxPotentialBlockSizeVariableSMem", "", CONV_OCCUPANCY, API_RUNTIME, 8, HIP_UNSUPPORTED}}, // no analogue {"cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags", {"hipOccupancyMaxPotentialBlockSizeVariableSMemWithFlags", "", CONV_OCCUPANCY, API_RUNTIME, 8, HIP_UNSUPPORTED}}, + // cuOccupancyMaxPotentialClusterSize + {"cudaOccupancyMaxPotentialClusterSize", {"hipOccupancyMaxPotentialClusterSize", "", CONV_OCCUPANCY, API_RUNTIME, 8, HIP_UNSUPPORTED}}, + // cuOccupancyMaxActiveClusters + {"cudaOccupancyMaxActiveClusters", {"hipOccupancyMaxActiveClusters", "", CONV_OCCUPANCY, API_RUNTIME, 8, HIP_UNSUPPORTED}}, // 9. Memory Management // no analogue @@ -642,6 +649,10 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // no analogue // NOTE: Not equal to cuTexObjectGetTextureDesc due to different signatures {"cudaGetTextureObjectTextureDesc", {"hipGetTextureObjectTextureDesc", "", CONV_TEXTURE, API_RUNTIME, 27}}, + // + {"cudaCreateTextureObject_v2", {"hipCreateTextureObject_v2", "", CONV_TEXTURE, API_RUNTIME, 27, HIP_UNSUPPORTED}}, + // + {"cudaGetTextureObjectTextureDesc_v2", {"hipGetTextureObjectTextureDesc_v2", "", CONV_TEXTURE, API_RUNTIME, 27, HIP_UNSUPPORTED}}, // 28. Surface Object Management // no analogue @@ -789,7 +800,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuGraphExecEventWaitNodeSetEvent {"cudaGraphExecEventWaitNodeSetEvent", {"hipGraphExecEventWaitNodeSetEvent", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuGraphUpload - {"cudaGraphUpload", {"hipGraphUpload", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}}, + {"cudaGraphUpload", {"hipGraphUpload", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuGraphAddExternalSemaphoresSignalNode {"cudaGraphAddExternalSemaphoresSignalNode", {"hipGraphAddExternalSemaphoresSignalNode", "", CONV_GRAPH, API_RUNTIME, 30, HIP_UNSUPPORTED}}, // cuGraphExternalSemaphoresSignalNodeGetParams @@ -807,15 +818,15 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuGraphExecExternalSemaphoresWaitNodeSetParams {"cudaGraphExecExternalSemaphoresWaitNodeSetParams", {"hipGraphExecExternalSemaphoresWaitNodeSetParams", "", CONV_GRAPH, API_RUNTIME, 30, HIP_UNSUPPORTED}}, // cuUserObjectCreate - {"cudaUserObjectCreate", {"hipUserObjectCreate", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}}, + {"cudaUserObjectCreate", {"hipUserObjectCreate", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuUserObjectRetain - {"cudaUserObjectRetain", {"hipUserObjectRetain", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}}, + {"cudaUserObjectRetain", {"hipUserObjectRetain", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuUserObjectRelease - {"cudaUserObjectRelease", {"hipUserObjectRelease", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}}, + {"cudaUserObjectRelease", {"hipUserObjectRelease", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuGraphRetainUserObject - {"cudaGraphRetainUserObject", {"hipGraphRetainUserObject", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}}, + {"cudaGraphRetainUserObject", {"hipGraphRetainUserObject", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuGraphReleaseUserObject - {"cudaGraphReleaseUserObject", {"hipGraphReleaseUserObject", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}}, + {"cudaGraphReleaseUserObject", {"hipGraphReleaseUserObject", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuGraphAddMemAllocNode {"cudaGraphAddMemAllocNode", {"hipGraphAddMemAllocNode", "", CONV_GRAPH, API_RUNTIME, 30, HIP_UNSUPPORTED}}, // cuGraphMemAllocNodeGetParams @@ -825,11 +836,11 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuGraphMemFreeNodeGetParams {"cudaGraphMemFreeNodeGetParams", {"hipGraphMemFreeNodeGetParams", "", CONV_GRAPH, API_RUNTIME, 30, HIP_UNSUPPORTED}}, // cuDeviceGraphMemTrim - {"cudaDeviceGraphMemTrim", {"hipDeviceGraphMemTrim", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}}, + {"cudaDeviceGraphMemTrim", {"hipDeviceGraphMemTrim", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuDeviceGetGraphMemAttribute - {"cudaDeviceGetGraphMemAttribute", {"hipDeviceGetGraphMemAttribute", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}}, + {"cudaDeviceGetGraphMemAttribute", {"hipDeviceGetGraphMemAttribute", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuDeviceSetGraphMemAttribute - {"cudaDeviceSetGraphMemAttribute", {"hipDeviceSetGraphMemAttribute", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}}, + {"cudaDeviceSetGraphMemAttribute", {"hipDeviceSetGraphMemAttribute", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuGraphInstantiateWithFlags {"cudaGraphInstantiateWithFlags", {"hipGraphInstantiateWithFlags", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuGraphNodeSetEnabled @@ -1073,6 +1084,11 @@ const std::map CUDA_RUNTIME_FUNCTION_VER_MAP { {"cudaGraphInstantiateWithFlags", {CUDA_114, CUDA_0, CUDA_0 }}, {"cudaArrayGetMemoryRequirements", {CUDA_116, CUDA_0, CUDA_0 }}, {"cudaGraphNodeSetEnabled", {CUDA_116, CUDA_0, CUDA_0 }}, + {"cudaLaunchKernelExC", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaOccupancyMaxPotentialClusterSize", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaOccupancyMaxActiveClusters", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaCreateTextureObject_v2", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaGetTextureObjectTextureDesc_v2", {CUDA_118, CUDA_0, CUDA_0 }}, }; const std::map HIP_RUNTIME_FUNCTION_VER_MAP { @@ -1304,16 +1320,16 @@ const std::map HIP_RUNTIME_FUNCTION_VER_MAP { {"hipThreadExchangeStreamCaptureMode", {HIP_5020, HIP_0, HIP_0 }}, {"hipGraphKernelNodeSetAttribute", {HIP_5020, HIP_0, HIP_0 }}, {"hipGraphKernelNodeGetAttribute", {HIP_5020, HIP_0, HIP_0 }}, - {"hipDeviceSetLimit", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphUpload", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipDeviceGetGraphMemAttribute", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipDeviceSetGraphMemAttribute", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipDeviceGraphMemTrim", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipUserObjectCreate", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipUserObjectRelease", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipUserObjectRetain", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphRetainUserObject", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphReleaseUserObject", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipDeviceSetLimit", {HIP_5030, HIP_0, HIP_0 }}, + {"hipGraphUpload", {HIP_5030, HIP_0, HIP_0 }}, + {"hipDeviceGetGraphMemAttribute", {HIP_5030, HIP_0, HIP_0 }}, + {"hipDeviceSetGraphMemAttribute", {HIP_5030, HIP_0, HIP_0 }}, + {"hipDeviceGraphMemTrim", {HIP_5030, HIP_0, HIP_0 }}, + {"hipUserObjectCreate", {HIP_5030, HIP_0, HIP_0 }}, + {"hipUserObjectRelease", {HIP_5030, HIP_0, HIP_0 }}, + {"hipUserObjectRetain", {HIP_5030, HIP_0, HIP_0 }}, + {"hipGraphRetainUserObject", {HIP_5030, HIP_0, HIP_0 }}, + {"hipGraphReleaseUserObject", {HIP_5030, HIP_0, HIP_0 }}, }; const std::map CUDA_RUNTIME_API_SECTION_MAP { diff --git a/src/CUDA2HIP_Runtime_API_types.cpp b/src/CUDA2HIP_Runtime_API_types.cpp index 9cc43d89..ff3c4986 100644 --- a/src/CUDA2HIP_Runtime_API_types.cpp +++ b/src/CUDA2HIP_Runtime_API_types.cpp @@ -225,6 +225,19 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // CUkernelNodeAttrValue {"cudaKernelNodeAttrValue", {"hipKernelNodeAttrValue", "", CONV_TYPE, API_RUNTIME, 36}}, + // CUlaunchAttributeValue + {"cudaLaunchAttributeValue", {"hipLaunchAttributeValue", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + + // CUlaunchAttribute_st + {"cudaLaunchAttribute_st", {"hipLaunchAttribute", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CUlaunchAttribute + {"cudaLaunchAttribute", {"hipLaunchAttribute", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + + // CUlaunchConfig_st + {"cudaLaunchConfig_st", {"hipLaunchConfig", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CUlaunchConfig + {"cudaLaunchConfig_t", {"hipLaunchConfig", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // 3. Enums // no analogue @@ -535,7 +548,7 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED {"cudaDevAttrMaxTimelineSemaphoreInteropSupported", {"hipDeviceAttributeMaxTimelineSemaphoreInteropSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED | CUDA_DEPRECATED}}, // 114 // CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED - {"cudaDevAttrTimelineSemaphoreInteropSupported", {"hipDevAttrTimelineSemaphoreInteropSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 114 + {"cudaDevAttrTimelineSemaphoreInteropSupported", {"hipDeviceAttributeTimelineSemaphoreInteropSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 114 // CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED {"cudaDevAttrMemoryPoolsSupported", {"hipDeviceAttributeMemoryPoolsSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 115 // CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED @@ -546,6 +559,8 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaDevAttrGPUDirectRDMAWritesOrdering", {"hipDeviceAttributeGpuDirectRdmaWritesOrdering", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 118 // CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES {"cudaDevAttrMemoryPoolSupportedHandleTypes", {"hipDeviceAttributeMempoolSupportedHandleTypes", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 119 + // CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH + {"cudaDevAttrClusterLaunch", {"hipDeviceAttributeClusterLaunch", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 120 // CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED {"cudaDevAttrDeferredMappingCudaArraySupported", {"hipDeviceAttributeDeferredMappingCudaArraySupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 121 // CU_DEVICE_ATTRIBUTE_MAX @@ -963,6 +978,8 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaErrorMpsMaxClientsReached", {"hipErrorMpsMaxClientsReached", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 808 // CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED {"cudaErrorMpsMaxConnectionsReached", {"hipErrorMpsMaxConnectionsReached", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 809 + // CUDA_ERROR_MPS_CLIENT_TERMINATED + {"cudaErrorMpsClientTerminated", {"hipErrorMpsClientTerminated", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 810 // CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED {"cudaErrorStreamCaptureUnsupported", {"hipErrorStreamCaptureUnsupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 900 // CUDA_ERROR_STREAM_CAPTURE_INVALIDATED @@ -987,6 +1004,8 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaErrorGraphExecUpdateFailure", {"hipErrorGraphExecUpdateFailure", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 910 // CUDA_ERROR_EXTERNAL_DEVICE {"cudaErrorExternalDevice", {"hipErrorExternalDevice", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 911 + // CUDA_ERROR_INVALID_CLUSTER_SIZE + {"cudaErrorInvalidClusterSize", {"hipErrorInvalidClusterSize", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 912 // CUDA_ERROR_UNKNOWN {"cudaErrorUnknown", {"hipErrorUnknown", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 999 // Deprecated since CUDA 4.1 @@ -1044,8 +1063,20 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaFuncAttributeMaxDynamicSharedMemorySize", {"hipFuncAttributeMaxDynamicSharedMemorySize", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 8 // CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT {"cudaFuncAttributePreferredSharedMemoryCarveout", {"hipFuncAttributePreferredSharedMemoryCarveout", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 9 + // CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET + {"cudaFuncAttributeClusterDimMustBeSet", {"hipFuncAttributeClusterDimMustBeSet", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 10 + // CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH + {"cudaFuncAttributeRequiredClusterWidth", {"hipFuncAttributeRequiredClusterWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 11 + // CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT + {"cudaFuncAttributeRequiredClusterHeight", {"hipFuncAttributeRequiredClusterHeight", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 12 + // CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH + {"cudaFuncAttributeRequiredClusterDepth", {"hipFuncAttributeRequiredClusterDepth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 13 + // CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED + {"cudaFuncAttributeNonPortableClusterSizeAllowed", {"hipFuncAttributeNonPortableClusterSizeAllowed", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 14 + // CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE + {"cudaFuncAttributeClusterSchedulingPolicyPreference", {"hipFuncAttributeClusterSchedulingPolicyPreference", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 15 // CU_FUNC_ATTRIBUTE_MAX - {"cudaFuncAttributeMax", {"hipFuncAttributeMax", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 10 + {"cudaFuncAttributeMax", {"hipFuncAttributeMax", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 16 // CUfunc_cache {"cudaFuncCache", {"hipFuncCache_t", "", CONV_TYPE, API_RUNTIME, 36}}, @@ -1119,9 +1150,9 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // CU_GRAPH_NODE_TYPE_EVENT_RECORD = 7 {"cudaGraphNodeTypeEventRecord", {"hipGraphNodeTypeEventRecord", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x07 // CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL = 8 - {"cudaGraphNodeTypeExtSemaphoreSignal", {"hipGraphNodeTypeExtSemaphoreSignal", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, // 0x08 + {"cudaGraphNodeTypeExtSemaphoreSignal", {"hipGraphNodeTypeExtSemaphoreSignal", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x08 // CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT = 9 - {"cudaGraphNodeTypeExtSemaphoreWait", {"hipGraphNodeTypeExtSemaphoreWait", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, // 0x09 + {"cudaGraphNodeTypeExtSemaphoreWait", {"hipGraphNodeTypeExtSemaphoreWait", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x09 // CU_GRAPH_NODE_TYPE_MEM_ALLOC = 10 {"cudaGraphNodeTypeMemAlloc", {"hipGraphNodeTypeMemAlloc", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 0x0a // CU_GRAPH_NODE_TYPE_MEM_FREE = 11 @@ -1155,7 +1186,7 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaLimit", {"hipLimit_t", "", CONV_TYPE, API_RUNTIME, 36}}, // cudaLimit enum values // CU_LIMIT_STACK_SIZE - {"cudaLimitStackSize", {"hipLimitStackSize", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, // 0x00 + {"cudaLimitStackSize", {"hipLimitStackSize", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x00 // CU_LIMIT_PRINTF_FIFO_SIZE {"cudaLimitPrintfFifoSize", {"hipLimitPrintfFifoSize", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x01 // CU_LIMIT_MALLOC_HEAP_SIZE @@ -1201,7 +1232,7 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaMemoryTypeUnregistered", {"hipMemoryTypeUnregistered", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 0 {"cudaMemoryTypeHost", {"hipMemoryTypeHost", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 1 {"cudaMemoryTypeDevice", {"hipMemoryTypeDevice", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 2 - {"cudaMemoryTypeManaged", {"hipMemoryTypeManaged", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, // 3 + {"cudaMemoryTypeManaged", {"hipMemoryTypeManaged", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 3 // CUmem_range_attribute {"cudaMemRangeAttribute", {"hipMemRangeAttribute", "", CONV_TYPE, API_RUNTIME, 36}}, @@ -1517,7 +1548,7 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE {"cudaKernelNodeAttributeCooperative", {"hipKernelNodeAttributeCooperative", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 2 // CU_KERNEL_NODE_ATTRIBUTE_PRIORITY - {"cudaKernelNodeAttributePriority", {"hipKernelNodeAttributePriority", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 8 + {"cudaKernelNodeAttributePriority", {"hipKernelNodeAttributePriority", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 8 // CUmemPool_attribute {"cudaMemPoolAttr", {"hipMemPoolAttr", "", CONV_TYPE, API_RUNTIME, 36}}, @@ -1588,16 +1619,16 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaStreamSetCaptureDependencies", {"hipStreamSetCaptureDependencies", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x1 // CUuserObject_flags - {"cudaUserObjectFlags", {"hipUserObjectFlags", "", CONV_TYPE, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, + {"cudaUserObjectFlags", {"hipUserObjectFlags", "", CONV_TYPE, API_RUNTIME, 36}}, // cudaUserObjectFlags enum values // CU_USER_OBJECT_NO_DESTRUCTOR_SYNC - {"cudaUserObjectNoDestructorSync", {"hipUserObjectNoDestructorSync", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, // 0x1 + {"cudaUserObjectNoDestructorSync", {"hipUserObjectNoDestructorSync", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x1 // CUuserObjectRetain_flags - {"cudaUserObjectRetainFlags", {"hipUserObjectRetainFlags", "", CONV_TYPE, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, + {"cudaUserObjectRetainFlags", {"hipUserObjectRetainFlags", "", CONV_TYPE, API_RUNTIME, 36}}, // cudaUserObjectRetainFlags enum values // CU_GRAPH_USER_OBJECT_MOVE - {"cudaGraphUserObjectMove", {"hipGraphUserObjectMove", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, // 0x1 + {"cudaGraphUserObjectMove", {"hipGraphUserObjectMove", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x1 // CUflushGPUDirectRDMAWritesOptions {"cudaFlushGPUDirectRDMAWritesOptions", {"hipFlushGPUDirectRDMAWritesOptions", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, @@ -1666,16 +1697,16 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaGraphDebugDotFlagsHandles", {"hipGraphDebugDotFlagsHandles", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 1<<10 // CUgraphMem_attribute - {"cudaGraphMemAttributeType", {"hipGraphMemAttributeType", "", CONV_TYPE, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, + {"cudaGraphMemAttributeType", {"hipGraphMemAttributeType", "", CONV_TYPE, API_RUNTIME, 36}}, // cudaGraphMemAttributeType enum values // CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT - {"cudaGraphMemAttrUsedMemCurrent", {"hipGraphMemAttrUsedMemCurrent", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, + {"cudaGraphMemAttrUsedMemCurrent", {"hipGraphMemAttrUsedMemCurrent", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // CU_GRAPH_MEM_ATTR_USED_MEM_HIGH - {"cudaGraphMemAttrUsedMemHigh", {"hipGraphMemAttrUsedMemHigh", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, + {"cudaGraphMemAttrUsedMemHigh", {"hipGraphMemAttrUsedMemHigh", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT - {"cudaGraphMemAttrReservedMemCurrent", {"hipGraphMemAttrReservedMemCurrent", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, + {"cudaGraphMemAttrReservedMemCurrent", {"hipGraphMemAttrReservedMemCurrent", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH - {"cudaGraphMemAttrReservedMemHigh", {"hipGraphMemAttrReservedMemHigh", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, + {"cudaGraphMemAttrReservedMemHigh", {"hipGraphMemAttrReservedMemHigh", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // CUgraphInstantiate_flags {"cudaGraphInstantiateFlags", {"hipGraphInstantiateFlags", "", CONV_TYPE, API_RUNTIME, 36}}, @@ -1685,6 +1716,38 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY {"cudaGraphInstantiateFlagUseNodePriority", {"hipGraphInstantiateFlagUseNodePriority", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CUclusterSchedulingPolicy + {"cudaClusterSchedulingPolicy", {"hipClusterSchedulingPolicy", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // cudaClusterSchedulingPolicy enum values + // CU_CLUSTER_SCHEDULING_POLICY_DEFAULT + {"cudaClusterSchedulingPolicyDefault", {"hipClusterSchedulingPolicyDefault", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CU_CLUSTER_SCHEDULING_POLICY_SPREAD + {"cudaClusterSchedulingPolicySpread", {"hipClusterSchedulingPolicySpread", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING + {"cudaClusterSchedulingPolicyLoadBalancing", {"hipClusterSchedulingPolicyLoadBalancing", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + + // CUlaunchAttributeID + {"cudaLaunchAttributeID", {"hipLaunchAttributeID", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // cudaLaunchAttributeID enum values + // CU_LAUNCH_ATTRIBUTE_IGNORE + {"cudaLaunchAttributeIgnore", {"hipLaunchAttributeIgnore", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW + {"cudaLaunchAttributeAccessPolicyWindow", {"hipLaunchAttributeAccessPolicyWindow", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CU_LAUNCH_ATTRIBUTE_COOPERATIVE + {"cudaLaunchAttributeCooperative", {"hipLaunchAttributeCooperative", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY + {"cudaLaunchAttributeSynchronizationPolicy", {"hipLaunchAttributeSynchronizationPolicy", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION + {"cudaLaunchAttributeClusterDimension", {"hipLaunchAttributeClusterDimension", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE + {"cudaLaunchAttributeClusterSchedulingPolicyPreference", {"hipLaunchAttributeClusterSchedulingPolicyPreference", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION + {"cudaLaunchAttributeProgrammaticStreamSerialization", {"hipLaunchAttributeProgrammaticStreamSerialization", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT + {"cudaLaunchAttributeProgrammaticEvent", {"hipLaunchAttributeProgrammaticEvent", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CU_LAUNCH_ATTRIBUTE_PRIORITY + {"cudaLaunchAttributePriority", {"hipLaunchAttributePriority", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // 4. Typedefs // CUhostFn @@ -1706,7 +1769,7 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaMemPool_t", {"hipMemPool_t", "", CONV_TYPE, API_RUNTIME, 36}}, // CUuserObject - {"cudaUserObject_t", {"hipUserObject_t", "", CONV_TYPE, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, + {"cudaUserObject_t", {"hipUserObject_t", "", CONV_TYPE, API_RUNTIME, 36}}, // 5. Defines @@ -1840,6 +1903,10 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaStreamPerThread", {"hipStreamPerThread", "", CONV_DEFINE, API_RUNTIME, 36}}, // ((cudaStream_t)0x2) // CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL {"cudaArraySparsePropertiesSingleMipTail", {"hipArraySparsePropertiesSingleMipTail", "", CONV_DEFINE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 0x1 + // CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION + {"cudaKernelNodeAttributeClusterDimension", {"hipKernelNodeAttributeClusterDimension", "", CONV_DEFINE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // cudaLaunchAttributeClusterDimension + // CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE + {"cudaKernelNodeAttributeClusterSchedulingPolicyPreference", {"hipKernelNodeAttributeClusterSchedulingPolicyPreference", "", CONV_DEFINE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // cudaLaunchAttributeClusterSchedulingPolicyPreference }; const std::map CUDA_RUNTIME_TYPE_NAME_VER_MAP { @@ -2272,6 +2339,36 @@ const std::map CUDA_RUNTIME_TYPE_NAME_VER_MAP {"cudaGraphExecUpdateErrorAttributesChanged", {CUDA_116, CUDA_0, CUDA_0 }}, {"cudaKernelNodeAttributePriority", {CUDA_117, CUDA_0, CUDA_0 }}, {"cudaGraphInstantiateFlagUseNodePriority", {CUDA_117, CUDA_0, CUDA_0 }}, + {"cudaErrorMpsClientTerminated", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaErrorInvalidClusterSize", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaClusterSchedulingPolicy", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaClusterSchedulingPolicyDefault", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaClusterSchedulingPolicySpread", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaClusterSchedulingPolicyLoadBalancing", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaFuncAttributeClusterDimMustBeSet", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaFuncAttributeRequiredClusterWidth", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaFuncAttributeRequiredClusterHeight", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaFuncAttributeRequiredClusterDepth", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaFuncAttributeNonPortableClusterSizeAllowed", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaFuncAttributeClusterSchedulingPolicyPreference", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaDevAttrClusterLaunch", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttributeID", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttributeIgnore", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttributeAccessPolicyWindow", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttributeCooperative", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttributeSynchronizationPolicy", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttributeClusterDimension", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttributeClusterSchedulingPolicyPreference", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttributeProgrammaticStreamSerialization", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttributeProgrammaticEvent", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttributePriority", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttributeValue", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttribute_st", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttribute", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchConfig_st", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchConfig_t", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaKernelNodeAttributeClusterDimension", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaKernelNodeAttributeClusterSchedulingPolicyPreference", {CUDA_118, CUDA_0, CUDA_0 }}, }; const std::map HIP_RUNTIME_TYPE_NAME_VER_MAP { @@ -2477,17 +2574,17 @@ const std::map HIP_RUNTIME_TYPE_NAME_VER_MAP { {"hipMemPoolPtrExportData", {HIP_5020, HIP_0, HIP_0 }}, {"hipGraphInstantiateFlags", {HIP_5020, HIP_0, HIP_0 }}, {"hipGraphInstantiateFlagAutoFreeOnLaunch", {HIP_5020, HIP_0, HIP_0 }}, - {"hipMemoryTypeManaged", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipLimitStackSize", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphNodeTypeExtSemaphoreSignal", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphNodeTypeExtSemaphoreWait", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphMemAttributeType", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphMemAttrUsedMemCurrent", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphMemAttrUsedMemHigh", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphMemAttrReservedMemCurrent", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphMemAttrReservedMemHigh", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipUserObjectFlags", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipUserObjectNoDestructorSync", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipUserObjectRetainFlags", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphUserObjectMove", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipMemoryTypeManaged", {HIP_5030, HIP_0, HIP_0 }}, + {"hipLimitStackSize", {HIP_5030, HIP_0, HIP_0 }}, + {"hipGraphNodeTypeExtSemaphoreSignal", {HIP_5030, HIP_0, HIP_0 }}, + {"hipGraphNodeTypeExtSemaphoreWait", {HIP_5030, HIP_0, HIP_0 }}, + {"hipGraphMemAttributeType", {HIP_5030, HIP_0, HIP_0 }}, + {"hipGraphMemAttrUsedMemCurrent", {HIP_5030, HIP_0, HIP_0 }}, + {"hipGraphMemAttrUsedMemHigh", {HIP_5030, HIP_0, HIP_0 }}, + {"hipGraphMemAttrReservedMemCurrent", {HIP_5030, HIP_0, HIP_0 }}, + {"hipGraphMemAttrReservedMemHigh", {HIP_5030, HIP_0, HIP_0 }}, + {"hipUserObjectFlags", {HIP_5030, HIP_0, HIP_0 }}, + {"hipUserObjectNoDestructorSync", {HIP_5030, HIP_0, HIP_0 }}, + {"hipUserObjectRetainFlags", {HIP_5030, HIP_0, HIP_0 }}, + {"hipGraphUserObjectMove", {HIP_5030, HIP_0, HIP_0 }}, }; diff --git a/src/Statistics.cpp b/src/Statistics.cpp index ed6aea21..7925cbde 100644 --- a/src/Statistics.cpp +++ b/src/Statistics.cpp @@ -62,6 +62,7 @@ const char *counterNames[NUM_CONV_TYPES] = { "library", // CONV_LIB_FUNC "device_library", // CONV_LIB_DEVICE_FUNC "device_function", // CONV_DEVICE_FUNC + "device_type", // CONV_DEVICE_TYPE "include", // CONV_INCLUDE "include_cuda_main_header", // CONV_INCLUDE_CUDA_MAIN_H "include_cuda_main_header_v2", // CONV_INCLUDE_CUDA_MAIN_V2_H @@ -348,7 +349,7 @@ void Statistics::setActive(const std::string &name) { } bool Statistics::isToRoc(const hipCounter &counter) { - return TranslateToRoc && counter.apiType == API_BLAS; + return TranslateToRoc && (counter.apiType == API_BLAS || counter.apiType == API_DNN); } bool Statistics::isHipExperimental(const hipCounter& counter) { @@ -442,6 +443,7 @@ std::string Statistics::getCudaVersion(const cudaVersions& ver) { case CUDA_115: return "11.5"; case CUDA_116: return "11.6"; case CUDA_117: return "11.7"; + case CUDA_118: return "11.8"; case CUDNN_10: return "1.0.0"; case CUDNN_20: return "2.0.0"; case CUDNN_30: return "3.0.0"; @@ -475,6 +477,10 @@ std::string Statistics::getCudaVersion(const cudaVersions& ver) { case CUDNN_810: return "8.1.0"; case CUDNN_811: return "8.1.1"; case CUDNN_820: return "8.2.0"; + case CUDNN_830: return "8.3.0"; + case CUDNN_840: return "8.4.0"; + case CUDNN_850: return "8.5.0"; + case CUDNN_860: return "8.6.0"; } return ""; } @@ -488,6 +494,7 @@ std::string Statistics::getHipVersion(const hipVersions& ver) { case HIP_1052: return "1.5.2"; case HIP_1060: return "1.6.0"; case HIP_1061: return "1.6.1"; + case HIP_1064: return "1.6.4"; case HIP_1070: return "1.7.0"; case HIP_1071: return "1.7.1"; case HIP_1080: return "1.8.0"; @@ -538,6 +545,7 @@ std::string Statistics::getHipVersion(const hipVersions& ver) { case HIP_5011: return "5.1.1"; case HIP_5020: return "5.2.0"; case HIP_5030: return "5.3.0"; + case HIP_5040: return "5.4.0"; } return ""; } diff --git a/src/Statistics.h b/src/Statistics.h index 02b4da7d..79dd6a78 100644 --- a/src/Statistics.h +++ b/src/Statistics.h @@ -119,6 +119,7 @@ enum ConvTypes { CONV_LIB_FUNC, CONV_LIB_DEVICE_FUNC, CONV_DEVICE_FUNC, + CONV_DEVICE_TYPE, CONV_INCLUDE, CONV_INCLUDE_CUDA_MAIN_H, CONV_INCLUDE_CUDA_MAIN_V2_H, @@ -198,6 +199,7 @@ enum cudaVersions { CUDA_115 = 11050, CUDA_116 = 11060, CUDA_117 = 11070, + CUDA_118 = 11080, CUDNN_10 = 100, CUDNN_20 = 200, CUDNN_30 = 300, @@ -231,6 +233,10 @@ enum cudaVersions { CUDNN_810 = 810, CUDNN_811 = 811, CUDNN_820 = 820, + CUDNN_830 = 830, + CUDNN_840 = 840, + CUDNN_850 = 850, + CUDNN_860 = 860, }; enum hipVersions { @@ -240,6 +246,7 @@ enum hipVersions { HIP_1052 = 1052, HIP_1060 = 1060, HIP_1061 = 1061, + HIP_1064 = 1064, HIP_1070 = 1070, HIP_1071 = 1071, HIP_1080 = 1080, @@ -290,7 +297,8 @@ enum hipVersions { HIP_5011 = 5011, HIP_5020 = 5020, HIP_5030 = 5030, - HIP_LATEST = HIP_5030, + HIP_5040 = 5040, + HIP_LATEST = HIP_5040, }; struct cudaAPIversions { diff --git a/tests/unit_tests/synthetic/driver_enums.cu b/tests/unit_tests/synthetic/driver_enums.cu index b97398c3..8368e648 100644 --- a/tests/unit_tests/synthetic/driver_enums.cu +++ b/tests/unit_tests/synthetic/driver_enums.cu @@ -313,21 +313,21 @@ int main() { // CHECK: hipJitOption jit_option; // CHECK-NEXT: hipJitOption jit_option_enum; - // CHECK-NEXT: hipJitOption JIT_MAX_REGISTERS = hipJitOptionMaxRegisters; - // CHECK-NEXT: hipJitOption JIT_THREADS_PER_BLOCK = hipJitOptionThreadsPerBlock; - // CHECK-NEXT: hipJitOption JIT_WALL_TIME = hipJitOptionWallTime; - // CHECK-NEXT: hipJitOption JIT_INFO_LOG_BUFFER = hipJitOptionInfoLogBuffer; - // CHECK-NEXT: hipJitOption JIT_INFO_LOG_BUFFER_SIZE_BYTES = hipJitOptionInfoLogBufferSizeBytes; - // CHECK-NEXT: hipJitOption JIT_ERROR_LOG_BUFFER = hipJitOptionErrorLogBuffer; - // CHECK-NEXT: hipJitOption JIT_ERROR_LOG_BUFFER_SIZE_BYTES = hipJitOptionErrorLogBufferSizeBytes; - // CHECK-NEXT: hipJitOption JIT_OPTIMIZATION_LEVEL = hipJitOptionOptimizationLevel; - // CHECK-NEXT: hipJitOption JIT_TARGET_FROM_CUCONTEXT = hipJitOptionTargetFromContext; - // CHECK-NEXT: hipJitOption JIT_TARGET = hipJitOptionTarget; - // CHECK-NEXT: hipJitOption JIT_FALLBACK_STRATEGY = hipJitOptionFallbackStrategy; - // CHECK-NEXT: hipJitOption JIT_GENERATE_DEBUG_INFO = hipJitOptionGenerateDebugInfo; - // CHECK-NEXT: hipJitOption JIT_LOG_VERBOSE = hipJitOptionLogVerbose; - // CHECK-NEXT: hipJitOption JIT_GENERATE_LINE_INFO = hipJitOptionGenerateLineInfo; - // CHECK-NEXT: hipJitOption JIT_CACHE_MODE = hipJitOptionCacheMode; + // CHECK-NEXT: hipJitOption JIT_MAX_REGISTERS = HIPRTC_JIT_MAX_REGISTERS; + // CHECK-NEXT: hipJitOption JIT_THREADS_PER_BLOCK = HIPRTC_JIT_THREADS_PER_BLOCK; + // CHECK-NEXT: hipJitOption JIT_WALL_TIME = HIPRTC_JIT_WALL_TIME; + // CHECK-NEXT: hipJitOption JIT_INFO_LOG_BUFFER = HIPRTC_JIT_INFO_LOG_BUFFER; + // CHECK-NEXT: hipJitOption JIT_INFO_LOG_BUFFER_SIZE_BYTES = HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES; + // CHECK-NEXT: hipJitOption JIT_ERROR_LOG_BUFFER = HIPRTC_JIT_ERROR_LOG_BUFFER; + // CHECK-NEXT: hipJitOption JIT_ERROR_LOG_BUFFER_SIZE_BYTES = HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES; + // CHECK-NEXT: hipJitOption JIT_OPTIMIZATION_LEVEL = HIPRTC_JIT_OPTIMIZATION_LEVEL; + // CHECK-NEXT: hipJitOption JIT_TARGET_FROM_CUCONTEXT = HIPRTC_JIT_TARGET_FROM_HIPCONTEXT; + // CHECK-NEXT: hipJitOption JIT_TARGET = HIPRTC_JIT_TARGET; + // CHECK-NEXT: hipJitOption JIT_FALLBACK_STRATEGY = HIPRTC_JIT_FALLBACK_STRATEGY; + // CHECK-NEXT: hipJitOption JIT_GENERATE_DEBUG_INFO = HIPRTC_JIT_GENERATE_DEBUG_INFO; + // CHECK-NEXT: hipJitOption JIT_LOG_VERBOSE = HIPRTC_JIT_LOG_VERBOSE; + // CHECK-NEXT: hipJitOption JIT_GENERATE_LINE_INFO = HIPRTC_JIT_GENERATE_LINE_INFO; + // CHECK-NEXT: hipJitOption JIT_CACHE_MODE = HIPRTC_JIT_CACHE_MODE; CUjit_option jit_option; CUjit_option_enum jit_option_enum; CUjit_option JIT_MAX_REGISTERS = CU_JIT_MAX_REGISTERS; @@ -346,7 +346,7 @@ int main() { CUjit_option JIT_GENERATE_LINE_INFO = CU_JIT_GENERATE_LINE_INFO; CUjit_option JIT_CACHE_MODE = CU_JIT_CACHE_MODE; - // CHECK: hipJitOption JIT_NUM_OPTIONS = hipJitOptionNumOptions; + // CHECK: hipJitOption JIT_NUM_OPTIONS = HIPRTC_JIT_NUM_OPTIONS; CUjit_option JIT_NUM_OPTIONS = CU_JIT_NUM_OPTIONS; // CHECK: hipLimit_t limit; @@ -642,8 +642,8 @@ int main() { CUdevice_P2PAttribute DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED = CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED; CUdevice_P2PAttribute DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED = CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED; - // CHECK: hipJitOption JIT_NEW_SM3X_OPT = hipJitOptionSm3xOpt; - // CHECK-NEXT: hipJitOption JIT_FAST_COMPILE = hipJitOptionFastCompile; + // CHECK: hipJitOption JIT_NEW_SM3X_OPT = HIPRTC_JIT_NEW_SM3X_OPT; + // CHECK-NEXT: hipJitOption JIT_FAST_COMPILE = HIPRTC_JIT_FAST_COMPILE; CUjit_option JIT_NEW_SM3X_OPT = CU_JIT_NEW_SM3X_OPT; CUjit_option JIT_FAST_COMPILE = CU_JIT_FAST_COMPILE; @@ -913,15 +913,6 @@ int main() { // CHECK: hipDeviceAttribute_t DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = hipDeviceAttributeMaxBlocksPerMultiprocessor; CUdevice_attribute DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR; - // CHECK: hipKernelNodeAttrID kernelNodeAttrID; - // CHECK-NEXT: hipKernelNodeAttrID kernelNodeAttrID_enum; - // CHECK-NEXT: hipKernelNodeAttrID KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = hipKernelNodeAttributeAccessPolicyWindow; - // CHECK-NEXT: hipKernelNodeAttrID KERNEL_NODE_ATTRIBUTE_COOPERATIVE = hipKernelNodeAttributeCooperative; - CUkernelNodeAttrID kernelNodeAttrID; - CUkernelNodeAttrID_enum kernelNodeAttrID_enum; - CUkernelNodeAttrID KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW; - CUkernelNodeAttrID KERNEL_NODE_ATTRIBUTE_COOPERATIVE = CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE; - // CHECK: hipAccessProperty accessProperty; // CHECK-NEXT: hipAccessProperty accessProperty_enum; // CHECK-NEXT: hipAccessProperty ACCESS_PROPERTY_NORMAL = hipAccessPropertyNormal; @@ -937,7 +928,19 @@ int main() { CUpointer_attribute POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE = CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE; #endif +#if CUDA_VERSION >= 11000 && CUDA_VERSION < 11080 + // CHECK: hipKernelNodeAttrID kernelNodeAttrID_enum; + CUkernelNodeAttrID_enum kernelNodeAttrID_enum; +#endif + #if CUDA_VERSION >= 11010 + // CHECK: hipKernelNodeAttrID kernelNodeAttrID; + // CHECK-NEXT: hipKernelNodeAttrID KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = hipKernelNodeAttributeAccessPolicyWindow; + // CHECK-NEXT: hipKernelNodeAttrID KERNEL_NODE_ATTRIBUTE_COOPERATIVE = hipKernelNodeAttributeCooperative; + CUkernelNodeAttrID kernelNodeAttrID; + CUkernelNodeAttrID KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW; + CUkernelNodeAttrID KERNEL_NODE_ATTRIBUTE_COOPERATIVE = CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE; + // CHECK: hipGraphNodeType GRAPH_NODE_TYPE_WAIT_EVENT = hipGraphNodeTypeWaitEvent; // CHECK-NEXT: hipGraphNodeType GRAPH_NODE_TYPE_EVENT_RECORD = hipGraphNodeTypeEventRecord; CUgraphNodeType GRAPH_NODE_TYPE_WAIT_EVENT = CU_GRAPH_NODE_TYPE_WAIT_EVENT; diff --git a/tests/unit_tests/synthetic/driver_functions.cu b/tests/unit_tests/synthetic/driver_functions.cu index 736e7bdc..d77eee21 100644 --- a/tests/unit_tests/synthetic/driver_functions.cu +++ b/tests/unit_tests/synthetic/driver_functions.cu @@ -527,6 +527,16 @@ int main() { // CHECK: result = hipDevicePrimaryCtxSetFlags(device, flags); result = cuDevicePrimaryCtxSetFlags_v2(device, flags); + // CUDA: CUresult CUDAAPI cuMemRetainAllocationHandle(CUmemGenericAllocationHandle *handle, void *addr); + // HIP: hipError_t hipMemRetainAllocationHandle(hipMemGenericAllocationHandle_t* handle, void* addr); + // CHECK: result = hipMemRetainAllocationHandle(&memGenericAllocationHandle_t, image); + result = cuMemRetainAllocationHandle(&memGenericAllocationHandle_t, image); + + // CHECK: result = hipGraphInstantiate(&graphExec, graph, &graphNode, nullptr, bytes); + result = cuGraphInstantiate_v2(&graphExec, graph, &graphNode, nullptr, bytes); +#endif + +#if CUDA_VERSION >= 11000 // CHECK: hipKernelNodeAttrID kernelNodeAttrID; CUkernelNodeAttrID kernelNodeAttrID; // CHECK: hipKernelNodeAttrValue kernelNodeAttrValue; @@ -542,16 +552,6 @@ int main() { // CHECK: result = hipGraphKernelNodeGetAttribute(graphNode, kernelNodeAttrID, &kernelNodeAttrValue); result = cuGraphKernelNodeGetAttribute(graphNode, kernelNodeAttrID, &kernelNodeAttrValue); - // CUDA: CUresult CUDAAPI cuMemRetainAllocationHandle(CUmemGenericAllocationHandle *handle, void *addr); - // HIP: hipError_t hipMemRetainAllocationHandle(hipMemGenericAllocationHandle_t* handle, void* addr); - // CHECK: result = hipMemRetainAllocationHandle(&memGenericAllocationHandle_t, image); - result = cuMemRetainAllocationHandle(&memGenericAllocationHandle_t, image); - - // CHECK: result = hipGraphInstantiate(&graphExec, graph, &graphNode, nullptr, bytes); - result = cuGraphInstantiate_v2(&graphExec, graph, &graphNode, nullptr, bytes); -#endif - -#if CUDA_VERSION >= 11010 // CUDA: CUresult CUDAAPI cuGraphExecChildGraphNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraph childGraph); // HIP: hipError_t hipGraphExecChildGraphNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, hipGraph_t childGraph); // CHECK: result = hipGraphExecChildGraphNodeSetParams(graphExec, graphNode, graph); diff --git a/tests/unit_tests/synthetic/driver_unions.cu b/tests/unit_tests/synthetic/driver_unions.cu index 1d9bddc4..0c375f96 100644 --- a/tests/unit_tests/synthetic/driver_unions.cu +++ b/tests/unit_tests/synthetic/driver_unions.cu @@ -9,8 +9,11 @@ int main() { #if CUDA_VERSION >= 11000 // CHECK: hipKernelNodeAttrValue kernelNodeAttrValue; - // CHECK-NEXT: hipKernelNodeAttrValue kernelNodeAttrValue_union; CUkernelNodeAttrValue kernelNodeAttrValue; +#endif + +#if CUDA_VERSION >= 11000 && CUDA_VERSION < 11080 + // CHECK: hipKernelNodeAttrValue kernelNodeAttrValue_union; CUkernelNodeAttrValue_union kernelNodeAttrValue_union; #endif diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu index 61643cdd..0a24616d 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu @@ -144,6 +144,7 @@ int main() { blasStatus = cublasGetPointerMode_v2(blasHandle, &blasPointerMode); int n = 0; + int nrhs = 0; int m = 0; int num = 0; int lda = 0; @@ -156,6 +157,8 @@ int main() { int kl = 0; int ku = 0; int batchCount = 0; + int P = 0; + int info = 0; void* image = nullptr; void* image_2 = nullptr; void* deviceptr = nullptr; @@ -221,6 +224,7 @@ int main() { float** fAarray = 0; float** fBarray = 0; float** fCarray = 0; + float** fTauarray = 0; double da = 0; double dA = 0; @@ -240,6 +244,11 @@ int main() { double** dAarray = 0; double** dBarray = 0; double** dCarray = 0; + double** dTauarray = 0; + + void** voidAarray = nullptr; + void** voidBarray = nullptr; + void** voidCarray = nullptr; // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle, int n, const float* x, int incx, float* result); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSnrm2(hipblasHandle_t handle, int n, const float* x, int incx, float* result); @@ -263,16 +272,20 @@ int main() { // CHECK: hipComplex** complexAarray = 0; // CHECK-NEXT: hipComplex** complexBarray = 0; // CHECK-NEXT: hipComplex** complexCarray = 0; + // CHECK-NEXT: hipComplex** complexTauarray = 0; cuComplex** complexAarray = 0; cuComplex** complexBarray = 0; cuComplex** complexCarray = 0; + cuComplex** complexTauarray = 0; // CHECK: hipDoubleComplex** dcomplexAarray = 0; // CHECK-NEXT: hipDoubleComplex** dcomplexBarray = 0; // CHECK-NEXT: hipDoubleComplex** dcomplexCarray = 0; + // CHECK-NEXT: hipDoubleComplex** dcomplexTauarray = 0; cuDoubleComplex** dcomplexAarray = 0; cuDoubleComplex** dcomplexBarray = 0; cuDoubleComplex** dcomplexCarray = 0; + cuDoubleComplex** dcomplexTauarray = 0; // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScnrm2_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, float* result); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasScnrm2(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, float* result); @@ -1192,6 +1205,330 @@ int main() { // CHECK: blasStatus = hipblasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* beta, float* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* AP, int lda, const float* beta, float* CP, int ldc); + // CHECK: blasStatus = hipblasSsyrk(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, &fC, ldc); + // CHECK-NEXT: blasStatus = hipblasSsyrk(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, &fC, ldc); + blasStatus = cublasSsyrk(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, &fC, ldc); + blasStatus = cublasSsyrk_v2(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, &fC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const double* alpha, const double* A, int lda, const double* beta, double* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* AP, int lda, const double* beta, double* CP, int ldc); + // CHECK: blasStatus = hipblasDsyrk(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, &dC, ldc); + // CHECK-NEXT: blasStatus = hipblasDsyrk(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, &dC, ldc); + blasStatus = cublasDsyrk(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, &dC, ldc); + blasStatus = cublasDsyrk_v2(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, &dC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* beta, cuComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* beta, hipblasComplex* CP, int ldc); + // CHECK: blasStatus = hipblasCsyrk(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, &complexC, ldc); + // CHECK-NEXT: blasStatus = hipblasCsyrk(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, &complexC, ldc); + blasStatus = cublasCsyrk(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, &complexC, ldc); + blasStatus = cublasCsyrk_v2(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZsyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* beta, hipblasDoubleComplex* CP, int ldc); + // CHECK: blasStatus = hipblasZsyrk(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexC, ldc); + // CHECK-NEXT: blasStatus = hipblasZsyrk(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZsyrk(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZsyrk_v2(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const cuComplex* A, int lda, const float* beta, cuComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCherk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const hipblasComplex* AP, int lda, const float* beta, hipblasComplex* CP, int ldc); + // CHECK: blasStatus = hipblasCherk(blasHandle, blasFillMode, transa, n, k, &fa, &complexA, lda, &fb, &complexC, ldc); + // CHECK-NEXT: blasStatus = hipblasCherk(blasHandle, blasFillMode, transa, n, k, &fa, &complexA, lda, &fb, &complexC, ldc); + blasStatus = cublasCherk(blasHandle, blasFillMode, transa, n, k, &fa, &complexA, lda, &fb, &complexC, ldc); + blasStatus = cublasCherk_v2(blasHandle, blasFillMode, transa, n, k, &fa, &complexA, lda, &fb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const double* alpha, const cuDoubleComplex* A, int lda, const double* beta, cuDoubleComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZherk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const hipblasDoubleComplex* AP, int lda, const double* beta, hipblasDoubleComplex* CP, int ldc); + // CHECK: blasStatus = hipblasZherk(blasHandle, blasFillMode, transa, n, k, &da, &dcomplexA, lda, &db, &dcomplexC, ldc); + // CHECK-NEXT: blasStatus = hipblasZherk(blasHandle, blasFillMode, transa, n, k, &da, &dcomplexA, lda, &db, &dcomplexC, ldc); + blasStatus = cublasZherk(blasHandle, blasFillMode, transa, n, k, &da, &dcomplexA, lda, &db, &dcomplexC, ldc); + blasStatus = cublasZherk_v2(blasHandle, blasFillMode, transa, n, k, &da, &dcomplexA, lda, &db, &dcomplexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsyr2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* AP, int lda, const float* BP, int ldb, const float* beta, float* CP, int ldc); + // CHECK: blasStatus = hipblasSsyr2k(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, ldb, &fb, &fC, ldc); + // CHECK-NEXT: blasStatus = hipblasSsyr2k(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, ldb, &fb, &fC, ldc); + blasStatus = cublasSsyr2k(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, ldb, &fb, &fC, ldc); + blasStatus = cublasSsyr2k_v2(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, ldb, &fb, &fC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsyr2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* AP, int lda, const double* BP, int ldb, const double* beta, double* CP, int ldc); + // CHECK: blasStatus = hipblasDsyr2k(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, ldb, &db, &dC, ldc); + // CHECK-NEXT: blasStatus = hipblasDsyr2k(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, ldb, &db, &dC, ldc); + blasStatus = cublasDsyr2k(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, ldb, &db, &dC, ldc); + blasStatus = cublasDsyr2k_v2(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, ldb, &db, &dC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const cuComplex* beta, cuComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsyr2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* BP, int ldb, const hipblasComplex* beta, hipblasComplex* CP, int ldc); + // CHECK: blasStatus = hipblasCsyr2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &complexb, &complexC, ldc); + // CHECK-NEXT: blasStatus = hipblasCsyr2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &complexb, &complexC, ldc); + blasStatus = cublasCsyr2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &complexb, &complexC, ldc); + blasStatus = cublasCsyr2k_v2(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &complexb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZsyr2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* BP, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* CP, int ldc); + // CHECK: blasStatus = hipblasZsyr2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &dcomplexb, &dcomplexC, ldc); + // CHECK-NEXT: blasStatus = hipblasZsyr2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZsyr2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZsyr2k_v2(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &dcomplexb, &dcomplexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsyrkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* AP, int lda, const float* BP, int ldb, const float* beta, float* CP, int ldc); + // CHECK: blasStatus = hipblasSsyrkx(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + blasStatus = cublasSsyrkx(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyrkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsyrkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* AP, int lda, const double* BP, int ldb, const double* beta, double* CP, int ldc); + // CHECK: blasStatus = hipblasDsyrkx(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + blasStatus = cublasDsyrkx(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const cuComplex* beta, cuComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsyrkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* BP, int ldb, const hipblasComplex* beta, hipblasComplex* CP, int ldc); + // CHECK: blasStatus = hipblasCsyrkx(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + blasStatus = cublasCsyrkx(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyrkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZsyrkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* BP, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* CP, int ldc); + // CHECK: blasStatus = hipblasZsyrkx(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZsyrkx(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const float* beta, cuComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCher2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* BP, int ldb, const float* beta, hipblasComplex* CP, int ldc); + // CHECK: blasStatus = hipblasCher2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &fb, &complexC, ldc); + // CHECK-NEXT: blasStatus = hipblasCher2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &fb, &complexC, ldc); + blasStatus = cublasCher2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &fb, &complexC, ldc); + blasStatus = cublasCher2k_v2(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &fb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const double* beta, cuDoubleComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZher2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* BP, int ldb, const double* beta, hipblasDoubleComplex* CP, int ldc); + // CHECK: blasStatus = hipblasZher2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &db, &dcomplexC, ldc); + // CHECK-NEXT: blasStatus = hipblasZher2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &db, &dcomplexC, ldc); + blasStatus = cublasZher2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &db, &dcomplexC, ldc); + blasStatus = cublasZher2k_v2(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &db, &dcomplexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const float* beta, cuComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCherkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* BP, int ldb, const float* beta, hipblasComplex* CP, int ldc); + // CHECK: blasStatus = hipblasCherkx(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexB, ldb, &fb, &complexC, ldc); + blasStatus = cublasCherkx(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexB, ldb, &fb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const double* beta, cuDoubleComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZherkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* BP, int ldb, const double* beta, hipblasDoubleComplex* CP, int ldc); + // CHECK: blasStatus = hipblasZherkx(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &db, &dcomplexC, ldc); + blasStatus = cublasZherkx(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &db, &dcomplexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsymm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsymm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const float* alpha, const float* AP, int lda, const float* BP, int ldb, const float* beta, float* CP, int ldc); + // CHECK: blasStatus = hipblasSsymm(blasHandle, blasSideMode, blasFillMode, m, n, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + // CHECK-NEXT: blasStatus = hipblasSsymm(blasHandle, blasSideMode, blasFillMode, m, n, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + blasStatus = cublasSsymm(blasHandle, blasSideMode, blasFillMode, m, n, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + blasStatus = cublasSsymm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsymm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsymm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const double* alpha, const double* AP, int lda, const double* BP, int ldb, const double* beta, double* CP, int ldc); + // CHECK: blasStatus = hipblasDsymm(blasHandle, blasSideMode, blasFillMode, m, n, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + // CHECK-NEXT: blasStatus = hipblasDsymm(blasHandle, blasSideMode, blasFillMode, m, n, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + blasStatus = cublasDsymm(blasHandle, blasSideMode, blasFillMode, m, n, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + blasStatus = cublasDsymm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsymm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const cuComplex* beta, cuComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsymm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* BP, int ldb, const hipblasComplex* beta, hipblasComplex* CP, int ldc); + // CHECK: blasStatus = hipblasCsymm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + // CHECK-NEXT: blasStatus = hipblasCsymm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + blasStatus = cublasCsymm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + blasStatus = cublasCsymm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsymm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZsymm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* BP, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* CP, int ldc); + // CHECK: blasStatus = hipblasZsymm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + // CHECK-NEXT: blasStatus = hipblasZsymm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZsymm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZsymm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChemm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const cuComplex* beta, cuComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasChemm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* BP, int ldb, const hipblasComplex* beta, hipblasComplex* CP, int ldc); + // CHECK: blasStatus = hipblasChemm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + // CHECK-NEXT: blasStatus = hipblasChemm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + blasStatus = cublasChemm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + blasStatus = cublasChemm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhemm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZhemm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* BP, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* CP, int ldc); + // CHECK: blasStatus = hipblasZhemm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + // CHECK-NEXT: blasStatus = hipblasZhemm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZhemm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZhemm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* A, int lda, float* B, int ldb); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrsm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, float* AP, int lda, float* BP, int ldb); + // CHECK: blasStatus = hipblasStrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb); + // CHECK-NEXT: blasStatus = hipblasStrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb); + blasStatus = cublasStrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb); + blasStatus = cublasStrsm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* A, int lda, double* B, int ldb); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, double* AP, int lda, double* BP, int ldb); + // CHECK: blasStatus = hipblasDtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb); + // CHECK-NEXT: blasStatus = hipblasDtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb); + blasStatus = cublasDtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb); + blasStatus = cublasDtrsm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, cuComplex* B, int ldb); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, hipblasComplex* AP, int lda, hipblasComplex* BP, int ldb); + // CHECK: blasStatus = hipblasCtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb); + // CHECK-NEXT: blasStatus = hipblasCtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb); + blasStatus = cublasCtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb); + blasStatus = cublasCtrsm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, cuDoubleComplex* B, int ldb); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* AP, int lda, hipblasDoubleComplex* BP, int ldb); + // CHECK: blasStatus = hipblasZtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb); + // CHECK-NEXT: blasStatus = hipblasZtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb); + blasStatus = cublasZtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb); + blasStatus = cublasZtrsm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgeam(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const float* alpha, const float* A, int lda, const float* beta, const float* B, int ldb, float* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgeam(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const float* alpha, const float* AP, int lda, const float* beta, const float* BP, int ldb, float* CP, int ldc); + // CHECK: blasStatus = hipblasSgeam(blasHandle, transa, transb, m, n, &fa, &fA, lda, &fb, &fB, ldb, &fC, ldc); + blasStatus = cublasSgeam(blasHandle, transa, transb, m, n, &fa, &fA, lda, &fb, &fB, ldb, &fC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgeam(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const double* alpha, const double* A, int lda, const double* beta, const double* B, int ldb, double* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgeam(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const double* alpha, const double* AP, int lda, const double* beta, const double* BP, int ldb, double* CP, int ldc); + // CHECK: blasStatus = hipblasDgeam(blasHandle, transa, transb, m, n, &da, &dA, lda, &db, &dB, ldb, &dC, ldc); + blasStatus = cublasDgeam(blasHandle, transa, transb, m, n, &da, &dA, lda, &db, &dB, ldb, &dC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeam(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* beta, const cuComplex* B, int ldb, cuComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgeam(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* beta, const hipblasComplex* BP, int ldb, hipblasComplex* CP, int ldc); + // CHECK: blasStatus = hipblasCgeam(blasHandle, transa, transb, m, n, &complexa, &complexA, lda, &complexb, &complexB, ldb, &complexC, ldc); + blasStatus = cublasCgeam(blasHandle, transa, transb, m, n, &complexa, &complexA, lda, &complexb, &complexB, ldb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeam(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* beta, const cuDoubleComplex* B, int ldb, cuDoubleComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgeam(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* beta, const hipblasDoubleComplex* BP, int ldb, hipblasDoubleComplex* CP, int ldc); + // CHECK: blasStatus = hipblasZgeam(blasHandle, transa, transb, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexB, ldb, &dcomplexC, ldc); + blasStatus = cublasZgeam(blasHandle, transa, transb, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexB, ldb, &dcomplexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetrfBatched(cublasHandle_t handle, int n, float* const A[], int lda, int* P, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgetrfBatched(hipblasHandle_t handle, const int n, float* const A[], const int lda, int* ipiv, int* info, const int batchCount); + // CHECK: blasStatus = hipblasSgetrfBatched(blasHandle, n, fAarray, lda, &P, &info, batchCount); + blasStatus = cublasSgetrfBatched(blasHandle, n, fAarray, lda, &P, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetrfBatched(cublasHandle_t handle, int n, double* const A[], int lda, int* P, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgetrfBatched(hipblasHandle_t handle, const int n, double* const A[], const int lda, int* ipiv, int* info, const int batchCount); + // CHECK: blasStatus = hipblasDgetrfBatched(blasHandle, n, dAarray, lda, &P, &info, batchCount); + blasStatus = cublasDgetrfBatched(blasHandle, n, dAarray, lda, &P, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetrfBatched(cublasHandle_t handle, int n, cuComplex* const A[], int lda, int* P, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgetrfBatched(hipblasHandle_t handle, const int n, hipblasComplex* const A[], const int lda, int* ipiv, int* info, const int batchCount); + // CHECK: blasStatus = hipblasCgetrfBatched(blasHandle, n, complexAarray, lda, &P, &info, batchCount); + blasStatus = cublasCgetrfBatched(blasHandle, n, complexAarray, lda, &P, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetrfBatched(cublasHandle_t handle, int n, cuDoubleComplex* const A[], int lda, int* P, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgetrfBatched(hipblasHandle_t handle, const int n, hipblasDoubleComplex* const A[], const int lda, int* ipiv, int* info, const int batchCount); + // CHECK: blasStatus = hipblasZgetrfBatched(blasHandle, n, dcomplexAarray, lda, &P, &info, batchCount); + blasStatus = cublasZgetrfBatched(blasHandle, n, dcomplexAarray, lda, &P, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetriBatched(cublasHandle_t handle, int n, const float* const A[], int lda, const int* P, float* const C[], int ldc, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgetriBatched(hipblasHandle_t handle, const int n, float* const A[], const int lda, int* ipiv, float* const C[], const int ldc, int* info, const int batchCount); + // CHECK: blasStatus = hipblasSgetriBatched(blasHandle, n, fAarray, lda, &P, fCarray, ldc, &info, batchCount); + blasStatus = cublasSgetriBatched(blasHandle, n, fAarray, lda, &P, fCarray, ldc, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetriBatched(cublasHandle_t handle, int n, const double* const A[], int lda, const int* P, double* const C[], int ldc, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgetriBatched(hipblasHandle_t handle, const int n, double* const A[], const int lda, int* ipiv, double* const C[], const int ldc, int* info, const int batchCount); + // CHECK: blasStatus = hipblasDgetriBatched(blasHandle, n, dAarray, lda, &P, dCarray, ldc, &info, batchCount); + blasStatus = cublasDgetriBatched(blasHandle, n, dAarray, lda, &P, dCarray, ldc, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetriBatched(cublasHandle_t handle, int n, const cuComplex* const A[], int lda, const int* P, cuComplex* const C[], int ldc, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgetriBatched(hipblasHandle_t handle, const int n, hipblasComplex* const A[], const int lda, int* ipiv, hipblasComplex* const C[], const int ldc, int* info, const int batchCount); + // CHECK: blasStatus = hipblasCgetriBatched(blasHandle, n, complexAarray, lda, &P, complexCarray, ldc, &info, batchCount); + blasStatus = cublasCgetriBatched(blasHandle, n, complexAarray, lda, &P, complexCarray, ldc, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetriBatched(cublasHandle_t handle, int n, const cuDoubleComplex* const A[], int lda, const int* P, cuDoubleComplex* const C[], int ldc, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgetriBatched(hipblasHandle_t handle, const int n, hipblasDoubleComplex* const A[], const int lda, int* ipiv, hipblasDoubleComplex* const C[], const int ldc, int* info, const int batchCount); + // CHECK: blasStatus = hipblasZgetriBatched(blasHandle, n, dcomplexAarray, lda, &P, dcomplexCarray, ldc, &info, batchCount); + blasStatus = cublasZgetriBatched(blasHandle, n, dcomplexAarray, lda, &P, dcomplexCarray, ldc, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const float* const Aarray[], int lda, const int* devIpiv, float* const Barray[], int ldb, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, float* const A[], const int lda, const int* ipiv, float* const B[], const int ldb, int* info, const int batchCount); + // CHECK: blasStatus = hipblasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray, lda, &P, fBarray, ldb, &info, batchCount); + blasStatus = cublasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray, lda, &P, fBarray, ldb, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const double* const Aarray[], int lda, const int* devIpiv, double* const Barray[], int ldb, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, double* const A[], const int lda, const int* ipiv, double* const B[], const int ldb, int* info, const int batchCount); + // CHECK: blasStatus = hipblasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray, lda, &P, dBarray, ldb, &info, batchCount); + blasStatus = cublasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray, lda, &P, dBarray, ldb, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const cuComplex* const Aarray[], int lda, const int* devIpiv, cuComplex* const Barray[], int ldb, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex* const A[], const int lda, const int* ipiv, hipblasComplex* const B[], const int ldb, int* info, const int batchCount); + // CHECK: blasStatus = hipblasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray, lda, &P, complexBarray, ldb, &info, batchCount); + blasStatus = cublasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray, lda, &P, complexBarray, ldb, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const cuDoubleComplex* const Aarray[], int lda, const int* devIpiv, cuDoubleComplex* const Barray[], int ldb, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex* const A[], const int lda, const int* ipiv, hipblasDoubleComplex* const B[], const int ldb, int* info, const int batchCount); + // CHECK: blasStatus = hipblasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray, lda, &P, dcomplexBarray, ldb, &info, batchCount); + blasStatus = cublasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray, lda, &P, dcomplexBarray, ldb, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* const A[], int lda, float* const B[], int ldb, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, float* const AP[], int lda, float* BP[], int ldb, int batchCount); + // CHECK: blasStatus = hipblasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount); + blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* const A[], int lda, double* const B[], int ldb, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, double* const AP[], int lda, double* BP[], int ldb, int batchCount); + // CHECK: blasStatus = hipblasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount); + blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* const A[], int lda, cuComplex* const B[], int ldb, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, hipblasComplex* const AP[], int lda, hipblasComplex* BP[], int ldb, int batchCount); + // CHECK: blasStatus = hipblasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount); + blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* const A[], int lda, cuDoubleComplex* const B[], int ldb, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* const AP[], int lda, hipblasDoubleComplex* BP[], int ldb, int batchCount); + // CHECK: blasStatus = hipblasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount); + blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgeqrfBatched(cublasHandle_t handle, int m, int n, float* const Aarray[], int lda, float* const TauArray[], int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgeqrfBatched(hipblasHandle_t handle, const int m, const int n, float* const A[], const int lda, float* const ipiv[], int* info, const int batchCount); + // CHECK: blasStatus = hipblasSgeqrfBatched(blasHandle, m, n, fAarray, lda, fTauarray, &info, batchCount); + blasStatus = cublasSgeqrfBatched(blasHandle, m, n, fAarray, lda, fTauarray, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgeqrfBatched(cublasHandle_t handle, int m, int n, double* const Aarray[], int lda, double* const TauArray[], int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgeqrfBatched(hipblasHandle_t handle, const int m, const int n, double* const A[], const int lda, double* const ipiv[], int* info, const int batchCount); + // CHECK: blasStatus = hipblasDgeqrfBatched(blasHandle, m, n, dAarray, lda, dTauarray, &info, batchCount); + blasStatus = cublasDgeqrfBatched(blasHandle, m, n, dAarray, lda, dTauarray, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeqrfBatched(cublasHandle_t handle, int m, int n, cuComplex* const Aarray[], int lda, cuComplex* const TauArray[], int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgeqrfBatched(hipblasHandle_t handle, const int m, const int n, hipblasComplex* const A[], const int lda, hipblasComplex* const ipiv[], int* info, const int batchCount); + // CHECK: blasStatus = hipblasCgeqrfBatched(blasHandle, m, n, complexAarray, lda, complexTauarray, &info, batchCount); + blasStatus = cublasCgeqrfBatched(blasHandle, m, n, complexAarray, lda, complexTauarray, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeqrfBatched(cublasHandle_t handle, int m, int n, cuDoubleComplex* const Aarray[], int lda, cuDoubleComplex* const TauArray[], int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgeqrfBatched(hipblasHandle_t handle, const int m, const int n, hipblasDoubleComplex* const A[], const int lda, hipblasDoubleComplex* const ipiv[], int* info, const int batchCount); + // CHECK: blasStatus = hipblasZgeqrfBatched(blasHandle, m, n, dcomplexAarray, lda, dcomplexTauarray, &info, batchCount); + blasStatus = cublasZgeqrfBatched(blasHandle, m, n, dcomplexAarray, lda, dcomplexTauarray, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const float* A, int lda, const float* x, int incx, float* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSdgmm(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const float* AP, int lda, const float* x, int incx, float* CP, int ldc); + // CHECK: blasStatus = hipblasSdgmm(blasHandle, blasSideMode, m, n, &fa, lda, &fx, incx, &fC, ldc); + blasStatus = cublasSdgmm(blasHandle, blasSideMode, m, n, &fa, lda, &fx, incx, &fC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const double* A, int lda, const double* x, int incx, double* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDdgmm(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const double* AP, int lda, const double* x, int incx, double* CP, int ldc); + // CHECK: blasStatus = hipblasDdgmm(blasHandle, blasSideMode, m, n, &da, lda, &dx, incx, &dC, ldc); + blasStatus = cublasDdgmm(blasHandle, blasSideMode, m, n, &da, lda, &dx, incx, &dC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const cuComplex* A, int lda, const cuComplex* x, int incx, cuComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCdgmm(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasComplex* AP, int lda, const hipblasComplex* x, int incx, hipblasComplex* CP, int ldc); + // CHECK: blasStatus = hipblasCdgmm(blasHandle, blasSideMode, m, n, &complexa, lda, &complexx, incx, &complexC, ldc); + blasStatus = cublasCdgmm(blasHandle, blasSideMode, m, n, &complexa, lda, &complexx, incx, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, cuDoubleComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZdgmm(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* CP, int ldc); + // CHECK: blasStatus = hipblasZdgmm(blasHandle, blasSideMode, m, n, &dcomplexa, lda, &dcomplexx, incx, &dcomplexC, ldc); + blasStatus = cublasZdgmm(blasHandle, blasSideMode, m, n, &dcomplexa, lda, &dcomplexx, incx, &dcomplexC, ldc); + long long int strideA = 0; long long int strideB = 0; long long int strideC = 0; @@ -1264,6 +1601,51 @@ int main() { // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemmStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, long long strideA, const hipblasDoubleComplex* BP, int ldb, long long strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* CP, int ldc, long long strideC, int batchCount); // CHECK: blasStatus = hipblasZgemmStridedBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, strideA, &dcomplexB, ldb, strideB, &dcomplexb, &dcomplexC, ldc, strideC, batchCount); blasStatus = cublasZgemmStridedBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, strideA, &dcomplexB, ldb, strideB, &dcomplexb, &dcomplexC, ldc, strideC, batchCount); + + void* aptr = nullptr; + void* Aptr = nullptr; + void* bptr = nullptr; + void* Bptr = nullptr; + void* cptr = nullptr; + void* Cptr = nullptr; + void* xptr = nullptr; + void* yptr = nullptr; + void* sptr = nullptr; + + // CHECK: hipblasDatatype_t Atype; + // CHECK-NEXT: hipblasDatatype_t Btype; + // CHECK-NEXT: hipblasDatatype_t Ctype; + // CHECK-NEXT: hipblasDatatype_t Xtype; + // CHECK-NEXT: hipblasDatatype_t Ytype; + // CHECK-NEXT: hipblasDatatype_t CStype; + // CHECK-NEXT: hipblasDatatype_t Executiontype; + cudaDataType Atype; + cudaDataType Btype; + cudaDataType Ctype; + cudaDataType Xtype; + cudaDataType Ytype; + cudaDataType CStype; + cudaDataType Executiontype; + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScalEx(cublasHandle_t handle, int n, const void* alpha, cudaDataType alphaType, void* x, cudaDataType xType, int incx, cudaDataType executionType); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasScalEx(hipblasHandle_t handle, int n, const void* alpha, hipblasDatatype_t alphaType, void* x, hipblasDatatype_t xType, int incx, hipblasDatatype_t executionType); + // CHECK: blasStatus = hipblasScalEx(blasHandle, n, aptr, Atype, xptr, Xtype, incx, Executiontype); + blasStatus = cublasScalEx(blasHandle, n, aptr, Atype, xptr, Xtype, incx, Executiontype); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasAxpyEx(cublasHandle_t handle, int n, const void* alpha, cudaDataType alphaType, const void* x, cudaDataType xType, int incx, void* y, cudaDataType yType, int incy, cudaDataType executiontype); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasAxpyEx(hipblasHandle_t handle, int n, const void* alpha, hipblasDatatype_t alphaType, const void* x, hipblasDatatype_t xType, int incx, void* y, hipblasDatatype_t yType, int incy, hipblasDatatype_t executionType); + // CHECK: blasStatus = hipblasAxpyEx(blasHandle, n, aptr, Atype, xptr, Xtype, incx, yptr, Ytype, incy, Executiontype); + blasStatus = cublasAxpyEx(blasHandle, n, aptr, Atype, xptr, Xtype, incx, yptr, Ytype, incy, Executiontype); +#endif + +#if CUDA_VERSION >= 8000 && CUDA_VERSION < 11000 + // CHECK: hipblasDatatype_t computeType; + cudaDataType computeType; + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, const void* B, cudaDataType Btype, int ldb, const void* beta, void* C, cudaDataType Ctype, int ldc, cudaDataType computeType, cublasGemmAlgo_t algo); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipblasDatatype_t aType, int lda, const void* B, hipblasDatatype_t bType, int ldb, const void* beta, void* C, hipblasDatatype_t cType, int ldc, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo); + // CHECK: blasStatus = hipblasGemmEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, Bptr, Btype, ldb, bptr, Cptr, Ctype, ldc, computeType, blasGemmAlgo); + blasStatus = cublasGemmEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, Bptr, Btype, ldb, bptr, Cptr, Ctype, ldc, computeType, blasGemmAlgo); #endif #if CUDA_VERSION >= 9000 @@ -1271,12 +1653,29 @@ int main() { cublasGemmAlgo_t BLAS_GEMM_DEFAULT = CUBLAS_GEMM_DEFAULT; #endif +#if CUDA_VERSION >= 9010 && CUDA_VERSION < 11000 + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A[], hipblasDatatype_t aType, int lda, const void* B[], hipblasDatatype_t bType, int ldb, const void* beta, void* C[], hipblasDatatype_t cType, int ldc, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo); + // CHECK: blasStatus = hipblasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); + blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmStridedBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipblasDatatype_t aType, int lda, hipblasStride strideA, const void* B, hipblasDatatype_t bType, int ldb, hipblasStride strideB, const void* beta, void* C, hipblasDatatype_t cType, int ldc, hipblasStride strideC, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo); + // CHECK: blasStatus = hipblasGemmStridedBatchedEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, strideA, Bptr, Btype, ldb, strideB, bptr, Cptr, Ctype, ldc, strideC, batchCount, computeType, blasGemmAlgo); + blasStatus = cublasGemmStridedBatchedEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, strideA, Bptr, Btype, ldb, strideB, bptr, Cptr, Ctype, ldc, strideC, batchCount, computeType, blasGemmAlgo); +#endif + #if CUDA_VERSION >= 10010 // CHECK: hipblasOperation_t BLAS_OP_HERMITAN = HIPBLAS_OP_C; cublasOperation_t BLAS_OP_HERMITAN = CUBLAS_OP_HERMITAN; // CHECK: hipblasFillMode_t BLAS_FILL_MODE_FULL = HIPBLAS_FILL_MODE_FULL; cublasFillMode_t BLAS_FILL_MODE_FULL = CUBLAS_FILL_MODE_FULL; + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasRotEx(cublasHandle_t handle, int n, void* x, cudaDataType xType, int incx, void* y, cudaDataType yType, int incy, const void* c, const void* s, cudaDataType csType, cudaDataType executiontype); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasRotEx(hipblasHandle_t handle, int n, void* x, hipblasDatatype_t xType, int incx, void* y, hipblasDatatype_t yType, int incy, const void* c, const void* s, hipblasDatatype_t csType, hipblasDatatype_t executionType); + // CHECK: blasStatus = hipblasRotEx(blasHandle, n, xptr, Xtype, incx, yptr, Ytype, incy, cptr, sptr, CStype, Executiontype); + blasStatus = cublasRotEx(blasHandle, n, xptr, Xtype, incx, yptr, Ytype, incy, cptr, sptr, CStype, Executiontype); #endif #if CUDA_VERSION >= 11000 @@ -1284,6 +1683,26 @@ int main() { // CHECK-NEXT: hipblasDatatype_t C_16BF = HIPBLAS_C_16B; cublasDataType_t R_16BF = CUDA_R_16BF; cublasDataType_t C_16BF = CUDA_C_16BF; + + // NOTE: WORKAROUND: cublasComputeType_t is not actually supported by hipBLAS + // TODO: Fix it after fixing https://github.com/ROCmSoftwarePlatform/hipBLAS/issues/529 + // CHECK: hipblasDatatype_t blasComputeType; + cublasComputeType_t blasComputeType; + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, const void* B, cudaDataType Btype, int ldb, const void* beta, void* C, cudaDataType Ctype, int ldc, cublasComputeType_t computeType, cublasGemmAlgo_t algo); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipblasDatatype_t aType, int lda, const void* B, hipblasDatatype_t bType, int ldb, const void* beta, void* C, hipblasDatatype_t cType, int ldc, hipblasDatatype_t computeType, ipblasGemmAlgo_t algo); + // CHECK: blasStatus = hipblasGemmEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, Bptr, Btype, ldb, bptr, Cptr, Ctype, ldc, blasComputeType, blasGemmAlgo); + blasStatus = cublasGemmEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, Bptr, Btype, ldb, bptr, Cptr, Ctype, ldc, blasComputeType, blasGemmAlgo); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A[], hipblasDatatype_t aType, int lda, const void* B[], hipblasDatatype_t bType, int ldb, const void* beta, void* C[], hipblasDatatype_t cType, int ldc, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo); + // CHECK: blasStatus = hipblasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, blasComputeType, blasGemmAlgo); + blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, blasComputeType, blasGemmAlgo); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmStridedBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipblasDatatype_t aType, int lda, hipblasStride strideA, const void* B, hipblasDatatype_t bType, int ldb, hipblasStride strideB, const void* beta, void* C, hipblasDatatype_t cType, int ldc, hipblasStride strideC, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo); + // CHECK: blasStatus = hipblasGemmStridedBatchedEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, strideA, Bptr, Btype, ldb, strideB, bptr, Cptr, Ctype, ldc, strideC, batchCount, blasComputeType, blasGemmAlgo); + blasStatus = cublasGemmStridedBatchedEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, strideA, Bptr, Btype, ldb, strideB, bptr, Cptr, Ctype, ldc, strideC, batchCount, blasComputeType, blasGemmAlgo); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cudnn2miopen.cu b/tests/unit_tests/synthetic/libraries/cudnn2miopen.cu new file mode 100644 index 00000000..d7a9ebb3 --- /dev/null +++ b/tests/unit_tests/synthetic/libraries/cudnn2miopen.cu @@ -0,0 +1,30 @@ +// RUN: %run_test hipify "%s" "%t" %hipify_args 3 --skip-excluded-preprocessor-conditional-blocks --experimental -roc %clang_args -D__CUDA_API_VERSION_INTERNAL + +// CHECK: #include +#include +#include +// CHECK: #include "miopen/miopen.h" +#include "cudnn.h" + +int main() { + printf("15. cuDNN API to MIOpen API synthetic test\n"); + + // CHECK: miopenStatus_t dnnStatus_t; + // CHECK-NEXT: miopenStatus_t STATUS_SUCCESS = miopenStatusSuccess; + // CHECK-NEXT: miopenStatus_t STATUS_NOT_INITIALIZED = miopenStatusNotInitialized; + // CHECK-NEXT: miopenStatus_t STATUS_ALLOC_FAILED = miopenStatusAllocFailed; + // CHECK-NEXT: miopenStatus_t STATUS_BAD_PARAM = miopenStatusBadParm; + // CHECK-NEXT: miopenStatus_t STATUS_INTERNAL_ERROR = miopenStatusInternalError; + // CHECK-NEXT: miopenStatus_t STATUS_INVALID_VALUE = miopenStatusInvalidValue; + // CHECK-NEXT: miopenStatus_t STATUS_NOT_SUPPORTED = miopenStatusUnsupportedOp; + cudnnStatus_t dnnStatus_t; + cudnnStatus_t STATUS_SUCCESS = CUDNN_STATUS_SUCCESS; + cudnnStatus_t STATUS_NOT_INITIALIZED = CUDNN_STATUS_NOT_INITIALIZED; + cudnnStatus_t STATUS_ALLOC_FAILED = CUDNN_STATUS_ALLOC_FAILED; + cudnnStatus_t STATUS_BAD_PARAM = CUDNN_STATUS_BAD_PARAM; + cudnnStatus_t STATUS_INTERNAL_ERROR = CUDNN_STATUS_INTERNAL_ERROR; + cudnnStatus_t STATUS_INVALID_VALUE = CUDNN_STATUS_INVALID_VALUE; + cudnnStatus_t STATUS_NOT_SUPPORTED = CUDNN_STATUS_NOT_SUPPORTED; + + return 0; +}
11.7.111.7.1 works only with the patch
due to the clang's bug 54609
patch for 14.0.0**
patch for 14.0.1**
@@ -170,9 +170,13 @@ After applying all the matchers, the output HIP source is produced.
+
14.0.5, - 14.0.6, - 15.0.011.7.111.8.0 LATEST STABLE CONFIG