diff --git a/CMakeLists.txt b/CMakeLists.txt
index b04de886..0c6806bb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -24,12 +24,12 @@ if(MSVC AND MSVC_VERSION VERSION_LESS "1900")
endif()
include_directories(${LLVM_INCLUDE_DIRS})
-link_directories(${LLVM_LIBRARY_DIRS})
add_definitions(${LLVM_DEFINITIONS})
file(GLOB_RECURSE HIPIFY_SOURCES src/*.cpp)
file(GLOB_RECURSE HIPIFY_HEADERS src/*.h)
add_llvm_executable(hipify-clang ${HIPIFY_SOURCES} ${HIPIFY_HEADERS})
+target_link_directories(hipify-clang PRIVATE ${LLVM_LIBRARY_DIRS})
set(CMAKE_CXX_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang++)
set(CMAKE_C_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang)
@@ -131,7 +131,14 @@ install(
PATTERN "openmp_wrappers" EXCLUDE)
option(FILE_REORG_BACKWARD_COMPATIBILITY "Enable File Reorg with backward compatibility" ON)
+
if(UNIX)
+
+ #get rid of any RPATH definations already
+ set_target_properties(hipify-clang PROPERTIES INSTALL_RPATH "")
+ #set RPATH for the binary
+ set_target_properties(hipify-clang PROPERTIES LINK_FLAGS "-Wl,--disable-new-dtags -Wl,--rpath,$ORIGIN/../lib" )
+
if(FILE_REORG_BACKWARD_COMPATIBILITY)
include(hipify-backward-compat.cmake)
endif()
diff --git a/README.md b/README.md
index 7e7f8871..2040e605 100644
--- a/README.md
+++ b/README.md
@@ -43,9 +43,9 @@ After applying all the matchers, the output HIP source is produced.
`hipify-clang` requires:
-1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [4.0.0](http://releases.llvm.org/download.html#4.0.0); the latest stable and recommended release: [**15.0.0**](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.0).
+1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [4.0.0](http://releases.llvm.org/download.html#4.0.0); the latest stable and recommended release: [**15.0.4**](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.4).
-2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive), the latest supported version is [**11.7.1**](https://developer.nvidia.com/cuda-downloads).
+2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive), the latest supported version is [**11.8.0**](https://developer.nvidia.com/cuda-downloads).
@@ -160,7 +160,7 @@ After applying all the matchers, the output HIP source is produced.
14.0.2,
14.0.3,
14.0.4
- 11.7.1 |
+ 11.7.1 |
works only with the patch due to the clang's bug 54609
patch for 14.0.0**
patch for 14.0.1**
@@ -170,9 +170,13 @@ After applying all the matchers, the output HIP source is produced.
| + |
14.0.5,
- 14.0.6,
- 15.0.0 |
- 11.7.1 |
+ 14.0.6,
+ 15.0.0,
+ 15.0.1,
+ 15.0.2,
+ 15.0.3,
+ 15.0.4
+ 11.8.0 |
LATEST STABLE CONFIG |
@@ -187,7 +191,7 @@ After applying all the matchers, the output HIP source is produced.
In most cases, you can get a suitable version of `LLVM+CLANG` with your package manager.
Failing that or having multiple versions of `LLVM`, you can [download a release archive](http://releases.llvm.org/), build or install it, and set
-[CMAKE_PREFIX_PATH](https://cmake.org/cmake/help/v3.5/variable/CMAKE_PREFIX_PATH.html) so `cmake` can find it; for instance: `-DCMAKE_PREFIX_PATH=d:\LLVM\15.0.0\dist`
+[CMAKE_PREFIX_PATH](https://cmake.org/cmake/help/v3.5/variable/CMAKE_PREFIX_PATH.html) so `cmake` can find it; for instance: `-DCMAKE_PREFIX_PATH=d:\LLVM\15.0.4\dist`
### hipify-clang: usage
@@ -196,14 +200,14 @@ To process a file, `hipify-clang` needs access to the same headers that would be
For example:
```shell
-./hipify-clang square.cu --cuda-path=/usr/local/cuda-11.7 -I /usr/local/cuda-11.7/samples/common/inc
+./hipify-clang square.cu --cuda-path=/usr/local/cuda-11.8 -I /usr/local/cuda-11.8/samples/common/inc
```
`hipify-clang` arguments are given first, followed by a separator `'--'`, and then the arguments you'd pass to `clang` if you
were compiling the input file. For example:
```bash
-./hipify-clang cpp17.cu --cuda-path=/usr/local/cuda-11.7 -- -std=c++17
+./hipify-clang cpp17.cu --cuda-path=/usr/local/cuda-11.8 -- -std=c++17
```
The [Clang manual for compiling CUDA](https://llvm.org/docs/CompileCudaWithLLVM.html#compiling-cuda-code) may be useful.
@@ -285,7 +289,7 @@ Run `Visual Studio 16 2019`, open the generated `LLVM.sln`, build all, and build
**LLVM >= 10.0.0:**
-1. download [`LLVM project`](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.0) sources;
+1. download [`LLVM project`](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.4) sources;
2. build [`LLVM project`](http://llvm.org/docs/CMake.html):
```bash
@@ -324,9 +328,9 @@ Run `Visual Studio 17 2022`, open the generated `LLVM.sln`, build all, build pro
- ***Linux***: `-DCUDA_TOOLKIT_ROOT_DIR=/usr/include`
- - ***Windows***: `-DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.7"`
+ - ***Windows***: `-DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8"`
- `-DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v11.7"`
+ `-DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v11.8"`
4. Ensure [`cuDNN`](https://developer.nvidia.com/rdp/cudnn-archive) of the version corresponding to CUDA's version is installed.
@@ -334,7 +338,7 @@ Run `Visual Studio 17 2022`, open the generated `LLVM.sln`, build all, build pro
- ***Linux***: `-DCUDA_DNN_ROOT_DIR=/usr/include`
- - ***Windows***: `-DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.7-windows-x64-v8.4.1`
+ - ***Windows***: `-DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.8-windows-x64-v8.6.0`
5. Ensure [`CUB`](https://github.com/NVlabs/cub) of the version corresponding to CUDA's version is installed.
@@ -350,21 +354,21 @@ Run `Visual Studio 17 2022`, open the generated `LLVM.sln`, build all, build pro
* Install `lit` into `python`:
- - ***Linux***: `python /usr/llvm/15.0.0/llvm-project/llvm/utils/lit/setup.py install`
+ - ***Linux***: `python /usr/llvm/15.0.4/llvm-project/llvm/utils/lit/setup.py install`
- - ***Windows***: `python d:/LLVM/15.0.0/llvm-project/llvm/utils/lit/setup.py install`
+ - ***Windows***: `python d:/LLVM/15.0.4/llvm-project/llvm/utils/lit/setup.py install`
* Starting with LLVM 6.0.1 path to `llvm-lit` python script should be specified by the `LLVM_EXTERNAL_LIT` option:
- - ***Linux***: `-DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.0/build/bin/llvm-lit`
+ - ***Linux***: `-DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.4/build/bin/llvm-lit`
- - ***Windows***: `-DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.0/build/Release/bin/llvm-lit.py`
+ - ***Windows***: `-DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.4/build/Release/bin/llvm-lit.py`
* `FileCheck`:
- - ***Linux***: copy from `/usr/llvm/15.0.0/build/bin/` to `CMAKE_INSTALL_PREFIX/dist/bin`
+ - ***Linux***: copy from `/usr/llvm/15.0.4/build/bin/` to `CMAKE_INSTALL_PREFIX/dist/bin`
- - ***Windows***: copy from `d:/LLVM/15.0.0/build/Release/bin` to `CMAKE_INSTALL_PREFIX/dist/bin`
+ - ***Windows***: copy from `d:/LLVM/15.0.4/build/Release/bin` to `CMAKE_INSTALL_PREFIX/dist/bin`
- Or specify the path to `FileCheck` in `CMAKE_INSTALL_PREFIX` option
@@ -386,7 +390,7 @@ Ubuntu 14: LLVM 4.0.0 - 7.1.0, CUDA 7.0 - 9.0, cuDNN 5.0.5 - 7.6.5
Ubuntu 16-18: LLVM 8.0.0 - 14.0.6, CUDA 8.0 - 10.2, cuDNN 5.1.10 - 8.0.5
-Ubuntu 20-21: LLVM 9.0.0 - 15.0.0, CUDA 8.0 - 11.7.1, cuDNN 5.1.10 - 8.4.1
+Ubuntu 20-21: LLVM 9.0.0 - 15.0.4, CUDA 8.0 - 11.8.0, cuDNN 5.1.10 - 8.6.0
Minimum build system requirements for the above configurations:
@@ -403,11 +407,11 @@ cmake
-DHIPIFY_CLANG_TESTS=1 \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=../dist \
- -DCMAKE_PREFIX_PATH=/usr/llvm/15.0.0/dist \
+ -DCMAKE_PREFIX_PATH=/usr/llvm/15.0.4/dist \
-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda \
-DCUDA_DNN_ROOT_DIR=/usr/local/cuda \
-DCUDA_CUB_ROOT_DIR=/usr/CUB \
- -DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.0/build/bin/llvm-lit \
+ -DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.4/build/bin/llvm-lit \
../hipify
```
*A corresponding successful output:*
@@ -425,20 +429,20 @@ cmake
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Found ZLIB: /usr/lib/x86_64-linux-gnu/libz.so (found version "1.2.11")
--- Found LLVM 15.0.0:
--- - CMake module path: /usr/llvm/15.0.0/dist/lib/cmake/llvm
--- - Include path : /usr/llvm/15.0.0/dist/include
--- - Binary path : /usr/llvm/15.0.0/dist/bin
+-- Found LLVM 15.0.4:
+-- - CMake module path: /usr/llvm/15.0.4/dist/lib/cmake/llvm
+-- - Include path : /usr/llvm/15.0.4/dist/include
+-- - Binary path : /usr/llvm/15.0.4/dist/bin
-- Linker detection: GNU ld
-- Found PythonInterp: /usr/bin/python (found suitable version "3.9.7", minimum required is "2.7")
-- Found lit: /usr/local/bin/lit
--- Found FileCheck: /usr/llvm/15.0.0/dist/bin/FileCheck
+-- Found FileCheck: /usr/llvm/15.0.4/dist/bin/FileCheck
-- Looking for pthread.h
-- Looking for pthread.h - found
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success
-- Found Threads: TRUE
--- Found CUDA: /usr/local/cuda (found version "11.7")
+-- Found CUDA: /usr/local/cuda (found version "11.8")
-- Configuring done
-- Generating done
-- Build files have been written to: /usr/hipify/build
@@ -450,8 +454,8 @@ make test-hipify
```shell
Running HIPify regression tests
========================================
-CUDA 11.7 - will be used for testing
-LLVM 15.0.0 - will be used for testing
+CUDA 11.8 - will be used for testing
+LLVM 15.0.4 - will be used for testing
x86_64 - Platform architecture
Linux 5.13.0-21-generic - Platform OS
64 - hipify-clang binary bitness
@@ -565,8 +569,8 @@ Testing Time: 6.22s
| 11.0.1 - 11.1.0 | 7.0 - 11.2.2 | 7.6.5 - 8.0.5 | 2017.15.9.31, 2019.16.8.4 | 3.19.3 | 3.9.2 |
| 12.0.0 - 13.0.1 | 7.0 - 11.5.1 | 7.6.5 - 8.3.2 | 2017.15.9.43, 2019.16.11.9 | 3.22.2 | 3.10.2 |
| 14.0.0 - 14.0.6 | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.49, 2019.16.11.17, 2022.17.2.6 | 3.24.0 | 3.10.6 |
-| 15.0.0 | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 |
-| 16.0.0git | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 |
+| 15.0.0 - 15.0.4 | 7.0 - 11.8.0 | 8.0.5 - 8.6.0 | 2017.15.9.50, 2019.16.11.20, 2022.17.3.6 | 3.24.2 | 3.11.0 |
+| 16.0.0git | 7.0 - 11.8.0 | 8.0.5 - 8.6.0 | 2017.15.9.50, 2019.16.11.20, 2022.17.3.6 | 3.24.2 | 3.11.0 |
*Building with testing support by `Visual Studio 17 2022` on `Windows 10`:*
@@ -578,24 +582,24 @@ cmake
-DHIPIFY_CLANG_TESTS=1 \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=../dist \
- -DCMAKE_PREFIX_PATH=d:/LLVM/15.0.0/dist \
- -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.7" \
- -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v11.7" \
- -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.7-windows-x64-v8.4.1 \
+ -DCMAKE_PREFIX_PATH=d:/LLVM/15.0.4/dist \
+ -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8" \
+ -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v11.8" \
+ -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.8-windows-x64-v8.6.0 \
-DCUDA_CUB_ROOT_DIR=d:/GIT/cub \
- -DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.0/build/Release/bin/llvm-lit.py \
+ -DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.4/build/Release/bin/llvm-lit.py \
../hipify
```
*A corresponding successful output:*
```shell
--- Found LLVM 15.0.0:
--- - CMake module path: d:/LLVM/15.0.0/dist/lib/cmake/llvm
--- - Include path : d:/LLVM/15.0.0/dist/include
--- - Binary path : d:/LLVM/15.0.0/dist/bin
--- Found PythonInterp: c:/Program Files/Python39/python.exe (found suitable version "3.9.5", minimum required is "3.6")
--- Found lit: c:/Program Files/Python39/Scripts/lit.exe
--- Found FileCheck: d:/LLVM/15.0.0/dist/bin/FileCheck.exe
--- Found CUDA: c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.7 (found version "11.7")
+-- Found LLVM 15.0.4:
+-- - CMake module path: d:/LLVM/15.0.4/dist/lib/cmake/llvm
+-- - Include path : d:/LLVM/15.0.4/dist/include
+-- - Binary path : d:/LLVM/15.0.4/dist/bin
+-- Found PythonInterp: c:/Program Files/Python311/python.exe (found suitable version "3.11.0", minimum required is "3.6")
+-- Found lit: c:/Program Files/Python311/Scripts/lit.exe
+-- Found FileCheck: d:/LLVM/15.0.4/dist/bin/FileCheck.exe
+-- Found CUDA: c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8 (found version "11.8")
-- Configuring done
-- Generating done
-- Build files have been written to: d:/hipify/build
diff --git a/bin/hipify-perl b/bin/hipify-perl
index 4dfd064d..23da6adc 100755
--- a/bin/hipify-perl
+++ b/bin/hipify-perl
@@ -682,6 +682,8 @@ my %removed_funcs = (
"cudaSetupArgument" => "10.1",
"cudaLaunch" => "10.1",
"cudaConfigureCall" => "10.1",
+ "CUkernelNodeAttrValue_union" => "11.8",
+ "CUkernelNodeAttrID_enum" => "11.8",
"CU_TARGET_COMPUTE_73" => "10.0",
"CU_TARGET_COMPUTE_13" => "9.0",
"CU_TARGET_COMPUTE_12" => "9.0",
@@ -715,77 +717,8 @@ my %removed_funcs = (
);
my %experimental_funcs = (
- "nvrtcGetCUBINSize" => "5.3.0",
- "nvrtcGetCUBIN" => "5.3.0",
- "cudaUserObject_t" => "5.3.0",
- "cudaUserObjectRetainFlags" => "5.3.0",
- "cudaUserObjectRetain" => "5.3.0",
- "cudaUserObjectRelease" => "5.3.0",
- "cudaUserObjectNoDestructorSync" => "5.3.0",
- "cudaUserObjectFlags" => "5.3.0",
- "cudaUserObjectCreate" => "5.3.0",
- "cudaMemoryTypeManaged" => "5.3.0",
- "cudaLimitStackSize" => "5.3.0",
- "cudaGraphUserObjectMove" => "5.3.0",
- "cudaGraphUpload" => "5.3.0",
- "cudaGraphRetainUserObject" => "5.3.0",
- "cudaGraphReleaseUserObject" => "5.3.0",
- "cudaGraphNodeTypeExtSemaphoreWait" => "5.3.0",
- "cudaGraphNodeTypeExtSemaphoreSignal" => "5.3.0",
- "cudaGraphMemAttributeType" => "5.3.0",
- "cudaGraphMemAttrUsedMemHigh" => "5.3.0",
- "cudaGraphMemAttrUsedMemCurrent" => "5.3.0",
- "cudaGraphMemAttrReservedMemHigh" => "5.3.0",
- "cudaGraphMemAttrReservedMemCurrent" => "5.3.0",
- "cudaDeviceSetLimit" => "5.3.0",
- "cudaDeviceSetGraphMemAttribute" => "5.3.0",
- "cudaDeviceGraphMemTrim" => "5.3.0",
- "cudaDeviceGetGraphMemAttribute" => "5.3.0",
- "cuUserObjectRetain" => "5.3.0",
- "cuUserObjectRelease" => "5.3.0",
- "cuUserObjectCreate" => "5.3.0",
- "cuLinkDestroy" => "5.3.0",
- "cuLinkCreate_v2" => "5.3.0",
- "cuLinkCreate" => "5.3.0",
- "cuLinkComplete" => "5.3.0",
- "cuLinkAddFile_v2" => "5.3.0",
- "cuLinkAddFile" => "5.3.0",
- "cuLinkAddData_v2" => "5.3.0",
- "cuLinkAddData" => "5.3.0",
- "cuGraphUpload" => "5.3.0",
- "cuGraphRetainUserObject" => "5.3.0",
- "cuGraphReleaseUserObject" => "5.3.0",
- "cuDeviceSetGraphMemAttribute" => "5.3.0",
- "cuDeviceGraphMemTrim" => "5.3.0",
- "cuDeviceGetGraphMemAttribute" => "5.3.0",
- "cuCtxSetLimit" => "5.3.0",
- "CUuserObject_st" => "5.3.0",
- "CUuserObject_flags_enum" => "5.3.0",
- "CUuserObject_flags" => "5.3.0",
- "CUuserObjectRetain_flags_enum" => "5.3.0",
- "CUuserObjectRetain_flags" => "5.3.0",
- "CUuserObject" => "5.3.0",
- "CUjitInputType_enum" => "5.3.0",
- "CUjitInputType" => "5.3.0",
- "CUgraphMem_attribute_enum" => "5.3.0",
- "CUgraphMem_attribute" => "5.3.0",
- "CU_USER_OBJECT_NO_DESTRUCTOR_SYNC" => "5.3.0",
- "CU_LIMIT_STACK_SIZE" => "5.3.0",
- "CU_JIT_NUM_INPUT_TYPES" => "5.3.0",
- "CU_JIT_INPUT_PTX" => "5.3.0",
- "CU_JIT_INPUT_OBJECT" => "5.3.0",
- "CU_JIT_INPUT_NVVM" => "5.3.0",
- "CU_JIT_INPUT_LIBRARY" => "5.3.0",
- "CU_JIT_INPUT_FATBINARY" => "5.3.0",
- "CU_JIT_INPUT_CUBIN" => "5.3.0",
- "CU_GRAPH_USER_OBJECT_MOVE" => "5.3.0",
- "CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT" => "5.3.0",
- "CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL" => "5.3.0",
- "CU_GRAPH_MEM_ATTR_USED_MEM_HIGH" => "5.3.0",
- "CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT" => "5.3.0",
- "CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH" => "5.3.0",
- "CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT" => "5.3.0",
- "CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED" => "5.3.0"
+ "cuGetErrorString" => "5.4.0",
+ "cuGetErrorName" => "5.4.0"
);
$print_stats = 1 if $examine;
@@ -853,7 +786,7 @@ push(@exclude_filelist, split(',', $exclude_files));
%exclude_dirhash = map { $_ => 1 } @exclude_dirlist;
%exclude_filehash = map { $_ => 1 } @exclude_filelist;
-@statNames = ("error", "init", "version", "device", "context", "module", "memory", "virtual_memory", "stream_ordered_memory", "addressing", "stream", "event", "external_resource_interop", "stream_memory", "execution", "graph", "occupancy", "texture", "surface", "peer", "graphics", "interactions", "profiler", "openGL", "D3D9", "D3D10", "D3D11", "VDPAU", "EGL", "thread", "complex", "library", "device_library", "device_function", "include", "include_cuda_main_header", "include_cuda_main_header_v2", "type", "literal", "numeric_literal", "define", "extern_shared", "kernel_launch");
+@statNames = ("error", "init", "version", "device", "context", "module", "memory", "virtual_memory", "stream_ordered_memory", "addressing", "stream", "event", "external_resource_interop", "stream_memory", "execution", "graph", "occupancy", "texture", "surface", "peer", "graphics", "interactions", "profiler", "openGL", "D3D9", "D3D10", "D3D11", "VDPAU", "EGL", "thread", "complex", "library", "device_library", "device_function", "device_type", "include", "include_cuda_main_header", "include_cuda_main_header_v2", "type", "literal", "numeric_literal", "define", "extern_shared", "kernel_launch");
sub totalStats {
my %count = %{shift()};
@@ -923,77 +856,8 @@ sub subst {
}
sub experimentalSubstitutions {
- subst("cudaDeviceSetLimit", "hipDeviceSetLimit", "device");
- subst("cuCtxSetLimit", "hipDeviceSetLimit", "context");
- subst("cuLinkAddData", "hiprtcLinkAddData", "module");
- subst("cuLinkAddData_v2", "hiprtcLinkAddData", "module");
- subst("cuLinkAddFile", "hiprtcLinkAddFile", "module");
- subst("cuLinkAddFile_v2", "hiprtcLinkAddFile", "module");
- subst("cuLinkComplete", "hiprtcLinkComplete", "module");
- subst("cuLinkCreate", "hiprtcLinkCreate", "module");
- subst("cuLinkCreate_v2", "hiprtcLinkCreate", "module");
- subst("cuLinkDestroy", "hiprtcLinkDestroy", "module");
- subst("cuDeviceGetGraphMemAttribute", "hipDeviceGetGraphMemAttribute", "graph");
- subst("cuDeviceGraphMemTrim", "hipDeviceGraphMemTrim", "graph");
- subst("cuDeviceSetGraphMemAttribute", "hipDeviceSetGraphMemAttribute", "graph");
- subst("cuGraphReleaseUserObject", "hipGraphReleaseUserObject", "graph");
- subst("cuGraphRetainUserObject", "hipGraphRetainUserObject", "graph");
- subst("cuGraphUpload", "hipGraphUpload", "graph");
- subst("cuUserObjectCreate", "hipUserObjectCreate", "graph");
- subst("cuUserObjectRelease", "hipUserObjectRelease", "graph");
- subst("cuUserObjectRetain", "hipUserObjectRetain", "graph");
- subst("cudaDeviceGetGraphMemAttribute", "hipDeviceGetGraphMemAttribute", "graph");
- subst("cudaDeviceGraphMemTrim", "hipDeviceGraphMemTrim", "graph");
- subst("cudaDeviceSetGraphMemAttribute", "hipDeviceSetGraphMemAttribute", "graph");
- subst("cudaGraphReleaseUserObject", "hipGraphReleaseUserObject", "graph");
- subst("cudaGraphRetainUserObject", "hipGraphRetainUserObject", "graph");
- subst("cudaGraphUpload", "hipGraphUpload", "graph");
- subst("cudaUserObjectCreate", "hipUserObjectCreate", "graph");
- subst("cudaUserObjectRelease", "hipUserObjectRelease", "graph");
- subst("cudaUserObjectRetain", "hipUserObjectRetain", "graph");
- subst("nvrtcGetCUBIN", "hiprtcGetBitcode", "library");
- subst("nvrtcGetCUBINSize", "hiprtcGetBitcodeSize", "library");
- subst("CUgraphMem_attribute", "hipGraphMemAttributeType", "type");
- subst("CUgraphMem_attribute_enum", "hipGraphMemAttributeType", "type");
- subst("CUjitInputType", "hiprtcJITInputType", "type");
- subst("CUjitInputType_enum", "hiprtcJITInputType", "type");
- subst("CUuserObject", "hipUserObject_t", "type");
- subst("CUuserObjectRetain_flags", "hipUserObjectRetainFlags", "type");
- subst("CUuserObjectRetain_flags_enum", "hipUserObjectRetainFlags", "type");
- subst("CUuserObject_flags", "hipUserObjectFlags", "type");
- subst("CUuserObject_flags_enum", "hipUserObjectFlags", "type");
- subst("CUuserObject_st", "hipUserObject", "type");
- subst("cudaGraphMemAttributeType", "hipGraphMemAttributeType", "type");
- subst("cudaUserObjectFlags", "hipUserObjectFlags", "type");
- subst("cudaUserObjectRetainFlags", "hipUserObjectRetainFlags", "type");
- subst("cudaUserObject_t", "hipUserObject_t", "type");
- subst("CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED", "hipDeviceAttributeVirtualMemoryManagementSupported", "numeric_literal");
- subst("CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT", "hipGraphMemAttrReservedMemCurrent", "numeric_literal");
- subst("CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH", "hipGraphMemAttrReservedMemHigh", "numeric_literal");
- subst("CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT", "hipGraphMemAttrUsedMemCurrent", "numeric_literal");
- subst("CU_GRAPH_MEM_ATTR_USED_MEM_HIGH", "hipGraphMemAttrUsedMemHigh", "numeric_literal");
- subst("CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL", "hipGraphNodeTypeExtSemaphoreSignal", "numeric_literal");
- subst("CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT", "hipGraphNodeTypeExtSemaphoreWait", "numeric_literal");
- subst("CU_GRAPH_USER_OBJECT_MOVE", "hipGraphUserObjectMove", "numeric_literal");
- subst("CU_JIT_INPUT_CUBIN", "HIPRTC_JIT_INPUT_CUBIN", "numeric_literal");
- subst("CU_JIT_INPUT_FATBINARY", "HIPRTC_JIT_INPUT_FATBINARY", "numeric_literal");
- subst("CU_JIT_INPUT_LIBRARY", "HIPRTC_JIT_INPUT_LIBRARY", "numeric_literal");
- subst("CU_JIT_INPUT_NVVM", "HIPRTC_JIT_INPUT_NVVM", "numeric_literal");
- subst("CU_JIT_INPUT_OBJECT", "HIPRTC_JIT_INPUT_OBJECT", "numeric_literal");
- subst("CU_JIT_INPUT_PTX", "HIPRTC_JIT_INPUT_PTX", "numeric_literal");
- subst("CU_JIT_NUM_INPUT_TYPES", "HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES", "numeric_literal");
- subst("CU_LIMIT_STACK_SIZE", "hipLimitStackSize", "numeric_literal");
- subst("CU_USER_OBJECT_NO_DESTRUCTOR_SYNC", "hipUserObjectNoDestructorSync", "numeric_literal");
- subst("cudaGraphMemAttrReservedMemCurrent", "hipGraphMemAttrReservedMemCurrent", "numeric_literal");
- subst("cudaGraphMemAttrReservedMemHigh", "hipGraphMemAttrReservedMemHigh", "numeric_literal");
- subst("cudaGraphMemAttrUsedMemCurrent", "hipGraphMemAttrUsedMemCurrent", "numeric_literal");
- subst("cudaGraphMemAttrUsedMemHigh", "hipGraphMemAttrUsedMemHigh", "numeric_literal");
- subst("cudaGraphNodeTypeExtSemaphoreSignal", "hipGraphNodeTypeExtSemaphoreSignal", "numeric_literal");
- subst("cudaGraphNodeTypeExtSemaphoreWait", "hipGraphNodeTypeExtSemaphoreWait", "numeric_literal");
- subst("cudaGraphUserObjectMove", "hipGraphUserObjectMove", "numeric_literal");
- subst("cudaLimitStackSize", "hipLimitStackSize", "numeric_literal");
- subst("cudaMemoryTypeManaged", "hipMemoryTypeManaged", "numeric_literal");
- subst("cudaUserObjectNoDestructorSync", "hipUserObjectNoDestructorSync", "numeric_literal");
+ subst("cuGetErrorName", "hipDrvGetErrorName", "error");
+ subst("cuGetErrorString", "hipDrvGetErrorString", "error");
}
sub rocSubstitutions {
@@ -1033,7 +897,7 @@ sub rocSubstitutions {
subst("cublasCher2k_v2", "rocblas_cher2k", "library");
subst("cublasCher_v2", "rocblas_cher", "library");
subst("cublasCherk", "rocblas_cherk", "library");
- subst("cublasCherk_v2", "rocblas_cherkx", "library");
+ subst("cublasCherk_v2", "rocblas_cherk", "library");
subst("cublasCherkx", "rocblas_cherkx", "library");
subst("cublasChpmv", "rocblas_chpmv", "library");
subst("cublasChpmv_v2", "rocblas_chpmv", "library");
@@ -1076,8 +940,8 @@ sub rocSubstitutions {
subst("cublasCtpmv_v2", "rocblas_ctpmv", "library");
subst("cublasCtpsv", "rocblas_ctpsv", "library");
subst("cublasCtpsv_v2", "rocblas_ctpsv", "library");
- subst("cublasCtrmm", "rocblas_ctrmm", "library");
- subst("cublasCtrmm_v2", "rocblas_ctrmm", "library");
+ subst("cublasCtrmm", "rocblas_ctrmm_outofplace", "library");
+ subst("cublasCtrmm_v2", "rocblas_ctrmm_outofplace", "library");
subst("cublasCtrmv", "rocblas_ctrmv", "library");
subst("cublasCtrmv_v2", "rocblas_ctrmv", "library");
subst("cublasCtrsm", "rocblas_ctrsm", "library");
@@ -1152,8 +1016,8 @@ sub rocSubstitutions {
subst("cublasDtpmv_v2", "rocblas_dtpmv", "library");
subst("cublasDtpsv", "rocblas_dtpsv", "library");
subst("cublasDtpsv_v2", "rocblas_dtpsv", "library");
- subst("cublasDtrmm", "rocblas_dtrmm", "library");
- subst("cublasDtrmm_v2", "rocblas_dtrmm", "library");
+ subst("cublasDtrmm", "rocblas_dtrmm_outofplace", "library");
+ subst("cublasDtrmm_v2", "rocblas_dtrmm_outofplace", "library");
subst("cublasDtrmv", "rocblas_dtrmv", "library");
subst("cublasDtrmv_v2", "rocblas_dtrmv", "library");
subst("cublasDtrsm", "rocblas_dtrsm", "library");
@@ -1168,10 +1032,12 @@ sub rocSubstitutions {
subst("cublasGemmBatchedEx", "rocblas_gemm_batched_ex", "library");
subst("cublasGemmEx", "rocblas_gemm_ex", "library");
subst("cublasGemmStridedBatchedEx", "rocblas_gemm_strided_batched_ex", "library");
+ subst("cublasGetAtomicsMode", "rocblas_get_atomics_mode", "library");
subst("cublasGetMatrix", "rocblas_get_matrix", "library");
subst("cublasGetMatrixAsync", "rocblas_get_matrix_async", "library");
subst("cublasGetPointerMode", "rocblas_get_pointer_mode", "library");
subst("cublasGetPointerMode_v2", "rocblas_set_pointer_mode", "library");
+ subst("cublasGetStatusString", "rocblas_status_to_string", "library");
subst("cublasGetStream", "rocblas_get_stream", "library");
subst("cublasGetStream_v2", "rocblas_get_stream", "library");
subst("cublasGetVector", "rocblas_get_vector", "library");
@@ -1187,6 +1053,7 @@ sub rocSubstitutions {
subst("cublasIdamax_v2", "rocblas_idamax", "library");
subst("cublasIdamin", "rocblas_idamin", "library");
subst("cublasIdamin_v2", "rocblas_idamin", "library");
+ subst("cublasInit", "rocblas_initialize", "library");
subst("cublasIsamax", "rocblas_isamax", "library");
subst("cublasIsamax_v2", "rocblas_isamax", "library");
subst("cublasIsamin", "rocblas_isamin", "library");
@@ -1211,6 +1078,7 @@ sub rocSubstitutions {
subst("cublasSdgmm", "rocblas_sdgmm", "library");
subst("cublasSdot", "rocblas_sdot", "library");
subst("cublasSdot_v2", "rocblas_sdot", "library");
+ subst("cublasSetAtomicsMode", "rocblas_set_atomics_mode", "library");
subst("cublasSetMatrix", "rocblas_set_matrix", "library");
subst("cublasSetMatrixAsync", "rocblas_set_matrix_async", "library");
subst("cublasSetPointerMode", "rocblas_set_pointer_mode", "library");
@@ -1273,8 +1141,8 @@ sub rocSubstitutions {
subst("cublasStpmv_v2", "rocblas_stpmv", "library");
subst("cublasStpsv", "rocblas_stpsv", "library");
subst("cublasStpsv_v2", "rocblas_stpsv", "library");
- subst("cublasStrmm", "rocblas_strmm", "library");
- subst("cublasStrmm_v2", "rocblas_strmm", "library");
+ subst("cublasStrmm", "rocblas_strmm_outofplace", "library");
+ subst("cublasStrmm_v2", "rocblas_strmm_outofplace", "library");
subst("cublasStrmv", "rocblas_strmv", "library");
subst("cublasStrmv_v2", "rocblas_strmv", "library");
subst("cublasStrsm", "rocblas_strsm", "library");
@@ -1358,8 +1226,8 @@ sub rocSubstitutions {
subst("cublasZtpmv_v2", "rocblas_ztpmv", "library");
subst("cublasZtpsv", "rocblas_ztpsv", "library");
subst("cublasZtpsv_v2", "rocblas_ztpsv", "library");
- subst("cublasZtrmm", "rocblas_ztrmm", "library");
- subst("cublasZtrmm_v2", "rocblas_ztrmm", "library");
+ subst("cublasZtrmm", "rocblas_ztrmm_outofplace", "library");
+ subst("cublasZtrmm_v2", "rocblas_ztrmm_outofplace", "library");
subst("cublasZtrmv", "rocblas_ztrmv", "library");
subst("cublasZtrmv_v2", "rocblas_ztrmv", "library");
subst("cublasZtrsm", "rocblas_ztrsm", "library");
@@ -1381,6 +1249,8 @@ sub rocSubstitutions {
subst("cublasSideMode_t", "rocblas_side", "type");
subst("cublasStatus", "rocblas_status", "type");
subst("cublasStatus_t", "rocblas_status", "type");
+ subst("cudaDataType", "rocblas_datatype", "type");
+ subst("cudaDataType_t", "rocblas_datatype_", "type");
subst("CUBLAS_ATOMICS_ALLOWED", "rocblas_atomics_allowed", "numeric_literal");
subst("CUBLAS_ATOMICS_NOT_ALLOWED", "rocblas_atomics_not_allowed", "numeric_literal");
subst("CUBLAS_DIAG_NON_UNIT", "rocblas_diagonal_non_unit", "numeric_literal");
@@ -1407,6 +1277,22 @@ sub rocSubstitutions {
subst("CUBLAS_STATUS_NOT_INITIALIZED", "rocblas_status_invalid_handle", "numeric_literal");
subst("CUBLAS_STATUS_NOT_SUPPORTED", "rocblas_status_perf_degraded", "numeric_literal");
subst("CUBLAS_STATUS_SUCCESS", "rocblas_status_success", "numeric_literal");
+ subst("CUDA_C_16BF", "rocblas_datatype_bf16_c", "numeric_literal");
+ subst("CUDA_C_16F", "rocblas_datatype_f16_c", "numeric_literal");
+ subst("CUDA_C_32F", "rocblas_datatype_f32_c", "numeric_literal");
+ subst("CUDA_C_32I", "rocblas_datatype_i32_c", "numeric_literal");
+ subst("CUDA_C_32U", "rocblas_datatype_u32_c", "numeric_literal");
+ subst("CUDA_C_64F", "rocblas_datatype_f64_c", "numeric_literal");
+ subst("CUDA_C_8I", "rocblas_datatype_i8_c", "numeric_literal");
+ subst("CUDA_C_8U", "rocblas_datatype_u8_c", "numeric_literal");
+ subst("CUDA_R_16BF", "rocblas_datatype_bf16_r", "numeric_literal");
+ subst("CUDA_R_16F", "rocblas_datatype_f16_r", "numeric_literal");
+ subst("CUDA_R_32F", "rocblas_datatype_f32_r", "numeric_literal");
+ subst("CUDA_R_32I", "rocblas_datatype_i32_r", "numeric_literal");
+ subst("CUDA_R_32U", "rocblas_datatype_u32_r", "numeric_literal");
+ subst("CUDA_R_64F", "rocblas_datatype_f64_r", "numeric_literal");
+ subst("CUDA_R_8I", "rocblas_datatype_i8_r", "numeric_literal");
+ subst("CUDA_R_8U", "rocblas_datatype_u8_r", "numeric_literal");
}
sub simpleSubstitutions {
@@ -1443,6 +1329,7 @@ sub simpleSubstitutions {
subst("cudaDeviceGetStreamPriorityRange", "hipDeviceGetStreamPriorityRange", "device");
subst("cudaDeviceReset", "hipDeviceReset", "device");
subst("cudaDeviceSetCacheConfig", "hipDeviceSetCacheConfig", "device");
+ subst("cudaDeviceSetLimit", "hipDeviceSetLimit", "device");
subst("cudaDeviceSetMemPool", "hipDeviceSetMemPool", "device");
subst("cudaDeviceSetSharedMemConfig", "hipDeviceSetSharedMemConfig", "device");
subst("cudaDeviceSynchronize", "hipDeviceSynchronize", "device");
@@ -1476,6 +1363,7 @@ sub simpleSubstitutions {
subst("cuCtxPushCurrent_v2", "hipCtxPushCurrent", "context");
subst("cuCtxSetCacheConfig", "hipCtxSetCacheConfig", "context");
subst("cuCtxSetCurrent", "hipCtxSetCurrent", "context");
+ subst("cuCtxSetLimit", "hipDeviceSetLimit", "context");
subst("cuCtxSetSharedMemConfig", "hipCtxSetSharedMemConfig", "context");
subst("cuCtxSynchronize", "hipCtxSynchronize", "context");
subst("cuDevicePrimaryCtxGetState", "hipDevicePrimaryCtxGetState", "context");
@@ -1486,6 +1374,14 @@ sub simpleSubstitutions {
subst("cuDevicePrimaryCtxRetain", "hipDevicePrimaryCtxRetain", "context");
subst("cuDevicePrimaryCtxSetFlags", "hipDevicePrimaryCtxSetFlags", "context");
subst("cuDevicePrimaryCtxSetFlags_v2", "hipDevicePrimaryCtxSetFlags", "context");
+ subst("cuLinkAddData", "hiprtcLinkAddData", "module");
+ subst("cuLinkAddData_v2", "hiprtcLinkAddData", "module");
+ subst("cuLinkAddFile", "hiprtcLinkAddFile", "module");
+ subst("cuLinkAddFile_v2", "hiprtcLinkAddFile", "module");
+ subst("cuLinkComplete", "hiprtcLinkComplete", "module");
+ subst("cuLinkCreate", "hiprtcLinkCreate", "module");
+ subst("cuLinkCreate_v2", "hiprtcLinkCreate", "module");
+ subst("cuLinkDestroy", "hiprtcLinkDestroy", "module");
subst("cuModuleGetFunction", "hipModuleGetFunction", "module");
subst("cuModuleGetGlobal", "hipModuleGetGlobal", "module");
subst("cuModuleGetGlobal_v2", "hipModuleGetGlobal", "module");
@@ -1750,6 +1646,9 @@ sub simpleSubstitutions {
subst("cudaLaunchHostFunc", "hipLaunchHostFunc", "execution");
subst("cudaLaunchKernel", "hipLaunchKernel", "execution");
subst("cudaSetupArgument", "hipSetupArgument", "execution");
+ subst("cuDeviceGetGraphMemAttribute", "hipDeviceGetGraphMemAttribute", "graph");
+ subst("cuDeviceGraphMemTrim", "hipDeviceGraphMemTrim", "graph");
+ subst("cuDeviceSetGraphMemAttribute", "hipDeviceSetGraphMemAttribute", "graph");
subst("cuGraphAddChildGraphNode", "hipGraphAddChildGraphNode", "graph");
subst("cuGraphAddDependencies", "hipGraphAddDependencies", "graph");
subst("cuGraphAddEmptyNode", "hipGraphAddEmptyNode", "graph");
@@ -1794,7 +1693,16 @@ sub simpleSubstitutions {
subst("cuGraphNodeGetDependencies", "hipGraphNodeGetDependencies", "graph");
subst("cuGraphNodeGetDependentNodes", "hipGraphNodeGetDependentNodes", "graph");
subst("cuGraphNodeGetType", "hipGraphNodeGetType", "graph");
+ subst("cuGraphReleaseUserObject", "hipGraphReleaseUserObject", "graph");
subst("cuGraphRemoveDependencies", "hipGraphRemoveDependencies", "graph");
+ subst("cuGraphRetainUserObject", "hipGraphRetainUserObject", "graph");
+ subst("cuGraphUpload", "hipGraphUpload", "graph");
+ subst("cuUserObjectCreate", "hipUserObjectCreate", "graph");
+ subst("cuUserObjectRelease", "hipUserObjectRelease", "graph");
+ subst("cuUserObjectRetain", "hipUserObjectRetain", "graph");
+ subst("cudaDeviceGetGraphMemAttribute", "hipDeviceGetGraphMemAttribute", "graph");
+ subst("cudaDeviceGraphMemTrim", "hipDeviceGraphMemTrim", "graph");
+ subst("cudaDeviceSetGraphMemAttribute", "hipDeviceSetGraphMemAttribute", "graph");
subst("cudaGraphAddChildGraphNode", "hipGraphAddChildGraphNode", "graph");
subst("cudaGraphAddDependencies", "hipGraphAddDependencies", "graph");
subst("cudaGraphAddEmptyNode", "hipGraphAddEmptyNode", "graph");
@@ -1851,7 +1759,13 @@ sub simpleSubstitutions {
subst("cudaGraphNodeGetDependencies", "hipGraphNodeGetDependencies", "graph");
subst("cudaGraphNodeGetDependentNodes", "hipGraphNodeGetDependentNodes", "graph");
subst("cudaGraphNodeGetType", "hipGraphNodeGetType", "graph");
+ subst("cudaGraphReleaseUserObject", "hipGraphReleaseUserObject", "graph");
subst("cudaGraphRemoveDependencies", "hipGraphRemoveDependencies", "graph");
+ subst("cudaGraphRetainUserObject", "hipGraphRetainUserObject", "graph");
+ subst("cudaGraphUpload", "hipGraphUpload", "graph");
+ subst("cudaUserObjectCreate", "hipUserObjectCreate", "graph");
+ subst("cudaUserObjectRelease", "hipUserObjectRelease", "graph");
+ subst("cudaUserObjectRetain", "hipUserObjectRetain", "graph");
subst("cuOccupancyMaxActiveBlocksPerMultiprocessor", "hipModuleOccupancyMaxActiveBlocksPerMultiprocessor", "occupancy");
subst("cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "occupancy");
subst("cuOccupancyMaxPotentialBlockSize", "hipModuleOccupancyMaxPotentialBlockSize", "occupancy");
@@ -2047,8 +1961,6 @@ sub simpleSubstitutions {
subst("cublasCtpmv_v2", "hipblasCtpmv", "library");
subst("cublasCtpsv", "hipblasCtpsv", "library");
subst("cublasCtpsv_v2", "hipblasCtpsv", "library");
- subst("cublasCtrmm", "hipblasCtrmm", "library");
- subst("cublasCtrmm_v2", "hipblasCtrmm", "library");
subst("cublasCtrmv", "hipblasCtrmv", "library");
subst("cublasCtrmv_v2", "hipblasCtrmv", "library");
subst("cublasCtrsm", "hipblasCtrsm", "library");
@@ -2127,8 +2039,6 @@ sub simpleSubstitutions {
subst("cublasDtpmv_v2", "hipblasDtpmv", "library");
subst("cublasDtpsv", "hipblasDtpsv", "library");
subst("cublasDtpsv_v2", "hipblasDtpsv", "library");
- subst("cublasDtrmm", "hipblasDtrmm", "library");
- subst("cublasDtrmm_v2", "hipblasDtrmm", "library");
subst("cublasDtrmv", "hipblasDtrmv", "library");
subst("cublasDtrmv_v2", "hipblasDtrmv", "library");
subst("cublasDtrsm", "hipblasDtrsm", "library");
@@ -2254,8 +2164,6 @@ sub simpleSubstitutions {
subst("cublasStpmv_v2", "hipblasStpmv", "library");
subst("cublasStpsv", "hipblasStpsv", "library");
subst("cublasStpsv_v2", "hipblasStpsv", "library");
- subst("cublasStrmm", "hipblasStrmm", "library");
- subst("cublasStrmm_v2", "hipblasStrmm", "library");
subst("cublasStrmv", "hipblasStrmv", "library");
subst("cublasStrmv_v2", "hipblasStrmv", "library");
subst("cublasStrsm", "hipblasStrsm", "library");
@@ -2343,8 +2251,6 @@ sub simpleSubstitutions {
subst("cublasZtpmv_v2", "hipblasZtpmv", "library");
subst("cublasZtpsv", "hipblasZtpsv", "library");
subst("cublasZtpsv_v2", "hipblasZtpsv", "library");
- subst("cublasZtrmm", "hipblasZtrmm", "library");
- subst("cublasZtrmm_v2", "hipblasZtrmm", "library");
subst("cublasZtrmv", "hipblasZtrmv", "library");
subst("cublasZtrmv_v2", "hipblasZtrmv", "library");
subst("cublasZtrsm", "hipblasZtrsm", "library");
@@ -3034,6 +2940,8 @@ sub simpleSubstitutions {
subst("nvrtcCompileProgram", "hiprtcCompileProgram", "library");
subst("nvrtcCreateProgram", "hiprtcCreateProgram", "library");
subst("nvrtcDestroyProgram", "hiprtcDestroyProgram", "library");
+ subst("nvrtcGetCUBIN", "hiprtcGetBitcode", "library");
+ subst("nvrtcGetCUBINSize", "hiprtcGetBitcodeSize", "library");
subst("nvrtcGetErrorString", "hiprtcGetErrorString", "library");
subst("nvrtcGetLoweredName", "hiprtcGetLoweredName", "library");
subst("nvrtcGetPTX", "hiprtcGetCode", "library");
@@ -3064,6 +2972,10 @@ sub simpleSubstitutions {
subst("curand_uniform4", "hiprand_uniform4", "device_library");
subst("curand_uniform4_double", "hiprand_uniform4_double", "device_library");
subst("curand_uniform_double", "hiprand_uniform_double", "device_library");
+ subst("__half", "__half", "device_type");
+ subst("__half2", "__half2", "device_type");
+ subst("__half2_raw", "__half2_raw", "device_type");
+ subst("__half_raw", "__half_raw", "device_type");
subst("caffe2\/core\/common_cudnn.h", "caffe2\/core\/hip\/common_miopen.h", "include");
subst("caffe2\/operators\/spatial_batch_norm_op.h", "caffe2\/operators\/hip\/spatial_batch_norm_op_miopen.hip", "include");
subst("channel_descriptor.h", "hip\/channel_descriptor.h", "include");
@@ -3211,6 +3123,8 @@ sub simpleSubstitutions {
subst("CUgraphExec_st", "hipGraphExec", "type");
subst("CUgraphInstantiate_flags", "hipGraphInstantiateFlags", "type");
subst("CUgraphInstantiate_flags_enum", "hipGraphInstantiateFlags", "type");
+ subst("CUgraphMem_attribute", "hipGraphMemAttributeType", "type");
+ subst("CUgraphMem_attribute_enum", "hipGraphMemAttributeType", "type");
subst("CUgraphNode", "hipGraphNode_t", "type");
subst("CUgraphNodeType", "hipGraphNodeType", "type");
subst("CUgraphNodeType_enum", "hipGraphNodeType", "type");
@@ -3227,6 +3141,8 @@ sub simpleSubstitutions {
subst("CUipcMemHandle", "hipIpcMemHandle_t", "type");
subst("CUipcMemHandle_st", "hipIpcMemHandle_st", "type");
subst("CUipcMemHandle_v1", "hipIpcMemHandle_t", "type");
+ subst("CUjitInputType", "hiprtcJITInputType", "type");
+ subst("CUjitInputType_enum", "hiprtcJITInputType", "type");
subst("CUjit_option", "hipJitOption", "type");
subst("CUjit_option_enum", "hipJitOption", "type");
subst("CUkernelNodeAttrID", "hipKernelNodeAttrID", "type");
@@ -3308,6 +3224,12 @@ sub simpleSubstitutions {
subst("CUtexObject_v1", "hipTextureObject_t", "type");
subst("CUtexref", "hipTexRef", "type");
subst("CUtexref_st", "textureReference", "type");
+ subst("CUuserObject", "hipUserObject_t", "type");
+ subst("CUuserObjectRetain_flags", "hipUserObjectRetainFlags", "type");
+ subst("CUuserObjectRetain_flags_enum", "hipUserObjectRetainFlags", "type");
+ subst("CUuserObject_flags", "hipUserObjectFlags", "type");
+ subst("CUuserObject_flags_enum", "hipUserObjectFlags", "type");
+ subst("CUuserObject_st", "hipUserObject", "type");
subst("CUuuid", "hipUUID", "type");
subst("CUuuid_st", "hipUUID_t", "type");
subst("GLenum", "GLenum", "type");
@@ -3327,6 +3249,7 @@ sub simpleSubstitutions {
subst("cuDoubleComplex", "hipDoubleComplex", "type");
subst("cuFloatComplex", "hipFloatComplex", "type");
subst("cublasAtomicsMode_t", "hipblasAtomicsMode_t", "type");
+ subst("cublasComputeType_t", "hipblasDatatype_t", "type");
subst("cublasDataType_t", "hipblasDatatype_t", "type");
subst("cublasDiagType_t", "hipblasDiagType_t", "type");
subst("cublasFillMode_t", "hipblasFillMode_t", "type");
@@ -3374,6 +3297,7 @@ sub simpleSubstitutions {
subst("cudaGraphExecUpdateResult", "hipGraphExecUpdateResult", "type");
subst("cudaGraphExec_t", "hipGraphExec_t", "type");
subst("cudaGraphInstantiateFlags", "hipGraphInstantiateFlags", "type");
+ subst("cudaGraphMemAttributeType", "hipGraphMemAttributeType", "type");
subst("cudaGraphNodeType", "hipGraphNodeType", "type");
subst("cudaGraphNode_t", "hipGraphNode_t", "type");
subst("cudaGraph_t", "hipGraph_t", "type");
@@ -3431,6 +3355,9 @@ sub simpleSubstitutions {
subst("cudaTextureObject_t", "hipTextureObject_t", "type");
subst("cudaTextureReadMode", "hipTextureReadMode", "type");
subst("cudaUUID_t", "hipUUID", "type");
+ subst("cudaUserObjectFlags", "hipUserObjectFlags", "type");
+ subst("cudaUserObjectRetainFlags", "hipUserObjectRetainFlags", "type");
+ subst("cudaUserObject_t", "hipUserObject_t", "type");
subst("cudnnActivationDescriptor_t", "hipdnnActivationDescriptor_t", "type");
subst("cudnnActivationMode_t", "hipdnnActivationMode_t", "type");
subst("cudnnBatchNormMode_t", "hipdnnBatchNormMode_t", "type");
@@ -4028,6 +3955,7 @@ sub simpleSubstitutions {
subst("CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT", "hipDeviceAttributeTexturePitchAlignment", "numeric_literal");
subst("CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY", "hipDeviceAttributeTotalConstantMemory", "numeric_literal");
subst("CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING", "hipDeviceAttributeUnifiedAddressing", "numeric_literal");
+ subst("CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED", "hipDeviceAttributeVirtualMemoryManagementSupported", "numeric_literal");
subst("CU_DEVICE_ATTRIBUTE_WARP_SIZE", "hipDeviceAttributeWarpSize", "numeric_literal");
subst("CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED", "hipDevP2PAttrHipArrayAccessSupported", "numeric_literal");
subst("CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED", "hipDevP2PAttrAccessSupported", "numeric_literal");
@@ -4081,38 +4009,53 @@ sub simpleSubstitutions {
subst("CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED", "hipGraphExecUpdateErrorTopologyChanged", "numeric_literal");
subst("CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE", "hipGraphExecUpdateErrorUnsupportedFunctionChange", "numeric_literal");
subst("CU_GRAPH_EXEC_UPDATE_SUCCESS", "hipGraphExecUpdateSuccess", "numeric_literal");
+ subst("CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT", "hipGraphMemAttrReservedMemCurrent", "numeric_literal");
+ subst("CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH", "hipGraphMemAttrReservedMemHigh", "numeric_literal");
+ subst("CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT", "hipGraphMemAttrUsedMemCurrent", "numeric_literal");
+ subst("CU_GRAPH_MEM_ATTR_USED_MEM_HIGH", "hipGraphMemAttrUsedMemHigh", "numeric_literal");
subst("CU_GRAPH_NODE_TYPE_COUNT", "hipGraphNodeTypeCount", "numeric_literal");
subst("CU_GRAPH_NODE_TYPE_EMPTY", "hipGraphNodeTypeEmpty", "numeric_literal");
subst("CU_GRAPH_NODE_TYPE_EVENT_RECORD", "hipGraphNodeTypeEventRecord", "numeric_literal");
+ subst("CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL", "hipGraphNodeTypeExtSemaphoreSignal", "numeric_literal");
+ subst("CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT", "hipGraphNodeTypeExtSemaphoreWait", "numeric_literal");
subst("CU_GRAPH_NODE_TYPE_GRAPH", "hipGraphNodeTypeGraph", "numeric_literal");
subst("CU_GRAPH_NODE_TYPE_HOST", "hipGraphNodeTypeHost", "numeric_literal");
subst("CU_GRAPH_NODE_TYPE_KERNEL", "hipGraphNodeTypeKernel", "numeric_literal");
subst("CU_GRAPH_NODE_TYPE_MEMCPY", "hipGraphNodeTypeMemcpy", "numeric_literal");
subst("CU_GRAPH_NODE_TYPE_MEMSET", "hipGraphNodeTypeMemset", "numeric_literal");
subst("CU_GRAPH_NODE_TYPE_WAIT_EVENT", "hipGraphNodeTypeWaitEvent", "numeric_literal");
+ subst("CU_GRAPH_USER_OBJECT_MOVE", "hipGraphUserObjectMove", "numeric_literal");
subst("CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS", "hipIpcMemLazyEnablePeerAccess", "numeric_literal");
- subst("CU_JIT_CACHE_MODE", "hipJitOptionCacheMode", "numeric_literal");
- subst("CU_JIT_ERROR_LOG_BUFFER", "hipJitOptionErrorLogBuffer", "numeric_literal");
- subst("CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", "hipJitOptionErrorLogBufferSizeBytes", "numeric_literal");
- subst("CU_JIT_FALLBACK_STRATEGY", "hipJitOptionFallbackStrategy", "numeric_literal");
- subst("CU_JIT_FAST_COMPILE", "hipJitOptionFastCompile", "numeric_literal");
- subst("CU_JIT_GENERATE_DEBUG_INFO", "hipJitOptionGenerateDebugInfo", "numeric_literal");
- subst("CU_JIT_GENERATE_LINE_INFO", "hipJitOptionGenerateLineInfo", "numeric_literal");
- subst("CU_JIT_INFO_LOG_BUFFER", "hipJitOptionInfoLogBuffer", "numeric_literal");
- subst("CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES", "hipJitOptionInfoLogBufferSizeBytes", "numeric_literal");
- subst("CU_JIT_LOG_VERBOSE", "hipJitOptionLogVerbose", "numeric_literal");
- subst("CU_JIT_MAX_REGISTERS", "hipJitOptionMaxRegisters", "numeric_literal");
- subst("CU_JIT_NEW_SM3X_OPT", "hipJitOptionSm3xOpt", "numeric_literal");
- subst("CU_JIT_NUM_OPTIONS", "hipJitOptionNumOptions", "numeric_literal");
- subst("CU_JIT_OPTIMIZATION_LEVEL", "hipJitOptionOptimizationLevel", "numeric_literal");
- subst("CU_JIT_TARGET", "hipJitOptionTarget", "numeric_literal");
- subst("CU_JIT_TARGET_FROM_CUCONTEXT", "hipJitOptionTargetFromContext", "numeric_literal");
- subst("CU_JIT_THREADS_PER_BLOCK", "hipJitOptionThreadsPerBlock", "numeric_literal");
- subst("CU_JIT_WALL_TIME", "hipJitOptionWallTime", "numeric_literal");
+ subst("CU_JIT_CACHE_MODE", "HIPRTC_JIT_CACHE_MODE", "numeric_literal");
+ subst("CU_JIT_ERROR_LOG_BUFFER", "HIPRTC_JIT_ERROR_LOG_BUFFER", "numeric_literal");
+ subst("CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", "HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", "numeric_literal");
+ subst("CU_JIT_FALLBACK_STRATEGY", "HIPRTC_JIT_FALLBACK_STRATEGY", "numeric_literal");
+ subst("CU_JIT_FAST_COMPILE", "HIPRTC_JIT_FAST_COMPILE", "numeric_literal");
+ subst("CU_JIT_GENERATE_DEBUG_INFO", "HIPRTC_JIT_GENERATE_DEBUG_INFO", "numeric_literal");
+ subst("CU_JIT_GENERATE_LINE_INFO", "HIPRTC_JIT_GENERATE_LINE_INFO", "numeric_literal");
+ subst("CU_JIT_INFO_LOG_BUFFER", "HIPRTC_JIT_INFO_LOG_BUFFER", "numeric_literal");
+ subst("CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES", "HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES", "numeric_literal");
+ subst("CU_JIT_INPUT_CUBIN", "HIPRTC_JIT_INPUT_CUBIN", "numeric_literal");
+ subst("CU_JIT_INPUT_FATBINARY", "HIPRTC_JIT_INPUT_FATBINARY", "numeric_literal");
+ subst("CU_JIT_INPUT_LIBRARY", "HIPRTC_JIT_INPUT_LIBRARY", "numeric_literal");
+ subst("CU_JIT_INPUT_NVVM", "HIPRTC_JIT_INPUT_NVVM", "numeric_literal");
+ subst("CU_JIT_INPUT_OBJECT", "HIPRTC_JIT_INPUT_OBJECT", "numeric_literal");
+ subst("CU_JIT_INPUT_PTX", "HIPRTC_JIT_INPUT_PTX", "numeric_literal");
+ subst("CU_JIT_LOG_VERBOSE", "HIPRTC_JIT_LOG_VERBOSE", "numeric_literal");
+ subst("CU_JIT_MAX_REGISTERS", "HIPRTC_JIT_MAX_REGISTERS", "numeric_literal");
+ subst("CU_JIT_NEW_SM3X_OPT", "HIPRTC_JIT_NEW_SM3X_OPT", "numeric_literal");
+ subst("CU_JIT_NUM_INPUT_TYPES", "HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES", "numeric_literal");
+ subst("CU_JIT_NUM_OPTIONS", "HIPRTC_JIT_NUM_OPTIONS", "numeric_literal");
+ subst("CU_JIT_OPTIMIZATION_LEVEL", "HIPRTC_JIT_OPTIMIZATION_LEVEL", "numeric_literal");
+ subst("CU_JIT_TARGET", "HIPRTC_JIT_TARGET", "numeric_literal");
+ subst("CU_JIT_TARGET_FROM_CUCONTEXT", "HIPRTC_JIT_TARGET_FROM_HIPCONTEXT", "numeric_literal");
+ subst("CU_JIT_THREADS_PER_BLOCK", "HIPRTC_JIT_THREADS_PER_BLOCK", "numeric_literal");
+ subst("CU_JIT_WALL_TIME", "HIPRTC_JIT_WALL_TIME", "numeric_literal");
subst("CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW", "hipKernelNodeAttributeAccessPolicyWindow", "numeric_literal");
subst("CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE", "hipKernelNodeAttributeCooperative", "numeric_literal");
subst("CU_LIMIT_MALLOC_HEAP_SIZE", "hipLimitMallocHeapSize", "numeric_literal");
subst("CU_LIMIT_PRINTF_FIFO_SIZE", "hipLimitPrintfFifoSize", "numeric_literal");
+ subst("CU_LIMIT_STACK_SIZE", "hipLimitStackSize", "numeric_literal");
subst("CU_MEMORYTYPE_ARRAY", "hipMemoryTypeArray", "numeric_literal");
subst("CU_MEMORYTYPE_DEVICE", "hipMemoryTypeDevice", "numeric_literal");
subst("CU_MEMORYTYPE_HOST", "hipMemoryTypeHost", "numeric_literal");
@@ -4235,6 +4178,7 @@ sub simpleSubstitutions {
subst("CU_TR_ADDRESS_MODE_WRAP", "HIP_TR_ADDRESS_MODE_WRAP", "numeric_literal");
subst("CU_TR_FILTER_MODE_LINEAR", "HIP_TR_FILTER_MODE_LINEAR", "numeric_literal");
subst("CU_TR_FILTER_MODE_POINT", "HIP_TR_FILTER_MODE_POINT", "numeric_literal");
+ subst("CU_USER_OBJECT_NO_DESTRUCTOR_SYNC", "hipUserObjectNoDestructorSync", "numeric_literal");
subst("NVRTC_ERROR_BUILTIN_OPERATION_FAILURE", "HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE", "numeric_literal");
subst("NVRTC_ERROR_COMPILATION", "HIPRTC_ERROR_COMPILATION", "numeric_literal");
subst("NVRTC_ERROR_INTERNAL_ERROR", "HIPRTC_ERROR_INTERNAL_ERROR", "numeric_literal");
@@ -4462,15 +4406,22 @@ sub simpleSubstitutions {
subst("cudaGraphExecUpdateErrorUnsupportedFunctionChange", "hipGraphExecUpdateErrorUnsupportedFunctionChange", "numeric_literal");
subst("cudaGraphExecUpdateSuccess", "hipGraphExecUpdateSuccess", "numeric_literal");
subst("cudaGraphInstantiateFlagAutoFreeOnLaunch", "hipGraphInstantiateFlagAutoFreeOnLaunch", "numeric_literal");
+ subst("cudaGraphMemAttrReservedMemCurrent", "hipGraphMemAttrReservedMemCurrent", "numeric_literal");
+ subst("cudaGraphMemAttrReservedMemHigh", "hipGraphMemAttrReservedMemHigh", "numeric_literal");
+ subst("cudaGraphMemAttrUsedMemCurrent", "hipGraphMemAttrUsedMemCurrent", "numeric_literal");
+ subst("cudaGraphMemAttrUsedMemHigh", "hipGraphMemAttrUsedMemHigh", "numeric_literal");
subst("cudaGraphNodeTypeCount", "hipGraphNodeTypeCount", "numeric_literal");
subst("cudaGraphNodeTypeEmpty", "hipGraphNodeTypeEmpty", "numeric_literal");
subst("cudaGraphNodeTypeEventRecord", "hipGraphNodeTypeEventRecord", "numeric_literal");
+ subst("cudaGraphNodeTypeExtSemaphoreSignal", "hipGraphNodeTypeExtSemaphoreSignal", "numeric_literal");
+ subst("cudaGraphNodeTypeExtSemaphoreWait", "hipGraphNodeTypeExtSemaphoreWait", "numeric_literal");
subst("cudaGraphNodeTypeGraph", "hipGraphNodeTypeGraph", "numeric_literal");
subst("cudaGraphNodeTypeHost", "hipGraphNodeTypeHost", "numeric_literal");
subst("cudaGraphNodeTypeKernel", "hipGraphNodeTypeKernel", "numeric_literal");
subst("cudaGraphNodeTypeMemcpy", "hipGraphNodeTypeMemcpy", "numeric_literal");
subst("cudaGraphNodeTypeMemset", "hipGraphNodeTypeMemset", "numeric_literal");
subst("cudaGraphNodeTypeWaitEvent", "hipGraphNodeTypeWaitEvent", "numeric_literal");
+ subst("cudaGraphUserObjectMove", "hipGraphUserObjectMove", "numeric_literal");
subst("cudaGraphicsRegisterFlagsNone", "hipGraphicsRegisterFlagsNone", "numeric_literal");
subst("cudaGraphicsRegisterFlagsReadOnly", "hipGraphicsRegisterFlagsReadOnly", "numeric_literal");
subst("cudaGraphicsRegisterFlagsSurfaceLoadStore", "hipGraphicsRegisterFlagsSurfaceLoadStore", "numeric_literal");
@@ -4480,6 +4431,7 @@ sub simpleSubstitutions {
subst("cudaKernelNodeAttributeCooperative", "hipKernelNodeAttributeCooperative", "numeric_literal");
subst("cudaLimitMallocHeapSize", "hipLimitMallocHeapSize", "numeric_literal");
subst("cudaLimitPrintfFifoSize", "hipLimitPrintfFifoSize", "numeric_literal");
+ subst("cudaLimitStackSize", "hipLimitStackSize", "numeric_literal");
subst("cudaMemAccessFlagsProtNone", "hipMemAccessFlagsProtNone", "numeric_literal");
subst("cudaMemAccessFlagsProtRead", "hipMemAccessFlagsProtRead", "numeric_literal");
subst("cudaMemAccessFlagsProtReadWrite", "hipMemAccessFlagsProtReadWrite", "numeric_literal");
@@ -4517,6 +4469,7 @@ sub simpleSubstitutions {
subst("cudaMemcpyHostToHost", "hipMemcpyHostToHost", "numeric_literal");
subst("cudaMemoryTypeDevice", "hipMemoryTypeDevice", "numeric_literal");
subst("cudaMemoryTypeHost", "hipMemoryTypeHost", "numeric_literal");
+ subst("cudaMemoryTypeManaged", "hipMemoryTypeManaged", "numeric_literal");
subst("cudaReadModeElementType", "hipReadModeElementType", "numeric_literal");
subst("cudaReadModeNormalizedFloat", "hipReadModeNormalizedFloat", "numeric_literal");
subst("cudaResViewFormatFloat1", "hipResViewFormatFloat1", "numeric_literal");
@@ -4570,6 +4523,7 @@ sub simpleSubstitutions {
subst("cudaStreamCaptureStatusNone", "hipStreamCaptureStatusNone", "numeric_literal");
subst("cudaStreamSetCaptureDependencies", "hipStreamSetCaptureDependencies", "numeric_literal");
subst("cudaSuccess", "hipSuccess", "numeric_literal");
+ subst("cudaUserObjectNoDestructorSync", "hipUserObjectNoDestructorSync", "numeric_literal");
subst("CUB_MAX", "CUB_MAX", "define");
subst("CUB_MIN", "CUB_MIN", "define");
subst("CUB_NAMESPACE_BEGIN", "BEGIN_HIPCUB_NAMESPACE", "define");
@@ -5380,10 +5334,32 @@ sub warnUnsupportedDeviceFunctions {
"__vabsdiffs2",
"__vabs4",
"__vabs2",
+ "__ushort_as_bfloat16",
+ "__ushort2bfloat16_rz",
+ "__ushort2bfloat16_ru",
+ "__ushort2bfloat16_rn",
+ "__ushort2bfloat16_rd",
+ "__ull2bfloat16_rz",
+ "__ull2bfloat16_ru",
+ "__ull2bfloat16_rn",
+ "__ull2bfloat16_rd",
+ "__uint2bfloat16_rz",
+ "__uint2bfloat16_ru",
+ "__uint2bfloat16_rn",
+ "__uint2bfloat16_rd",
"__trap",
+ "__stwt",
+ "__stwb",
+ "__stcs",
+ "__stcg",
"__signbitl",
"__signbitf",
"__signbit",
+ "__short_as_bfloat16",
+ "__short2bfloat16_rz",
+ "__short2bfloat16_ru",
+ "__short2bfloat16_rn",
+ "__short2bfloat16_rd",
"__shfl_xor_sync",
"__shfl_up_sync",
"__shfl_sync",
@@ -5393,12 +5369,56 @@ sub warnUnsupportedDeviceFunctions {
"__pm2",
"__pm1",
"__pm0",
+ "__nv_cvt_halfraw_to_fp8",
+ "__nv_cvt_halfraw2_to_fp8x2",
+ "__nv_cvt_fp8x2_to_halfraw2",
+ "__nv_cvt_fp8_to_halfraw",
+ "__nv_cvt_float_to_fp8",
+ "__nv_cvt_float2_to_fp8x2",
+ "__nv_cvt_double_to_fp8",
+ "__nv_cvt_double2_to_fp8x2",
+ "__nv_cvt_bfloat16raw_to_fp8",
+ "__nv_cvt_bfloat16raw2_to_fp8x2",
+ "__lows2bfloat162",
+ "__low2bfloat162",
+ "__low2bfloat16",
+ "__ll2bfloat16_rz",
+ "__ll2bfloat16_ru",
+ "__ll2bfloat16_rn",
+ "__ll2bfloat16_rd",
+ "__ldlu",
+ "__ldcv",
"__isnanl",
"__isnanf",
"__isnan",
"__isinfl",
"__isinff",
"__isinf",
+ "__int2bfloat16_rz",
+ "__int2bfloat16_ru",
+ "__int2bfloat16_rn",
+ "__int2bfloat16_rd",
+ "__hsub_rn",
+ "__hsub2_rn",
+ "__hmul_rn",
+ "__hmul2_rn",
+ "__hmin_nan",
+ "__hmin2_nan",
+ "__hmin2",
+ "__hmin",
+ "__hmax_nan",
+ "__hmax2_nan",
+ "__hmax2",
+ "__hmax",
+ "__highs2bfloat162",
+ "__high2bfloat162",
+ "__high2bfloat16",
+ "__hfma_relu",
+ "__hfma2_relu",
+ "__hcmadd",
+ "__halves2bfloat162",
+ "__hadd_rn",
+ "__hadd2_rn",
"__fsub_rz",
"__fsub_ru",
"__fsub_rd",
@@ -5417,6 +5437,13 @@ sub warnUnsupportedDeviceFunctions {
"__fma_rz",
"__fma_ru",
"__fma_rd",
+ "__floats2bfloat162_rn",
+ "__float2bfloat16_rz",
+ "__float2bfloat16_ru",
+ "__float2bfloat16_rn",
+ "__float2bfloat16_rd",
+ "__float2bfloat162_rn",
+ "__float2bfloat16",
"__finitel",
"__finitef",
"__finite",
@@ -5435,6 +5462,8 @@ sub warnUnsupportedDeviceFunctions {
"__drcp_rz",
"__drcp_ru",
"__drcp_rd",
+ "__double2half",
+ "__double2bfloat16",
"__dmul_rz",
"__dmul_ru",
"__dmul_rd",
@@ -5445,6 +5474,35 @@ sub warnUnsupportedDeviceFunctions {
"__dadd_ru",
"__dadd_rd",
"__brkpt",
+ "__bfloat16_as_ushort",
+ "__bfloat16_as_short",
+ "__bfloat162ushort_rz",
+ "__bfloat162ushort_ru",
+ "__bfloat162ushort_rn",
+ "__bfloat162ushort_rd",
+ "__bfloat162ull_rz",
+ "__bfloat162ull_ru",
+ "__bfloat162ull_rn",
+ "__bfloat162ull_rd",
+ "__bfloat162uint_rz",
+ "__bfloat162uint_ru",
+ "__bfloat162uint_rn",
+ "__bfloat162uint_rd",
+ "__bfloat162short_rz",
+ "__bfloat162short_ru",
+ "__bfloat162short_rn",
+ "__bfloat162short_rd",
+ "__bfloat162ll_rz",
+ "__bfloat162ll_ru",
+ "__bfloat162ll_rn",
+ "__bfloat162ll_rd",
+ "__bfloat162int_rz",
+ "__bfloat162int_ru",
+ "__bfloat162int_rn",
+ "__bfloat162int_rd",
+ "__bfloat162float",
+ "__bfloat162bfloat162",
+ "__bfloat1622float2",
"_Pow_int"
)
{
@@ -5521,6 +5579,8 @@ sub warnUnsupportedFunctions {
"nvrtcGetNumSupportedArchs",
"nvrtcGetNVVMSize",
"nvrtcGetNVVM",
+ "nv_bfloat162",
+ "nv_bfloat16",
"memoryBarrier",
"libraryPropertyType_t",
"libraryPropertyType",
@@ -5714,6 +5774,7 @@ sub warnUnsupportedFunctions {
"cufftXtSetWorkAreaPolicy",
"cufftXtSetWorkArea",
"cufftXtSetGPUs",
+ "cufftXtSetDistribution",
"cufftXtQueryType_t",
"cufftXtQueryType",
"cufftXtQueryPlan",
@@ -5736,6 +5797,8 @@ sub warnUnsupportedFunctions {
"cufftXt1dFactors",
"cufftCompatibility_t",
"cufftCompatibility",
+ "cufftBox3d_t",
+ "cufftBox3d",
"cudnnWgradMode_t",
"cudnnTransformTensorEx",
"cudnnTransformTensor",
@@ -5749,6 +5812,7 @@ sub warnUnsupportedFunctions {
"cudnnSpatialTfSamplerBackward",
"cudnnSpatialTfGridGeneratorForward",
"cudnnSpatialTfGridGeneratorBackward",
+ "cudnnSignalMode_t",
"cudnnSeverity_t",
"cudnnSetTensorTransformDescriptor",
"cudnnSetTensorNdDescriptorEx",
@@ -5780,6 +5844,7 @@ sub warnUnsupportedFunctions {
"cudnnRuntimeTag_t",
"cudnnRestoreDropoutDescriptor",
"cudnnRestoreAlgorithm",
+ "cudnnResampleMode_t",
"cudnnReorderType_t",
"cudnnReorderFilterAndBias",
"cudnnReduceTensorStruct",
@@ -5804,6 +5869,7 @@ sub warnUnsupportedFunctions {
"cudnnPoolingStruct",
"cudnnPointwiseMode_t",
"cudnnPersistentRNNPlan",
+ "cudnnPaddingMode_t",
"cudnnOpsTrainVersionCheck",
"cudnnOpsInferVersionCheck",
"cudnnOpTensorStruct",
@@ -5848,6 +5914,7 @@ sub warnUnsupportedFunctions {
"cudnnGetNormalizationBackwardWorkspaceSize",
"cudnnGetMultiHeadAttnWeights",
"cudnnGetMultiHeadAttnBuffers",
+ "cudnnGetMaxDeviceVersion",
"cudnnGetFusedOpsVariantParamPackAttribute",
"cudnnGetFusedOpsConstParamPackAttribute",
"cudnnGetFoldedConvBackwardDataDescriptors",
@@ -5891,6 +5958,8 @@ sub warnUnsupportedFunctions {
"cudnnFusedOpsConstParamStruct",
"cudnnFusedOpsConstParamPack_t",
"cudnnFusedOpsConstParamLabel_t",
+ "cudnnFraction_t",
+ "cudnnFractionStruct",
"cudnnForwardMode_t",
"cudnnFoldingDirection_t",
"cudnnFindRNNForwardTrainingAlgorithmEx",
@@ -5949,8 +6018,11 @@ sub warnUnsupportedFunctions {
"cudnnBatchNormalizationForwardTrainingEx",
"cudnnBatchNormalizationBackwardEx",
"cudnnBatchNormOps_t",
+ "cudnnBackendTensorReordering_t",
"cudnnBackendSetAttribute",
"cudnnBackendNumericalNote_t",
+ "cudnnBackendNormMode_t",
+ "cudnnBackendNormFwdPhase_t",
"cudnnBackendLayoutType_t",
"cudnnBackendKnobType_t",
"cudnnBackendInitialize",
@@ -6006,8 +6078,10 @@ sub warnUnsupportedFunctions {
"cudaProfilerInitialize",
"cudaOutputMode_t",
"cudaOutputMode",
+ "cudaOccupancyMaxPotentialClusterSize",
"cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags",
"cudaOccupancyMaxPotentialBlockSizeVariableSMem",
+ "cudaOccupancyMaxActiveClusters",
"cudaOccupancyDisableCachingOverride",
"cudaOccupancyAvailableDynamicSMemPerBlock",
"cudaNvSciSyncAttrWait",
@@ -6025,8 +6099,26 @@ sub warnUnsupportedFunctions {
"cudaLimitMaxL2FetchGranularity",
"cudaLimitDevRuntimeSyncDepth",
"cudaLimitDevRuntimePendingLaunchCount",
+ "cudaLaunchKernelExC",
+ "cudaLaunchConfig_t",
+ "cudaLaunchConfig_st",
+ "cudaLaunchAttribute_st",
+ "cudaLaunchAttributeValue",
+ "cudaLaunchAttributeSynchronizationPolicy",
+ "cudaLaunchAttributeProgrammaticStreamSerialization",
+ "cudaLaunchAttributeProgrammaticEvent",
+ "cudaLaunchAttributePriority",
+ "cudaLaunchAttributeIgnore",
+ "cudaLaunchAttributeID",
+ "cudaLaunchAttributeCooperative",
+ "cudaLaunchAttributeClusterSchedulingPolicyPreference",
+ "cudaLaunchAttributeClusterDimension",
+ "cudaLaunchAttributeAccessPolicyWindow",
+ "cudaLaunchAttribute",
"cudaKeyValuePair",
"cudaKernelNodeAttributePriority",
+ "cudaKernelNodeAttributeClusterSchedulingPolicyPreference",
+ "cudaKernelNodeAttributeClusterDimension",
"cudaHostRegisterReadOnly",
"cudaGraphicsVDPAURegisterVideoSurface",
"cudaGraphicsVDPAURegisterOutputSurface",
@@ -6078,6 +6170,7 @@ sub warnUnsupportedFunctions {
"cudaGraphAddMemAllocNode",
"cudaGraphAddExternalSemaphoresWaitNode",
"cudaGraphAddExternalSemaphoresSignalNode",
+ "cudaGetTextureObjectTextureDesc_v2",
"cudaGetSurfaceReference",
"cudaGetSurfaceObjectResourceDesc",
"cudaGetParameterBufferV2",
@@ -6101,6 +6194,12 @@ sub warnUnsupportedFunctions {
"cudaGLMapFlags",
"cudaGLMapBufferObjectAsync",
"cudaGLMapBufferObject",
+ "cudaFuncAttributeRequiredClusterWidth",
+ "cudaFuncAttributeRequiredClusterHeight",
+ "cudaFuncAttributeRequiredClusterDepth",
+ "cudaFuncAttributeNonPortableClusterSizeAllowed",
+ "cudaFuncAttributeClusterSchedulingPolicyPreference",
+ "cudaFuncAttributeClusterDimMustBeSet",
"cudaFormatModeForced",
"cudaFormatModeAuto",
"cudaFlushGPUDirectRDMAWritesToOwner",
@@ -6152,6 +6251,7 @@ sub warnUnsupportedFunctions {
"cudaErrorMpsMaxConnectionsReached",
"cudaErrorMpsMaxClientsReached",
"cudaErrorMpsConnectionFailed",
+ "cudaErrorMpsClientTerminated",
"cudaErrorMixedDeviceExecution",
"cudaErrorMisalignedAddress",
"cudaErrorMemoryValueTooLarge",
@@ -6169,6 +6269,7 @@ sub warnUnsupportedFunctions {
"cudaErrorInvalidNormSetting",
"cudaErrorInvalidHostPointer",
"cudaErrorInvalidFilterSetting",
+ "cudaErrorInvalidClusterSize",
"cudaErrorInvalidChannelDescriptor",
"cudaErrorInvalidAddressSpace",
"cudaErrorIncompatibleDriverContext",
@@ -6307,6 +6408,7 @@ sub warnUnsupportedFunctions {
"cudaDevAttrGPUDirectRDMASupported",
"cudaDevAttrGPUDirectRDMAFlushWritesOptions",
"cudaDevAttrDeferredMappingCudaArraySupported",
+ "cudaDevAttrClusterLaunch",
"cudaDevAttrCanFlushRemoteWrites",
"cudaD3D9UnregisterResource",
"cudaD3D9UnmapResources",
@@ -6367,6 +6469,11 @@ sub warnUnsupportedFunctions {
"cudaD3D10DeviceListAll",
"cudaD3D10DeviceList",
"cudaCtxResetPersistingL2Cache",
+ "cudaCreateTextureObject_v2",
+ "cudaClusterSchedulingPolicySpread",
+ "cudaClusterSchedulingPolicyLoadBalancing",
+ "cudaClusterSchedulingPolicyDefault",
+ "cudaClusterSchedulingPolicy",
"cudaChannelFormatKindUnsignedNormalized8X4",
"cudaChannelFormatKindUnsignedNormalized8X2",
"cudaChannelFormatKindUnsignedNormalized8X1",
@@ -6435,6 +6542,8 @@ sub warnUnsupportedFunctions {
"cuParamSetf",
"cuParamSetTexRef",
"cuParamSetSize",
+ "cuOccupancyMaxPotentialClusterSize",
+ "cuOccupancyMaxActiveClusters",
"cuOccupancyAvailableDynamicSMemPerBlock",
"cuModuleLoadFatBinary",
"cuModuleGetSurfRef",
@@ -6466,6 +6575,7 @@ sub warnUnsupportedFunctions {
"cuMemcpy3DPeer",
"cuMemcpy",
"cuMemGetHandleForAddressRange",
+ "cuLaunchKernelEx",
"cuLaunchGridAsync",
"cuLaunchGrid",
"cuLaunchCooperativeKernelMultiDevice",
@@ -6506,8 +6616,6 @@ sub warnUnsupportedFunctions {
"cuGraphAddExternalSemaphoresSignalNode",
"cuGraphAddBatchMemOpNode",
"cuGetProcAddress",
- "cuGetErrorString",
- "cuGetErrorName",
"cuGLUnregisterBufferObject",
"cuGLUnmapBufferObjectAsync",
"cuGLUnmapBufferObject",
@@ -6592,14 +6700,45 @@ sub warnUnsupportedFunctions {
"csrsm2Info",
"csrilu02Info",
"csrgemm2Info",
+ "cl_event_flags_enum",
+ "cl_event_flags",
+ "cl_context_flags_enum",
+ "cl_context_flags",
"bsrsv2Info",
"bsrilu02Info",
"bsric02Info",
+ "__nv_saturation_t",
+ "__nv_fp8x4_storage_t",
+ "__nv_fp8x4_e5m2",
+ "__nv_fp8x4_e4m3",
+ "__nv_fp8x2_storage_t",
+ "__nv_fp8x2_e5m2",
+ "__nv_fp8x2_e4m3",
+ "__nv_fp8_storage_t",
+ "__nv_fp8_interpretation_t",
+ "__nv_fp8_e5m2",
+ "__nv_fp8_e4m3",
+ "__nv_bfloat16_raw",
+ "__nv_bfloat162_raw",
+ "__nv_bfloat162",
+ "__nv_bfloat16",
"__curand_umul",
+ "__NV_SATFINITE",
+ "__NV_NOSAT",
+ "__NV_E5M2",
+ "__NV_E4M3",
"__CUB_LP64__",
"_CUB_ASM_PTR_SIZE_",
"_CUB_ASM_PTR_",
"PATCH_LEVEL",
+ "NVCL_EVENT_SCHED_YIELD",
+ "NVCL_EVENT_SCHED_SPIN",
+ "NVCL_EVENT_SCHED_BLOCKING_SYNC",
+ "NVCL_EVENT_SCHED_AUTO",
+ "NVCL_CTX_SCHED_YIELD",
+ "NVCL_CTX_SCHED_SPIN",
+ "NVCL_CTX_SCHED_BLOCKING_SYNC",
+ "NVCL_CTX_SCHED_AUTO",
"MINOR_VERSION",
"MAX_CUFFT_ERROR",
"MAJOR_VERSION",
@@ -6638,6 +6777,14 @@ sub warnUnsupportedFunctions {
"CUmemRangeHandleType",
"CUmemAttach_flags_enum",
"CUmemAttach_flags",
+ "CUlaunchConfig_st",
+ "CUlaunchConfig",
+ "CUlaunchAttribute_st",
+ "CUlaunchAttributeValue_union",
+ "CUlaunchAttributeValue",
+ "CUlaunchAttributeID_enum",
+ "CUlaunchAttributeID",
+ "CUlaunchAttribute",
"CUjit_target_enum",
"CUjit_target",
"CUjit_fallback_enum",
@@ -6670,6 +6817,8 @@ sub warnUnsupportedFunctions {
"CUexecAffinityParam",
"CUevent_wait_flags_enum",
"CUevent_wait_flags",
+ "CUevent_sched_flags_enum",
+ "CUevent_sched_flags",
"CUevent_record_flags_enum",
"CUevent_record_flags",
"CUevent_flags_enum",
@@ -6703,10 +6852,14 @@ sub warnUnsupportedFunctions {
"CUd3d10DeviceList",
"CUctx_flags_enum",
"CUctx_flags",
+ "CUclusterSchedulingPolicy_enum",
+ "CUclusterSchedulingPolicy",
"CUarray_cubemap_face_enum",
"CUarray_cubemap_face",
"CU_TRSF_SEAMLESS_CUBEMAP",
"CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION",
+ "CU_TARGET_COMPUTE_90",
+ "CU_TARGET_COMPUTE_89",
"CU_TARGET_COMPUTE_87",
"CU_TARGET_COMPUTE_86",
"CU_TARGET_COMPUTE_80",
@@ -6778,7 +6931,18 @@ sub warnUnsupportedFunctions {
"CU_LAUNCH_PARAM_END_AS_INT",
"CU_LAUNCH_PARAM_BUFFER_SIZE_AS_INT",
"CU_LAUNCH_PARAM_BUFFER_POINTER_AS_INT",
+ "CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY",
+ "CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION",
+ "CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT",
+ "CU_LAUNCH_ATTRIBUTE_PRIORITY",
+ "CU_LAUNCH_ATTRIBUTE_IGNORE",
+ "CU_LAUNCH_ATTRIBUTE_COOPERATIVE",
+ "CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE",
+ "CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION",
+ "CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW",
"CU_KERNEL_NODE_ATTRIBUTE_PRIORITY",
+ "CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE",
+ "CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION",
"CU_JIT_REFERENCED_VARIABLE_NAMES",
"CU_JIT_REFERENCED_VARIABLE_COUNT",
"CU_JIT_REFERENCED_KERNEL_NAMES",
@@ -6825,6 +6989,12 @@ sub warnUnsupportedFunctions {
"CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM",
"CU_GET_PROC_ADDRESS_LEGACY_STREAM",
"CU_GET_PROC_ADDRESS_DEFAULT",
+ "CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH",
+ "CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT",
+ "CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH",
+ "CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED",
+ "CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET",
+ "CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE",
"CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER",
"CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES",
"CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX",
@@ -6841,6 +7011,10 @@ sub warnUnsupportedFunctions {
"CU_EXEC_AFFINITY_TYPE_MAX",
"CU_EVENT_WAIT_EXTERNAL",
"CU_EVENT_WAIT_DEFAULT",
+ "CU_EVENT_SCHED_YIELD",
+ "CU_EVENT_SCHED_SPIN",
+ "CU_EVENT_SCHED_BLOCKING_SYNC",
+ "CU_EVENT_SCHED_AUTO",
"CU_EVENT_RECORD_EXTERNAL",
"CU_EVENT_RECORD_DEFAULT",
"CU_EGL_RESOURCE_LOCATION_VIDMEM",
@@ -6947,6 +7121,7 @@ sub warnUnsupportedFunctions {
"CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED",
"CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED",
"CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED",
+ "CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH",
"CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V2",
"CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS",
"CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V2",
@@ -6979,6 +7154,9 @@ sub warnUnsupportedFunctions {
"CU_CUBEMAP_FACE_NEGATIVE_Y",
"CU_CUBEMAP_FACE_NEGATIVE_X",
"CU_CTX_FLAGS_MASK",
+ "CU_CLUSTER_SCHEDULING_POLICY_SPREAD",
+ "CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING",
+ "CU_CLUSTER_SCHEDULING_POLICY_DEFAULT",
"CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL",
"CU_AD_FORMAT_UNORM_INT8X4",
"CU_AD_FORMAT_UNORM_INT8X2",
@@ -7065,6 +7243,8 @@ sub warnUnsupportedFunctions {
"CUFFT_XT_FORMAT_INPUT",
"CUFFT_XT_FORMAT_INPLACE_SHUFFLED",
"CUFFT_XT_FORMAT_INPLACE",
+ "CUFFT_XT_FORMAT_DISTRIBUTED_OUTPUT",
+ "CUFFT_XT_FORMAT_DISTRIBUTED_INPUT",
"CUFFT_XT_FORMAT_1D_INPUT_SHUFFLED",
"CUFFT_WORKAREA_USER",
"CUFFT_WORKAREA_PERFORMANCE",
@@ -7084,23 +7264,33 @@ sub warnUnsupportedFunctions {
"CUFFT_COPY_DEVICE_TO_DEVICE",
"CUFFT_COMPATIBILITY_FFTW_PADDING",
"CUFFT_COMPATIBILITY_DEFAULT",
+ "CUDNN_ZERO_PAD",
"CUDNN_WGRAD_MODE_SET",
"CUDNN_WGRAD_MODE_ADD",
"CUDNN_TYPE_VOID_PTR",
+ "CUDNN_TYPE_TENSOR_REORDERING_MODE",
+ "CUDNN_TYPE_SIGNAL_MODE",
+ "CUDNN_TYPE_RESAMPLE_MODE",
"CUDNN_TYPE_REDUCTION_OPERATOR_TYPE",
"CUDNN_TYPE_POINTWISE_MODE",
+ "CUDNN_TYPE_PADDING_MODE",
"CUDNN_TYPE_NUMERICAL_NOTE",
+ "CUDNN_TYPE_NORM_MODE",
+ "CUDNN_TYPE_NORM_FWD_PHASE",
"CUDNN_TYPE_NAN_PROPOGATION",
"CUDNN_TYPE_LAYOUT_TYPE",
"CUDNN_TYPE_KNOB_TYPE",
"CUDNN_TYPE_INT64",
+ "CUDNN_TYPE_INT32",
"CUDNN_TYPE_HEUR_MODE",
"CUDNN_TYPE_HANDLE",
"CUDNN_TYPE_GENSTATS_MODE",
+ "CUDNN_TYPE_FRACTION",
"CUDNN_TYPE_FLOAT",
"CUDNN_TYPE_DOUBLE",
"CUDNN_TYPE_DATA_TYPE",
"CUDNN_TYPE_CONVOLUTION_MODE",
+ "CUDNN_TYPE_CHAR",
"CUDNN_TYPE_BOOLEAN",
"CUDNN_TYPE_BN_FINALIZE_STATS_MODE",
"CUDNN_TYPE_BEHAVIOR_NOTE",
@@ -7108,10 +7298,14 @@ sub warnUnsupportedFunctions {
"CUDNN_TYPE_ATTRIB_NAME",
"CUDNN_TRANSFORM_UNFOLD",
"CUDNN_TRANSFORM_FOLD",
+ "CUDNN_TENSOR_REORDERING_NONE",
+ "CUDNN_TENSOR_REORDERING_INT8x32",
"CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION",
"CUDNN_STATUS_VERSION_MISMATCH",
"CUDNN_STATUS_RUNTIME_IN_PROGRESS",
"CUDNN_STATUS_RUNTIME_FP_OVERFLOW",
+ "CUDNN_SIGNAL_WAIT",
+ "CUDNN_SIGNAL_SET",
"CUDNN_SEV_WARNING_EN",
"CUDNN_SEV_WARNING",
"CUDNN_SEV_INFO_EN",
@@ -7137,6 +7331,12 @@ sub warnUnsupportedFunctions {
"CUDNN_RNN_CLIP_NONE",
"CUDNN_RNN_CLIP_MINMAX",
"CUDNN_RNN_ALGO_COUNT",
+ "CUDNN_RESAMPLE_NEAREST",
+ "CUDNN_RESAMPLE_MAXPOOL",
+ "CUDNN_RESAMPLE_BILINEAR",
+ "CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING",
+ "CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING",
+ "CUDNN_RESAMPLE_AVGPOOL",
"CUDNN_PTR_ZDATA",
"CUDNN_PTR_YSUM",
"CUDNN_PTR_YSQSUM",
@@ -7166,23 +7366,53 @@ sub warnUnsupportedFunctions {
"CUDNN_PTR_16B_ALIGNED",
"CUDNN_POINTWISE_TANH_FWD",
"CUDNN_POINTWISE_TANH_BWD",
+ "CUDNN_POINTWISE_TAN",
"CUDNN_POINTWISE_SWISH_FWD",
"CUDNN_POINTWISE_SWISH_BWD",
+ "CUDNN_POINTWISE_SUB",
"CUDNN_POINTWISE_SQRT",
"CUDNN_POINTWISE_SOFTPLUS_FWD",
"CUDNN_POINTWISE_SOFTPLUS_BWD",
+ "CUDNN_POINTWISE_SIN",
"CUDNN_POINTWISE_SIGMOID_FWD",
"CUDNN_POINTWISE_SIGMOID_BWD",
+ "CUDNN_POINTWISE_RSQRT",
"CUDNN_POINTWISE_RELU_FWD",
"CUDNN_POINTWISE_RELU_BWD",
+ "CUDNN_POINTWISE_POW",
+ "CUDNN_POINTWISE_NEG",
"CUDNN_POINTWISE_MUL",
+ "CUDNN_POINTWISE_MOD",
"CUDNN_POINTWISE_MIN",
"CUDNN_POINTWISE_MAX",
+ "CUDNN_POINTWISE_LOGICAL_OR",
+ "CUDNN_POINTWISE_LOGICAL_NOT",
+ "CUDNN_POINTWISE_LOGICAL_AND",
+ "CUDNN_POINTWISE_LOG",
+ "CUDNN_POINTWISE_IDENTITY",
+ "CUDNN_POINTWISE_GEN_INDEX",
"CUDNN_POINTWISE_GELU_FWD",
"CUDNN_POINTWISE_GELU_BWD",
+ "CUDNN_POINTWISE_GELU_APPROX_TANH_FWD",
+ "CUDNN_POINTWISE_GELU_APPROX_TANH_BWD",
+ "CUDNN_POINTWISE_FLOOR",
+ "CUDNN_POINTWISE_EXP",
+ "CUDNN_POINTWISE_ERF",
"CUDNN_POINTWISE_ELU_FWD",
"CUDNN_POINTWISE_ELU_BWD",
+ "CUDNN_POINTWISE_DIV",
+ "CUDNN_POINTWISE_COS",
+ "CUDNN_POINTWISE_CMP_NEQ",
+ "CUDNN_POINTWISE_CMP_LT",
+ "CUDNN_POINTWISE_CMP_LE",
+ "CUDNN_POINTWISE_CMP_GT",
+ "CUDNN_POINTWISE_CMP_GE",
+ "CUDNN_POINTWISE_CMP_EQ",
+ "CUDNN_POINTWISE_CEIL",
+ "CUDNN_POINTWISE_BINARY_SELECT",
+ "CUDNN_POINTWISE_ADD_SQUARE",
"CUDNN_POINTWISE_ADD",
+ "CUDNN_POINTWISE_ABS",
"CUDNN_PATCHLEVEL",
"CUDNN_PARAM_ZDESC",
"CUDNN_PARAM_ZDATA_PLACEHOLDER",
@@ -7230,6 +7460,9 @@ sub warnUnsupportedFunctions {
"CUDNN_OPS_INFER_PATCH",
"CUDNN_OPS_INFER_MINOR",
"CUDNN_OPS_INFER_MAJOR",
+ "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6",
+ "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4",
+ "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13",
"CUDNN_NUMERICAL_NOTE_WINOGRAD",
"CUDNN_NUMERICAL_NOTE_TYPE_COUNT",
"CUDNN_NUMERICAL_NOTE_TENSOR_CORE",
@@ -7243,9 +7476,12 @@ sub warnUnsupportedFunctions {
"CUDNN_NORM_OPS_NORM_ADD_ACTIVATION",
"CUDNN_NORM_OPS_NORM_ACTIVATION",
"CUDNN_NORM_OPS_NORM",
+ "CUDNN_NORM_FWD_TRAINING",
+ "CUDNN_NORM_FWD_INFERENCE",
"CUDNN_NORM_ALGO_STANDARD",
"CUDNN_NORM_ALGO_PERSIST",
"CUDNN_NON_DETERMINISTIC",
+ "CUDNN_NEG_INF_PAD",
"CUDNN_MINOR",
"CUDNN_MH_ATTN_V_WEIGHTS",
"CUDNN_MH_ATTN_V_BIASES",
@@ -7267,9 +7503,14 @@ sub warnUnsupportedFunctions {
"CUDNN_LAYOUT_TYPE_PREFERRED_NHWC",
"CUDNN_LAYOUT_TYPE_PREFERRED_NCHW",
"CUDNN_LAYOUT_TYPE_COUNT",
+ "CUDNN_LAYER_NORM",
+ "CUDNN_KNOB_TYPE_WORKSPACE",
"CUDNN_KNOB_TYPE_WINO_TILE",
"CUDNN_KNOB_TYPE_USE_TEX",
"CUDNN_KNOB_TYPE_TILE_SIZE",
+ "CUDNN_KNOB_TYPE_TILE_CGA_N",
+ "CUDNN_KNOB_TYPE_TILE_CGA_M",
+ "CUDNN_KNOB_TYPE_TILE_CGA",
"CUDNN_KNOB_TYPE_TILEK",
"CUDNN_KNOB_TYPE_SWIZZLE",
"CUDNN_KNOB_TYPE_STAGES",
@@ -7293,9 +7534,13 @@ sub warnUnsupportedFunctions {
"CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE",
"CUDNN_KNOB_TYPE_COUNTS",
"CUDNN_KNOB_TYPE_CHUNK_K",
+ "CUDNN_INSTANCE_NORM",
"CUDNN_HEUR_MODE_INSTANT",
+ "CUDNN_HEUR_MODE_FALLBACK",
"CUDNN_HEUR_MODE_B",
+ "CUDNN_HEUR_MODE_A",
"CUDNN_HEUR_MODES_COUNT",
+ "CUDNN_GROUP_NORM",
"CUDNN_GENSTATS_SUM_SQSUM",
"CUDNN_FWD_MODE_TRAINING",
"CUDNN_FWD_MODE_INFERENCE",
@@ -7310,6 +7555,7 @@ sub warnUnsupportedFunctions {
"CUDNN_ERRQUERY_RAWCODE",
"CUDNN_ERRQUERY_NONBLOCKING",
"CUDNN_ERRQUERY_BLOCKING",
+ "CUDNN_EDGE_VAL_PAD",
"CUDNN_DIVNORM_PRECOMPUTED_MEANS",
"CUDNN_DIM_MAX",
"CUDNN_DETERMINISTIC",
@@ -7318,6 +7564,9 @@ sub warnUnsupportedFunctions {
"CUDNN_DATA_UINT8",
"CUDNN_DATA_INT8x32",
"CUDNN_DATA_INT64",
+ "CUDNN_DATA_FP8_E5M2",
+ "CUDNN_DATA_FP8_E4M3",
+ "CUDNN_DATA_BOOLEAN",
"CUDNN_DATA_BFLOAT16",
"CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC",
"CUDNN_CTC_LOSS_ALGO_DETERMINISTIC",
@@ -7331,20 +7580,30 @@ sub warnUnsupportedFunctions {
"CUDNN_BN_FINALIZE_STATISTICS_INFERENCE",
"CUDNN_BEHAVIOR_NOTE_TYPE_COUNT",
"CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION",
+ "CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER",
+ "CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER",
+ "CUDNN_BATCH_NORM",
"CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION",
"CUDNN_BATCHNORM_OPS_BN_ACTIVATION",
"CUDNN_BATCHNORM_OPS_BN",
"CUDNN_BACKEND_VARIANT_PACK_DESCRIPTOR",
"CUDNN_BACKEND_TENSOR_DESCRIPTOR",
+ "CUDNN_BACKEND_RESAMPLE_DESCRIPTOR",
"CUDNN_BACKEND_REDUCTION_DESCRIPTOR",
"CUDNN_BACKEND_POINTWISE_DESCRIPTOR",
+ "CUDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR",
+ "CUDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR",
+ "CUDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR",
"CUDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR",
"CUDNN_BACKEND_OPERATION_POINTWISE_DESCRIPTOR",
+ "CUDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR",
+ "CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR",
"CUDNN_BACKEND_OPERATION_MATMUL_DESCRIPTOR",
"CUDNN_BACKEND_OPERATION_GEN_STATS_DESCRIPTOR",
"CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR",
"CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR",
"CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR",
+ "CUDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR",
"CUDNN_BACKEND_OPERATION_BN_FINALIZE_STATISTICS_DESCRIPTOR",
"CUDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR",
"CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR",
@@ -7366,11 +7625,21 @@ sub warnUnsupportedFunctions {
"CUDNN_ATTR_TENSOR_VECTORIZED_DIMENSION",
"CUDNN_ATTR_TENSOR_UNIQUE_ID",
"CUDNN_ATTR_TENSOR_STRIDES",
+ "CUDNN_ATTR_TENSOR_REORDERING_MODE",
"CUDNN_ATTR_TENSOR_IS_VIRTUAL",
"CUDNN_ATTR_TENSOR_IS_BY_VALUE",
"CUDNN_ATTR_TENSOR_DIMENSIONS",
"CUDNN_ATTR_TENSOR_DATA_TYPE",
"CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT",
+ "CUDNN_ATTR_RESAMPLE_WINDOW_DIMS",
+ "CUDNN_ATTR_RESAMPLE_STRIDES",
+ "CUDNN_ATTR_RESAMPLE_SPATIAL_DIMS",
+ "CUDNN_ATTR_RESAMPLE_PRE_PADDINGS",
+ "CUDNN_ATTR_RESAMPLE_POST_PADDINGS",
+ "CUDNN_ATTR_RESAMPLE_PADDING_MODE",
+ "CUDNN_ATTR_RESAMPLE_NAN_PROPAGATION",
+ "CUDNN_ATTR_RESAMPLE_MODE",
+ "CUDNN_ATTR_RESAMPLE_COMP_TYPE",
"CUDNN_ATTR_REDUCTION_OPERATOR",
"CUDNN_ATTR_REDUCTION_COMP_TYPE",
"CUDNN_ATTR_POINTWISE_SWISH_BETA",
@@ -7382,17 +7651,62 @@ sub warnUnsupportedFunctions {
"CUDNN_ATTR_POINTWISE_MODE",
"CUDNN_ATTR_POINTWISE_MATH_PREC",
"CUDNN_ATTR_POINTWISE_ELU_ALPHA",
+ "CUDNN_ATTR_POINTWISE_AXIS",
+ "CUDNN_ATTR_OPERATION_SIGNAL_YDESC",
+ "CUDNN_ATTR_OPERATION_SIGNAL_XDESC",
+ "CUDNN_ATTR_OPERATION_SIGNAL_VALUE",
+ "CUDNN_ATTR_OPERATION_SIGNAL_MODE",
+ "CUDNN_ATTR_OPERATION_SIGNAL_FLAGDESC",
+ "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC",
+ "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC",
+ "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC",
+ "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC",
+ "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA",
+ "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA",
+ "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC",
+ "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC",
+ "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC",
+ "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC",
+ "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA",
+ "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA",
"CUDNN_ATTR_OPERATION_REDUCTION_YDESC",
"CUDNN_ATTR_OPERATION_REDUCTION_XDESC",
"CUDNN_ATTR_OPERATION_REDUCTION_DESC",
"CUDNN_ATTR_OPERATION_POINTWISE_YDESC",
"CUDNN_ATTR_OPERATION_POINTWISE_XDESC",
+ "CUDNN_ATTR_OPERATION_POINTWISE_TDESC",
"CUDNN_ATTR_OPERATION_POINTWISE_PW_DESCRIPTOR",
"CUDNN_ATTR_OPERATION_POINTWISE_DYDESC",
"CUDNN_ATTR_OPERATION_POINTWISE_DXDESC",
"CUDNN_ATTR_OPERATION_POINTWISE_BDESC",
"CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2",
"CUDNN_ATTR_OPERATION_POINTWISE_ALPHA1",
+ "CUDNN_ATTR_OPERATION_NORM_FWD_YDESC",
+ "CUDNN_ATTR_OPERATION_NORM_FWD_XDESC",
+ "CUDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC",
+ "CUDNN_ATTR_OPERATION_NORM_FWD_PHASE",
+ "CUDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS",
+ "CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC",
+ "CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC",
+ "CUDNN_ATTR_OPERATION_NORM_FWD_MODE",
+ "CUDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC",
+ "CUDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC",
+ "CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC",
+ "CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC",
+ "CUDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC",
+ "CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC",
+ "CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC",
+ "CUDNN_ATTR_OPERATION_NORM_BWD_XDESC",
+ "CUDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC",
+ "CUDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS",
+ "CUDNN_ATTR_OPERATION_NORM_BWD_MODE",
+ "CUDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC",
+ "CUDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC",
+ "CUDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC",
+ "CUDNN_ATTR_OPERATION_NORM_BWD_DYDESC",
+ "CUDNN_ATTR_OPERATION_NORM_BWD_DXDESC",
+ "CUDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC",
+ "CUDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC",
"CUDNN_ATTR_OPERATION_MATMUL_IRREGULARLY_STRIDED_BATCH_COUNT",
"CUDNN_ATTR_OPERATION_MATMUL_DESC",
"CUDNN_ATTR_OPERATION_MATMUL_CDESC",
@@ -7421,6 +7735,10 @@ sub warnUnsupportedFunctions {
"CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC",
"CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA",
"CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA",
+ "CUDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC",
+ "CUDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS",
+ "CUDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX",
+ "CUDNN_ATTR_OPERATION_CONCAT_AXIS",
"CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SUM_DESC",
"CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SQ_SUM_DESC",
"CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_VAR_DESC",
@@ -7467,6 +7785,7 @@ sub warnUnsupportedFunctions {
"CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_ATTRIBUTES",
"CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE",
"CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS",
+ "CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION",
"CUDNN_ATTR_EXECUTION_PLAN_HANDLE",
"CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG",
"CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS",
@@ -7500,12 +7819,6 @@ sub warnUnsupportedFunctions {
"CUDNN_ADV_INFER_PATCH",
"CUDNN_ADV_INFER_MINOR",
"CUDNN_ADV_INFER_MAJOR",
- "CUDA_R_64U",
- "CUDA_R_64I",
- "CUDA_R_4U",
- "CUDA_R_4I",
- "CUDA_R_16U",
- "CUDA_R_16I",
"CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_v1",
"CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st",
"CUDA_POINTER_ATTRIBUTE_P2P_TOKENS",
@@ -7548,11 +7861,13 @@ sub warnUnsupportedFunctions {
"CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED",
"CUDA_ERROR_MPS_MAX_CLIENTS_REACHED",
"CUDA_ERROR_MPS_CONNECTION_FAILED",
+ "CUDA_ERROR_MPS_CLIENT_TERMINATED",
"CUDA_ERROR_MISALIGNED_ADDRESS",
"CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING",
"CUDA_ERROR_JIT_COMPILER_NOT_FOUND",
"CUDA_ERROR_JIT_COMPILATION_DISABLED",
"CUDA_ERROR_INVALID_PC",
+ "CUDA_ERROR_INVALID_CLUSTER_SIZE",
"CUDA_ERROR_INVALID_ADDRESS_SPACE",
"CUDA_ERROR_ILLEGAL_INSTRUCTION",
"CUDA_ERROR_HARDWARE_STACK_ERROR",
@@ -7561,12 +7876,6 @@ sub warnUnsupportedFunctions {
"CUDA_ERROR_DEVICE_NOT_LICENSED",
"CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE",
"CUDA_EGL_MAX_PLANES",
- "CUDA_C_64U",
- "CUDA_C_64I",
- "CUDA_C_4U",
- "CUDA_C_4I",
- "CUDA_C_16U",
- "CUDA_C_16I",
"CUDA_CB",
"CUDA_BATCH_MEM_OP_NODE_PARAMS_st",
"CUDA_BATCH_MEM_OP_NODE_PARAMS",
@@ -7651,6 +7960,8 @@ sub warnHipOnlyUnsupportedFunctions {
my $k = 0;
foreach $func (
"cublasZtrttp",
+ "cublasZtrmm_v2",
+ "cublasZtrmm",
"cublasZtpttr",
"cublasZmatinvBatched",
"cublasZgemm3m",
@@ -7659,6 +7970,8 @@ sub warnHipOnlyUnsupportedFunctions {
"cublasUint8gemmBias",
"cublasSwapEx",
"cublasStrttp",
+ "cublasStrmm_v2",
+ "cublasStrmm",
"cublasStpttr",
"cublasSmatinvBatched",
"cublasShutdown",
@@ -7690,16 +8003,19 @@ sub warnHipOnlyUnsupportedFunctions {
"cublasGetCudartVersion",
"cublasFree",
"cublasDtrttp",
+ "cublasDtrmm_v2",
+ "cublasDtrmm",
"cublasDtpttr",
"cublasDmatinvBatched",
"cublasDgelsBatched",
"cublasCtrttp",
+ "cublasCtrmm_v2",
+ "cublasCtrmm",
"cublasCtpttr",
"cublasCsyrkEx",
"cublasCsyrk3mEx",
"cublasCopyEx",
"cublasContext",
- "cublasComputeType_t",
"cublasCmatinvBatched",
"cublasCherkEx",
"cublasCherk3mEx",
@@ -7711,6 +8027,20 @@ sub warnHipOnlyUnsupportedFunctions {
"cublasCgelsBatched",
"cublasAsumEx",
"cublasAlloc",
+ "CUDA_R_8F_E5M2",
+ "CUDA_R_8F_E4M3",
+ "CUDA_R_64U",
+ "CUDA_R_64I",
+ "CUDA_R_4U",
+ "CUDA_R_4I",
+ "CUDA_R_16U",
+ "CUDA_R_16I",
+ "CUDA_C_64U",
+ "CUDA_C_64I",
+ "CUDA_C_4U",
+ "CUDA_C_4I",
+ "CUDA_C_16U",
+ "CUDA_C_16I",
"CUBLAS_VER_PATCH",
"CUBLAS_VER_MINOR",
"CUBLAS_VER_MAJOR",
@@ -7816,7 +8146,6 @@ sub warnRocOnlyUnsupportedFunctions {
"cublasSetMathMode",
"cublasSetLoggerCallback",
"cublasSetKernelStream",
- "cublasSetAtomicsMode",
"cublasRotmgEx",
"cublasRotmEx",
"cublasRotgEx",
@@ -7824,12 +8153,10 @@ sub warnRocOnlyUnsupportedFunctions {
"cublasMath_t",
"cublasLoggerConfigure",
"cublasLogCallback",
- "cublasInit",
"cublasIaminEx",
"cublasIamaxEx",
"cublasGetVersion_v2",
"cublasGetVersion",
- "cublasGetStatusString",
"cublasGetStatusName",
"cublasGetSmCountTarget",
"cublasGetProperty",
@@ -7837,7 +8164,6 @@ sub warnRocOnlyUnsupportedFunctions {
"cublasGetLoggerCallback",
"cublasGetError",
"cublasGetCudartVersion",
- "cublasGetAtomicsMode",
"cublasFree",
"cublasDtrttp",
"cublasDtpttr",
@@ -7852,7 +8178,6 @@ sub warnRocOnlyUnsupportedFunctions {
"cublasCsyrkEx",
"cublasCsyrk3mEx",
"cublasCopyEx",
- "cublasComputeType_t",
"cublasCmatinvBatched",
"cublasCherkEx",
"cublasCherk3mEx",
@@ -7868,6 +8193,20 @@ sub warnRocOnlyUnsupportedFunctions {
"cublasCgelsBatched",
"cublasAsumEx",
"cublasAlloc",
+ "CUDA_R_8F_E5M2",
+ "CUDA_R_8F_E4M3",
+ "CUDA_R_64U",
+ "CUDA_R_64I",
+ "CUDA_R_4U",
+ "CUDA_R_4I",
+ "CUDA_R_16U",
+ "CUDA_R_16I",
+ "CUDA_C_64U",
+ "CUDA_C_64I",
+ "CUDA_C_4U",
+ "CUDA_C_4I",
+ "CUDA_C_16U",
+ "CUDA_C_16I",
"CUBLAS_VER_PATCH",
"CUBLAS_VER_MINOR",
"CUBLAS_VER_MAJOR",
@@ -7955,7 +8294,7 @@ if ($help) {
print STDERR "$USAGE\n";
}
if ($version) {
- print STDERR "HIP version 5.3.0\n";
+ print STDERR "HIP version 5.4.0\n";
}
while (@ARGV) {
$fileName=shift (@ARGV);
@@ -8084,7 +8423,7 @@ while (@ARGV) {
transformHostFunctions();
# TODO: would like to move this code outside loop but it uses $_ which contains the whole file
unless ($no_output) {
- my $apiCalls = $ft{'error'} + $ft{'init'} + $ft{'version'} + $ft{'device'} + $ft{'context'} + $ft{'module'} + $ft{'memory'} + $ft{'virtual_memory'} + $ft{'stream_ordered_memory'} + $ft{'addressing'} + $ft{'stream'} + $ft{'event'} + $ft{'external_resource_interop'} + $ft{'stream_memory'} + $ft{'execution'} + $ft{'graph'} + $ft{'occupancy'} + $ft{'texture'} + $ft{'surface'} + $ft{'peer'} + $ft{'graphics'} + $ft{'interactions'} + $ft{'profiler'} + $ft{'openGL'} + $ft{'D3D9'} + $ft{'D3D10'} + $ft{'D3D11'} + $ft{'VDPAU'} + $ft{'EGL'} + $ft{'thread'} + $ft{'complex'} + $ft{'library'} + $ft{'device_library'} + $ft{'include'} + $ft{'include_cuda_main_header'} + $ft{'include_cuda_main_header_v2'} + $ft{'type'} + $ft{'literal'} + $ft{'numeric_literal'} + $ft{'define'};
+ my $apiCalls = $ft{'error'} + $ft{'init'} + $ft{'version'} + $ft{'device'} + $ft{'context'} + $ft{'module'} + $ft{'memory'} + $ft{'virtual_memory'} + $ft{'stream_ordered_memory'} + $ft{'addressing'} + $ft{'stream'} + $ft{'event'} + $ft{'external_resource_interop'} + $ft{'stream_memory'} + $ft{'execution'} + $ft{'graph'} + $ft{'occupancy'} + $ft{'texture'} + $ft{'surface'} + $ft{'peer'} + $ft{'graphics'} + $ft{'interactions'} + $ft{'profiler'} + $ft{'openGL'} + $ft{'D3D9'} + $ft{'D3D10'} + $ft{'D3D11'} + $ft{'VDPAU'} + $ft{'EGL'} + $ft{'thread'} + $ft{'complex'} + $ft{'library'} + $ft{'device_library'} + $ft{'device_type'} + $ft{'include'} + $ft{'include_cuda_main_header'} + $ft{'include_cuda_main_header_v2'} + $ft{'type'} + $ft{'literal'} + $ft{'numeric_literal'} + $ft{'define'};
my $kernStuff = $hasDeviceCode + $ft{'kernel_launch'} + $ft{'device_function'};
my $totalCalls = $apiCalls + $kernStuff;
$is_dos = m/\r\n$/;
diff --git a/doc/markdown/CUBLAS_API_supported_by_HIP.md b/doc/markdown/CUBLAS_API_supported_by_HIP.md
index 238db117..52f99a91 100644
--- a/doc/markdown/CUBLAS_API_supported_by_HIP.md
+++ b/doc/markdown/CUBLAS_API_supported_by_HIP.md
@@ -96,7 +96,7 @@
|`CUBLAS_VER_MINOR`|10.1| | | | | | | |
|`CUBLAS_VER_PATCH`|10.1| | | | | | | |
|`cublasAtomicsMode_t`| | | |`hipblasAtomicsMode_t`|3.10.0| | | |
-|`cublasComputeType_t`|11.0| | | | | | | |
+|`cublasComputeType_t`|11.0| | |`hipblasDatatype_t`|1.8.2| | | |
|`cublasContext`| | | | | | | | |
|`cublasDataType_t`|7.5| | |`hipblasDatatype_t`|1.8.2| | | |
|`cublasDiagType_t`| | | |`hipblasDiagType_t`|1.8.2| | | |
@@ -140,6 +140,8 @@
|`CUDA_R_64F`|8.0| | |`HIPBLAS_R_64F`|1.8.2| | | |
|`CUDA_R_64I`|11.0| | | | | | | |
|`CUDA_R_64U`|11.0| | | | | | | |
+|`CUDA_R_8F_E4M3`|11.8| | | | | | | |
+|`CUDA_R_8F_E5M2`|11.8| | | | | | | |
|`CUDA_R_8I`|8.0| | |`HIPBLAS_R_8I`|3.0.0| | | |
|`CUDA_R_8U`|8.0| | |`HIPBLAS_R_8U`|3.0.0| | | |
|`cudaDataType`|8.0| | |`hipblasDatatype_t`|1.8.2| | | |
@@ -483,8 +485,8 @@
|`cublasCsyrk`| | | |`hipblasCsyrk`|3.5.0| | | |
|`cublasCsyrk_v2`| | | |`hipblasCsyrk`|3.5.0| | | |
|`cublasCsyrkx`| | | |`hipblasCsyrkx`|3.5.0| | | |
-|`cublasCtrmm`| | | |`hipblasCtrmm`|3.5.0| | | |
-|`cublasCtrmm_v2`| | | |`hipblasCtrmm`|3.5.0| | | |
+|`cublasCtrmm`| | | | | | | | |
+|`cublasCtrmm_v2`| | | | | | | | |
|`cublasCtrsm`| | | |`hipblasCtrsm`|3.5.0| | | |
|`cublasCtrsm_v2`| | | |`hipblasCtrsm`|3.5.0| | | |
|`cublasDgemm`| | | |`hipblasDgemm`|1.8.2| | | |
@@ -498,8 +500,8 @@
|`cublasDsyrk`| | | |`hipblasDsyrk`|3.5.0| | | |
|`cublasDsyrk_v2`| | | |`hipblasDsyrk`|3.5.0| | | |
|`cublasDsyrkx`| | | |`hipblasDsyrkx`|3.5.0| | | |
-|`cublasDtrmm`| | | |`hipblasDtrmm`|3.2.0| | | |
-|`cublasDtrmm_v2`| | | |`hipblasDtrmm`|3.2.0| | | |
+|`cublasDtrmm`| | | | | | | | |
+|`cublasDtrmm_v2`| | | | | | | | |
|`cublasDtrsm`| | | |`hipblasDtrsm`|1.8.2| | | |
|`cublasDtrsm_v2`| | | |`hipblasDtrsm`|1.8.2| | | |
|`cublasHgemm`|7.5| | |`hipblasHgemm`|1.8.2| | | |
@@ -516,8 +518,8 @@
|`cublasSsyrk`| | | |`hipblasSsyrk`|3.5.0| | | |
|`cublasSsyrk_v2`| | | |`hipblasSsyrk`|3.5.0| | | |
|`cublasSsyrkx`| | | |`hipblasSsyrkx`|3.5.0| | | |
-|`cublasStrmm`| | | |`hipblasStrmm`|3.2.0| | | |
-|`cublasStrmm_v2`| | | |`hipblasStrmm`|3.2.0| | | |
+|`cublasStrmm`| | | | | | | | |
+|`cublasStrmm_v2`| | | | | | | | |
|`cublasStrsm`| | | |`hipblasStrsm`|1.8.2| | | |
|`cublasStrsm_v2`| | | |`hipblasStrsm`|1.8.2| | | |
|`cublasZgemm`| | | |`hipblasZgemm`|1.8.2| | | |
@@ -539,8 +541,8 @@
|`cublasZsyrk`| | | |`hipblasZsyrk`|3.5.0| | | |
|`cublasZsyrk_v2`| | | |`hipblasZsyrk`|3.5.0| | | |
|`cublasZsyrkx`| | | |`hipblasZsyrkx`|3.5.0| | | |
-|`cublasZtrmm`| | | |`hipblasZtrmm`|3.5.0| | | |
-|`cublasZtrmm_v2`| | | |`hipblasZtrmm`|3.5.0| | | |
+|`cublasZtrmm`| | | | | | | | |
+|`cublasZtrmm_v2`| | | | | | | | |
|`cublasZtrsm`| | | |`hipblasZtrsm`|3.5.0| | | |
|`cublasZtrsm_v2`| | | |`hipblasZtrsm`|3.5.0| | | |
diff --git a/doc/markdown/CUDA_Device_API_supported_by_HIP.md b/doc/markdown/CUDA_Device_API_supported_by_HIP.md
index f9310b6e..a8275f1b 100644
--- a/doc/markdown/CUDA_Device_API_supported_by_HIP.md
+++ b/doc/markdown/CUDA_Device_API_supported_by_HIP.md
@@ -10,6 +10,35 @@
|`__assert_fail`| | | |`__assert_fail`|1.9.0| | | |
|`__assertfail`| | | |`__assertfail`|1.9.0| | | |
|`__ballot`| | | |`__ballot`|1.6.0| | | |
+|`__bfloat1622float2`|11.0| | | | | | | |
+|`__bfloat162bfloat162`|11.0| | | | | | | |
+|`__bfloat162float`|11.0| | | | | | | |
+|`__bfloat162int_rd`|11.0| | | | | | | |
+|`__bfloat162int_rn`|11.0| | | | | | | |
+|`__bfloat162int_ru`|11.0| | | | | | | |
+|`__bfloat162int_rz`|11.0| | | | | | | |
+|`__bfloat162ll_rd`|11.0| | | | | | | |
+|`__bfloat162ll_rn`|11.0| | | | | | | |
+|`__bfloat162ll_ru`|11.0| | | | | | | |
+|`__bfloat162ll_rz`|11.0| | | | | | | |
+|`__bfloat162short_rd`|11.0| | | | | | | |
+|`__bfloat162short_rn`|11.0| | | | | | | |
+|`__bfloat162short_ru`|11.0| | | | | | | |
+|`__bfloat162short_rz`|11.0| | | | | | | |
+|`__bfloat162uint_rd`|11.0| | | | | | | |
+|`__bfloat162uint_rn`|11.0| | | | | | | |
+|`__bfloat162uint_ru`|11.0| | | | | | | |
+|`__bfloat162uint_rz`|11.0| | | | | | | |
+|`__bfloat162ull_rd`|11.0| | | | | | | |
+|`__bfloat162ull_rn`|11.0| | | | | | | |
+|`__bfloat162ull_ru`|11.0| | | | | | | |
+|`__bfloat162ull_rz`|11.0| | | | | | | |
+|`__bfloat162ushort_rd`|11.0| | | | | | | |
+|`__bfloat162ushort_rn`|11.0| | | | | | | |
+|`__bfloat162ushort_ru`|11.0| | | | | | | |
+|`__bfloat162ushort_rz`|11.0| | | | | | | |
+|`__bfloat16_as_short`|11.0| | | | | | | |
+|`__bfloat16_as_ushort`|11.0| | | | | | | |
|`__brev`| | | |`__brev`|1.6.0| | | |
|`__brevll`| | | |`__brevll`|1.6.0| | | |
|`__brkpt`| | | | | | | | |
@@ -29,10 +58,12 @@
|`__dmul_rn`| | | |`__dmul_rn`|1.6.0| | | |
|`__dmul_ru`| | | | | | | | |
|`__dmul_rz`| | | | | | | | |
+|`__double2bfloat16`|11.0| | | | | | | |
|`__double2float_rd`| | | |`__double2float_rd`|1.6.0| | | |
|`__double2float_rn`| | | |`__double2float_rn`|1.6.0| | | |
|`__double2float_ru`| | | |`__double2float_ru`|1.6.0| | | |
|`__double2float_rz`| | | |`__double2float_rz`|1.6.0| | | |
+|`__double2half`|11.0| | | | | | | |
|`__double2hiint`| | | |`__double2hiint`|1.6.0| | | |
|`__double2int_rd`| | | |`__double2int_rd`|1.6.0| | | |
|`__double2int_rn`| | | |`__double2int_rn`|1.6.0| | | |
@@ -81,6 +112,12 @@
|`__finitef`| | | | | | | | |
|`__finitel`| | | | | | | | |
|`__float22half2_rn`| | | |`__float22half2_rn`|1.6.0| | | |
+|`__float2bfloat16`|11.0| | | | | | | |
+|`__float2bfloat162_rn`|11.0| | | | | | | |
+|`__float2bfloat16_rd`|11.0| | | | | | | |
+|`__float2bfloat16_rn`|11.0| | | | | | | |
+|`__float2bfloat16_ru`|11.0| | | | | | | |
+|`__float2bfloat16_rz`|11.0| | | | | | | |
|`__float2half`| | | |`__float2half`|1.6.0| | | |
|`__float2half2_rn`| | | |`__float2half2_rn`|1.6.0| | | |
|`__float2half_rd`| | | |`__float2half_rd`|1.6.0| | | |
@@ -105,6 +142,7 @@
|`__float2ull_rz`| | | |`__float2ull_rz`|1.6.0| | | |
|`__float_as_int`| | | |`__float_as_int`|1.6.0| | | |
|`__float_as_uint`| | | |`__float_as_uint`|1.6.0| | | |
+|`__floats2bfloat162_rn`|11.0| | | | | | | |
|`__floats2half2_rn`| | | |`__floats2half2_rn`|1.6.0| | | |
|`__fma_rd`| | | | | | | | |
|`__fma_rn`| | | |`__fma_rn`|1.6.0| | | |
@@ -140,7 +178,9 @@
|`__habs2`| | | |`__habs2`|3.5.0| | | |
|`__hadd`| | | |`__hadd`|1.6.0| | | |
|`__hadd2`| | | |`__hadd2`|1.6.0| | | |
+|`__hadd2_rn`|11.6| | | | | | | |
|`__hadd2_sat`| | | |`__hadd2_sat`|1.6.0| | | |
+|`__hadd_rn`|11.6| | | | | | | |
|`__hadd_sat`| | | |`__hadd_sat`|1.6.0| | | |
|`__half22float2`| | | |`__half22float2`|1.6.0| | | |
|`__half2float`| | | |`__half2float`|1.6.0| | | |
@@ -171,6 +211,7 @@
|`__half2ushort_rz`| | | |`__half2ushort_rz`|1.6.0| | | |
|`__half_as_short`| | | |`__half_as_short`|1.6.0| | | |
|`__half_as_ushort`| | | |`__half_as_ushort`|1.6.0| | | |
+|`__halves2bfloat162`|11.0| | | | | | | |
|`__halves2half2`| | | |`__halves2half2`|1.6.0| | | |
|`__hbeq2`| | | |`__hbeq2`|1.6.0| | | |
|`__hbequ2`| | | |`__hbequ2`|1.9.0| | | |
@@ -184,6 +225,7 @@
|`__hbltu2`| | | |`__hbltu2`|1.9.0| | | |
|`__hbne2`| | | |`__hbne2`|1.6.0| | | |
|`__hbneu2`| | | |`__hbneu2`|1.9.0| | | |
+|`__hcmadd`|11.1| | | | | | | |
|`__hdiv`| | | |`__hdiv`|1.9.0| | | |
|`__heq`| | | |`__heq`|1.6.0| | | |
|`__heq2`| | | |`__heq2`|1.6.0| | | |
@@ -191,7 +233,9 @@
|`__hequ2`| | | |`__hequ2`|1.9.0| | | |
|`__hfma`| | | |`__hfma`|1.6.0| | | |
|`__hfma2`| | | |`__hfma2`|1.6.0| | | |
+|`__hfma2_relu`|11.0| | | | | | | |
|`__hfma2_sat`| | | |`__hfma2_sat`|1.6.0| | | |
+|`__hfma_relu`|11.0| | | | | | | |
|`__hfma_sat`| | | |`__hfma_sat`|1.6.0| | | |
|`__hge`| | | |`__hge`|1.6.0| | | |
|`__hge2`| | | |`__hge2`|1.6.0| | | |
@@ -201,9 +245,12 @@
|`__hgt2`| | | |`__hgt2`|1.6.0| | | |
|`__hgtu`| | | |`__hgtu`|1.9.0| | | |
|`__hgtu2`| | | |`__hgtu2`|1.9.0| | | |
+|`__high2bfloat16`|11.0| | | | | | | |
+|`__high2bfloat162`|11.0| | | | | | | |
|`__high2float`| | | |`__high2float`|1.6.0| | | |
|`__high2half`| | | |`__high2half`|1.6.0| | | |
|`__high2half2`| | | |`__high2half2`|1.6.0| | | |
+|`__highs2bfloat162`|11.0| | | | | | | |
|`__highs2half2`| | | |`__highs2half2`|1.6.0| | | |
|`__hiloint2double`| | | |`__hiloint2double`|1.6.0| | | |
|`__hisinf`| | | |`__hisinf`|1.6.0| | | |
@@ -217,9 +264,19 @@
|`__hlt2`| | | |`__hlt2`|1.6.0| | | |
|`__hltu`| | | |`__hltu`|1.9.0| | | |
|`__hltu2`| | | |`__hltu2`|1.9.0| | | |
+|`__hmax`|11.0| | | | | | | |
+|`__hmax2`|11.0| | | | | | | |
+|`__hmax2_nan`|11.0| | | | | | | |
+|`__hmax_nan`|11.0| | | | | | | |
+|`__hmin`|11.0| | | | | | | |
+|`__hmin2`|11.0| | | | | | | |
+|`__hmin2_nan`|11.0| | | | | | | |
+|`__hmin_nan`|11.0| | | | | | | |
|`__hmul`| | | |`__hmul`|1.6.0| | | |
|`__hmul2`| | | |`__hmul2`|1.6.0| | | |
+|`__hmul2_rn`|11.6| | | | | | | |
|`__hmul2_sat`| | | |`__hmul2_sat`|1.6.0| | | |
+|`__hmul_rn`|11.6| | | | | | | |
|`__hmul_sat`| | | |`__hmul_sat`|1.6.0| | | |
|`__hne`| | | |`__hne`|1.6.0| | | |
|`__hne2`| | | |`__hne2`|1.6.0| | | |
@@ -229,8 +286,14 @@
|`__hneu2`| | | |`__hneu2`|1.9.0| | | |
|`__hsub`| | | |`__hsub`|1.6.0| | | |
|`__hsub2`| | | |`__hsub2`|1.6.0| | | |
+|`__hsub2_rn`|11.6| | | | | | | |
|`__hsub2_sat`| | | |`__hsub2_sat`|1.6.0| | | |
+|`__hsub_rn`|11.6| | | | | | | |
|`__hsub_sat`| | | |`__hsub_sat`|1.6.0| | | |
+|`__int2bfloat16_rd`|11.0| | | | | | | |
+|`__int2bfloat16_rn`|11.0| | | | | | | |
+|`__int2bfloat16_ru`|11.0| | | | | | | |
+|`__int2bfloat16_rz`|11.0| | | | | | | |
|`__int2double_rn`| | | |`__int2double_rn`|1.6.0| | | |
|`__int2float_rd`| | | |`__int2float_rd`|1.6.0| | | |
|`__int2float_rn`| | | |`__int2float_rn`|1.6.0| | | |
@@ -250,7 +313,13 @@
|`__ldca`| | | |`__ldca`|1.9.0| | | |
|`__ldcg`| | | |`__ldcg`|1.9.0| | | |
|`__ldcs`| | | |`__ldcs`|1.9.0| | | |
+|`__ldcv`|11.0| | | | | | | |
|`__ldg`| | | |`__ldg`|1.6.0| | | |
+|`__ldlu`|11.0| | | | | | | |
+|`__ll2bfloat16_rd`|11.0| | | | | | | |
+|`__ll2bfloat16_rn`|11.0| | | | | | | |
+|`__ll2bfloat16_ru`|11.0| | | | | | | |
+|`__ll2bfloat16_rz`|11.0| | | | | | | |
|`__ll2double_rd`| | | |`__ll2double_rd`|1.6.0| | | |
|`__ll2double_rn`| | | |`__ll2double_rn`|1.6.0| | | |
|`__ll2double_ru`| | | |`__ll2double_ru`|1.6.0| | | |
@@ -267,14 +336,27 @@
|`__log2f`| | | |`__log2f`|1.6.0| | | |
|`__logf`| | | |`__logf`|1.6.0| | | |
|`__longlong_as_double`| | | |`__longlong_as_double`|1.6.0| | | |
+|`__low2bfloat16`|11.0| | | | | | | |
+|`__low2bfloat162`|11.0| | | | | | | |
|`__low2float`| | | |`__low2float`|1.6.0| | | |
|`__low2half`| | | |`__low2half`|1.6.0| | | |
|`__low2half2`| | | |`__low2half2`|1.6.0| | | |
|`__lowhigh2highlow`| | | |`__lowhigh2highlow`|1.6.0| | | |
+|`__lows2bfloat162`|11.0| | | | | | | |
|`__lows2half2`| | | |`__lows2half2`|1.6.0| | | |
|`__mul24`| | | |`__mul24`|1.6.0| | | |
|`__mul64hi`| | | |`__mul64hi`|1.6.0| | | |
|`__mulhi`| | | |`__mulhi`|1.6.0| | | |
+|`__nv_cvt_bfloat16raw2_to_fp8x2`|11.8| | | | | | | |
+|`__nv_cvt_bfloat16raw_to_fp8`|11.8| | | | | | | |
+|`__nv_cvt_double2_to_fp8x2`|11.8| | | | | | | |
+|`__nv_cvt_double_to_fp8`|11.8| | | | | | | |
+|`__nv_cvt_float2_to_fp8x2`|11.8| | | | | | | |
+|`__nv_cvt_float_to_fp8`|11.8| | | | | | | |
+|`__nv_cvt_fp8_to_halfraw`|11.8| | | | | | | |
+|`__nv_cvt_fp8x2_to_halfraw2`|11.8| | | | | | | |
+|`__nv_cvt_halfraw2_to_fp8x2`|11.8| | | | | | | |
+|`__nv_cvt_halfraw_to_fp8`|11.8| | | | | | | |
|`__pm0`| | | | | | | | |
|`__pm1`| | | | | | | | |
|`__pm2`| | | | | | | | |
@@ -294,16 +376,25 @@
|`__shfl_up_sync`| | | | | | | | |
|`__shfl_xor`|7.5|9.0| |`__shfl_xor`|1.6.0| | | |
|`__shfl_xor_sync`| | | | | | | | |
+|`__short2bfloat16_rd`|11.0| | | | | | | |
+|`__short2bfloat16_rn`|11.0| | | | | | | |
+|`__short2bfloat16_ru`|11.0| | | | | | | |
+|`__short2bfloat16_rz`|11.0| | | | | | | |
|`__short2half_rd`| | | |`__short2half_rd`|1.6.0| | | |
|`__short2half_rn`| | | |`__short2half_rn`|1.6.0| | | |
|`__short2half_ru`| | | |`__short2half_ru`|1.6.0| | | |
|`__short2half_rz`| | | |`__short2half_rz`|1.6.0| | | |
+|`__short_as_bfloat16`|11.0| | | | | | | |
|`__short_as_half`| | | |`__short_as_half`|1.9.0| | | |
|`__signbit`| | | | | | | | |
|`__signbitf`| | | | | | | | |
|`__signbitl`| | | | | | | | |
|`__sincosf`| | | |`__sincosf`|1.6.0| | | |
|`__sinf`| | | |`__sinf`|1.6.0| | | |
+|`__stcg`|11.0| | | | | | | |
+|`__stcs`|11.0| | | | | | | |
+|`__stwb`|11.0| | | | | | | |
+|`__stwt`|11.0| | | | | | | |
|`__syncthreads`| | | |`__syncthreads`|1.6.0| | | |
|`__syncthreads_and`| | | |`__syncthreads_and`|3.7.0| | | |
|`__syncthreads_count`| | | |`__syncthreads_count`|3.7.0| | | |
@@ -314,6 +405,10 @@
|`__threadfence_system`| | | |`__threadfence_system`|1.6.0| | | |
|`__trap`| | | | | | | | |
|`__uhadd`| | | |`__uhadd`|1.6.0| | | |
+|`__uint2bfloat16_rd`|11.0| | | | | | | |
+|`__uint2bfloat16_rn`|11.0| | | | | | | |
+|`__uint2bfloat16_ru`|11.0| | | | | | | |
+|`__uint2bfloat16_rz`|11.0| | | | | | | |
|`__uint2double_rn`| | | |`__uint2double_rn`|1.6.0| | | |
|`__uint2float_rd`| | | |`__uint2float_rd`|1.6.0| | | |
|`__uint2float_rn`| | | |`__uint2float_rn`|1.6.0| | | |
@@ -324,6 +419,10 @@
|`__uint2half_ru`| | | |`__uint2half_ru`|1.6.0| | | |
|`__uint2half_rz`| | | |`__uint2half_rz`|1.6.0| | | |
|`__uint_as_float`| | | |`__uint_as_float`|1.6.0| | | |
+|`__ull2bfloat16_rd`|11.0| | | | | | | |
+|`__ull2bfloat16_rn`|11.0| | | | | | | |
+|`__ull2bfloat16_ru`|11.0| | | | | | | |
+|`__ull2bfloat16_rz`|11.0| | | | | | | |
|`__ull2double_rd`| | | |`__ull2double_rd`|1.6.0| | | |
|`__ull2double_rn`| | | |`__ull2double_rn`|1.6.0| | | |
|`__ull2double_ru`| | | |`__ull2double_ru`|1.6.0| | | |
@@ -341,10 +440,15 @@
|`__umulhi`| | | |`__umulhi`|1.6.0| | | |
|`__urhadd`| | | |`__urhadd`|1.6.0| | | |
|`__usad`| | | |`__usad`|1.6.0| | | |
+|`__ushort2bfloat16_rd`|11.0| | | | | | | |
+|`__ushort2bfloat16_rn`|11.0| | | | | | | |
+|`__ushort2bfloat16_ru`|11.0| | | | | | | |
+|`__ushort2bfloat16_rz`|11.0| | | | | | | |
|`__ushort2half_rd`| | | |`__ushort2half_rd`|1.6.0| | | |
|`__ushort2half_rn`| | | |`__ushort2half_rn`|1.6.0| | | |
|`__ushort2half_ru`| | | |`__ushort2half_ru`|1.6.0| | | |
|`__ushort2half_rz`| | | |`__ushort2half_rz`|1.6.0| | | |
+|`__ushort_as_bfloat16`|11.0| | | | | | | |
|`__ushort_as_half`| | | |`__ushort_as_half`|1.6.0| | | |
|`__vabs2`| | | | | | | | |
|`__vabs4`| | | | | | | | |
@@ -676,5 +780,35 @@
|`yn`| | | |`yn`|1.6.0| | | |
|`ynf`| | | |`ynf`|1.6.0| | | |
+## **2. Device Types**
+
+|**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**|
+|:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|
+|`__NV_E4M3`|11.8| | | | | | | |
+|`__NV_E5M2`|11.8| | | | | | | |
+|`__NV_NOSAT`|11.8| | | | | | | |
+|`__NV_SATFINITE`|11.8| | | | | | | |
+|`__half`| | | |`__half`|1.6.0| | | |
+|`__half2`| | | |`__half2`|1.6.0| | | |
+|`__half2_raw`| | | |`__half2_raw`|1.9.0| | | |
+|`__half_raw`| | | |`__half_raw`|1.9.0| | | |
+|`__nv_bfloat16`|11.0| | | | | | | |
+|`__nv_bfloat162`|11.0| | | | | | | |
+|`__nv_bfloat162_raw`|11.0| | | | | | | |
+|`__nv_bfloat16_raw`|11.0| | | | | | | |
+|`__nv_fp8_e4m3`|11.8| | | | | | | |
+|`__nv_fp8_e5m2`|11.8| | | | | | | |
+|`__nv_fp8_interpretation_t`|11.8| | | | | | | |
+|`__nv_fp8_storage_t`|11.8| | | | | | | |
+|`__nv_fp8x2_e4m3`|11.8| | | | | | | |
+|`__nv_fp8x2_e5m2`|11.8| | | | | | | |
+|`__nv_fp8x2_storage_t`|11.8| | | | | | | |
+|`__nv_fp8x4_e4m3`|11.8| | | | | | | |
+|`__nv_fp8x4_e5m2`|11.8| | | | | | | |
+|`__nv_fp8x4_storage_t`|11.8| | | | | | | |
+|`__nv_saturation_t`|11.8| | | | | | | |
+|`nv_bfloat16`|11.0| | | | | | | |
+|`nv_bfloat162`|11.0| | | | | | | |
+
\*A - Added; D - Deprecated; R - Removed; E - Experimental
\ No newline at end of file
diff --git a/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md b/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md
index 55ebfb57..510b653a 100644
--- a/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md
+++ b/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md
@@ -56,6 +56,7 @@
|`CUDA_ERROR_ILLEGAL_INSTRUCTION`| | | | | | | | |
|`CUDA_ERROR_ILLEGAL_STATE`|10.0| | |`hipErrorIllegalState`|5.0.0| | | |
|`CUDA_ERROR_INVALID_ADDRESS_SPACE`| | | | | | | | |
+|`CUDA_ERROR_INVALID_CLUSTER_SIZE`|11.8| | | | | | | |
|`CUDA_ERROR_INVALID_CONTEXT`| | | |`hipErrorInvalidContext`|1.6.0| | | |
|`CUDA_ERROR_INVALID_DEVICE`| | | |`hipErrorInvalidDevice`|1.6.0| | | |
|`CUDA_ERROR_INVALID_GRAPHICS_CONTEXT`| | | |`hipErrorInvalidGraphicsContext`|1.6.0| | | |
@@ -73,6 +74,7 @@
|`CUDA_ERROR_LAUNCH_TIMEOUT`| | | |`hipErrorLaunchTimeOut`|1.6.0| | | |
|`CUDA_ERROR_MAP_FAILED`| | | |`hipErrorMapFailed`|1.6.0| | | |
|`CUDA_ERROR_MISALIGNED_ADDRESS`| | | | | | | | |
+|`CUDA_ERROR_MPS_CLIENT_TERMINATED`|11.8| | | | | | | |
|`CUDA_ERROR_MPS_CONNECTION_FAILED`|11.4| | | | | | | |
|`CUDA_ERROR_MPS_MAX_CLIENTS_REACHED`|11.4| | | | | | | |
|`CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED`|11.4| | | | | | | |
@@ -239,6 +241,9 @@
|`CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL`|11.1| | | | | | | |
|`CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL`|11.1| | |`hipArraySparseSubresourceTypeMiptail`|5.2.0| | | |
|`CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL`|11.1| | |`hipArraySparseSubresourceTypeSparseLevel`|5.2.0| | | |
+|`CU_CLUSTER_SCHEDULING_POLICY_DEFAULT`|11.8| | | | | | | |
+|`CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING`|11.8| | | | | | | |
+|`CU_CLUSTER_SCHEDULING_POLICY_SPREAD`|11.8| | | | | | | |
|`CU_COMPUTEMODE_DEFAULT`| | | |`hipComputeModeDefault`|1.9.0| | | |
|`CU_COMPUTEMODE_EXCLUSIVE`| | |8.0|`hipComputeModeExclusive`|1.9.0| | | |
|`CU_COMPUTEMODE_EXCLUSIVE_PROCESS`| | | |`hipComputeModeExclusiveProcess`|2.0.0| | | |
@@ -288,6 +293,7 @@
|`CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR`|9.0| | |`hipDeviceAttributeCanUseStreamWaitValue`|4.3.0| | | |
|`CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V2`|11.7| | | | | | | |
|`CU_DEVICE_ATTRIBUTE_CLOCK_RATE`| | | |`hipDeviceAttributeClockRate`|1.6.0| | | |
+|`CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH`|11.8| | | | | | | |
|`CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR`| | | |`hipDeviceAttributeComputeCapabilityMajor`|1.6.0| | | |
|`CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR`| | | |`hipDeviceAttributeComputeCapabilityMinor`|1.6.0| | | |
|`CU_DEVICE_ATTRIBUTE_COMPUTE_MODE`| | | |`hipDeviceAttributeComputeMode`|1.6.0| | | |
@@ -405,7 +411,7 @@
|`CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY`| | | |`hipDeviceAttributeTotalConstantMemory`|1.6.0| | | |
|`CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING`| | | |`hipDeviceAttributeUnifiedAddressing`|4.3.0| | | |
|`CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED`|10.2|11.2| | | | | | |
-|`CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED`|11.2| | |`hipDeviceAttributeVirtualMemoryManagementSupported`|5.3.0| | |5.3.0|
+|`CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED`|11.2| | |`hipDeviceAttributeVirtualMemoryManagementSupported`|5.3.0| | | |
|`CU_DEVICE_ATTRIBUTE_WARP_SIZE`| | | |`hipDeviceAttributeWarpSize`|1.6.0| | | |
|`CU_DEVICE_CPU`|8.0| | |`hipCpuDeviceId`|3.7.0| | | |
|`CU_DEVICE_INVALID`|8.0| | |`hipInvalidDeviceId`|3.7.0| | | |
@@ -498,6 +504,10 @@
|`CU_EVENT_INTERPROCESS`| | | |`hipEventInterprocess`|1.6.0| | | |
|`CU_EVENT_RECORD_DEFAULT`|11.1| | | | | | | |
|`CU_EVENT_RECORD_EXTERNAL`|11.1| | | | | | | |
+|`CU_EVENT_SCHED_AUTO`|11.8| | | | | | | |
+|`CU_EVENT_SCHED_BLOCKING_SYNC`|11.8| | | | | | | |
+|`CU_EVENT_SCHED_SPIN`|11.8| | | | | | | |
+|`CU_EVENT_SCHED_YIELD`|11.8| | | | | | | |
|`CU_EVENT_WAIT_DEFAULT`|11.1| | | | | | | |
|`CU_EVENT_WAIT_EXTERNAL`|11.1| | | | | | | |
|`CU_EXEC_AFFINITY_TYPE_MAX`|11.4| | | | | | | |
@@ -527,14 +537,20 @@
|`CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER`|11.3| | | | | | | |
|`CU_FUNC_ATTRIBUTE_BINARY_VERSION`| | | |`HIP_FUNC_ATTRIBUTE_BINARY_VERSION`|2.8.0| | | |
|`CU_FUNC_ATTRIBUTE_CACHE_MODE_CA`| | | |`HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA`|2.8.0| | | |
+|`CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE`|11.8| | | | | | | |
+|`CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET`|11.8| | | | | | | |
|`CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES`| | | |`HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES`|2.8.0| | | |
|`CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES`| | | |`HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES`|2.8.0| | | |
|`CU_FUNC_ATTRIBUTE_MAX`| | | |`HIP_FUNC_ATTRIBUTE_MAX`|2.8.0| | | |
|`CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES`|9.0| | |`HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES`|2.8.0| | | |
|`CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK`| | | |`HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK`|2.8.0| | | |
+|`CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED`|11.8| | | | | | | |
|`CU_FUNC_ATTRIBUTE_NUM_REGS`| | | |`HIP_FUNC_ATTRIBUTE_NUM_REGS`|2.8.0| | | |
|`CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT`|9.0| | |`HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT`|2.8.0| | | |
|`CU_FUNC_ATTRIBUTE_PTX_VERSION`| | | |`HIP_FUNC_ATTRIBUTE_PTX_VERSION`|2.8.0| | | |
+|`CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH`|11.8| | | | | | | |
+|`CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT`|11.8| | | | | | | |
+|`CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH`|11.8| | | | | | | |
|`CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES`| | | |`HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES`|2.8.0| | | |
|`CU_FUNC_CACHE_PREFER_EQUAL`| | | |`hipFuncCachePreferEqual`|1.6.0| | | |
|`CU_FUNC_CACHE_PREFER_L1`| | | |`hipFuncCachePreferL1`|1.6.0| | | |
@@ -583,16 +599,16 @@
|`CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED`|10.2| | |`hipGraphExecUpdateErrorTopologyChanged`|4.3.0| | | |
|`CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE`|11.2| | |`hipGraphExecUpdateErrorUnsupportedFunctionChange`|4.3.0| | | |
|`CU_GRAPH_EXEC_UPDATE_SUCCESS`|10.2| | |`hipGraphExecUpdateSuccess`|4.3.0| | | |
-|`CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT`|11.4| | |`hipGraphMemAttrReservedMemCurrent`|5.3.0| | |5.3.0|
-|`CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH`|11.4| | |`hipGraphMemAttrReservedMemHigh`|5.3.0| | |5.3.0|
-|`CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT`|11.4| | |`hipGraphMemAttrUsedMemCurrent`|5.3.0| | |5.3.0|
-|`CU_GRAPH_MEM_ATTR_USED_MEM_HIGH`|11.4| | |`hipGraphMemAttrUsedMemHigh`|5.3.0| | |5.3.0|
+|`CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT`|11.4| | |`hipGraphMemAttrReservedMemCurrent`|5.3.0| | | |
+|`CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH`|11.4| | |`hipGraphMemAttrReservedMemHigh`|5.3.0| | | |
+|`CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT`|11.4| | |`hipGraphMemAttrUsedMemCurrent`|5.3.0| | | |
+|`CU_GRAPH_MEM_ATTR_USED_MEM_HIGH`|11.4| | |`hipGraphMemAttrUsedMemHigh`|5.3.0| | | |
|`CU_GRAPH_NODE_TYPE_BATCH_MEM_OP`|11.7| | | | | | | |
|`CU_GRAPH_NODE_TYPE_COUNT`|10.0| |11.0|`hipGraphNodeTypeCount`|4.3.0| | | |
|`CU_GRAPH_NODE_TYPE_EMPTY`|10.0| | |`hipGraphNodeTypeEmpty`|4.3.0| | | |
|`CU_GRAPH_NODE_TYPE_EVENT_RECORD`|11.1| | |`hipGraphNodeTypeEventRecord`|4.3.0| | | |
-|`CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL`|11.2| | |`hipGraphNodeTypeExtSemaphoreSignal`|5.3.0| | |5.3.0|
-|`CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT`|11.2| | |`hipGraphNodeTypeExtSemaphoreWait`|5.3.0| | |5.3.0|
+|`CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL`|11.2| | |`hipGraphNodeTypeExtSemaphoreSignal`|5.3.0| | | |
+|`CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT`|11.2| | |`hipGraphNodeTypeExtSemaphoreWait`|5.3.0| | | |
|`CU_GRAPH_NODE_TYPE_GRAPH`|10.0| | |`hipGraphNodeTypeGraph`|4.3.0| | | |
|`CU_GRAPH_NODE_TYPE_HOST`|10.0| | |`hipGraphNodeTypeHost`|4.3.0| | | |
|`CU_GRAPH_NODE_TYPE_KERNEL`|10.0| | |`hipGraphNodeTypeKernel`|4.3.0| | | |
@@ -601,39 +617,39 @@
|`CU_GRAPH_NODE_TYPE_MEM_ALLOC`|11.4| | | | | | | |
|`CU_GRAPH_NODE_TYPE_MEM_FREE`|11.4| | | | | | | |
|`CU_GRAPH_NODE_TYPE_WAIT_EVENT`|11.1| | |`hipGraphNodeTypeWaitEvent`|4.3.0| | | |
-|`CU_GRAPH_USER_OBJECT_MOVE`|11.3| | |`hipGraphUserObjectMove`|5.3.0| | |5.3.0|
+|`CU_GRAPH_USER_OBJECT_MOVE`|11.3| | |`hipGraphUserObjectMove`|5.3.0| | | |
|`CU_IPC_HANDLE_SIZE`| | | |`HIP_IPC_HANDLE_SIZE`|1.6.0| | | |
|`CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS`| | | |`hipIpcMemLazyEnablePeerAccess`|1.6.0| | | |
-|`CU_JIT_CACHE_MODE`| | | |`hipJitOptionCacheMode`|1.6.0| | | |
+|`CU_JIT_CACHE_MODE`| | | |`HIPRTC_JIT_CACHE_MODE`|1.6.0| | | |
|`CU_JIT_CACHE_OPTION_CA`| | | | | | | | |
|`CU_JIT_CACHE_OPTION_CG`| | | | | | | | |
|`CU_JIT_CACHE_OPTION_NONE`| | | | | | | | |
-|`CU_JIT_ERROR_LOG_BUFFER`| | | |`hipJitOptionErrorLogBuffer`|1.6.0| | | |
-|`CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES`| | | |`hipJitOptionErrorLogBufferSizeBytes`|1.6.0| | | |
-|`CU_JIT_FALLBACK_STRATEGY`| | | |`hipJitOptionFallbackStrategy`|1.6.0| | | |
-|`CU_JIT_FAST_COMPILE`| | | |`hipJitOptionFastCompile`|1.6.0| | | |
+|`CU_JIT_ERROR_LOG_BUFFER`| | | |`HIPRTC_JIT_ERROR_LOG_BUFFER`|1.6.0| | | |
+|`CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES`| | | |`HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES`|1.6.0| | | |
+|`CU_JIT_FALLBACK_STRATEGY`| | | |`HIPRTC_JIT_FALLBACK_STRATEGY`|1.6.0| | | |
+|`CU_JIT_FAST_COMPILE`| | | |`HIPRTC_JIT_FAST_COMPILE`|1.6.0| | | |
|`CU_JIT_FMA`|11.4| | | | | | | |
|`CU_JIT_FTZ`|11.4| | | | | | | |
-|`CU_JIT_GENERATE_DEBUG_INFO`| | | |`hipJitOptionGenerateDebugInfo`|1.6.0| | | |
-|`CU_JIT_GENERATE_LINE_INFO`| | | |`hipJitOptionGenerateLineInfo`|1.6.0| | | |
+|`CU_JIT_GENERATE_DEBUG_INFO`| | | |`HIPRTC_JIT_GENERATE_DEBUG_INFO`|1.6.0| | | |
+|`CU_JIT_GENERATE_LINE_INFO`| | | |`HIPRTC_JIT_GENERATE_LINE_INFO`|1.6.0| | | |
|`CU_JIT_GLOBAL_SYMBOL_ADDRESSES`| | | | | | | | |
|`CU_JIT_GLOBAL_SYMBOL_COUNT`| | | | | | | | |
|`CU_JIT_GLOBAL_SYMBOL_NAMES`| | | | | | | | |
-|`CU_JIT_INFO_LOG_BUFFER`| | | |`hipJitOptionInfoLogBuffer`|1.6.0| | | |
-|`CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES`| | | |`hipJitOptionInfoLogBufferSizeBytes`|1.6.0| | | |
-|`CU_JIT_INPUT_CUBIN`| | | |`HIPRTC_JIT_INPUT_CUBIN`|5.3.0| | |5.3.0|
-|`CU_JIT_INPUT_FATBINARY`| | | |`HIPRTC_JIT_INPUT_FATBINARY`|5.3.0| | |5.3.0|
-|`CU_JIT_INPUT_LIBRARY`| | | |`HIPRTC_JIT_INPUT_LIBRARY`|5.3.0| | |5.3.0|
-|`CU_JIT_INPUT_NVVM`|11.4| | |`HIPRTC_JIT_INPUT_NVVM`|5.3.0| | |5.3.0|
-|`CU_JIT_INPUT_OBJECT`| | | |`HIPRTC_JIT_INPUT_OBJECT`|5.3.0| | |5.3.0|
-|`CU_JIT_INPUT_PTX`| | | |`HIPRTC_JIT_INPUT_PTX`|5.3.0| | |5.3.0|
-|`CU_JIT_LOG_VERBOSE`| | | |`hipJitOptionLogVerbose`|1.6.0| | | |
+|`CU_JIT_INFO_LOG_BUFFER`| | | |`HIPRTC_JIT_INFO_LOG_BUFFER`|1.6.0| | | |
+|`CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES`| | | |`HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES`|1.6.0| | | |
+|`CU_JIT_INPUT_CUBIN`| | | |`HIPRTC_JIT_INPUT_CUBIN`|5.3.0| | | |
+|`CU_JIT_INPUT_FATBINARY`| | | |`HIPRTC_JIT_INPUT_FATBINARY`|5.3.0| | | |
+|`CU_JIT_INPUT_LIBRARY`| | | |`HIPRTC_JIT_INPUT_LIBRARY`|5.3.0| | | |
+|`CU_JIT_INPUT_NVVM`|11.4| | |`HIPRTC_JIT_INPUT_NVVM`|5.3.0| | | |
+|`CU_JIT_INPUT_OBJECT`| | | |`HIPRTC_JIT_INPUT_OBJECT`|5.3.0| | | |
+|`CU_JIT_INPUT_PTX`| | | |`HIPRTC_JIT_INPUT_PTX`|5.3.0| | | |
+|`CU_JIT_LOG_VERBOSE`| | | |`HIPRTC_JIT_LOG_VERBOSE`|1.6.0| | | |
|`CU_JIT_LTO`|11.4| | | | | | | |
-|`CU_JIT_MAX_REGISTERS`| | | |`hipJitOptionMaxRegisters`|1.6.0| | | |
-|`CU_JIT_NEW_SM3X_OPT`| | | |`hipJitOptionSm3xOpt`|1.6.0| | | |
-|`CU_JIT_NUM_INPUT_TYPES`| | | |`HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES`|5.3.0| | |5.3.0|
-|`CU_JIT_NUM_OPTIONS`| | | |`hipJitOptionNumOptions`|1.6.0| | | |
-|`CU_JIT_OPTIMIZATION_LEVEL`| | | |`hipJitOptionOptimizationLevel`|1.6.0| | | |
+|`CU_JIT_MAX_REGISTERS`| | | |`HIPRTC_JIT_MAX_REGISTERS`|1.6.0| | | |
+|`CU_JIT_NEW_SM3X_OPT`| | | |`HIPRTC_JIT_NEW_SM3X_OPT`|1.6.0| | | |
+|`CU_JIT_NUM_INPUT_TYPES`| | | |`HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES`|5.3.0| | | |
+|`CU_JIT_NUM_OPTIONS`| | | |`HIPRTC_JIT_NUM_OPTIONS`|1.6.0| | | |
+|`CU_JIT_OPTIMIZATION_LEVEL`| | | |`HIPRTC_JIT_OPTIMIZATION_LEVEL`|1.6.0| | | |
|`CU_JIT_OPTIMIZE_UNUSED_DEVICE_VARIABLES`|11.7| | | | | | | |
|`CU_JIT_PREC_DIV`|11.4| | | | | | | |
|`CU_JIT_PREC_SQRT`|11.4| | | | | | | |
@@ -641,13 +657,24 @@
|`CU_JIT_REFERENCED_KERNEL_NAMES`|11.7| | | | | | | |
|`CU_JIT_REFERENCED_VARIABLE_COUNT`|11.7| | | | | | | |
|`CU_JIT_REFERENCED_VARIABLE_NAMES`|11.7| | | | | | | |
-|`CU_JIT_TARGET`| | | |`hipJitOptionTarget`|1.6.0| | | |
-|`CU_JIT_TARGET_FROM_CUCONTEXT`| | | |`hipJitOptionTargetFromContext`|1.6.0| | | |
-|`CU_JIT_THREADS_PER_BLOCK`| | | |`hipJitOptionThreadsPerBlock`|1.6.0| | | |
-|`CU_JIT_WALL_TIME`| | | |`hipJitOptionWallTime`|1.6.0| | | |
+|`CU_JIT_TARGET`| | | |`HIPRTC_JIT_TARGET`|1.6.0| | | |
+|`CU_JIT_TARGET_FROM_CUCONTEXT`| | | |`HIPRTC_JIT_TARGET_FROM_HIPCONTEXT`|1.6.0| | | |
+|`CU_JIT_THREADS_PER_BLOCK`| | | |`HIPRTC_JIT_THREADS_PER_BLOCK`|1.6.0| | | |
+|`CU_JIT_WALL_TIME`| | | |`HIPRTC_JIT_WALL_TIME`|1.6.0| | | |
|`CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW`|11.0| | |`hipKernelNodeAttributeAccessPolicyWindow`|5.2.0| | | |
+|`CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION`|11.8| | | | | | | |
+|`CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE`|11.8| | | | | | | |
|`CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE`|11.0| | |`hipKernelNodeAttributeCooperative`|5.2.0| | | |
|`CU_KERNEL_NODE_ATTRIBUTE_PRIORITY`|11.7| | | | | | | |
+|`CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW`|11.8| | | | | | | |
+|`CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION`|11.8| | | | | | | |
+|`CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE`|11.8| | | | | | | |
+|`CU_LAUNCH_ATTRIBUTE_COOPERATIVE`|11.8| | | | | | | |
+|`CU_LAUNCH_ATTRIBUTE_IGNORE`|11.8| | | | | | | |
+|`CU_LAUNCH_ATTRIBUTE_PRIORITY`|11.8| | | | | | | |
+|`CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT`|11.8| | | | | | | |
+|`CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION`|11.8| | | | | | | |
+|`CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY`|11.8| | | | | | | |
|`CU_LAUNCH_PARAM_BUFFER_POINTER`| | | |`HIP_LAUNCH_PARAM_BUFFER_POINTER`|1.6.0| | | |
|`CU_LAUNCH_PARAM_BUFFER_POINTER_AS_INT`|11.7| | | | | | | |
|`CU_LAUNCH_PARAM_BUFFER_SIZE`| | | |`HIP_LAUNCH_PARAM_BUFFER_SIZE`|1.6.0| | | |
@@ -661,7 +688,7 @@
|`CU_LIMIT_MAX_L2_FETCH_GRANULARITY`|10.0| | | | | | | |
|`CU_LIMIT_PERSISTING_L2_CACHE_SIZE`|11.0| | | | | | | |
|`CU_LIMIT_PRINTF_FIFO_SIZE`| | | |`hipLimitPrintfFifoSize`|4.5.0| | | |
-|`CU_LIMIT_STACK_SIZE`| | | |`hipLimitStackSize`|5.3.0| | |5.3.0|
+|`CU_LIMIT_STACK_SIZE`| | | |`hipLimitStackSize`|5.3.0| | | |
|`CU_MEMHOSTALLOC_DEVICEMAP`| | | |`hipHostMallocMapped`|1.6.0| | | |
|`CU_MEMHOSTALLOC_PORTABLE`| | | |`hipHostMallocPortable`|1.6.0| | | |
|`CU_MEMHOSTALLOC_WRITECOMBINED`| | | |`hipHostMallocWriteCombined`|1.6.0| | | |
@@ -848,6 +875,8 @@
|`CU_TARGET_COMPUTE_80`|11.0| | | | | | | |
|`CU_TARGET_COMPUTE_86`|11.1| | | | | | | |
|`CU_TARGET_COMPUTE_87`|11.7| | | | | | | |
+|`CU_TARGET_COMPUTE_89`|11.8| | | | | | | |
+|`CU_TARGET_COMPUTE_90`|11.8| | | | | | | |
|`CU_TRSA_OVERRIDE_FORMAT`| | | |`HIP_TRSA_OVERRIDE_FORMAT`|1.7.0| | | |
|`CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION`|11.0| | | | | | | |
|`CU_TRSF_NORMALIZED_COORDINATES`| | | |`HIP_TRSF_NORMALIZED_COORDINATES`|1.7.0| | | |
@@ -860,7 +889,7 @@
|`CU_TR_ADDRESS_MODE_WRAP`| | | |`HIP_TR_ADDRESS_MODE_WRAP`|3.5.0| | | |
|`CU_TR_FILTER_MODE_LINEAR`| | | |`HIP_TR_FILTER_MODE_LINEAR`|3.5.0| | | |
|`CU_TR_FILTER_MODE_POINT`| | | |`HIP_TR_FILTER_MODE_POINT`|3.5.0| | | |
-|`CU_USER_OBJECT_NO_DESTRUCTOR_SYNC`|11.3| | |`hipUserObjectNoDestructorSync`|5.3.0| | |5.3.0|
+|`CU_USER_OBJECT_NO_DESTRUCTOR_SYNC`|11.3| | |`hipUserObjectNoDestructorSync`|5.3.0| | | |
|`CUaccessPolicyWindow`|11.0| | |`hipAccessPolicyWindow`|5.2.0| | | |
|`CUaccessPolicyWindow_st`|11.0| | |`hipAccessPolicyWindow`|5.2.0| | | |
|`CUaccessProperty`|11.0| | |`hipAccessProperty`|5.2.0| | | |
@@ -878,6 +907,8 @@
|`CUarray_format`| | | |`hipArray_Format`|1.7.0| | | |
|`CUarray_format_enum`| | | |`hipArray_Format`|1.7.0| | | |
|`CUarray_st`| | | |`hipArray`|1.7.0| | | |
+|`CUclusterSchedulingPolicy`|11.8| | | | | | | |
+|`CUclusterSchedulingPolicy_enum`|11.8| | | | | | | |
|`CUcomputemode`| | | |`hipComputeMode`|1.9.0| | | |
|`CUcomputemode_enum`| | | |`hipComputeMode`|1.9.0| | | |
|`CUcontext`| | | |`hipCtx_t`|1.6.0| | | |
@@ -925,6 +956,8 @@
|`CUevent_flags_enum`| | | | | | | | |
|`CUevent_record_flags`|11.1| | | | | | | |
|`CUevent_record_flags_enum`|11.1| | | | | | | |
+|`CUevent_sched_flags`|11.8| | | | | | | |
+|`CUevent_sched_flags_enum`|11.8| | | | | | | |
|`CUevent_st`| | | |`ihipEvent_t`|1.6.0| | | |
|`CUevent_wait_flags`|11.1| | | | | | | |
|`CUevent_wait_flags_enum`| | | | | | | | |
@@ -967,8 +1000,8 @@
|`CUgraphExec_st`|10.0| | |`hipGraphExec`|4.3.0| | | |
|`CUgraphInstantiate_flags`|11.4| | |`hipGraphInstantiateFlags`|5.2.0| | | |
|`CUgraphInstantiate_flags_enum`|11.4| | |`hipGraphInstantiateFlags`|5.2.0| | | |
-|`CUgraphMem_attribute`|11.4| | |`hipGraphMemAttributeType`|5.3.0| | |5.3.0|
-|`CUgraphMem_attribute_enum`|11.4| | |`hipGraphMemAttributeType`|5.3.0| | |5.3.0|
+|`CUgraphMem_attribute`|11.4| | |`hipGraphMemAttributeType`|5.3.0| | | |
+|`CUgraphMem_attribute_enum`|11.4| | |`hipGraphMemAttributeType`|5.3.0| | | |
|`CUgraphNode`|10.0| | |`hipGraphNode_t`|4.3.0| | | |
|`CUgraphNodeType`|10.0| | |`hipGraphNodeType`|4.3.0| | | |
|`CUgraphNodeType_enum`|10.0| | |`hipGraphNodeType`|4.3.0| | | |
@@ -989,8 +1022,8 @@
|`CUipcMemHandle_v1`|11.3| | |`hipIpcMemHandle_t`|1.6.0| | | |
|`CUipcMem_flags`| | | | | | | | |
|`CUipcMem_flags_enum`| | | | | | | | |
-|`CUjitInputType`| | | |`hiprtcJITInputType`|5.3.0| | |5.3.0|
-|`CUjitInputType_enum`| | | |`hiprtcJITInputType`|5.3.0| | |5.3.0|
+|`CUjitInputType`| | | |`hiprtcJITInputType`|5.3.0| | | |
+|`CUjitInputType_enum`| | | |`hiprtcJITInputType`|5.3.0| | | |
|`CUjit_cacheMode`| | | | | | | | |
|`CUjit_cacheMode_enum`| | | | | | | | |
|`CUjit_fallback`| | | | | | | | |
@@ -1000,14 +1033,22 @@
|`CUjit_target`| | | | | | | | |
|`CUjit_target_enum`| | | | | | | | |
|`CUkernelNodeAttrID`|11.0| | |`hipKernelNodeAttrID`|5.2.0| | | |
-|`CUkernelNodeAttrID_enum`|11.0| | |`hipKernelNodeAttrID`|5.2.0| | | |
+|`CUkernelNodeAttrID_enum`|11.0| |11.8|`hipKernelNodeAttrID`|5.2.0| | | |
|`CUkernelNodeAttrValue`|11.0| | |`hipKernelNodeAttrValue`|5.2.0| | | |
-|`CUkernelNodeAttrValue_union`|11.0| | |`hipKernelNodeAttrValue`|5.2.0| | | |
+|`CUkernelNodeAttrValue_union`|11.0| |11.8|`hipKernelNodeAttrValue`|5.2.0| | | |
|`CUkernelNodeAttrValue_v1`|11.3| | |`hipKernelNodeAttrValue`|5.2.0| | | |
+|`CUlaunchAttribute`|11.8| | | | | | | |
+|`CUlaunchAttributeID`|11.8| | | | | | | |
+|`CUlaunchAttributeID_enum`|11.8| | | | | | | |
+|`CUlaunchAttributeValue`|11.8| | | | | | | |
+|`CUlaunchAttributeValue_union`|11.8| | | | | | | |
+|`CUlaunchAttribute_st`|11.8| | | | | | | |
+|`CUlaunchConfig`|11.8| | | | | | | |
+|`CUlaunchConfig_st`|11.8| | | | | | | |
|`CUlimit`| | | |`hipLimit_t`|1.6.0| | | |
|`CUlimit_enum`| | | |`hipLimit_t`|1.6.0| | | |
-|`CUlinkState`| | | |`hiprtcLinkState`|5.3.0| | |5.3.0|
-|`CUlinkState_st`| | | |`ihiprtcLinkState`|5.3.0| | |5.3.0|
+|`CUlinkState`| | | |`hiprtcLinkState`|5.3.0| | | |
+|`CUlinkState_st`| | | |`ihiprtcLinkState`|5.3.0| | | |
|`CUmemAccessDesc`|10.2| | |`hipMemAccessDesc`|5.2.0| | | |
|`CUmemAccessDesc_st`|10.2| | |`hipMemAccessDesc`|5.2.0| | | |
|`CUmemAccessDesc_v1`|11.3| | |`hipMemAccessDesc`|5.2.0| | | |
@@ -1111,17 +1152,29 @@
|`CUtexObject_v1`|11.3| | |`hipTextureObject_t`|1.7.0| | | |
|`CUtexref`| | | |`hipTexRef`|3.10.0| | | |
|`CUtexref_st`| | | |`textureReference`|1.6.0| | | |
-|`CUuserObject`|11.3| | |`hipUserObject_t`|5.3.0| | |5.3.0|
-|`CUuserObjectRetain_flags`|11.3| | |`hipUserObjectRetainFlags`|5.3.0| | |5.3.0|
-|`CUuserObjectRetain_flags_enum`|11.3| | |`hipUserObjectRetainFlags`|5.3.0| | |5.3.0|
-|`CUuserObject_flags`|11.3| | |`hipUserObjectFlags`|5.3.0| | |5.3.0|
-|`CUuserObject_flags_enum`|11.3| | |`hipUserObjectFlags`|5.3.0| | |5.3.0|
-|`CUuserObject_st`|11.3| | |`hipUserObject`|5.3.0| | |5.3.0|
+|`CUuserObject`|11.3| | |`hipUserObject_t`|5.3.0| | | |
+|`CUuserObjectRetain_flags`|11.3| | |`hipUserObjectRetainFlags`|5.3.0| | | |
+|`CUuserObjectRetain_flags_enum`|11.3| | |`hipUserObjectRetainFlags`|5.3.0| | | |
+|`CUuserObject_flags`|11.3| | |`hipUserObjectFlags`|5.3.0| | | |
+|`CUuserObject_flags_enum`|11.3| | |`hipUserObjectFlags`|5.3.0| | | |
+|`CUuserObject_st`|11.3| | |`hipUserObject`|5.3.0| | | |
|`CUuuid`| | | |`hipUUID`|5.2.0| | | |
|`CUuuid_st`| | | |`hipUUID_t`|5.2.0| | | |
|`GLenum`| | | |`GLenum`|5.1.0| | | |
|`GLuint`| | | |`GLuint`|5.1.0| | | |
+|`NVCL_CTX_SCHED_AUTO`|11.8| | | | | | | |
+|`NVCL_CTX_SCHED_BLOCKING_SYNC`|11.8| | | | | | | |
+|`NVCL_CTX_SCHED_SPIN`|11.8| | | | | | | |
+|`NVCL_CTX_SCHED_YIELD`|11.8| | | | | | | |
+|`NVCL_EVENT_SCHED_AUTO`|11.8| | | | | | | |
+|`NVCL_EVENT_SCHED_BLOCKING_SYNC`|11.8| | | | | | | |
+|`NVCL_EVENT_SCHED_SPIN`|11.8| | | | | | | |
+|`NVCL_EVENT_SCHED_YIELD`|11.8| | | | | | | |
|`__CUDACC__`| | | |`__HIPCC__`|1.6.0| | | |
+|`cl_context_flags`|11.8| | | | | | | |
+|`cl_context_flags_enum`|11.8| | | | | | | |
+|`cl_event_flags`|11.8| | | | | | | |
+|`cl_event_flags_enum`|11.8| | | | | | | |
|`cudaError_enum`| | | |`hipError_t`|1.5.0| | | |
|`memoryBarrier`|11.7| | | | | | | |
@@ -1129,8 +1182,8 @@
|**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**|
|:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|
-|`cuGetErrorName`| | | | | | | | |
-|`cuGetErrorString`| | | | | | | | |
+|`cuGetErrorName`| | | |`hipDrvGetErrorName`|5.4.0| | |5.4.0|
+|`cuGetErrorString`| | | |`hipDrvGetErrorString`|5.4.0| | |5.4.0|
## **3. Initialization**
@@ -1209,7 +1262,7 @@
|`cuCtxResetPersistingL2Cache`|11.0| | | | | | | |
|`cuCtxSetCacheConfig`| | | |`hipCtxSetCacheConfig`|1.9.0|1.9.0| | |
|`cuCtxSetCurrent`| | | |`hipCtxSetCurrent`|1.6.0|1.9.0| | |
-|`cuCtxSetLimit`| | | |`hipDeviceSetLimit`|5.3.0| | |5.3.0|
+|`cuCtxSetLimit`| | | |`hipDeviceSetLimit`|5.3.0| | | |
|`cuCtxSetSharedMemConfig`| | | |`hipCtxSetSharedMemConfig`|1.9.0|1.9.0| | |
|`cuCtxSynchronize`| | | |`hipCtxSynchronize`|1.9.0|1.9.0| | |
@@ -1224,14 +1277,14 @@
|**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**|
|:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|
-|`cuLinkAddData`| | | |`hiprtcLinkAddData`|5.3.0| | |5.3.0|
-|`cuLinkAddData_v2`| | | |`hiprtcLinkAddData`|5.3.0| | |5.3.0|
-|`cuLinkAddFile`| | | |`hiprtcLinkAddFile`|5.3.0| | |5.3.0|
-|`cuLinkAddFile_v2`| | | |`hiprtcLinkAddFile`|5.3.0| | |5.3.0|
-|`cuLinkComplete`| | | |`hiprtcLinkComplete`|5.3.0| | |5.3.0|
-|`cuLinkCreate`| | | |`hiprtcLinkCreate`|5.3.0| | |5.3.0|
-|`cuLinkCreate_v2`| | | |`hiprtcLinkCreate`|5.3.0| | |5.3.0|
-|`cuLinkDestroy`| | | |`hiprtcLinkDestroy`|5.3.0| | |5.3.0|
+|`cuLinkAddData`| | | |`hiprtcLinkAddData`|5.3.0| | | |
+|`cuLinkAddData_v2`| | | |`hiprtcLinkAddData`|5.3.0| | | |
+|`cuLinkAddFile`| | | |`hiprtcLinkAddFile`|5.3.0| | | |
+|`cuLinkAddFile_v2`| | | |`hiprtcLinkAddFile`|5.3.0| | | |
+|`cuLinkComplete`| | | |`hiprtcLinkComplete`|5.3.0| | | |
+|`cuLinkCreate`| | | |`hiprtcLinkCreate`|5.3.0| | | |
+|`cuLinkCreate_v2`| | | |`hiprtcLinkCreate`|5.3.0| | | |
+|`cuLinkDestroy`| | | |`hiprtcLinkDestroy`|5.3.0| | | |
|`cuModuleGetFunction`| | | |`hipModuleGetFunction`|1.6.0| | | |
|`cuModuleGetGlobal`| | | |`hipModuleGetGlobal`|1.6.0| | | |
|`cuModuleGetGlobal_v2`| | | |`hipModuleGetGlobal`|1.6.0| | | |
@@ -1487,6 +1540,7 @@
|`cuLaunchCooperativeKernelMultiDevice`|9.0|11.3| | | | | | |
|`cuLaunchHostFunc`|10.0| | |`hipLaunchHostFunc`|5.2.0| | | |
|`cuLaunchKernel`| | | |`hipModuleLaunchKernel`|1.6.0| | | |
+|`cuLaunchKernelEx`|11.8| | | | | | | |
## **20. Execution Control [DEPRECATED]**
@@ -1507,9 +1561,9 @@
|**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**|
|:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|
-|`cuDeviceGetGraphMemAttribute`|11.4| | |`hipDeviceGetGraphMemAttribute`|5.3.0| | |5.3.0|
-|`cuDeviceGraphMemTrim`|11.4| | |`hipDeviceGraphMemTrim`|5.3.0| | |5.3.0|
-|`cuDeviceSetGraphMemAttribute`|11.4| | |`hipDeviceSetGraphMemAttribute`|5.3.0| | |5.3.0|
+|`cuDeviceGetGraphMemAttribute`|11.4| | |`hipDeviceGetGraphMemAttribute`|5.3.0| | | |
+|`cuDeviceGraphMemTrim`|11.4| | |`hipDeviceGraphMemTrim`|5.3.0| | | |
+|`cuDeviceSetGraphMemAttribute`|11.4| | |`hipDeviceSetGraphMemAttribute`|5.3.0| | | |
|`cuGraphAddBatchMemOpNode`|11.7| | | | | | | |
|`cuGraphAddChildGraphNode`|10.0| | |`hipGraphAddChildGraphNode`|5.0.0| | | |
|`cuGraphAddDependencies`|10.0| | |`hipGraphAddDependencies`|4.5.0| | | |
@@ -1578,13 +1632,13 @@
|`cuGraphNodeGetEnabled`|11.6| | | | | | | |
|`cuGraphNodeGetType`|10.0| | |`hipGraphNodeGetType`|5.0.0| | | |
|`cuGraphNodeSetEnabled`|11.6| | | | | | | |
-|`cuGraphReleaseUserObject`|11.3| | |`hipGraphReleaseUserObject`|5.3.0| | |5.3.0|
+|`cuGraphReleaseUserObject`|11.3| | |`hipGraphReleaseUserObject`|5.3.0| | | |
|`cuGraphRemoveDependencies`|10.0| | |`hipGraphRemoveDependencies`|5.0.0| | | |
-|`cuGraphRetainUserObject`|11.3| | |`hipGraphRetainUserObject`|5.3.0| | |5.3.0|
-|`cuGraphUpload`|11.1| | |`hipGraphUpload`|5.3.0| | |5.3.0|
-|`cuUserObjectCreate`|11.3| | |`hipUserObjectCreate`|5.3.0| | |5.3.0|
-|`cuUserObjectRelease`|11.3| | |`hipUserObjectRelease`|5.3.0| | |5.3.0|
-|`cuUserObjectRetain`|11.3| | |`hipUserObjectRetain`|5.3.0| | |5.3.0|
+|`cuGraphRetainUserObject`|11.3| | |`hipGraphRetainUserObject`|5.3.0| | | |
+|`cuGraphUpload`|11.1| | |`hipGraphUpload`|5.3.0| | | |
+|`cuUserObjectCreate`|11.3| | |`hipUserObjectCreate`|5.3.0| | | |
+|`cuUserObjectRelease`|11.3| | |`hipUserObjectRelease`|5.3.0| | | |
+|`cuUserObjectRetain`|11.3| | |`hipUserObjectRetain`|5.3.0| | | |
## **22. Occupancy**
@@ -1593,8 +1647,10 @@
|`cuOccupancyAvailableDynamicSMemPerBlock`|11.0| | | | | | | |
|`cuOccupancyMaxActiveBlocksPerMultiprocessor`| | | |`hipModuleOccupancyMaxActiveBlocksPerMultiprocessor`|3.5.0| | | |
|`cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`| | | |`hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`|3.5.0| | | |
+|`cuOccupancyMaxActiveClusters`|11.8| | | | | | | |
|`cuOccupancyMaxPotentialBlockSize`| | | |`hipModuleOccupancyMaxPotentialBlockSize`|3.5.0| | | |
|`cuOccupancyMaxPotentialBlockSizeWithFlags`| | | |`hipModuleOccupancyMaxPotentialBlockSizeWithFlags`|3.5.0| | | |
+|`cuOccupancyMaxPotentialClusterSize`|11.8| | | | | | | |
## **23. Texture Reference Management [DEPRECATED]**
diff --git a/doc/markdown/CUDA_RTC_API_supported_by_HIP.md b/doc/markdown/CUDA_RTC_API_supported_by_HIP.md
index 22d82bba..ec6c5c48 100644
--- a/doc/markdown/CUDA_RTC_API_supported_by_HIP.md
+++ b/doc/markdown/CUDA_RTC_API_supported_by_HIP.md
@@ -27,8 +27,8 @@
|`nvrtcCompileProgram`| | | |`hiprtcCompileProgram`|2.6.0| | | |
|`nvrtcCreateProgram`| | | |`hiprtcCreateProgram`|2.6.0| | | |
|`nvrtcDestroyProgram`| | | |`hiprtcDestroyProgram`|2.6.0| | | |
-|`nvrtcGetCUBIN`|11.1| | |`hiprtcGetBitcode`|5.3.0| | |5.3.0|
-|`nvrtcGetCUBINSize`|11.1| | |`hiprtcGetBitcodeSize`|5.3.0| | |5.3.0|
+|`nvrtcGetCUBIN`|11.1| | |`hiprtcGetBitcode`|5.3.0| | | |
+|`nvrtcGetCUBINSize`|11.1| | |`hiprtcGetBitcodeSize`|5.3.0| | | |
|`nvrtcGetErrorString`| | | |`hiprtcGetErrorString`|2.6.0| | | |
|`nvrtcGetLoweredName`|8.0| | |`hiprtcGetLoweredName`|2.6.0| | | |
|`nvrtcGetNVVM`|11.4| | | | | | | |
diff --git a/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md b/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md
index a8314136..d9a3b2ce 100644
--- a/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md
+++ b/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md
@@ -20,7 +20,7 @@
|`cudaDeviceGetTexture1DLinearMaxWidth`|11.1| | | | | | | |
|`cudaDeviceReset`| | | |`hipDeviceReset`|1.6.0| | | |
|`cudaDeviceSetCacheConfig`| | | |`hipDeviceSetCacheConfig`|1.6.0| | | |
-|`cudaDeviceSetLimit`| | | |`hipDeviceSetLimit`|5.3.0| | |5.3.0|
+|`cudaDeviceSetLimit`| | | |`hipDeviceSetLimit`|5.3.0| | | |
|`cudaDeviceSetMemPool`|11.2| | |`hipDeviceSetMemPool`|5.2.0| | | |
|`cudaDeviceSetSharedMemConfig`| | | |`hipDeviceSetSharedMemConfig`|1.6.0| | | |
|`cudaDeviceSynchronize`| | | |`hipDeviceSynchronize`|1.6.0| | | |
@@ -122,6 +122,7 @@
|`cudaLaunchCooperativeKernelMultiDevice`|9.0|11.3| |`hipLaunchCooperativeKernelMultiDevice`|2.6.0| | | |
|`cudaLaunchHostFunc`|10.0| | |`hipLaunchHostFunc`|5.2.0| | | |
|`cudaLaunchKernel`| | | |`hipLaunchKernel`|1.6.0| | | |
+|`cudaLaunchKernelExC`|11.8| | | | | | | |
|`cudaSetDoubleForDevice`| |10.0| | | | | | |
|`cudaSetDoubleForHost`| |10.0| | | | | | |
@@ -132,10 +133,12 @@
|`cudaOccupancyAvailableDynamicSMemPerBlock`|11.0| | | | | | | |
|`cudaOccupancyMaxActiveBlocksPerMultiprocessor`| | | |`hipOccupancyMaxActiveBlocksPerMultiprocessor`|1.6.0| | | |
|`cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`| | | |`hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`|2.6.0| | | |
+|`cudaOccupancyMaxActiveClusters`|11.8| | | | | | | |
|`cudaOccupancyMaxPotentialBlockSize`| | | |`hipOccupancyMaxPotentialBlockSize`|1.6.0| | | |
|`cudaOccupancyMaxPotentialBlockSizeVariableSMem`| | | | | | | | |
|`cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags`| | | | | | | | |
|`cudaOccupancyMaxPotentialBlockSizeWithFlags`| | | |`hipOccupancyMaxPotentialBlockSizeWithFlags`|3.5.0| | | |
+|`cudaOccupancyMaxPotentialClusterSize`|11.8| | | | | | | |
## **9. Memory Management**
@@ -393,10 +396,12 @@
|**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**|
|:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|
|`cudaCreateTextureObject`| | | |`hipCreateTextureObject`|1.7.0| | | |
+|`cudaCreateTextureObject_v2`|11.8| | | | | | | |
|`cudaDestroyTextureObject`| | | |`hipDestroyTextureObject`|1.7.0| | | |
|`cudaGetTextureObjectResourceDesc`| | | |`hipGetTextureObjectResourceDesc`|1.7.0| | | |
|`cudaGetTextureObjectResourceViewDesc`| | | |`hipGetTextureObjectResourceViewDesc`|1.7.0| | | |
|`cudaGetTextureObjectTextureDesc`| | | |`hipGetTextureObjectTextureDesc`|1.7.0| | | |
+|`cudaGetTextureObjectTextureDesc_v2`|11.8| | | | | | | |
## **28. Surface Object Management**
@@ -417,9 +422,9 @@
|**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**|
|:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|
-|`cudaDeviceGetGraphMemAttribute`|11.4| | |`hipDeviceGetGraphMemAttribute`|5.3.0| | |5.3.0|
-|`cudaDeviceGraphMemTrim`|11.4| | |`hipDeviceGraphMemTrim`|5.3.0| | |5.3.0|
-|`cudaDeviceSetGraphMemAttribute`|11.4| | |`hipDeviceSetGraphMemAttribute`|5.3.0| | |5.3.0|
+|`cudaDeviceGetGraphMemAttribute`|11.4| | |`hipDeviceGetGraphMemAttribute`|5.3.0| | | |
+|`cudaDeviceGraphMemTrim`|11.4| | |`hipDeviceGraphMemTrim`|5.3.0| | | |
+|`cudaDeviceSetGraphMemAttribute`|11.4| | |`hipDeviceSetGraphMemAttribute`|5.3.0| | | |
|`cudaGraphAddChildGraphNode`|10.0| | |`hipGraphAddChildGraphNode`|5.0.0| | | |
|`cudaGraphAddDependencies`|10.0| | |`hipGraphAddDependencies`|4.5.0| | | |
|`cudaGraphAddEmptyNode`|10.0| | |`hipGraphAddEmptyNode`|4.5.0| | | |
@@ -491,13 +496,13 @@
|`cudaGraphNodeGetDependentNodes`|11.0| | |`hipGraphNodeGetDependentNodes`|5.0.0| | | |
|`cudaGraphNodeGetType`|11.0| | |`hipGraphNodeGetType`|5.0.0| | | |
|`cudaGraphNodeSetEnabled`|11.6| | | | | | | |
-|`cudaGraphReleaseUserObject`|11.3| | |`hipGraphReleaseUserObject`|5.3.0| | |5.3.0|
+|`cudaGraphReleaseUserObject`|11.3| | |`hipGraphReleaseUserObject`|5.3.0| | | |
|`cudaGraphRemoveDependencies`|11.0| | |`hipGraphRemoveDependencies`|5.0.0| | | |
-|`cudaGraphRetainUserObject`|11.3| | |`hipGraphRetainUserObject`|5.3.0| | |5.3.0|
-|`cudaGraphUpload`|11.1| | |`hipGraphUpload`|5.3.0| | |5.3.0|
-|`cudaUserObjectCreate`|11.3| | |`hipUserObjectCreate`|5.3.0| | |5.3.0|
-|`cudaUserObjectRelease`|11.3| | |`hipUserObjectRelease`|5.3.0| | |5.3.0|
-|`cudaUserObjectRetain`|11.3| | |`hipUserObjectRetain`|5.3.0| | |5.3.0|
+|`cudaGraphRetainUserObject`|11.3| | |`hipGraphRetainUserObject`|5.3.0| | | |
+|`cudaGraphUpload`|11.1| | |`hipGraphUpload`|5.3.0| | | |
+|`cudaUserObjectCreate`|11.3| | |`hipUserObjectCreate`|5.3.0| | | |
+|`cudaUserObjectRelease`|11.3| | |`hipUserObjectRelease`|5.3.0| | | |
+|`cudaUserObjectRetain`|11.3| | |`hipUserObjectRetain`|5.3.0| | | |
## **31. Driver Entry Point Access**
@@ -610,6 +615,10 @@ Unsupported
|`cudaChannelFormatKindUnsignedNormalized8X1`|11.5| | | | | | | |
|`cudaChannelFormatKindUnsignedNormalized8X2`|11.5| | | | | | | |
|`cudaChannelFormatKindUnsignedNormalized8X4`|11.5| | | | | | | |
+|`cudaClusterSchedulingPolicy`|11.8| | | | | | | |
+|`cudaClusterSchedulingPolicyDefault`|11.8| | | | | | | |
+|`cudaClusterSchedulingPolicyLoadBalancing`|11.8| | | | | | | |
+|`cudaClusterSchedulingPolicySpread`|11.8| | | | | | | |
|`cudaComputeMode`| | | |`hipComputeMode`|1.9.0| | | |
|`cudaComputeModeDefault`| | | |`hipComputeModeDefault`|1.9.0| | | |
|`cudaComputeModeExclusive`| | | |`hipComputeModeExclusive`|1.9.0| | | |
@@ -649,6 +658,7 @@ Unsupported
|`cudaDevAttrCanMapHostMemory`| | | |`hipDeviceAttributeCanMapHostMemory`|2.10.0| | | |
|`cudaDevAttrCanUseHostPointerForRegisteredMem`|8.0| | |`hipDeviceAttributeCanUseHostPointerForRegisteredMem`|4.3.0| | | |
|`cudaDevAttrClockRate`| | | |`hipDeviceAttributeClockRate`|1.6.0| | | |
+|`cudaDevAttrClusterLaunch`|11.8| | | | | | | |
|`cudaDevAttrComputeCapabilityMajor`| | | |`hipDeviceAttributeComputeCapabilityMajor`|1.6.0| | | |
|`cudaDevAttrComputeCapabilityMinor`| | | |`hipDeviceAttributeComputeCapabilityMinor`|1.6.0| | | |
|`cudaDevAttrComputeMode`| | | |`hipDeviceAttributeComputeMode`|1.6.0| | | |
@@ -898,6 +908,7 @@ Unsupported
|`cudaErrorInsufficientDriver`| | | |`hipErrorInsufficientDriver`|1.7.0| | | |
|`cudaErrorInvalidAddressSpace`| | | | | | | | |
|`cudaErrorInvalidChannelDescriptor`| | | | | | | | |
+|`cudaErrorInvalidClusterSize`|11.8| | | | | | | |
|`cudaErrorInvalidConfiguration`| | | |`hipErrorInvalidConfiguration`|1.6.0| | | |
|`cudaErrorInvalidDevice`| | | |`hipErrorInvalidDevice`|1.6.0| | | |
|`cudaErrorInvalidDeviceFunction`| | | |`hipErrorInvalidDeviceFunction`|1.6.0| | | |
@@ -934,6 +945,7 @@ Unsupported
|`cudaErrorMisalignedAddress`| | | | | | | | |
|`cudaErrorMissingConfiguration`| | | |`hipErrorMissingConfiguration`|1.6.0| | | |
|`cudaErrorMixedDeviceExecution`| |3.1| | | | | | |
+|`cudaErrorMpsClientTerminated`|11.8| | | | | | | |
|`cudaErrorMpsConnectionFailed`|11.4| | | | | | | |
|`cudaErrorMpsMaxClientsReached`|11.4| | | | | | | |
|`cudaErrorMpsMaxConnectionsReached`|11.4| | | | | | | |
@@ -1045,9 +1057,15 @@ Unsupported
|`cudaFormatModeAuto`| | | | | | | | |
|`cudaFormatModeForced`| | | | | | | | |
|`cudaFuncAttribute`|9.0| | |`hipFuncAttribute`|3.9.0| | | |
+|`cudaFuncAttributeClusterDimMustBeSet`|11.8| | | | | | | |
+|`cudaFuncAttributeClusterSchedulingPolicyPreference`|11.8| | | | | | | |
|`cudaFuncAttributeMax`|9.0| | |`hipFuncAttributeMax`|3.9.0| | | |
|`cudaFuncAttributeMaxDynamicSharedMemorySize`|9.0| | |`hipFuncAttributeMaxDynamicSharedMemorySize`|3.9.0| | | |
+|`cudaFuncAttributeNonPortableClusterSizeAllowed`|11.8| | | | | | | |
|`cudaFuncAttributePreferredSharedMemoryCarveout`|9.0| | |`hipFuncAttributePreferredSharedMemoryCarveout`|3.9.0| | | |
+|`cudaFuncAttributeRequiredClusterDepth`|11.8| | | | | | | |
+|`cudaFuncAttributeRequiredClusterHeight`|11.8| | | | | | | |
+|`cudaFuncAttributeRequiredClusterWidth`|11.8| | | | | | | |
|`cudaFuncAttributes`| | | |`hipFuncAttributes`|1.9.0| | | |
|`cudaFuncCache`| | | |`hipFuncCache_t`|1.6.0| | | |
|`cudaFuncCachePreferEqual`| | | |`hipFuncCachePreferEqual`|1.6.0| | | |
@@ -1092,17 +1110,17 @@ Unsupported
|`cudaGraphInstantiateFlagAutoFreeOnLaunch`|11.4| | |`hipGraphInstantiateFlagAutoFreeOnLaunch`|5.2.0| | | |
|`cudaGraphInstantiateFlagUseNodePriority`|11.7| | | | | | | |
|`cudaGraphInstantiateFlags`|11.4| | |`hipGraphInstantiateFlags`|5.2.0| | | |
-|`cudaGraphMemAttrReservedMemCurrent`|11.4| | |`hipGraphMemAttrReservedMemCurrent`|5.3.0| | |5.3.0|
-|`cudaGraphMemAttrReservedMemHigh`|11.4| | |`hipGraphMemAttrReservedMemHigh`|5.3.0| | |5.3.0|
-|`cudaGraphMemAttrUsedMemCurrent`|11.4| | |`hipGraphMemAttrUsedMemCurrent`|5.3.0| | |5.3.0|
-|`cudaGraphMemAttrUsedMemHigh`|11.4| | |`hipGraphMemAttrUsedMemHigh`|5.3.0| | |5.3.0|
-|`cudaGraphMemAttributeType`|11.4| | |`hipGraphMemAttributeType`|5.3.0| | |5.3.0|
+|`cudaGraphMemAttrReservedMemCurrent`|11.4| | |`hipGraphMemAttrReservedMemCurrent`|5.3.0| | | |
+|`cudaGraphMemAttrReservedMemHigh`|11.4| | |`hipGraphMemAttrReservedMemHigh`|5.3.0| | | |
+|`cudaGraphMemAttrUsedMemCurrent`|11.4| | |`hipGraphMemAttrUsedMemCurrent`|5.3.0| | | |
+|`cudaGraphMemAttrUsedMemHigh`|11.4| | |`hipGraphMemAttrUsedMemHigh`|5.3.0| | | |
+|`cudaGraphMemAttributeType`|11.4| | |`hipGraphMemAttributeType`|5.3.0| | | |
|`cudaGraphNodeType`|10.0| | |`hipGraphNodeType`|4.3.0| | | |
|`cudaGraphNodeTypeCount`|10.0| | |`hipGraphNodeTypeCount`|4.3.0| | | |
|`cudaGraphNodeTypeEmpty`|10.0| | |`hipGraphNodeTypeEmpty`|4.3.0| | | |
|`cudaGraphNodeTypeEventRecord`|11.1| | |`hipGraphNodeTypeEventRecord`|4.3.0| | | |
-|`cudaGraphNodeTypeExtSemaphoreSignal`|11.4| | |`hipGraphNodeTypeExtSemaphoreSignal`|5.3.0| | |5.3.0|
-|`cudaGraphNodeTypeExtSemaphoreWait`|11.4| | |`hipGraphNodeTypeExtSemaphoreWait`|5.3.0| | |5.3.0|
+|`cudaGraphNodeTypeExtSemaphoreSignal`|11.4| | |`hipGraphNodeTypeExtSemaphoreSignal`|5.3.0| | | |
+|`cudaGraphNodeTypeExtSemaphoreWait`|11.4| | |`hipGraphNodeTypeExtSemaphoreWait`|5.3.0| | | |
|`cudaGraphNodeTypeGraph`|10.0| | |`hipGraphNodeTypeGraph`|4.3.0| | | |
|`cudaGraphNodeTypeHost`|10.0| | |`hipGraphNodeTypeHost`|4.3.0| | | |
|`cudaGraphNodeTypeKernel`|10.0| | |`hipGraphNodeTypeKernel`|4.3.0| | | |
@@ -1112,7 +1130,7 @@ Unsupported
|`cudaGraphNodeTypeMemset`|10.0| | |`hipGraphNodeTypeMemset`|4.3.0| | | |
|`cudaGraphNodeTypeWaitEvent`|11.1| | |`hipGraphNodeTypeWaitEvent`|4.3.0| | | |
|`cudaGraphNode_t`|10.0| | |`hipGraphNode_t`|4.3.0| | | |
-|`cudaGraphUserObjectMove`|11.3| | |`hipGraphUserObjectMove`|5.3.0| | |5.3.0|
+|`cudaGraphUserObjectMove`|11.3| | |`hipGraphUserObjectMove`|5.3.0| | | |
|`cudaGraph_t`|10.0| | |`hipGraph_t`|4.3.0| | | |
|`cudaGraphicsCubeFace`| | | | | | | | |
|`cudaGraphicsCubeFaceNegativeX`| | | | | | | | |
@@ -1153,10 +1171,27 @@ Unsupported
|`cudaKernelNodeAttrID`|11.0| | |`hipKernelNodeAttrID`|5.2.0| | | |
|`cudaKernelNodeAttrValue`|11.0| | |`hipKernelNodeAttrValue`|5.2.0| | | |
|`cudaKernelNodeAttributeAccessPolicyWindow`|11.0| | |`hipKernelNodeAttributeAccessPolicyWindow`|5.2.0| | | |
+|`cudaKernelNodeAttributeClusterDimension`|11.8| | | | | | | |
+|`cudaKernelNodeAttributeClusterSchedulingPolicyPreference`|11.8| | | | | | | |
|`cudaKernelNodeAttributeCooperative`|11.0| | |`hipKernelNodeAttributeCooperative`|5.2.0| | | |
|`cudaKernelNodeAttributePriority`|11.7| | | | | | | |
|`cudaKernelNodeParams`|10.0| | |`hipKernelNodeParams`|4.3.0| | | |
|`cudaKeyValuePair`| | | | | | | | |
+|`cudaLaunchAttribute`|11.8| | | | | | | |
+|`cudaLaunchAttributeAccessPolicyWindow`|11.8| | | | | | | |
+|`cudaLaunchAttributeClusterDimension`|11.8| | | | | | | |
+|`cudaLaunchAttributeClusterSchedulingPolicyPreference`|11.8| | | | | | | |
+|`cudaLaunchAttributeCooperative`|11.8| | | | | | | |
+|`cudaLaunchAttributeID`|11.8| | | | | | | |
+|`cudaLaunchAttributeIgnore`|11.8| | | | | | | |
+|`cudaLaunchAttributePriority`|11.8| | | | | | | |
+|`cudaLaunchAttributeProgrammaticEvent`|11.8| | | | | | | |
+|`cudaLaunchAttributeProgrammaticStreamSerialization`|11.8| | | | | | | |
+|`cudaLaunchAttributeSynchronizationPolicy`|11.8| | | | | | | |
+|`cudaLaunchAttributeValue`|11.8| | | | | | | |
+|`cudaLaunchAttribute_st`|11.8| | | | | | | |
+|`cudaLaunchConfig_st`|11.8| | | | | | | |
+|`cudaLaunchConfig_t`|11.8| | | | | | | |
|`cudaLaunchParams`|9.0| | |`hipLaunchParams`|2.6.0| | | |
|`cudaLimit`| | | |`hipLimit_t`|1.6.0| | | |
|`cudaLimitDevRuntimePendingLaunchCount`| | | | | | | | |
@@ -1165,7 +1200,7 @@ Unsupported
|`cudaLimitMaxL2FetchGranularity`|10.0| | | | | | | |
|`cudaLimitPersistingL2CacheSize`|11.0| | | | | | | |
|`cudaLimitPrintfFifoSize`| | | |`hipLimitPrintfFifoSize`|4.5.0| | | |
-|`cudaLimitStackSize`| | | |`hipLimitStackSize`|5.3.0| | |5.3.0|
+|`cudaLimitStackSize`| | | |`hipLimitStackSize`|5.3.0| | | |
|`cudaMemAccessDesc`|11.2| | |`hipMemAccessDesc`|5.2.0| | | |
|`cudaMemAccessFlags`|11.2| | |`hipMemAccessFlags`|5.2.0| | | |
|`cudaMemAccessFlagsProtNone`|11.2| | |`hipMemAccessFlagsProtNone`|5.2.0| | | |
@@ -1223,7 +1258,7 @@ Unsupported
|`cudaMemoryType`| | | |`hipMemoryType`|1.6.0| | | |
|`cudaMemoryTypeDevice`| | | |`hipMemoryTypeDevice`|1.6.0| | | |
|`cudaMemoryTypeHost`| | | |`hipMemoryTypeHost`|1.6.0| | | |
-|`cudaMemoryTypeManaged`|10.0| | |`hipMemoryTypeManaged`|5.3.0| | |5.3.0|
+|`cudaMemoryTypeManaged`|10.0| | |`hipMemoryTypeManaged`|5.3.0| | | |
|`cudaMemoryTypeUnregistered`| | | | | | | | |
|`cudaMemsetParams`|10.0| | |`hipMemsetParams`|4.3.0| | | |
|`cudaMipmappedArray`| | | |`hipMipmappedArray`|1.7.0| | | |
@@ -1334,10 +1369,10 @@ Unsupported
|`cudaTextureTypeCubemap`| | | |`hipTextureTypeCubemap`|1.7.0| | | |
|`cudaTextureTypeCubemapLayered`| | | |`hipTextureTypeCubemapLayered`|1.7.0| | | |
|`cudaUUID_t`| | | |`hipUUID`|5.2.0| | | |
-|`cudaUserObjectFlags`|11.3| | |`hipUserObjectFlags`|5.3.0| | |5.3.0|
-|`cudaUserObjectNoDestructorSync`|11.3| | |`hipUserObjectNoDestructorSync`|5.3.0| | |5.3.0|
-|`cudaUserObjectRetainFlags`|11.3| | |`hipUserObjectRetainFlags`|5.3.0| | |5.3.0|
-|`cudaUserObject_t`|11.3| | |`hipUserObject_t`|5.3.0| | |5.3.0|
+|`cudaUserObjectFlags`|11.3| | |`hipUserObjectFlags`|5.3.0| | | |
+|`cudaUserObjectNoDestructorSync`|11.3| | |`hipUserObjectNoDestructorSync`|5.3.0| | | |
+|`cudaUserObjectRetainFlags`|11.3| | |`hipUserObjectRetainFlags`|5.3.0| | | |
+|`cudaUserObject_t`|11.3| | |`hipUserObject_t`|5.3.0| | | |
|`libraryPropertyType`|8.0| | | | | | | |
|`libraryPropertyType_t`|8.0| | | | | | | |
|`surfaceReference`| | | |`surfaceReference`|1.9.0| | | |
diff --git a/doc/markdown/CUDNN_API_supported_by_HIP.md b/doc/markdown/CUDNN_API_supported_by_HIP.md
index 7a5f5ae3..405b23c5 100644
--- a/doc/markdown/CUDNN_API_supported_by_HIP.md
+++ b/doc/markdown/CUDNN_API_supported_by_HIP.md
@@ -48,6 +48,7 @@
|`CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS`|8.0.2| | | | | | | |
|`CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG`|8.0.1| | | | | | | |
|`CUDNN_ATTR_EXECUTION_PLAN_HANDLE`|8.0.1| | | | | | | |
+|`CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION`|8.4.0| | | | | | | |
|`CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS`|8.0.2| | | | | | | |
|`CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE`|8.0.1| | | | | | | |
|`CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_ATTRIBUTES`|8.0.2| | | | | | | |
@@ -94,6 +95,10 @@
|`CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_VAR_DESC`|8.1.0| | | | | | | |
|`CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SQ_SUM_DESC`|8.1.0| | | | | | | |
|`CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SUM_DESC`|8.1.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_CONCAT_AXIS`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC`|8.5.0| | | | | | | |
|`CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA`|8.0.1| | | | | | | |
|`CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA`|8.0.1| | | | | | | |
|`CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC`|8.0.1| | | | | | | |
@@ -122,17 +127,62 @@
|`CUDNN_ATTR_OPERATION_MATMUL_CDESC`|8.1.0| | | | | | | |
|`CUDNN_ATTR_OPERATION_MATMUL_DESC`|8.1.0| | | | | | | |
|`CUDNN_ATTR_OPERATION_MATMUL_IRREGULARLY_STRIDED_BATCH_COUNT`|8.1.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_BWD_DXDESC`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_BWD_DYDESC`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_BWD_MODE`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_BWD_XDESC`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_FWD_MODE`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_FWD_PHASE`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_FWD_XDESC`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_NORM_FWD_YDESC`|8.5.0| | | | | | | |
|`CUDNN_ATTR_OPERATION_POINTWISE_ALPHA1`|8.0.1| | | | | | | |
|`CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2`|8.0.1| | | | | | | |
|`CUDNN_ATTR_OPERATION_POINTWISE_BDESC`|8.0.1| | | | | | | |
|`CUDNN_ATTR_OPERATION_POINTWISE_DXDESC`|8.1.0| | | | | | | |
|`CUDNN_ATTR_OPERATION_POINTWISE_DYDESC`|8.1.0| | | | | | | |
|`CUDNN_ATTR_OPERATION_POINTWISE_PW_DESCRIPTOR`|8.0.1| | | | | | | |
+|`CUDNN_ATTR_OPERATION_POINTWISE_TDESC`|8.3.0| | | | | | | |
|`CUDNN_ATTR_OPERATION_POINTWISE_XDESC`|8.0.1| | | | | | | |
|`CUDNN_ATTR_OPERATION_POINTWISE_YDESC`|8.0.1| | | | | | | |
|`CUDNN_ATTR_OPERATION_REDUCTION_DESC`|8.1.0| | | | | | | |
|`CUDNN_ATTR_OPERATION_REDUCTION_XDESC`|8.1.0| | | | | | | |
|`CUDNN_ATTR_OPERATION_REDUCTION_YDESC`|8.1.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA`|8.3.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA`|8.3.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC`|8.3.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC`|8.3.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC`|8.3.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC`|8.3.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA`|8.3.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA`|8.3.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC`|8.3.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC`|8.3.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC`|8.3.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC`|8.3.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_SIGNAL_FLAGDESC`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_SIGNAL_MODE`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_SIGNAL_VALUE`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_SIGNAL_XDESC`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_OPERATION_SIGNAL_YDESC`|8.5.0| | | | | | | |
+|`CUDNN_ATTR_POINTWISE_AXIS`|8.4.0| | | | | | | |
|`CUDNN_ATTR_POINTWISE_ELU_ALPHA`|8.1.0| | | | | | | |
|`CUDNN_ATTR_POINTWISE_MATH_PREC`|8.0.1| | | | | | | |
|`CUDNN_ATTR_POINTWISE_MODE`|8.0.1| | | | | | | |
@@ -144,11 +194,21 @@
|`CUDNN_ATTR_POINTWISE_SWISH_BETA`|8.1.0| | | | | | | |
|`CUDNN_ATTR_REDUCTION_COMP_TYPE`|8.1.0| | | | | | | |
|`CUDNN_ATTR_REDUCTION_OPERATOR`|8.1.0| | | | | | | |
+|`CUDNN_ATTR_RESAMPLE_COMP_TYPE`|8.3.0| | | | | | | |
+|`CUDNN_ATTR_RESAMPLE_MODE`|8.3.0| | | | | | | |
+|`CUDNN_ATTR_RESAMPLE_NAN_PROPAGATION`|8.3.0| | | | | | | |
+|`CUDNN_ATTR_RESAMPLE_PADDING_MODE`|8.3.0| | | | | | | |
+|`CUDNN_ATTR_RESAMPLE_POST_PADDINGS`|8.3.0| | | | | | | |
+|`CUDNN_ATTR_RESAMPLE_PRE_PADDINGS`|8.3.0| | | | | | | |
+|`CUDNN_ATTR_RESAMPLE_SPATIAL_DIMS`|8.3.0| | | | | | | |
+|`CUDNN_ATTR_RESAMPLE_STRIDES`|8.3.0| | | | | | | |
+|`CUDNN_ATTR_RESAMPLE_WINDOW_DIMS`|8.3.0| | | | | | | |
|`CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT`|8.0.1| | | | | | | |
|`CUDNN_ATTR_TENSOR_DATA_TYPE`|8.0.1| | | | | | | |
|`CUDNN_ATTR_TENSOR_DIMENSIONS`|8.0.1| | | | | | | |
|`CUDNN_ATTR_TENSOR_IS_BY_VALUE`|8.1.0| | | | | | | |
|`CUDNN_ATTR_TENSOR_IS_VIRTUAL`|8.0.1| | | | | | | |
+|`CUDNN_ATTR_TENSOR_REORDERING_MODE`|8.3.0| | | | | | | |
|`CUDNN_ATTR_TENSOR_STRIDES`|8.0.1| | | | | | | |
|`CUDNN_ATTR_TENSOR_UNIQUE_ID`|8.0.1| | | | | | | |
|`CUDNN_ATTR_TENSOR_VECTORIZED_DIMENSION`|8.0.1| | | | | | | |
@@ -170,15 +230,22 @@
|`CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR`|8.0.1| | | | | | | |
|`CUDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR`| | | | | | | | |
|`CUDNN_BACKEND_OPERATION_BN_FINALIZE_STATISTICS_DESCRIPTOR`|8.1.0| | | | | | | |
+|`CUDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR`|8.5.0| | | | | | | |
|`CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR`|8.0.1| | | | | | | |
|`CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR`|8.0.1| | | | | | | |
|`CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR`|8.0.1| | | | | | | |
|`CUDNN_BACKEND_OPERATION_GEN_STATS_DESCRIPTOR`|8.0.1| | | | | | | |
|`CUDNN_BACKEND_OPERATION_MATMUL_DESCRIPTOR`|8.1.0| | | | | | | |
+|`CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR`|8.5.0| | | | | | | |
+|`CUDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR`|8.5.0| | | | | | | |
|`CUDNN_BACKEND_OPERATION_POINTWISE_DESCRIPTOR`|8.0.1| | | | | | | |
|`CUDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR`|8.1.0| | | | | | | |
+|`CUDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR`|8.3.0| | | | | | | |
+|`CUDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR`|8.3.0| | | | | | | |
+|`CUDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR`|8.5.0| | | | | | | |
|`CUDNN_BACKEND_POINTWISE_DESCRIPTOR`|8.0.1| | | | | | | |
|`CUDNN_BACKEND_REDUCTION_DESCRIPTOR`|8.1.0| | | | | | | |
+|`CUDNN_BACKEND_RESAMPLE_DESCRIPTOR`|8.3.0| | | | | | | |
|`CUDNN_BACKEND_TENSOR_DESCRIPTOR`|8.0.1| | | | | | | |
|`CUDNN_BACKEND_VARIANT_PACK_DESCRIPTOR`|8.0.1| | | | | | | |
|`CUDNN_BATCHNORM_OPS_BN`|7.4.1| | | | | | | |
@@ -187,6 +254,9 @@
|`CUDNN_BATCHNORM_PER_ACTIVATION`|4.0.0| | |`HIPDNN_BATCHNORM_PER_ACTIVATION`| | | | |
|`CUDNN_BATCHNORM_SPATIAL`|4.0.0| | |`HIPDNN_BATCHNORM_SPATIAL`| | | | |
|`CUDNN_BATCHNORM_SPATIAL_PERSISTENT`|7.0.5| | |`HIPDNN_BATCHNORM_SPATIAL_PERSISTENT`| | | | |
+|`CUDNN_BATCH_NORM`|8.5.0| | | | | | | |
+|`CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER`|8.3.0| | | | | | | |
+|`CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER`|8.3.0| | | | | | | |
|`CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION`|8.2.0| | | | | | | |
|`CUDNN_BEHAVIOR_NOTE_TYPE_COUNT`|8.2.0| | | | | | | |
|`CUDNN_BIDIRECTIONAL`|5.0.0| | |`HIPDNN_BIDIRECTIONAL`| | | | |
@@ -237,8 +307,11 @@
|`CUDNN_CTC_LOSS_ALGO_DETERMINISTIC`|7.0.5| | | | | | | |
|`CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC`|7.0.5| | | | | | | |
|`CUDNN_DATA_BFLOAT16`|8.1.0| | | | | | | |
+|`CUDNN_DATA_BOOLEAN`|8.3.0| | | | | | | |
|`CUDNN_DATA_DOUBLE`|1.0.0| | |`HIPDNN_DATA_DOUBLE`| | | | |
|`CUDNN_DATA_FLOAT`|1.0.0| | |`HIPDNN_DATA_FLOAT`| | | | |
+|`CUDNN_DATA_FP8_E4M3`|8.6.0| | | | | | | |
+|`CUDNN_DATA_FP8_E5M2`|8.6.0| | | | | | | |
|`CUDNN_DATA_HALF`|3.0.0| | |`HIPDNN_DATA_HALF`| | | | |
|`CUDNN_DATA_INT32`|6.0.0| | |`HIPDNN_DATA_INT32`| | | | |
|`CUDNN_DATA_INT64`|8.1.0| | | | | | | |
@@ -252,6 +325,7 @@
|`CUDNN_DETERMINISTIC`|6.0.0| | | | | | | |
|`CUDNN_DIM_MAX`|4.0.0| | | | | | | |
|`CUDNN_DIVNORM_PRECOMPUTED_MEANS`|3.0.0| | | | | | | |
+|`CUDNN_EDGE_VAL_PAD`|8.3.0| | | | | | | |
|`CUDNN_ERRQUERY_BLOCKING`|7.0.5| | | | | | | |
|`CUDNN_ERRQUERY_NONBLOCKING`|7.0.5| | | | | | | |
|`CUDNN_ERRQUERY_RAWCODE`|7.0.5| | | | | | | |
@@ -266,10 +340,14 @@
|`CUDNN_FWD_MODE_INFERENCE`|8.0.1| | | | | | | |
|`CUDNN_FWD_MODE_TRAINING`|8.0.1| | | | | | | |
|`CUDNN_GENSTATS_SUM_SQSUM`|8.0.1| | | | | | | |
+|`CUDNN_GROUP_NORM`|8.5.0| | | | | | | |
|`CUDNN_GRU`|5.0.0| | |`HIPDNN_GRU`| | | | |
|`CUDNN_HEUR_MODES_COUNT`|8.0.1| | | | | | | |
+|`CUDNN_HEUR_MODE_A`|8.3.0| | | | | | | |
|`CUDNN_HEUR_MODE_B`|8.0.1| | | | | | | |
+|`CUDNN_HEUR_MODE_FALLBACK`|8.3.0| | | | | | | |
|`CUDNN_HEUR_MODE_INSTANT`|8.0.1| | | | | | | |
+|`CUDNN_INSTANCE_NORM`|8.5.0| | | | | | | |
|`CUDNN_KNOB_TYPE_CHUNK_K`|8.0.1| | | | | | | |
|`CUDNN_KNOB_TYPE_COUNTS`|8.0.1| | | | | | | |
|`CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE`|8.0.1| | | | | | | |
@@ -293,9 +371,14 @@
|`CUDNN_KNOB_TYPE_STAGES`|8.0.1| | | | | | | |
|`CUDNN_KNOB_TYPE_SWIZZLE`|8.0.1| | | | | | | |
|`CUDNN_KNOB_TYPE_TILEK`|8.0.1| | | | | | | |
+|`CUDNN_KNOB_TYPE_TILE_CGA`|8.6.0| | | | | | | |
+|`CUDNN_KNOB_TYPE_TILE_CGA_M`|8.6.0| | | | | | | |
+|`CUDNN_KNOB_TYPE_TILE_CGA_N`|8.6.0| | | | | | | |
|`CUDNN_KNOB_TYPE_TILE_SIZE`|8.0.1| | | | | | | |
|`CUDNN_KNOB_TYPE_USE_TEX`|8.0.1| | | | | | | |
|`CUDNN_KNOB_TYPE_WINO_TILE`|8.0.1| | | | | | | |
+|`CUDNN_KNOB_TYPE_WORKSPACE`|8.4.0| | | | | | | |
+|`CUDNN_LAYER_NORM`|8.5.0| | | | | | | |
|`CUDNN_LAYOUT_TYPE_COUNT`|8.0.2| | | | | | | |
|`CUDNN_LAYOUT_TYPE_PREFERRED_NCHW`|8.0.1| | | | | | | |
|`CUDNN_LAYOUT_TYPE_PREFERRED_NHWC`|8.0.2| | | | | | | |
@@ -320,9 +403,12 @@
|`CUDNN_MH_ATTN_V_BIASES`|7.6.3| | | | | | | |
|`CUDNN_MH_ATTN_V_WEIGHTS`|7.5.0| | | | | | | |
|`CUDNN_MINOR`|3.0.0| | | | | | | |
+|`CUDNN_NEG_INF_PAD`|8.3.0| | | | | | | |
|`CUDNN_NON_DETERMINISTIC`|6.0.0| | | | | | | |
|`CUDNN_NORM_ALGO_PERSIST`|8.0.1| | | | | | | |
|`CUDNN_NORM_ALGO_STANDARD`|8.0.1| | | | | | | |
+|`CUDNN_NORM_FWD_INFERENCE`|8.5.0| | | | | | | |
+|`CUDNN_NORM_FWD_TRAINING`|8.5.0| | | | | | | |
|`CUDNN_NORM_OPS_NORM`|8.0.1| | | | | | | |
|`CUDNN_NORM_OPS_NORM_ACTIVATION`|8.0.1| | | | | | | |
|`CUDNN_NORM_OPS_NORM_ADD_ACTIVATION`|8.0.1| | | | | | | |
@@ -337,6 +423,9 @@
|`CUDNN_NUMERICAL_NOTE_TENSOR_CORE`|8.0.1| | | | | | | |
|`CUDNN_NUMERICAL_NOTE_TYPE_COUNT`|8.0.1| | | | | | | |
|`CUDNN_NUMERICAL_NOTE_WINOGRAD`|8.0.1| | | | | | | |
+|`CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13`|8.3.0| | | | | | | |
+|`CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4`|8.3.0| | | | | | | |
+|`CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6`|8.3.0| | | | | | | |
|`CUDNN_OPS_INFER_MAJOR`|8.0.1| | | | | | | |
|`CUDNN_OPS_INFER_MINOR`|8.0.1| | | | | | | |
|`CUDNN_OPS_INFER_PATCH`|8.0.1| | | | | | | |
@@ -389,23 +478,53 @@
|`CUDNN_PARAM_ZDATA_PLACEHOLDER`|7.6.0| | | | | | | |
|`CUDNN_PARAM_ZDESC`|7.6.0| | | | | | | |
|`CUDNN_PATCHLEVEL`|3.0.0| | | | | | | |
+|`CUDNN_POINTWISE_ABS`|8.3.0| | | | | | | |
|`CUDNN_POINTWISE_ADD`|8.0.1| | | | | | | |
+|`CUDNN_POINTWISE_ADD_SQUARE`|8.3.0| | | | | | | |
+|`CUDNN_POINTWISE_BINARY_SELECT`|8.4.0| | | | | | | |
+|`CUDNN_POINTWISE_CEIL`|8.3.0| | | | | | | |
+|`CUDNN_POINTWISE_CMP_EQ`|8.3.0| | | | | | | |
+|`CUDNN_POINTWISE_CMP_GE`|8.3.0| | | | | | | |
+|`CUDNN_POINTWISE_CMP_GT`|8.3.0| | | | | | | |
+|`CUDNN_POINTWISE_CMP_LE`|8.3.0| | | | | | | |
+|`CUDNN_POINTWISE_CMP_LT`|8.3.0| | | | | | | |
+|`CUDNN_POINTWISE_CMP_NEQ`|8.3.0| | | | | | | |
+|`CUDNN_POINTWISE_COS`|8.3.0| | | | | | | |
+|`CUDNN_POINTWISE_DIV`|8.3.0| | | | | | | |
|`CUDNN_POINTWISE_ELU_BWD`|8.1.0| | | | | | | |
|`CUDNN_POINTWISE_ELU_FWD`|8.0.1| | | | | | | |
+|`CUDNN_POINTWISE_ERF`|8.5.0| | | | | | | |
+|`CUDNN_POINTWISE_EXP`|8.3.0| | | | | | | |
+|`CUDNN_POINTWISE_FLOOR`|8.3.0| | | | | | | |
+|`CUDNN_POINTWISE_GELU_APPROX_TANH_BWD`|8.5.0| | | | | | | |
+|`CUDNN_POINTWISE_GELU_APPROX_TANH_FWD`|8.5.0| | | | | | | |
|`CUDNN_POINTWISE_GELU_BWD`|8.1.0| | | | | | | |
|`CUDNN_POINTWISE_GELU_FWD`|8.1.0| | | | | | | |
+|`CUDNN_POINTWISE_GEN_INDEX`|8.4.0| | | | | | | |
+|`CUDNN_POINTWISE_IDENTITY`|8.5.0| | | | | | | |
+|`CUDNN_POINTWISE_LOG`|8.3.0| | | | | | | |
+|`CUDNN_POINTWISE_LOGICAL_AND`|8.3.0| | | | | | | |
+|`CUDNN_POINTWISE_LOGICAL_NOT`|8.3.0| | | | | | | |
+|`CUDNN_POINTWISE_LOGICAL_OR`|8.3.0| | | | | | | |
|`CUDNN_POINTWISE_MAX`|8.0.1| | | | | | | |
|`CUDNN_POINTWISE_MIN`|8.0.1| | | | | | | |
+|`CUDNN_POINTWISE_MOD`|8.3.0| | | | | | | |
|`CUDNN_POINTWISE_MUL`|8.0.1| | | | | | | |
+|`CUDNN_POINTWISE_NEG`|8.3.0| | | | | | | |
+|`CUDNN_POINTWISE_POW`|8.3.0| | | | | | | |
|`CUDNN_POINTWISE_RELU_BWD`|8.1.0| | | | | | | |
|`CUDNN_POINTWISE_RELU_FWD`|8.0.1| | | | | | | |
+|`CUDNN_POINTWISE_RSQRT`|8.3.0| | | | | | | |
|`CUDNN_POINTWISE_SIGMOID_BWD`|8.1.0| | | | | | | |
|`CUDNN_POINTWISE_SIGMOID_FWD`|8.0.1| | | | | | | |
+|`CUDNN_POINTWISE_SIN`|8.3.0| | | | | | | |
|`CUDNN_POINTWISE_SOFTPLUS_BWD`|8.1.0| | | | | | | |
|`CUDNN_POINTWISE_SOFTPLUS_FWD`|8.1.0| | | | | | | |
|`CUDNN_POINTWISE_SQRT`|8.0.1| | | | | | | |
+|`CUDNN_POINTWISE_SUB`|8.3.0| | | | | | | |
|`CUDNN_POINTWISE_SWISH_BWD`|8.1.0| | | | | | | |
|`CUDNN_POINTWISE_SWISH_FWD`|8.1.0| | | | | | | |
+|`CUDNN_POINTWISE_TAN`|8.3.0| | | | | | | |
|`CUDNN_POINTWISE_TANH_BWD`|8.1.0| | | | | | | |
|`CUDNN_POINTWISE_TANH_FWD`|8.0.1| | | | | | | |
|`CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING`|2.0.0| | |`HIPDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING`| | | | |
@@ -451,6 +570,12 @@
|`CUDNN_REDUCE_TENSOR_NORM1`|6.0.0| | |`HIPDNN_REDUCE_TENSOR_NORM1`| | | | |
|`CUDNN_REDUCE_TENSOR_NORM2`|6.0.0| | |`HIPDNN_REDUCE_TENSOR_NORM2`| | | | |
|`CUDNN_REDUCE_TENSOR_NO_INDICES`|6.0.0| | |`HIPDNN_REDUCE_TENSOR_NO_INDICES`| | | | |
+|`CUDNN_RESAMPLE_AVGPOOL`|8.3.0| | | | | | | |
+|`CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING`|8.6.0| | | | | | | |
+|`CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING`|8.6.0| | | | | | | |
+|`CUDNN_RESAMPLE_BILINEAR`|8.3.0| | | | | | | |
+|`CUDNN_RESAMPLE_MAXPOOL`|8.3.0| | | | | | | |
+|`CUDNN_RESAMPLE_NEAREST`|8.3.0| | | | | | | |
|`CUDNN_RNN_ALGO_COUNT`|7.1.3| | | | | | | |
|`CUDNN_RNN_ALGO_PERSIST_DYNAMIC`|6.0.0| | |`HIPDNN_RNN_ALGO_PERSIST_DYNAMIC`| | | | |
|`CUDNN_RNN_ALGO_PERSIST_STATIC`|6.0.0| | |`HIPDNN_RNN_ALGO_PERSIST_STATIC`| | | | |
@@ -486,6 +611,8 @@
|`CUDNN_SEV_INFO_EN`|7.1.3| | | | | | | |
|`CUDNN_SEV_WARNING`|7.1.3| | | | | | | |
|`CUDNN_SEV_WARNING_EN`|7.1.3| | | | | | | |
+|`CUDNN_SIGNAL_SET`|8.5.0| | | | | | | |
+|`CUDNN_SIGNAL_WAIT`|8.5.0| | | | | | | |
|`CUDNN_SKIP_INPUT`|5.0.0| | |`HIPDNN_SKIP_INPUT`| | | | |
|`CUDNN_SOFTMAX_ACCURATE`|1.0.0| | |`HIPDNN_SOFTMAX_ACCURATE`| | | | |
|`CUDNN_SOFTMAX_FAST`|1.0.0| | |`HIPDNN_SOFTMAX_FAST`| | | | |
@@ -512,6 +639,8 @@
|`CUDNN_TENSOR_NHWC`|1.0.0| | |`HIPDNN_TENSOR_NHWC`| | | | |
|`CUDNN_TENSOR_OP_MATH`|7.0.5| | |`HIPDNN_TENSOR_OP_MATH`| | | | |
|`CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION`|7.2.1| | | | | | | |
+|`CUDNN_TENSOR_REORDERING_INT8x32`|8.3.0| | | | | | | |
+|`CUDNN_TENSOR_REORDERING_NONE`|8.3.0| | | | | | | |
|`CUDNN_TRANSFORM_FOLD`|7.5.0| | | | | | | |
|`CUDNN_TRANSFORM_UNFOLD`|7.5.0| | | | | | | |
|`CUDNN_TYPE_ATTRIB_NAME`|8.0.1| | | | | | | |
@@ -519,25 +648,35 @@
|`CUDNN_TYPE_BEHAVIOR_NOTE`|8.2.0| | | | | | | |
|`CUDNN_TYPE_BN_FINALIZE_STATS_MODE`|8.1.0| | | | | | | |
|`CUDNN_TYPE_BOOLEAN`|8.0.1| | | | | | | |
+|`CUDNN_TYPE_CHAR`|8.4.0| | | | | | | |
|`CUDNN_TYPE_CONVOLUTION_MODE`|8.0.1| | | | | | | |
|`CUDNN_TYPE_DATA_TYPE`|8.0.1| | | | | | | |
|`CUDNN_TYPE_DOUBLE`|8.0.1| | | | | | | |
|`CUDNN_TYPE_FLOAT`|8.0.1| | | | | | | |
+|`CUDNN_TYPE_FRACTION`|8.5.0| | | | | | | |
|`CUDNN_TYPE_GENSTATS_MODE`|8.0.1| | | | | | | |
|`CUDNN_TYPE_HANDLE`|8.0.1| | | | | | | |
|`CUDNN_TYPE_HEUR_MODE`|8.0.1| | | | | | | |
+|`CUDNN_TYPE_INT32`|8.3.0| | | | | | | |
|`CUDNN_TYPE_INT64`|8.0.1| | | | | | | |
|`CUDNN_TYPE_KNOB_TYPE`|8.0.1| | | | | | | |
|`CUDNN_TYPE_LAYOUT_TYPE`|8.0.2| | | | | | | |
|`CUDNN_TYPE_NAN_PROPOGATION`|8.0.1| | | | | | | |
+|`CUDNN_TYPE_NORM_FWD_PHASE`|8.5.0| | | | | | | |
+|`CUDNN_TYPE_NORM_MODE`|8.5.0| | | | | | | |
|`CUDNN_TYPE_NUMERICAL_NOTE`|8.0.1| | | | | | | |
+|`CUDNN_TYPE_PADDING_MODE`|8.3.0| | | | | | | |
|`CUDNN_TYPE_POINTWISE_MODE`|8.0.1| | | | | | | |
|`CUDNN_TYPE_REDUCTION_OPERATOR_TYPE`|8.1.0| | | | | | | |
+|`CUDNN_TYPE_RESAMPLE_MODE`|8.3.0| | | | | | | |
+|`CUDNN_TYPE_SIGNAL_MODE`|8.5.0| | | | | | | |
+|`CUDNN_TYPE_TENSOR_REORDERING_MODE`|8.3.0| | | | | | | |
|`CUDNN_TYPE_VOID_PTR`|8.0.1| | | | | | | |
|`CUDNN_UNIDIRECTIONAL`|5.0.0| | |`HIPDNN_UNIDIRECTIONAL`| | | | |
|`CUDNN_VERSION`|2.0.0| | |`HIPDNN_VERSION`| | | | |
|`CUDNN_WGRAD_MODE_ADD`|7.5.0| | | | | | | |
|`CUDNN_WGRAD_MODE_SET`|7.5.0| | | | | | | |
+|`CUDNN_ZERO_PAD`|8.3.0| | | | | | | |
|`cudnnActivationDescriptor_t`|4.0.0| | |`hipdnnActivationDescriptor_t`| | | | |
|`cudnnActivationMode_t`|1.0.0| | |`hipdnnActivationMode_t`| | | | |
|`cudnnActivationStruct`|4.0.0| | | | | | | |
@@ -558,7 +697,10 @@
|`cudnnBackendHeurMode_t`|8.0.1| | | | | | | |
|`cudnnBackendKnobType_t`|8.0.1| | | | | | | |
|`cudnnBackendLayoutType_t`|8.0.1| | | | | | | |
+|`cudnnBackendNormFwdPhase_t`|8.5.0| | | | | | | |
+|`cudnnBackendNormMode_t`|8.5.0| | | | | | | |
|`cudnnBackendNumericalNote_t`|8.0.1| | | | | | | |
+|`cudnnBackendTensorReordering_t`|8.3.0| | | | | | | |
|`cudnnBatchNormMode_t`|4.0.0| | |`hipdnnBatchNormMode_t`| | | | |
|`cudnnBatchNormOps_t`|7.4.1| | | | | | | |
|`cudnnBnFinalizeStatsMode_t`|8.1.0| | | | | | | |
@@ -595,6 +737,8 @@
|`cudnnFilterStruct`|1.0.0| | | | | | | |
|`cudnnFoldingDirection_t`|7.5.0| | | | | | | |
|`cudnnForwardMode_t`|8.0.1| | | | | | | |
+|`cudnnFractionStruct`|8.5.0| | | | | | | |
+|`cudnnFraction_t`|8.5.0| | | | | | | |
|`cudnnFusedOpsConstParamLabel_t`|7.6.0| | | | | | | |
|`cudnnFusedOpsConstParamPack_t`|7.6.0| | | | | | | |
|`cudnnFusedOpsConstParamStruct`|7.6.0| | | | | | | |
@@ -621,6 +765,7 @@
|`cudnnOpTensorDescriptor_t`|5.0.0| | |`hipdnnOpTensorDescriptor_t`| | | | |
|`cudnnOpTensorOp_t`|5.0.0| | |`hipdnnOpTensorOp_t`| | | | |
|`cudnnOpTensorStruct`|5.0.0| | | | | | | |
+|`cudnnPaddingMode_t`|8.3.0| | | | | | | |
|`cudnnPersistentRNNPlan`|6.0.0| | | | | | | |
|`cudnnPersistentRNNPlan_t`|6.0.0| | |`hipdnnPersistentRNNPlan_t`| | | | |
|`cudnnPointwiseMode_t`|8.0.1| | | | | | | |
@@ -643,12 +788,14 @@
|`cudnnReduceTensorOp_t`|6.0.0| | |`hipdnnReduceTensorOp_t`| | | | |
|`cudnnReduceTensorStruct`|6.0.0| | | | | | | |
|`cudnnReorderType_t`|7.6.0| | | | | | | |
+|`cudnnResampleMode_t`|8.3.0| | | | | | | |
|`cudnnRuntimeTag_t`|7.0.5| | | | | | | |
|`cudnnSamplerType_t`|5.0.0| | | | | | | |
|`cudnnSeqDataAxis_t`|7.5.0| | | | | | | |
|`cudnnSeqDataDescriptor_t`|7.5.0| | | | | | | |
|`cudnnSeqDataStruct`|7.5.0| | | | | | | |
|`cudnnSeverity_t`|7.1.3| | | | | | | |
+|`cudnnSignalMode_t`|8.5.0| | | | | | | |
|`cudnnSoftmaxAlgorithm_t`|1.0.0| | |`hipdnnSoftmaxAlgorithm_t`| | | | |
|`cudnnSoftmaxMode_t`|1.0.0| | |`hipdnnSoftmaxMode_t`| | | | |
|`cudnnSpatialTransformerDescriptor_t`|5.0.0| | | | | | | |
@@ -804,6 +951,7 @@
|`cudnnGetFusedOpsConstParamPackAttribute`|7.6.0| | | | | | | |
|`cudnnGetFusedOpsVariantParamPackAttribute`|7.6.0| | | | | | | |
|`cudnnGetLRNDescriptor`|3.0.0| | |`hipdnnGetLRNDescriptor`| | | | |
+|`cudnnGetMaxDeviceVersion`|8.6.0| | | | | | | |
|`cudnnGetMultiHeadAttnBuffers`|7.5.0| | | | | | | |
|`cudnnGetMultiHeadAttnWeights`|7.5.0| | | | | | | |
|`cudnnGetNormalizationBackwardWorkspaceSize`|8.0.1| | | | | | | |
diff --git a/doc/markdown/CUFFT_API_supported_by_HIP.md b/doc/markdown/CUFFT_API_supported_by_HIP.md
index d9c3cbeb..163e5696 100644
--- a/doc/markdown/CUFFT_API_supported_by_HIP.md
+++ b/doc/markdown/CUFFT_API_supported_by_HIP.md
@@ -54,6 +54,8 @@
|`CUFFT_WORKAREA_PERFORMANCE`| | | | | | | | |
|`CUFFT_WORKAREA_USER`|9.2| | | | | | | |
|`CUFFT_XT_FORMAT_1D_INPUT_SHUFFLED`| | | | | | | | |
+|`CUFFT_XT_FORMAT_DISTRIBUTED_INPUT`|11.8| | | | | | | |
+|`CUFFT_XT_FORMAT_DISTRIBUTED_OUTPUT`|11.8| | | | | | | |
|`CUFFT_XT_FORMAT_INPLACE`| | | | | | | | |
|`CUFFT_XT_FORMAT_INPLACE_SHUFFLED`| | | | | | | | |
|`CUFFT_XT_FORMAT_INPUT`| | | | | | | | |
@@ -61,6 +63,8 @@
|`CUFFT_Z2D`| | | |`HIPFFT_Z2D`|1.7.0| | | |
|`CUFFT_Z2Z`| | | |`HIPFFT_Z2Z`|1.7.0| | | |
|`MAX_CUFFT_ERROR`| | | | | | | | |
+|`cufftBox3d`|11.8| | | | | | | |
+|`cufftBox3d_t`|11.8| | | | | | | |
|`cufftCompatibility`| | | | | | | | |
|`cufftCompatibility_t`| | | | | | | | |
|`cufftComplex`| | | |`hipfftComplex`|1.7.0| | | |
@@ -131,7 +135,7 @@
|`cufftSetWorkArea`| | | |`hipfftSetWorkArea`|1.7.0| | | |
|`cufftXtClearCallback`| | | |`hipfftXtClearCallback`|4.3.0| | | |
|`cufftXtExec`|8.0| | | | | | | |
-|`cufftXtExecDescriptor`| | | | | | | | |
+|`cufftXtExecDescriptor`|8.0| | | | | | | |
|`cufftXtExecDescriptorC2C`| | | | | | | | |
|`cufftXtExecDescriptorC2R`| | | | | | | | |
|`cufftXtExecDescriptorD2Z`| | | | | | | | |
@@ -146,6 +150,7 @@
|`cufftXtQueryPlan`| | | | | | | | |
|`cufftXtSetCallback`| | | |`hipfftXtSetCallback`|4.3.0| | | |
|`cufftXtSetCallbackSharedSize`| | | |`hipfftXtSetCallbackSharedSize`|4.3.0| | | |
+|`cufftXtSetDistribution`|11.8| | | | | | | |
|`cufftXtSetGPUs`| | | | | | | | |
|`cufftXtSetWorkArea`| | | | | | | | |
|`cufftXtSetWorkAreaPolicy`|9.2| | | | | | | |
diff --git a/src/CUDA2HIP.cpp b/src/CUDA2HIP.cpp
index 1711966c..e5e53e9c 100644
--- a/src/CUDA2HIP.cpp
+++ b/src/CUDA2HIP.cpp
@@ -25,63 +25,63 @@ THE SOFTWARE.
// Maps CUDA header names to HIP header names
const std::map CUDA_INCLUDE_MAP {
// CUDA includes
- {"cuda.h", {"hip/hip_runtime.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_DRIVER, 0}},
- {"cuda_runtime.h", {"hip/hip_runtime.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_RUNTIME, 0}},
- {"cuda_runtime_api.h", {"hip/hip_runtime_api.h", "", CONV_INCLUDE, API_RUNTIME, 0}},
- {"channel_descriptor.h", {"hip/channel_descriptor.h", "", CONV_INCLUDE, API_RUNTIME, 0}},
- {"device_functions.h", {"hip/device_functions.h", "", CONV_INCLUDE, API_RUNTIME, 0}},
- {"driver_types.h", {"hip/driver_types.h", "", CONV_INCLUDE, API_RUNTIME, 0}},
- {"cuda_fp16.h", {"hip/hip_fp16.h", "", CONV_INCLUDE, API_RUNTIME, 0}},
- {"cuda_texture_types.h", {"hip/hip_texture_types.h", "", CONV_INCLUDE, API_RUNTIME, 0}},
- {"texture_fetch_functions.h", {"", "", CONV_INCLUDE, API_RUNTIME, 0}},
- {"vector_types.h", {"hip/hip_vector_types.h", "", CONV_INCLUDE, API_RUNTIME, 0}},
- {"cuda_profiler_api.h", {"hip/hip_runtime_api.h", "", CONV_INCLUDE, API_RUNTIME, 0}},
- {"cooperative_groups.h", {"hip/hip_cooperative_groups.h", "", CONV_INCLUDE, API_RUNTIME, 0}},
+ {"cuda.h", {"hip/hip_runtime.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_DRIVER, 0}},
+ {"cuda_runtime.h", {"hip/hip_runtime.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_RUNTIME, 0}},
+ {"cuda_runtime_api.h", {"hip/hip_runtime_api.h", "", CONV_INCLUDE, API_RUNTIME, 0}},
+ {"channel_descriptor.h", {"hip/channel_descriptor.h", "", CONV_INCLUDE, API_RUNTIME, 0}},
+ {"device_functions.h", {"hip/device_functions.h", "", CONV_INCLUDE, API_RUNTIME, 0}},
+ {"driver_types.h", {"hip/driver_types.h", "", CONV_INCLUDE, API_RUNTIME, 0}},
+ {"cuda_fp16.h", {"hip/hip_fp16.h", "", CONV_INCLUDE, API_RUNTIME, 0}},
+ {"cuda_texture_types.h", {"hip/hip_texture_types.h", "", CONV_INCLUDE, API_RUNTIME, 0}},
+ {"texture_fetch_functions.h", {"", "", CONV_INCLUDE, API_RUNTIME, 0}},
+ {"vector_types.h", {"hip/hip_vector_types.h", "", CONV_INCLUDE, API_RUNTIME, 0}},
+ {"cuda_profiler_api.h", {"hip/hip_runtime_api.h", "", CONV_INCLUDE, API_RUNTIME, 0}},
+ {"cooperative_groups.h", {"hip/hip_cooperative_groups.h", "", CONV_INCLUDE, API_RUNTIME, 0}},
// cuComplex includes
- {"cuComplex.h", {"hip/hip_complex.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_COMPLEX, 0}},
+ {"cuComplex.h", {"hip/hip_complex.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_COMPLEX, 0}},
// cuBLAS includes
- {"cublas.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_H, API_BLAS, 0}},
- {"cublas_v2.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_V2_H, API_BLAS, 0}},
- {"cublas_api.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE, API_BLAS, 0}},
+ {"cublas.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_H, API_BLAS, 0}},
+ {"cublas_v2.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_V2_H, API_BLAS, 0}},
+ {"cublas_api.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE, API_BLAS, 0}},
// cuRAND includes
- {"curand.h", {"hiprand.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_RAND, 0}},
- {"curand_kernel.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
- {"curand_discrete.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
- {"curand_discrete2.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
- {"curand_globals.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
- {"curand_lognormal.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
- {"curand_mrg32k3a.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
- {"curand_mtgp32.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
- {"curand_mtgp32_host.h", {"hiprand_mtgp32_host.h", "", CONV_INCLUDE, API_RAND, 0}},
- {"curand_mtgp32_kernel.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
- {"curand_mtgp32dc_p_11213.h", {"rocrand_mtgp32_11213.h", "", CONV_INCLUDE, API_RAND, 0}},
- {"curand_normal.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
- {"curand_normal_static.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
- {"curand_philox4x32_x.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
- {"curand_poisson.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
- {"curand_precalc.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
- {"curand_uniform.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
+ {"curand.h", {"hiprand.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_RAND, 0}},
+ {"curand_kernel.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
+ {"curand_discrete.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
+ {"curand_discrete2.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
+ {"curand_globals.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
+ {"curand_lognormal.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
+ {"curand_mrg32k3a.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
+ {"curand_mtgp32.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
+ {"curand_mtgp32_host.h", {"hiprand_mtgp32_host.h", "", CONV_INCLUDE, API_RAND, 0}},
+ {"curand_mtgp32_kernel.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
+ {"curand_mtgp32dc_p_11213.h", {"rocrand_mtgp32_11213.h", "", CONV_INCLUDE, API_RAND, 0}},
+ {"curand_normal.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
+ {"curand_normal_static.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
+ {"curand_philox4x32_x.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
+ {"curand_poisson.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
+ {"curand_precalc.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
+ {"curand_uniform.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
// cuDNN includes
- {"cudnn.h", {"hipDNN.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_DNN, 0}},
+ {"cudnn.h", {"hipDNN.h", "miopen/miopen.h", CONV_INCLUDE_CUDA_MAIN_H, API_DNN, 0}},
// cuFFT includes
- {"cufft.h", {"hipfft.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_FFT, 0}},
- {"cufftXt.h", {"hipfftXt.h", "", CONV_INCLUDE, API_FFT, 0}},
+ {"cufft.h", {"hipfft.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_FFT, 0}},
+ {"cufftXt.h", {"hipfftXt.h", "", CONV_INCLUDE, API_FFT, 0}},
// cuSPARSE includes
- {"cusparse.h", {"hipsparse.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_SPARSE, 0}},
- {"cusparse_v2.h", {"hipsparse.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_SPARSE, 0}},
+ {"cusparse.h", {"hipsparse.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_SPARSE, 0}},
+ {"cusparse_v2.h", {"hipsparse.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_SPARSE, 0}},
// CUB includes
- {"cub/cub.cuh", {"hipcub/hipcub.hpp", "", CONV_INCLUDE_CUDA_MAIN_H, API_CUB, 0}},
+ {"cub/cub.cuh", {"hipcub/hipcub.hpp", "", CONV_INCLUDE_CUDA_MAIN_H, API_CUB, 0}},
// CAFFE2 includes
- {"caffe2/core/common_gpu.h", {"caffe2/core/hip/common_gpu.h", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}},
- {"caffe2/core/context_gpu.h", {"caffe2/core/hip/context_gpu.h", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}},
- {"caffe2/operators/operator_fallback_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}},
- {"caffe2/operators/spatial_batch_norm_op.h", {"caffe2/operators/hip/spatial_batch_norm_op_miopen.hip", "", CONV_INCLUDE, API_CAFFE2, 0}},
- {"caffe2/operators/generate_proposals_op_util_nms_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}},
- {"caffe2/operators/max_pool_with_index_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}},
- {"caffe2/operators/rnn/recurrent_network_executor_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}},
- {"caffe2/utils/math/reduce.cuh", {"caffe2/utils/math/hip/reduce.cuh", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}},
- {"caffe2/operators/gather_op.cuh", {"caffe2/operators/math/gather_op.cuh", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}},
- {"caffe2/core/common_cudnn.h", {"caffe2/core/hip/common_miopen.h", "", CONV_INCLUDE, API_CAFFE2, 0}},
+ {"caffe2/core/common_gpu.h", {"caffe2/core/hip/common_gpu.h", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}},
+ {"caffe2/core/context_gpu.h", {"caffe2/core/hip/context_gpu.h", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}},
+ {"caffe2/operators/operator_fallback_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}},
+ {"caffe2/operators/spatial_batch_norm_op.h", {"caffe2/operators/hip/spatial_batch_norm_op_miopen.hip", "", CONV_INCLUDE, API_CAFFE2, 0}},
+ {"caffe2/operators/generate_proposals_op_util_nms_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}},
+ {"caffe2/operators/max_pool_with_index_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}},
+ {"caffe2/operators/rnn/recurrent_network_executor_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}},
+ {"caffe2/utils/math/reduce.cuh", {"caffe2/utils/math/hip/reduce.cuh", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}},
+ {"caffe2/operators/gather_op.cuh", {"caffe2/operators/math/gather_op.cuh", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}},
+ {"caffe2/core/common_cudnn.h", {"caffe2/core/hip/common_miopen.h", "", CONV_INCLUDE, API_CAFFE2, 0}},
// RTC includes
{"nvrtc.h", {"hiprtc.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_RTC, 0}},
};
@@ -113,6 +113,7 @@ const std::map &CUDA_RENAMES_MAP() {
ret.insert(CUDA_CUB_FUNCTION_MAP.begin(), CUDA_CUB_FUNCTION_MAP.end());
ret.insert(CUDA_RTC_TYPE_NAME_MAP.begin(), CUDA_RTC_TYPE_NAME_MAP.end());
ret.insert(CUDA_RTC_FUNCTION_MAP.begin(), CUDA_RTC_FUNCTION_MAP.end());
+ ret.insert(CUDA_DEVICE_TYPE_NAME_MAP.begin(), CUDA_DEVICE_TYPE_NAME_MAP.end());
return ret;
};
diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp
index b36ae477..a912fa1f 100644
--- a/src/CUDA2HIP_BLAS_API_functions.cpp
+++ b/src/CUDA2HIP_BLAS_API_functions.cpp
@@ -26,22 +26,22 @@ THE SOFTWARE.
const std::map CUDA_BLAS_FUNCTION_MAP {
// Blas management functions
- {"cublasInit", {"hipblasInit", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}},
+ {"cublasInit", {"hipblasInit", "rocblas_initialize", CONV_LIB_FUNC, API_BLAS, 4, HIP_UNSUPPORTED}},
{"cublasShutdown", {"hipblasShutdown", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}},
{"cublasGetVersion", {"hipblasGetVersion", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}},
{"cublasGetError", {"hipblasGetError", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}},
{"cublasAlloc", {"hipblasAlloc", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}},
{"cublasFree", {"hipblasFree", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}},
{"cublasSetKernelStream", {"hipblasSetKernelStream", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}},
- {"cublasGetAtomicsMode", {"hipblasGetAtomicsMode", "", CONV_LIB_FUNC, API_BLAS, 4, ROC_UNSUPPORTED}},
- {"cublasSetAtomicsMode", {"hipblasSetAtomicsMode", "", CONV_LIB_FUNC, API_BLAS, 4, ROC_UNSUPPORTED}},
+ {"cublasGetAtomicsMode", {"hipblasGetAtomicsMode", "rocblas_get_atomics_mode", CONV_LIB_FUNC, API_BLAS, 4}},
+ {"cublasSetAtomicsMode", {"hipblasSetAtomicsMode", "rocblas_set_atomics_mode", CONV_LIB_FUNC, API_BLAS, 4}},
{"cublasGetMathMode", {"hipblasGetMathMode", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}},
{"cublasSetMathMode", {"hipblasSetMathMode", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}},
{"cublasMigrateComputeType", {"hipblasMigrateComputeType", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}},
{"cublasGetSmCountTarget", {"hipblasGetSmCountTarget", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}},
{"cublasSetSmCountTarget", {"hipblasSetSmCountTarget", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}},
{"cublasGetStatusName", {"hipblasGetStatusName", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}},
- {"cublasGetStatusString", {"hipblasGetStatusString", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}},
+ {"cublasGetStatusString", {"hipblasGetStatusString", "rocblas_status_to_string", CONV_LIB_FUNC, API_BLAS, 4, HIP_UNSUPPORTED}},
// Blas logging
{"cublasLogCallback", {"hipblasLogCallback", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}},
@@ -288,20 +288,20 @@ const std::map CUDA_BLAS_FUNCTION_MAP {
{"cublasHgemmStridedBatched", {"hipblasHgemmStridedBatched", "rocblas_hgemm_strided_batched", CONV_LIB_FUNC, API_BLAS, 7}},
// SYRK
- {"cublasSsyrk", {"hipblasSsyrk", "rocblas_ssyrk", CONV_LIB_FUNC, API_BLAS, 7}},
- {"cublasDsyrk", {"hipblasDsyrk", "rocblas_dsyrk", CONV_LIB_FUNC, API_BLAS, 7}},
- {"cublasCsyrk", {"hipblasCsyrk", "rocblas_csyrk", CONV_LIB_FUNC, API_BLAS, 7}},
- {"cublasZsyrk", {"hipblasZsyrk", "rocblas_zsyrk", CONV_LIB_FUNC, API_BLAS, 7}},
+ {"cublasSsyrk", {"hipblasSsyrk", "rocblas_ssyrk", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasDsyrk", {"hipblasDsyrk", "rocblas_dsyrk", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasCsyrk", {"hipblasCsyrk", "rocblas_csyrk", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasZsyrk", {"hipblasZsyrk", "rocblas_zsyrk", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}},
// HERK
- {"cublasCherk", {"hipblasCherk", "rocblas_cherk", CONV_LIB_FUNC, API_BLAS, 7}},
- {"cublasZherk", {"hipblasZherk", "rocblas_zherk", CONV_LIB_FUNC, API_BLAS, 7}},
+ {"cublasCherk", {"hipblasCherk", "rocblas_cherk", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasZherk", {"hipblasZherk", "rocblas_zherk", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}},
// SYR2K
- {"cublasSsyr2k", {"hipblasSsyr2k", "rocblas_ssyr2k", CONV_LIB_FUNC, API_BLAS, 7}},
- {"cublasDsyr2k", {"hipblasDsyr2k", "rocblas_dsyr2k", CONV_LIB_FUNC, API_BLAS, 7}},
- {"cublasCsyr2k", {"hipblasCsyr2k", "rocblas_csyr2k", CONV_LIB_FUNC, API_BLAS, 7}},
- {"cublasZsyr2k", {"hipblasZsyr2k", "rocblas_zsyr2k", CONV_LIB_FUNC, API_BLAS, 7}},
+ {"cublasSsyr2k", {"hipblasSsyr2k", "rocblas_ssyr2k", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasDsyr2k", {"hipblasDsyr2k", "rocblas_dsyr2k", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasCsyr2k", {"hipblasCsyr2k", "rocblas_csyr2k", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasZsyr2k", {"hipblasZsyr2k", "rocblas_zsyr2k", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}},
// SYRKX - eXtended SYRK
{"cublasSsyrkx", {"hipblasSsyrkx", "rocblas_ssyrkx", CONV_LIB_FUNC, API_BLAS, 7}},
@@ -310,34 +310,34 @@ const std::map CUDA_BLAS_FUNCTION_MAP {
{"cublasZsyrkx", {"hipblasZsyrkx", "rocblas_zsyrkx", CONV_LIB_FUNC, API_BLAS, 7}},
// HER2K
- {"cublasCher2k", {"hipblasCher2k", "rocblas_cher2k", CONV_LIB_FUNC, API_BLAS, 7}},
- {"cublasZher2k", {"hipblasZher2k", "rocblas_zher2k", CONV_LIB_FUNC, API_BLAS, 7}},
+ {"cublasCher2k", {"hipblasCher2k", "rocblas_cher2k", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasZher2k", {"hipblasZher2k", "rocblas_zher2k", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}},
// HERKX - eXtended HERK
{"cublasCherkx", {"hipblasCherkx", "rocblas_cherkx", CONV_LIB_FUNC, API_BLAS, 7}},
{"cublasZherkx", {"hipblasZherkx", "rocblas_zherkx", CONV_LIB_FUNC, API_BLAS, 7}},
// SYMM
- {"cublasSsymm", {"hipblasSsymm", "rocblas_ssymm", CONV_LIB_FUNC, API_BLAS, 7}},
- {"cublasDsymm", {"hipblasDsymm", "rocblas_dsymm", CONV_LIB_FUNC, API_BLAS, 7}},
- {"cublasCsymm", {"hipblasCsymm", "rocblas_csymm", CONV_LIB_FUNC, API_BLAS, 7}},
- {"cublasZsymm", {"hipblasZsymm", "rocblas_zsymm", CONV_LIB_FUNC, API_BLAS, 7}},
+ {"cublasSsymm", {"hipblasSsymm", "rocblas_ssymm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasDsymm", {"hipblasDsymm", "rocblas_dsymm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasCsymm", {"hipblasCsymm", "rocblas_csymm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasZsymm", {"hipblasZsymm", "rocblas_zsymm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}},
// HEMM
- {"cublasChemm", {"hipblasChemm", "rocblas_chemm", CONV_LIB_FUNC, API_BLAS, 7}},
- {"cublasZhemm", {"hipblasZhemm", "rocblas_zhemm", CONV_LIB_FUNC, API_BLAS, 7}},
+ {"cublasChemm", {"hipblasChemm", "rocblas_chemm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasZhemm", {"hipblasZhemm", "rocblas_zhemm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}},
// TRSM
- {"cublasStrsm", {"hipblasStrsm", "rocblas_strsm", CONV_LIB_FUNC, API_BLAS, 7}},
- {"cublasDtrsm", {"hipblasDtrsm", "rocblas_dtrsm", CONV_LIB_FUNC, API_BLAS, 7}},
- {"cublasCtrsm", {"hipblasCtrsm", "rocblas_ctrsm", CONV_LIB_FUNC, API_BLAS, 7}},
- {"cublasZtrsm", {"hipblasZtrsm", "rocblas_ztrsm", CONV_LIB_FUNC, API_BLAS, 7}},
+ {"cublasStrsm", {"hipblasStrsm", "rocblas_strsm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasDtrsm", {"hipblasDtrsm", "rocblas_dtrsm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasCtrsm", {"hipblasCtrsm", "rocblas_ctrsm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasZtrsm", {"hipblasZtrsm", "rocblas_ztrsm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}},
// TRMM
- {"cublasStrmm", {"hipblasStrmm", "rocblas_strmm", CONV_LIB_FUNC, API_BLAS, 7}},
- {"cublasDtrmm", {"hipblasDtrmm", "rocblas_dtrmm", CONV_LIB_FUNC, API_BLAS, 7}},
- {"cublasCtrmm", {"hipblasCtrmm", "rocblas_ctrmm", CONV_LIB_FUNC, API_BLAS, 7}},
- {"cublasZtrmm", {"hipblasZtrmm", "rocblas_ztrmm", CONV_LIB_FUNC, API_BLAS, 7}},
+ {"cublasStrmm", {"hipblasStrmm", "rocblas_strmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}},
+ {"cublasDtrmm", {"hipblasDtrmm", "rocblas_dtrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}},
+ {"cublasCtrmm", {"hipblasCtrmm", "rocblas_ctrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}},
+ {"cublasZtrmm", {"hipblasZtrmm", "rocblas_ztrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}},
// ------------------------ CUBLAS BLAS - like extension (cublas_api.h)
// GEAM
@@ -541,7 +541,7 @@ const std::map CUDA_BLAS_FUNCTION_MAP {
{"cublasCsyrk3mEx", {"hipblasCsyrk3mEx", "", CONV_LIB_FUNC, API_BLAS, 8, UNSUPPORTED}},
// HERK
- {"cublasCherk_v2", {"hipblasCherk", "rocblas_cherkx", CONV_LIB_FUNC, API_BLAS, 7}},
+ {"cublasCherk_v2", {"hipblasCherk", "rocblas_cherk", CONV_LIB_FUNC, API_BLAS, 7}},
// IO in Int8 complex/cuComplex, computation in cuComplex
{"cublasCherkEx", {"hipblasCherkEx", "", CONV_LIB_FUNC, API_BLAS, 8, UNSUPPORTED}},
// IO in Int8 complex/cuComplex, computation in cuComplex, Gaussian math
@@ -575,10 +575,10 @@ const std::map CUDA_BLAS_FUNCTION_MAP {
{"cublasZtrsm_v2", {"hipblasZtrsm", "rocblas_ztrsm", CONV_LIB_FUNC, API_BLAS, 7}},
// TRMM
- {"cublasStrmm_v2", {"hipblasStrmm", "rocblas_strmm", CONV_LIB_FUNC, API_BLAS, 7}},
- {"cublasDtrmm_v2", {"hipblasDtrmm", "rocblas_dtrmm", CONV_LIB_FUNC, API_BLAS, 7}},
- {"cublasCtrmm_v2", {"hipblasCtrmm", "rocblas_ctrmm", CONV_LIB_FUNC, API_BLAS, 7}},
- {"cublasZtrmm_v2", {"hipblasZtrmm", "rocblas_ztrmm", CONV_LIB_FUNC, API_BLAS, 7}},
+ {"cublasStrmm_v2", {"hipblasStrmm", "rocblas_strmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}},
+ {"cublasDtrmm_v2", {"hipblasDtrmm", "rocblas_dtrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}},
+ {"cublasCtrmm_v2", {"hipblasCtrmm", "rocblas_ctrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}},
+ {"cublasZtrmm_v2", {"hipblasZtrmm", "rocblas_ztrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}},
// NRM2
{"cublasSnrm2_v2", {"hipblasSnrm2", "rocblas_snrm2", CONV_LIB_FUNC, API_BLAS, 5}},
@@ -953,6 +953,219 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP {
{"hipblasNrm2Ex", {HIP_4010, HIP_0, HIP_0 }},
{"hipblasRotEx", {HIP_4010, HIP_0, HIP_0 }},
{"hipblasScalEx", {HIP_4010, HIP_0, HIP_0 }},
+ {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_dscal", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_cscal", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_zscal", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_csscal", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zdscal", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_scopy", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_dcopy", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_ccopy", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_zcopy", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_sdot", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_ddot", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_hdot", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_cdotu", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_zdotu", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_cdotc", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zdotc", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_sswap", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_dswap", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_cswap", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_zswap", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_saxpy", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_daxpy", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_caxpy", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_zaxpy", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_sasum", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_dasum", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_scasum", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_dzasum", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_snrm2", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_dnrm2", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_scnrm2", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_dznrm2", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_isamax", {HIP_1064, HIP_0, HIP_0 }},
+ {"rocblas_idamax", {HIP_1064, HIP_0, HIP_0 }},
+ {"rocblas_icamax", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_izamax", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_isamin", {HIP_1064, HIP_0, HIP_0 }},
+ {"rocblas_idamin", {HIP_1064, HIP_0, HIP_0 }},
+ {"rocblas_icamin", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_izamin", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_srot", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_drot", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_crot", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_csrot", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zrot", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zdrot", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_srotg", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_drotg", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_crotg", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zrotg", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_srotm", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_drotm", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_srotmg", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_drotmg", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_sgbmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_dgbmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_cgbmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zgbmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_sgemv", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_dgemv", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_cgemv", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_zgemv", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_chbmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zhbmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_chemv", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_zhemv", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_cher", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zher", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_cher2", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zher2", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_chpmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zhpmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_chpr", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zhpr", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_chpr2", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zhpr2", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_strmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_dtrmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_ctrmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_ztrmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_stpmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_dtpmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_ctpmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_ztpmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_stbmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_dtbmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_ctbmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_ztbmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_stbsv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_dtbsv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_ctbsv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_ztbsv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_strsv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_dtrsv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_ctrsv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_ztrsv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_stpsv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_dtpsv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_ctpsv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_ztpsv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_ssymv", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_dsymv", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_csymv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zsymv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_sspmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_dspmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_ssbmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_dsbmv", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_sger", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_dger", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_cgeru", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zgeru", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_cgerc", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zgerc", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_sspr", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_dspr", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_sspr2", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_dspr2", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_ssyr", {HIP_1071, HIP_0, HIP_0 }},
+ {"rocblas_dsyr", {HIP_1071, HIP_0, HIP_0 }},
+ {"rocblas_csyr", {HIP_1071, HIP_0, HIP_0 }},
+ {"rocblas_zsyr", {HIP_1071, HIP_0, HIP_0 }},
+ {"rocblas_ssyr2", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_dsyr2", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_csyr2", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zsyr2", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_chemm", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zhemm", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_cherk", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zherk", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_cher2k", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zher2k", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_cherkx", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zherkx", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_ssymm", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_dsymm", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_csymm", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zsymm", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_ssyrk", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_dsyrk", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_csyrk", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zsyrk", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_ssyr2k", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_dsyr2k", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_csyr2k", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zsyr2k", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_ssyrkx", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_dsyrkx", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_csyrkx", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zsyrkx", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_strmm_outofplace", {HIP_5000, HIP_0, HIP_0 }},
+ {"rocblas_dtrmm_outofplace", {HIP_5000, HIP_0, HIP_0 }},
+ {"rocblas_ctrmm_outofplace", {HIP_5000, HIP_0, HIP_0 }},
+ {"rocblas_ztrmm_outofplace", {HIP_5000, HIP_0, HIP_0 }},
+ {"rocblas_strsm", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_dtrsm", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_ctrsm", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_ztrsm", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_strsm_batched", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_dtrsm_batched", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_ctrsm_batched", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_ztrsm_batched", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_sgemm", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_dgemm", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_hgemm", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_cgemm", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_zgemm", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_sgemm_batched", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_dgemm_batched", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_hgemm_batched", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_cgemm_batched", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zgemm_batched", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_sgemm_strided_batched", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_dgemm_strided_batched", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_hgemm_strided_batched", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_cgemm_strided_batched", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_zgemm_strided_batched", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_sdgmm", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_ddgmm", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_cdgmm", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zdgmm", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_sgeam", {HIP_1064, HIP_0, HIP_0 }},
+ {"rocblas_dgeam", {HIP_1064, HIP_0, HIP_0 }},
+ {"rocblas_cgeam", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_zgeam", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_gemm_ex", {HIP_1082, HIP_0, HIP_0 }},
+ {"rocblas_gemm_batched_ex", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_gemm_strided_batched_ex", {HIP_1090, HIP_0, HIP_0 }},
+ {"rocblas_axpy_ex", {HIP_3090, HIP_0, HIP_0 }},
+ {"rocblas_dot_ex", {HIP_4010, HIP_0, HIP_0 }},
+ {"rocblas_dotc_ex", {HIP_4010, HIP_0, HIP_0 }},
+ {"rocblas_nrm2_ex", {HIP_4010, HIP_0, HIP_0 }},
+ {"rocblas_rot_ex", {HIP_4010, HIP_0, HIP_0 }},
+ {"rocblas_scal_ex", {HIP_4010, HIP_0, HIP_0 }},
+ {"rocblas_initialize", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_create_handle", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_destroy_handle", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_set_stream", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_get_stream", {HIP_1050, HIP_0, HIP_0 }},
+ {"rocblas_set_pointer_mode", {HIP_1060, HIP_0, HIP_0 }},
+ {"rocblas_get_pointer_mode", {HIP_1060, HIP_0, HIP_0 }},
+ {"rocblas_set_atomics_mode", {HIP_3080, HIP_0, HIP_0 }},
+ {"rocblas_get_atomics_mode", {HIP_3080, HIP_0, HIP_0 }},
+ {"rocblas_set_vector", {HIP_1060, HIP_0, HIP_0 }},
+ {"rocblas_get_vector", {HIP_1060, HIP_0, HIP_0 }},
+ {"rocblas_set_matrix", {HIP_1060, HIP_0, HIP_0 }},
+ {"rocblas_get_matrix", {HIP_1060, HIP_0, HIP_0 }},
+ {"rocblas_set_vector_async", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_get_vector_async", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_set_matrix_async", {HIP_3050, HIP_0, HIP_0 }},
+ {"rocblas_get_matrix_async", {HIP_3050, HIP_0, HIP_0 }},
};
const std::map CUDA_BLAS_API_SECTION_MAP {
diff --git a/src/CUDA2HIP_BLAS_API_types.cpp b/src/CUDA2HIP_BLAS_API_types.cpp
index 997183f1..cd94f7b6 100644
--- a/src/CUDA2HIP_BLAS_API_types.cpp
+++ b/src/CUDA2HIP_BLAS_API_types.cpp
@@ -138,42 +138,46 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP {
{"CUBLAS_GEMM_ALGO15_TENSOR_OP", {"HIPBLAS_GEMM_ALGO15_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 115
// TODO: rename hipblasDatatype_t to hipDataType_t and move from hipBLAS to HIP
- {"cudaDataType_t", {"hipblasDatatype_t", "rocblas_datatype_", CONV_TYPE, API_RUNTIME, 3}},
- {"cudaDataType", {"hipblasDatatype_t", "rocblas_datatype", CONV_TYPE, API_RUNTIME, 3}},
- {"CUDA_R_16F", {"HIPBLAS_R_16F", "rocblas_datatype_f16_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 2 // 150
- {"CUDA_C_16F", {"HIPBLAS_C_16F", "rocblas_datatype_f16_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 6 // 153
- {"CUDA_R_32F", {"HIPBLAS_R_32F", "rocblas_datatype_f32_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 0 // 151
- {"CUDA_C_32F", {"HIPBLAS_C_32F", "rocblas_datatype_f32_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 4 // 154
- {"CUDA_R_64F", {"HIPBLAS_R_64F", "rocblas_datatype_f64_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 1 // 152
- {"CUDA_C_64F", {"HIPBLAS_C_64F", "rocblas_datatype_f64_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 5 // 155
- {"CUDA_R_8I", {"HIPBLAS_R_8I", "rocblas_datatype_i8_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 3 // 160
- {"CUDA_C_8I", {"HIPBLAS_C_8I", "rocblas_datatype_i8_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 7 // 164
- {"CUDA_R_8U", {"HIPBLAS_R_8U", "rocblas_datatype_u8_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 8 // 161
- {"CUDA_C_8U", {"HIPBLAS_C_8U", "rocblas_datatype_u8_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 9 // 165
- {"CUDA_R_32I", {"HIPBLAS_R_32I", "rocblas_datatype_i32_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 10 // 162
- {"CUDA_C_32I", {"HIPBLAS_C_32I", "rocblas_datatype_i32_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 11 // 166
- {"CUDA_R_32U", {"HIPBLAS_R_32U", "rocblas_datatype_u32_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 12 // 163
- {"CUDA_C_32U", {"HIPBLAS_C_32U", "rocblas_datatype_u32_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 13 // 167
- {"CUDA_R_16BF", {"HIPBLAS_R_16B", "rocblas_datatype_bf16_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 14 // 168
- {"CUDA_C_16BF", {"HIPBLAS_C_16B", "rocblas_datatype_bf16_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 15 // 169
- {"CUDA_R_4I", {"HIPBLAS_R_4I", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 16
- {"CUDA_C_4I", {"HIPBLAS_C_4I", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 17
- {"CUDA_R_4U", {"HIPBLAS_R_4U", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 18
- {"CUDA_C_4U", {"HIPBLAS_C_4U", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 19
- {"CUDA_R_16I", {"HIPBLAS_R_16I", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 20
- {"CUDA_C_16I", {"HIPBLAS_C_16I", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 21
- {"CUDA_R_16U", {"HIPBLAS_R_16U", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 22
- {"CUDA_C_16U", {"HIPBLAS_C_16U", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 23
- {"CUDA_R_64I", {"HIPBLAS_R_64I", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 24
- {"CUDA_C_64I", {"HIPBLAS_C_64I", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 25
- {"CUDA_R_64U", {"HIPBLAS_R_64U", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 26
- {"CUDA_C_64U", {"HIPBLAS_C_64U", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 27
+ {"cudaDataType_t", {"hipblasDatatype_t", "rocblas_datatype_", CONV_TYPE, API_BLAS, 3}},
+ {"cudaDataType", {"hipblasDatatype_t", "rocblas_datatype", CONV_TYPE, API_BLAS, 3}},
+ {"CUDA_R_16F", {"HIPBLAS_R_16F", "rocblas_datatype_f16_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 2 // 150
+ {"CUDA_C_16F", {"HIPBLAS_C_16F", "rocblas_datatype_f16_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 6 // 153
+ {"CUDA_R_32F", {"HIPBLAS_R_32F", "rocblas_datatype_f32_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 0 // 151
+ {"CUDA_C_32F", {"HIPBLAS_C_32F", "rocblas_datatype_f32_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 4 // 154
+ {"CUDA_R_64F", {"HIPBLAS_R_64F", "rocblas_datatype_f64_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 1 // 152
+ {"CUDA_C_64F", {"HIPBLAS_C_64F", "rocblas_datatype_f64_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 5 // 155
+ {"CUDA_R_8I", {"HIPBLAS_R_8I", "rocblas_datatype_i8_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 3 // 160
+ {"CUDA_C_8I", {"HIPBLAS_C_8I", "rocblas_datatype_i8_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 7 // 164
+ {"CUDA_R_8U", {"HIPBLAS_R_8U", "rocblas_datatype_u8_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 8 // 161
+ {"CUDA_C_8U", {"HIPBLAS_C_8U", "rocblas_datatype_u8_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 9 // 165
+ {"CUDA_R_32I", {"HIPBLAS_R_32I", "rocblas_datatype_i32_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 10 // 162
+ {"CUDA_C_32I", {"HIPBLAS_C_32I", "rocblas_datatype_i32_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 11 // 166
+ {"CUDA_R_32U", {"HIPBLAS_R_32U", "rocblas_datatype_u32_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 12 // 163
+ {"CUDA_C_32U", {"HIPBLAS_C_32U", "rocblas_datatype_u32_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 13 // 167
+ {"CUDA_R_16BF", {"HIPBLAS_R_16B", "rocblas_datatype_bf16_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 14 // 168
+ {"CUDA_C_16BF", {"HIPBLAS_C_16B", "rocblas_datatype_bf16_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 15 // 169
+ {"CUDA_R_4I", {"HIPBLAS_R_4I", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 16
+ {"CUDA_C_4I", {"HIPBLAS_C_4I", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 17
+ {"CUDA_R_4U", {"HIPBLAS_R_4U", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 18
+ {"CUDA_C_4U", {"HIPBLAS_C_4U", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 19
+ {"CUDA_R_16I", {"HIPBLAS_R_16I", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 20
+ {"CUDA_C_16I", {"HIPBLAS_C_16I", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 21
+ {"CUDA_R_16U", {"HIPBLAS_R_16U", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 22
+ {"CUDA_C_16U", {"HIPBLAS_C_16U", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 23
+ {"CUDA_R_64I", {"HIPBLAS_R_64I", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 24
+ {"CUDA_C_64I", {"HIPBLAS_C_64I", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 25
+ {"CUDA_R_64U", {"HIPBLAS_R_64U", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 26
+ {"CUDA_C_64U", {"HIPBLAS_C_64U", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 27
+ {"CUDA_R_8F_E4M3", {"HIPBLAS_R_8F_E4M3", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 28
+ {"CUDA_R_8F_E5M2", {"HIPBLAS_R_8F_E5M2", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 29
{"cublasHandle_t", {"hipblasHandle_t", "rocblas_handle", CONV_TYPE, API_BLAS, 2}},
// TODO: dereferencing: typedef struct cublasContext *cublasHandle_t;
{"cublasContext", {"hipblasContext", "_rocblas_handle", CONV_TYPE, API_BLAS, 2, HIP_UNSUPPORTED}},
- {"cublasComputeType_t", {"hipblasComputeType_t", "", CONV_TYPE, API_BLAS, 2, UNSUPPORTED}},
+ // NOTE: renamed UNSUPPORTED hipblasComputeType_t to the HIP supported hipblasDatatype_t (workaround)
+ // TODO: change the type to the correct one after fixing https://github.com/ROCmSoftwarePlatform/hipBLAS/issues/529
+ {"cublasComputeType_t", {"hipblasDatatype_t", "", CONV_TYPE, API_BLAS, 2}},
{"CUBLAS_COMPUTE_16F", {"HIPBLAS_COMPUTE_16F", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 64
{"CUBLAS_COMPUTE_16F_PEDANTIC", {"HIPBLAS_COMPUTE_16F_PEDANTIC", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 65
{"CUBLAS_COMPUTE_32F", {"HIPBLAS_COMPUTE_32F", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 68
@@ -288,6 +292,8 @@ const std::map CUDA_BLAS_TYPE_NAME_VER_MAP {
{"CUDA_C_64I", {CUDA_110, CUDA_0, CUDA_0}},
{"CUDA_R_64U", {CUDA_110, CUDA_0, CUDA_0}},
{"CUDA_C_64U", {CUDA_110, CUDA_0, CUDA_0}},
+ {"CUDA_R_8F_E4M3", {CUDA_118, CUDA_0, CUDA_0}},
+ {"CUDA_R_8F_E5M2", {CUDA_118, CUDA_0, CUDA_0}},
};
const std::map HIP_BLAS_TYPE_NAME_VER_MAP {
diff --git a/src/CUDA2HIP_DNN_API_functions.cpp b/src/CUDA2HIP_DNN_API_functions.cpp
index 3babf0da..d41f2dfc 100644
--- a/src/CUDA2HIP_DNN_API_functions.cpp
+++ b/src/CUDA2HIP_DNN_API_functions.cpp
@@ -27,6 +27,7 @@ const std::map CUDA_DNN_FUNCTION_MAP {
{"cudnnGetVersion", {"hipdnnGetVersion", "", CONV_LIB_FUNC, API_DNN, 2}},
{"cudnnGetCudartVersion", {"hipdnnGetCudartVersion", "", CONV_LIB_FUNC, API_DNN, 2, HIP_UNSUPPORTED}},
+ {"cudnnGetMaxDeviceVersion", {"hipdnnGetMaxDeviceVersion", "", CONV_LIB_FUNC, API_DNN, 2, HIP_UNSUPPORTED}},
{"cudnnQueryRuntimeError", {"hipdnnQueryRuntimeError", "", CONV_LIB_FUNC, API_DNN, 2, HIP_UNSUPPORTED}},
{"cudnnGetProperty", {"hipdnnGetProperty", "", CONV_LIB_FUNC, API_DNN, 2, HIP_UNSUPPORTED}},
{"cudnnGetErrorString", {"hipdnnGetErrorString", "", CONV_LIB_FUNC, API_DNN, 2}},
@@ -608,6 +609,7 @@ const std::map CUDA_DNN_FUNCTION_VER_MAP {
{"cudnnSetRNNDescriptor_v5", {CUDNN_705, CUDNN_765, CUDNN_801}},
{"cudnnSetActivationDescriptorSwishBeta", {CUDNN_820, CUDA_0, CUDA_0}},
{"cudnnGetActivationDescriptorSwishBeta", {CUDNN_820, CUDA_0, CUDA_0}},
+ {"cudnnGetMaxDeviceVersion", {CUDNN_860, CUDA_0, CUDA_0}},
};
const std::map HIP_DNN_FUNCTION_VER_MAP {
diff --git a/src/CUDA2HIP_DNN_API_types.cpp b/src/CUDA2HIP_DNN_API_types.cpp
index 98580c33..de977487 100644
--- a/src/CUDA2HIP_DNN_API_types.cpp
+++ b/src/CUDA2HIP_DNN_API_types.cpp
@@ -59,22 +59,22 @@ const std::map CUDA_DNN_TYPE_NAME_MAP {
{"CUDNN_OPS_TRAIN_PATCH", {"HIPDNN_OPS_TRAIN_PATCH", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
// cuDNN enums
- {"cudnnStatus_t", {"hipdnnStatus_t", "", CONV_TYPE, API_DNN, 1}},
- {"CUDNN_STATUS_SUCCESS", {"HIPDNN_STATUS_SUCCESS", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 0
- {"CUDNN_STATUS_NOT_INITIALIZED", {"HIPDNN_STATUS_NOT_INITIALIZED", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 1
- {"CUDNN_STATUS_ALLOC_FAILED", {"HIPDNN_STATUS_ALLOC_FAILED", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 2
- {"CUDNN_STATUS_BAD_PARAM", {"HIPDNN_STATUS_BAD_PARAM", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 3
- {"CUDNN_STATUS_INTERNAL_ERROR", {"HIPDNN_STATUS_INTERNAL_ERROR", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 4
- {"CUDNN_STATUS_INVALID_VALUE", {"HIPDNN_STATUS_INVALID_VALUE", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 5
- {"CUDNN_STATUS_ARCH_MISMATCH", {"HIPDNN_STATUS_ARCH_MISMATCH", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 6
- {"CUDNN_STATUS_MAPPING_ERROR", {"HIPDNN_STATUS_MAPPING_ERROR", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 7
- {"CUDNN_STATUS_EXECUTION_FAILED", {"HIPDNN_STATUS_EXECUTION_FAILED", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 8
- {"CUDNN_STATUS_NOT_SUPPORTED", {"HIPDNN_STATUS_NOT_SUPPORTED", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 9
- {"CUDNN_STATUS_LICENSE_ERROR", {"HIPDNN_STATUS_LICENSE_ERROR", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 10
- {"CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", {"HIPDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 11
- {"CUDNN_STATUS_RUNTIME_IN_PROGRESS", {"HIPDNN_STATUS_RUNTIME_IN_PROGRESS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 12
- {"CUDNN_STATUS_RUNTIME_FP_OVERFLOW", {"HIPDNN_STATUS_RUNTIME_FP_OVERFLOW", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 13
- {"CUDNN_STATUS_VERSION_MISMATCH", {"HIPDNN_STATUS_VERSION_MISMATCH", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 14
+ {"cudnnStatus_t", {"hipdnnStatus_t", "miopenStatus_t", CONV_TYPE, API_DNN, 1}},
+ {"CUDNN_STATUS_SUCCESS", {"HIPDNN_STATUS_SUCCESS", "miopenStatusSuccess", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 0
+ {"CUDNN_STATUS_NOT_INITIALIZED", {"HIPDNN_STATUS_NOT_INITIALIZED", "miopenStatusNotInitialized", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 1
+ {"CUDNN_STATUS_ALLOC_FAILED", {"HIPDNN_STATUS_ALLOC_FAILED", "miopenStatusAllocFailed", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 2 // 4
+ {"CUDNN_STATUS_BAD_PARAM", {"HIPDNN_STATUS_BAD_PARAM", "miopenStatusBadParm", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 3
+ {"CUDNN_STATUS_INTERNAL_ERROR", {"HIPDNN_STATUS_INTERNAL_ERROR", "miopenStatusInternalError", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 4 // 5
+ {"CUDNN_STATUS_INVALID_VALUE", {"HIPDNN_STATUS_INVALID_VALUE", "miopenStatusInvalidValue", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 5 // 2
+ {"CUDNN_STATUS_ARCH_MISMATCH", {"HIPDNN_STATUS_ARCH_MISMATCH", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED}}, // 6
+ {"CUDNN_STATUS_MAPPING_ERROR", {"HIPDNN_STATUS_MAPPING_ERROR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED}}, // 7
+ {"CUDNN_STATUS_EXECUTION_FAILED", {"HIPDNN_STATUS_EXECUTION_FAILED", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED}}, // 8
+ {"CUDNN_STATUS_NOT_SUPPORTED", {"HIPDNN_STATUS_NOT_SUPPORTED", "miopenStatusUnsupportedOp", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 9 // 8
+ {"CUDNN_STATUS_LICENSE_ERROR", {"HIPDNN_STATUS_LICENSE_ERROR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED}}, // 10
+ {"CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", {"HIPDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED}}, // 11
+ {"CUDNN_STATUS_RUNTIME_IN_PROGRESS", {"HIPDNN_STATUS_RUNTIME_IN_PROGRESS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 12
+ {"CUDNN_STATUS_RUNTIME_FP_OVERFLOW", {"HIPDNN_STATUS_RUNTIME_FP_OVERFLOW", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 13
+ {"CUDNN_STATUS_VERSION_MISMATCH", {"HIPDNN_STATUS_VERSION_MISMATCH", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 14
{"cudnnRuntimeTag_t", {"hipdnnRuntimeTag_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}},
{"cudnnConvolutionMode_t", {"hipdnnConvolutionMode_t", "", CONV_TYPE, API_DNN, 1}},
{"CUDNN_CONVOLUTION", {"HIPDNN_CONVOLUTION", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 0
@@ -98,6 +98,9 @@ const std::map CUDA_DNN_TYPE_NAME_MAP {
{"CUDNN_DATA_INT8x32", {"HIPDNN_DATA_INT8x32", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 8
{"CUDNN_DATA_BFLOAT16", {"HIPDNN_DATA_BFLOAT16", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 9
{"CUDNN_DATA_INT64", {"HIPDNN_DATA_INT64", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 10
+ {"CUDNN_DATA_BOOLEAN", {"HIPDNN_DATA_BOOLEAN", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 11
+ {"CUDNN_DATA_FP8_E4M3", {"HIPDNN_DATA_FP8_E4M3", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 11
+ {"CUDNN_DATA_FP8_E5M2", {"HIPDNN_DATA_FP8_E5M2", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 11
{"cudnnErrQueryMode_t", {"hipdnnErrQueryMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_ERRQUERY_RAWCODE", {"HIPDNN_ERRQUERY_RAWCODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 0
{"CUDNN_ERRQUERY_NONBLOCKING", {"HIPDNN_ERRQUERY_NONBLOCKING", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1
@@ -370,6 +373,23 @@ const std::map CUDA_DNN_TYPE_NAME_MAP {
{"CUDNN_POINTWISE_MIN", {"HIPDNN_POINTWISE_MIN", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2
{"CUDNN_POINTWISE_MAX", {"HIPDNN_POINTWISE_MAX", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 3
{"CUDNN_POINTWISE_SQRT", {"HIPDNN_POINTWISE_SQRT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 4
+ {"CUDNN_POINTWISE_ADD_SQUARE", {"HIPDNN_POINTWISE_ADD_SQUARE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 5
+ {"CUDNN_POINTWISE_DIV", {"HIPDNN_POINTWISE_DIV", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 6
+ {"CUDNN_POINTWISE_MOD", {"HIPDNN_POINTWISE_MOD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 7
+ {"CUDNN_POINTWISE_POW", {"HIPDNN_POINTWISE_POW", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 8
+ {"CUDNN_POINTWISE_SUB", {"HIPDNN_POINTWISE_SUB", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 9
+ {"CUDNN_POINTWISE_ABS", {"HIPDNN_POINTWISE_ABS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 10
+ {"CUDNN_POINTWISE_CEIL", {"HIPDNN_POINTWISE_CEIL", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 11
+ {"CUDNN_POINTWISE_COS", {"HIPDNN_POINTWISE_COS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 12
+ {"CUDNN_POINTWISE_EXP", {"HIPDNN_POINTWISE_EXP", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 13
+ {"CUDNN_POINTWISE_FLOOR", {"HIPDNN_POINTWISE_FLOOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 14
+ {"CUDNN_POINTWISE_LOG", {"HIPDNN_POINTWISE_LOG", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 15
+ {"CUDNN_POINTWISE_NEG", {"HIPDNN_POINTWISE_NEG", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 16
+ {"CUDNN_POINTWISE_RSQRT", {"HIPDNN_POINTWISE_RSQRT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 17
+ {"CUDNN_POINTWISE_SIN", {"HIPDNN_POINTWISE_SIN", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 18
+ {"CUDNN_POINTWISE_TAN", {"HIPDNN_POINTWISE_TAN", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 19
+ {"CUDNN_POINTWISE_ERF", {"HIPDNN_POINTWISE_ERF", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 20
+ {"CUDNN_POINTWISE_IDENTITY", {"HIPDNN_POINTWISE_IDENTITY", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 21
{"CUDNN_POINTWISE_RELU_FWD", {"HIPDNN_POINTWISE_RELU_FWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 100
{"CUDNN_POINTWISE_TANH_FWD", {"HIPDNN_POINTWISE_TANH_FWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 101
{"CUDNN_POINTWISE_SIGMOID_FWD", {"HIPDNN_POINTWISE_SIGMOID_FWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 102
@@ -377,6 +397,7 @@ const std::map CUDA_DNN_TYPE_NAME_MAP {
{"CUDNN_POINTWISE_GELU_FWD", {"HIPDNN_POINTWISE_GELU_FWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 104
{"CUDNN_POINTWISE_SOFTPLUS_FWD", {"HIPDNN_POINTWISE_SOFTPLUS_FWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 105
{"CUDNN_POINTWISE_SWISH_FWD", {"HIPDNN_POINTWISE_SWISH_FWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 106
+ {"CUDNN_POINTWISE_GELU_APPROX_TANH_FWD", {"HIPDNN_POINTWISE_GELU_APPROX_TANH_FWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 107
{"CUDNN_POINTWISE_RELU_BWD", {"HIPDNN_POINTWISE_RELU_BWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 200
{"CUDNN_POINTWISE_TANH_BWD", {"HIPDNN_POINTWISE_TANH_BWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 201
{"CUDNN_POINTWISE_SIGMOID_BWD", {"HIPDNN_POINTWISE_SIGMOID_BWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 202
@@ -384,6 +405,18 @@ const std::map CUDA_DNN_TYPE_NAME_MAP {
{"CUDNN_POINTWISE_GELU_BWD", {"HIPDNN_POINTWISE_GELU_BWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 204
{"CUDNN_POINTWISE_SOFTPLUS_BWD", {"HIPDNN_POINTWISE_SOFTPLUS_BWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 205
{"CUDNN_POINTWISE_SWISH_BWD", {"HIPDNN_POINTWISE_SWISH_BWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 206
+ {"CUDNN_POINTWISE_GELU_APPROX_TANH_BWD", {"HIPDNN_POINTWISE_GELU_APPROX_TANH_BWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 207
+ {"CUDNN_POINTWISE_CMP_EQ", {"HIPDNN_POINTWISE_CMP_EQ", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 300
+ {"CUDNN_POINTWISE_CMP_NEQ", {"HIPDNN_POINTWISE_CMP_NEQ", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 301
+ {"CUDNN_POINTWISE_CMP_GT", {"HIPDNN_POINTWISE_CMP_GT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 302
+ {"CUDNN_POINTWISE_CMP_GE", {"HIPDNN_POINTWISE_CMP_GE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 303
+ {"CUDNN_POINTWISE_CMP_LT", {"HIPDNN_POINTWISE_CMP_LT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 304
+ {"CUDNN_POINTWISE_CMP_LE", {"HIPDNN_POINTWISE_CMP_LE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 305
+ {"CUDNN_POINTWISE_LOGICAL_AND", {"HIPDNN_POINTWISE_LOGICAL_AND", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 400
+ {"CUDNN_POINTWISE_LOGICAL_OR", {"HIPDNN_POINTWISE_LOGICAL_OR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 401
+ {"CUDNN_POINTWISE_LOGICAL_NOT", {"HIPDNN_POINTWISE_LOGICAL_NOT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 402
+ {"CUDNN_POINTWISE_GEN_INDEX", {"HIPDNN_POINTWISE_GEN_INDEX", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 501
+ {"CUDNN_POINTWISE_BINARY_SELECT", {"HIPDNN_POINTWISE_BINARY_SELECT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 601
{"cudnnGenStatsMode_t", {"hipdnnGenStatsMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_GENSTATS_SUM_SQSUM", {"HIPDNN_GENSTATS_SUM_SQSUM", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 0
{"cudnnBackendAttributeName_t", {"hipdnnBackendAttributeName_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}},
@@ -396,6 +429,7 @@ const std::map CUDA_DNN_TYPE_NAME_MAP {
{"CUDNN_ATTR_POINTWISE_ELU_ALPHA", {"HIPDNN_ATTR_POINTWISE_ELU_ALPHA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 6
{"CUDNN_ATTR_POINTWISE_SOFTPLUS_BETA", {"HIPDNN_ATTR_POINTWISE_SOFTPLUS_BETA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 7
{"CUDNN_ATTR_POINTWISE_SWISH_BETA", {"HIPDNN_ATTR_POINTWISE_SWISH_BETA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 8
+ {"CUDNN_ATTR_POINTWISE_AXIS", {"HIPDNN_ATTR_POINTWISE_AXIS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 9
{"CUDNN_ATTR_CONVOLUTION_COMP_TYPE", {"HIPDNN_ATTR_CONVOLUTION_COMP_TYPE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 100
{"CUDNN_ATTR_CONVOLUTION_CONV_MODE", {"HIPDNN_ATTR_CONVOLUTION_CONV_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 101
{"CUDNN_ATTR_CONVOLUTION_DILATIONS", {"HIPDNN_ATTR_CONVOLUTION_DILATIONS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 102
@@ -414,6 +448,7 @@ const std::map CUDA_DNN_TYPE_NAME_MAP {
{"CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE", {"HIPDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 402
{"CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS", {"HIPDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 403
{"CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS", {"HIPDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 404
+ {"CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION", {"HIPDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 405
{"CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID", {"HIPDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 500
{"CUDNN_ATTR_INTERMEDIATE_INFO_SIZE", {"HIPDNN_ATTR_INTERMEDIATE_INFO_SIZE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 501
{"CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_DATA_UIDS", {"HIPDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_DATA_UIDS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 502
@@ -446,6 +481,7 @@ const std::map CUDA_DNN_TYPE_NAME_MAP {
{"CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2", {"HIPDNN_ATTR_OPERATION_POINTWISE_ALPHA2", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 755
{"CUDNN_ATTR_OPERATION_POINTWISE_DXDESC", {"HIPDNN_ATTR_OPERATION_POINTWISE_DXDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 756
{"CUDNN_ATTR_OPERATION_POINTWISE_DYDESC", {"HIPDNN_ATTR_OPERATION_POINTWISE_DYDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 757
+ {"CUDNN_ATTR_OPERATION_POINTWISE_TDESC", {"HIPDNN_ATTR_OPERATION_POINTWISE_TDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 758
{"CUDNN_ATTR_OPERATION_GENSTATS_MODE", {"HIPDNN_ATTR_OPERATION_GENSTATS_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 770
{"CUDNN_ATTR_OPERATION_GENSTATS_MATH_PREC", {"HIPDNN_ATTR_OPERATION_GENSTATS_MATH_PREC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 771
{"CUDNN_ATTR_OPERATION_GENSTATS_XDESC", {"HIPDNN_ATTR_OPERATION_GENSTATS_XDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 772
@@ -480,6 +516,7 @@ const std::map CUDA_DNN_TYPE_NAME_MAP {
{"CUDNN_ATTR_TENSOR_UNIQUE_ID", {"HIPDNN_ATTR_TENSOR_UNIQUE_ID", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 906
{"CUDNN_ATTR_TENSOR_IS_VIRTUAL", {"HIPDNN_ATTR_TENSOR_IS_VIRTUAL", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 907
{"CUDNN_ATTR_TENSOR_IS_BY_VALUE", {"HIPDNN_ATTR_TENSOR_IS_BY_VALUE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 908
+ {"CUDNN_ATTR_TENSOR_REORDERING_MODE", {"HIPDNN_ATTR_TENSOR_REORDERING_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 909
{"CUDNN_ATTR_VARIANT_PACK_UNIQUE_IDS", {"HIPDNN_ATTR_VARIANT_PACK_UNIQUE_IDS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1000
{"CUDNN_ATTR_VARIANT_PACK_DATA_POINTERS", {"HIPDNN_ATTR_VARIANT_PACK_DATA_POINTERS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1001
{"CUDNN_ATTR_VARIANT_PACK_INTERMEDIATES", {"HIPDNN_ATTR_VARIANT_PACK_INTERMEDIATES", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1002
@@ -518,6 +555,62 @@ const std::map CUDA_DNN_TYPE_NAME_MAP {
{"CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_DY_SCALE_DESC", {"HIPDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_DY_SCALE_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1628
{"CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_X_SCALE_DESC", {"HIPDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_X_SCALE_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1629
{"CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_BIAS", {"HIPDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_BIAS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1630
+ {"CUDNN_ATTR_RESAMPLE_MODE", {"HIPDNN_ATTR_RESAMPLE_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1700
+ {"CUDNN_ATTR_RESAMPLE_COMP_TYPE", {"HIPDNN_ATTR_RESAMPLE_COMP_TYPE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1701
+ {"CUDNN_ATTR_RESAMPLE_SPATIAL_DIMS", {"HIPDNN_ATTR_RESAMPLE_SPATIAL_DIMS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1702
+ {"CUDNN_ATTR_RESAMPLE_POST_PADDINGS", {"HIPDNN_ATTR_RESAMPLE_POST_PADDINGS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1703
+ {"CUDNN_ATTR_RESAMPLE_PRE_PADDINGS", {"HIPDNN_ATTR_RESAMPLE_PRE_PADDINGS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1704
+ {"CUDNN_ATTR_RESAMPLE_STRIDES", {"HIPDNN_ATTR_RESAMPLE_STRIDES", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1705
+ {"CUDNN_ATTR_RESAMPLE_WINDOW_DIMS", {"HIPDNN_ATTR_RESAMPLE_WINDOW_DIMS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1706
+ {"CUDNN_ATTR_RESAMPLE_NAN_PROPAGATION", {"HIPDNN_ATTR_RESAMPLE_NAN_PROPAGATION", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1707
+ {"CUDNN_ATTR_RESAMPLE_PADDING_MODE", {"HIPDNN_ATTR_RESAMPLE_PADDING_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1708
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1710
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1711
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1712
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA", {"HIPDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1713
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA", {"HIPDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1714
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1716
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1720
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1721
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1722
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA", {"HIPDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1723
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA", {"HIPDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1724
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1725
+ {"CUDNN_ATTR_OPERATION_CONCAT_AXIS", {"HIPDNN_ATTR_OPERATION_CONCAT_AXIS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1800
+ {"CUDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS", {"HIPDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1801
+ {"CUDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX", {"HIPDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1802
+ {"CUDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC", {"HIPDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1803
+ {"CUDNN_ATTR_OPERATION_SIGNAL_MODE", {"HIPDNN_ATTR_OPERATION_SIGNAL_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1900
+ {"CUDNN_ATTR_OPERATION_SIGNAL_FLAGDESC", {"HIPDNN_ATTR_OPERATION_SIGNAL_FLAGDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1901
+ {"CUDNN_ATTR_OPERATION_SIGNAL_VALUE", {"HIPDNN_ATTR_OPERATION_SIGNAL_VALUE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1902
+ {"CUDNN_ATTR_OPERATION_SIGNAL_XDESC", {"HIPDNN_ATTR_OPERATION_SIGNAL_XDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1903
+ {"CUDNN_ATTR_OPERATION_SIGNAL_YDESC", {"HIPDNN_ATTR_OPERATION_SIGNAL_YDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1904
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_MODE", {"HIPDNN_ATTR_OPERATION_NORM_FWD_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2000
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_PHASE", {"HIPDNN_ATTR_OPERATION_NORM_FWD_PHASE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2001
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_XDESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_XDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2002
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2003
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2004
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2005
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2006
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2007
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2008
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2009
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2010
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2011
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2012
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_YDESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_YDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2013
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS", {"HIPDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2014
+ {"CUDNN_ATTR_OPERATION_NORM_BWD_MODE", {"HIPDNN_ATTR_OPERATION_NORM_BWD_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2100
+ {"CUDNN_ATTR_OPERATION_NORM_BWD_XDESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_XDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2101
+ {"CUDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2102
+ {"CUDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2103
+ {"CUDNN_ATTR_OPERATION_NORM_BWD_DYDESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_DYDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2104
+ {"CUDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2105
+ {"CUDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2106
+ {"CUDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2107
+ {"CUDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2108
+ {"CUDNN_ATTR_OPERATION_NORM_BWD_DXDESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_DXDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2109
+ {"CUDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS", {"HIPDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2110
{"cudnnBackendAttributeType_t", {"hipdnnBackendAttributeType_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_TYPE_HANDLE", {"HIPDNN_TYPE_HANDLE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_TYPE_DATA_TYPE", {"HIPDNN_TYPE_DATA_TYPE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
@@ -539,6 +632,15 @@ const std::map CUDA_DNN_TYPE_NAME_MAP {
{"CUDNN_TYPE_BN_FINALIZE_STATS_MODE", {"HIPDNN_TYPE_BN_FINALIZE_STATS_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_TYPE_REDUCTION_OPERATOR_TYPE", {"HIPDNN_TYPE_REDUCTION_OPERATOR_TYPE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_TYPE_BEHAVIOR_NOTE", {"HIPDNN_TYPE_BEHAVIOR_NOTE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_TYPE_TENSOR_REORDERING_MODE", {"HIPDNN_TYPE_TENSOR_REORDERING_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_TYPE_RESAMPLE_MODE", {"HIPDNN_TYPE_RESAMPLE_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_TYPE_PADDING_MODE", {"HIPDNN_TYPE_PADDING_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_TYPE_INT32", {"HIPDNN_TYPE_INT32", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_TYPE_CHAR", {"HIPDNN_TYPE_CHAR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_TYPE_SIGNAL_MODE", {"HIPDNN_TYPE_SIGNAL_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_TYPE_FRACTION", {"HIPDNN_TYPE_FRACTION", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_TYPE_NORM_MODE", {"HIPDNN_TYPE_NORM_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_TYPE_NORM_FWD_PHASE", {"HIPDNN_TYPE_NORM_FWD_PHASE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
{"cudnnBackendDescriptorType_t", {"hipdnnBackendDescriptorType_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_BACKEND_POINTWISE_DESCRIPTOR", {"HIPDNN_BACKEND_POINTWISE_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_BACKEND_CONVOLUTION_DESCRIPTOR", {"HIPDNN_BACKEND_CONVOLUTION_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
@@ -564,6 +666,13 @@ const std::map CUDA_DNN_TYPE_NAME_MAP {
{"CUDNN_BACKEND_REDUCTION_DESCRIPTOR", {"HIPDNN_BACKEND_REDUCTION_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_BACKEND_RESAMPLE_DESCRIPTOR", {"HIPDNN_BACKEND_RESAMPLE_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
{"cudnnBackendNumericalNote_t", {"hipdnnBackendNumericalNote_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_NUMERICAL_NOTE_TENSOR_CORE", {"HIPDNN_NUMERICAL_NOTE_TENSOR_CORE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS", {"HIPDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
@@ -571,6 +680,9 @@ const std::map CUDA_DNN_TYPE_NAME_MAP {
{"CUDNN_NUMERICAL_NOTE_FFT", {"HIPDNN_NUMERICAL_NOTE_FFT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC", {"HIPDNN_NUMERICAL_NOTE_NONDETERMINISTIC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_NUMERICAL_NOTE_WINOGRAD", {"HIPDNN_NUMERICAL_NOTE_WINOGRAD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4", {"HIPDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6", {"HIPDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13", {"HIPDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_NUMERICAL_NOTE_TYPE_COUNT", {"HIPDNN_NUMERICAL_NOTE_TYPE_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
{"cudnnBackendLayoutType_t", {"hipdnnBackendLayoutType_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_LAYOUT_TYPE_PREFERRED_NCHW", {"HIPDNN_LAYOUT_TYPE_PREFERRED_NCHW", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
@@ -604,10 +716,16 @@ const std::map CUDA_DNN_TYPE_NAME_MAP {
{"CUDNN_KNOB_TYPE_LDGC", {"HIPDNN_KNOB_TYPE_LDGC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_KNOB_TYPE_SPECFILT", {"HIPDNN_KNOB_TYPE_SPECFILT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_KNOB_TYPE_KERNEL_CFG", {"HIPDNN_KNOB_TYPE_KERNEL_CFG", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_KNOB_TYPE_WORKSPACE", {"HIPDNN_KNOB_TYPE_WORKSPACE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_KNOB_TYPE_TILE_CGA", {"HIPDNN_KNOB_TYPE_TILE_CGA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_KNOB_TYPE_TILE_CGA_M", {"HIPDNN_KNOB_TYPE_TILE_CGA_M", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_KNOB_TYPE_TILE_CGA_N", {"HIPDNN_KNOB_TYPE_TILE_CGA_N", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_KNOB_TYPE_COUNTS", {"HIPDNN_KNOB_TYPE_COUNTS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
{"cudnnBackendHeurMode_t", {"hipdnnBackendHeurMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_HEUR_MODE_INSTANT", {"HIPDNN_HEUR_MODE_INSTANT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_HEUR_MODE_B", {"HIPDNN_HEUR_MODE_B", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_HEUR_MODE_FALLBACK", {"HIPDNN_HEUR_MODE_FALLBACK", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_HEUR_MODE_A", {"HIPDNN_HEUR_MODE_A", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_HEUR_MODES_COUNT", {"HIPDNN_HEUR_MODES_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
{"cudnnNormMode_t", {"hipdnnNormMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_NORM_PER_ACTIVATION", {"HIPDNN_NORM_PER_ACTIVATION", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
@@ -624,7 +742,34 @@ const std::map CUDA_DNN_TYPE_NAME_MAP {
{"CUDNN_BN_FINALIZE_STATISTICS_INFERENCE", {"HIPDNN_BN_FINALIZE_STATISTICS_INFERENCE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
{"cudnnBackendBehaviorNote_t", {"hipdnnBackendBehaviorNote_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}},
{"CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", {"HIPDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 0
+ {"CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER", {"HIPDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1
+ {"CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER", {"HIPDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2
{"CUDNN_BEHAVIOR_NOTE_TYPE_COUNT", {"HIPDNN_BEHAVIOR_NOTE_TYPE_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"cudnnResampleMode_t", {"hipdnnResampleMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_RESAMPLE_NEAREST", {"HIPDNN_RESAMPLE_NEAREST", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_RESAMPLE_BILINEAR", {"HIPDNN_RESAMPLE_BILINEAR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_RESAMPLE_AVGPOOL", {"HIPDNN_RESAMPLE_AVGPOOL", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING", {"HIPDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING", {"HIPDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_RESAMPLE_MAXPOOL", {"HIPDNN_RESAMPLE_MAXPOOL", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"cudnnSignalMode_t", {"hipdnnSignalMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_SIGNAL_SET", {"HIPDNN_SIGNAL_SET", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_SIGNAL_WAIT", {"HIPDNN_SIGNAL_WAIT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"cudnnBackendTensorReordering_t", {"hipdnnBackendTensorReordering_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_TENSOR_REORDERING_NONE", {"HIPDNN_TENSOR_REORDERING_NONE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_TENSOR_REORDERING_INT8x32", {"HIPDNN_TENSOR_REORDERING_INT8x32", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"cudnnPaddingMode_t", {"hipdnnPaddingMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_ZERO_PAD", {"HIPDNN_ZERO_PAD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_NEG_INF_PAD", {"HIPDNN_NEG_INF_PAD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_EDGE_VAL_PAD", {"HIPDNN_EDGE_VAL_PAD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"cudnnBackendNormMode_t", {"hipdnnBackendNormMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_LAYER_NORM", {"HIPDNN_LAYER_NORM", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_INSTANCE_NORM", {"HIPDNN_INSTANCE_NORM", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_BATCH_NORM", {"HIPDNN_BATCH_NORM", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_GROUP_NORM", {"HIPDNN_GROUP_NORM", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"cudnnBackendNormFwdPhase_t", {"hipdnnBackendNormFwdPhase_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_NORM_FWD_INFERENCE", {"HIPDNN_NORM_FWD_INFERENCE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"CUDNN_NORM_FWD_TRAINING", {"HIPDNN_NORM_FWD_TRAINING", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}},
// cuDNN types
{"cudnnContext", {"hipdnnContext", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}},
@@ -687,6 +832,8 @@ const std::map CUDA_DNN_TYPE_NAME_MAP {
{"cudnnBackendDescriptor_t", {"hipdnnBackendDescriptor_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}},
{"libraryPropertyType", {"hipdnnLibraryPropertyType", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}},
{"libraryPropertyType_t", {"hipdnnLibraryPropertyType_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"cudnnFractionStruct", {"hipdnnFractionStruct", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}},
+ {"cudnnFraction_t", {"hipdnnFraction_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}},
};
const std::map CUDA_DNN_TYPE_NAME_VER_MAP {
@@ -1350,6 +1497,153 @@ const std::map CUDA_DNN_TYPE_NAME_VER_MAP {
{"cudnnBackendBehaviorNote_t", {CUDNN_820, CUDA_0, CUDA_0 }},
{"CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", {CUDNN_820, CUDA_0, CUDA_0 }},
{"CUDNN_BEHAVIOR_NOTE_TYPE_COUNT", {CUDNN_820, CUDA_0, CUDA_0 }},
+ {"CUDNN_DATA_BOOLEAN", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_ADD_SQUARE", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_DIV", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_MOD", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_POW", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_SUB", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_ABS", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_CEIL", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_COS", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_EXP", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_FLOOR", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_LOG", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_NEG", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_RSQRT", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_SIN", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_TAN", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_ERF", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_IDENTITY", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_GELU_APPROX_TANH_FWD", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_GELU_APPROX_TANH_BWD", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_CMP_EQ", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_CMP_NEQ", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_CMP_GT", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_CMP_GE", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_CMP_LT", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_CMP_LE", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_LOGICAL_AND", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_LOGICAL_OR", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_LOGICAL_NOT", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_GEN_INDEX", {CUDNN_840, CUDA_0, CUDA_0 }},
+ {"CUDNN_POINTWISE_BINARY_SELECT", {CUDNN_840, CUDA_0, CUDA_0 }},
+ {"cudnnFractionStruct", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"cudnnFraction_t", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"cudnnResampleMode_t", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_RESAMPLE_NEAREST", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_RESAMPLE_BILINEAR", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_RESAMPLE_AVGPOOL", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_RESAMPLE_MAXPOOL", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"cudnnSignalMode_t", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_SIGNAL_SET", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_SIGNAL_WAIT", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_POINTWISE_AXIS", {CUDNN_840, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION", {CUDNN_840, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_POINTWISE_TDESC", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_TENSOR_REORDERING_MODE", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_RESAMPLE_MODE", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_RESAMPLE_COMP_TYPE", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_RESAMPLE_SPATIAL_DIMS", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_RESAMPLE_POST_PADDINGS", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_RESAMPLE_PRE_PADDINGS", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_RESAMPLE_STRIDES", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_RESAMPLE_WINDOW_DIMS", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_RESAMPLE_NAN_PROPAGATION", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_RESAMPLE_PADDING_MODE", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_CONCAT_AXIS", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_SIGNAL_MODE", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_SIGNAL_FLAGDESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_SIGNAL_VALUE", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_SIGNAL_XDESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_SIGNAL_YDESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_MODE", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_PHASE", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_XDESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_YDESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_BWD_MODE", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_BWD_XDESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_BWD_DYDESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_BWD_DXDESC", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_TYPE_TENSOR_REORDERING_MODE", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_TYPE_RESAMPLE_MODE", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_TYPE_PADDING_MODE", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_TYPE_INT32", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_TYPE_CHAR", {CUDNN_840, CUDA_0, CUDA_0 }},
+ {"CUDNN_TYPE_SIGNAL_MODE", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_TYPE_FRACTION", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_TYPE_NORM_MODE", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_TYPE_NORM_FWD_PHASE", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_BACKEND_RESAMPLE_DESCRIPTOR", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_KNOB_TYPE_WORKSPACE", {CUDNN_840, CUDA_0, CUDA_0 }},
+ {"CUDNN_HEUR_MODE_FALLBACK", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_HEUR_MODE_A", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"cudnnBackendTensorReordering_t", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_TENSOR_REORDERING_NONE", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_TENSOR_REORDERING_INT8x32", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"cudnnPaddingMode_t", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_ZERO_PAD", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_NEG_INF_PAD", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"CUDNN_EDGE_VAL_PAD", {CUDNN_830, CUDA_0, CUDA_0 }},
+ {"cudnnBackendNormMode_t", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_LAYER_NORM", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_INSTANCE_NORM", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_BATCH_NORM", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_GROUP_NORM", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"cudnnBackendNormFwdPhase_t", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_NORM_FWD_INFERENCE", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_NORM_FWD_TRAINING", {CUDNN_850, CUDA_0, CUDA_0 }},
+ {"CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING", {CUDNN_860, CUDA_0, CUDA_0 }},
+ {"CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING", {CUDNN_860, CUDA_0, CUDA_0 }},
+ {"CUDNN_KNOB_TYPE_TILE_CGA", {CUDNN_860, CUDA_0, CUDA_0 }},
+ {"CUDNN_KNOB_TYPE_TILE_CGA_M", {CUDNN_860, CUDA_0, CUDA_0 }},
+ {"CUDNN_KNOB_TYPE_TILE_CGA_N", {CUDNN_860, CUDA_0, CUDA_0 }},
+ {"CUDNN_DATA_FP8_E4M3", {CUDNN_860, CUDA_0, CUDA_0 }},
+ {"CUDNN_DATA_FP8_E5M2", {CUDNN_860, CUDA_0, CUDA_0 }},
};
const std::map HIP_DNN_TYPE_NAME_VER_MAP {
diff --git a/src/CUDA2HIP_Device_functions.cpp b/src/CUDA2HIP_Device_functions.cpp
index b54a8780..07c2f797 100644
--- a/src/CUDA2HIP_Device_functions.cpp
+++ b/src/CUDA2HIP_Device_functions.cpp
@@ -25,1210 +25,1412 @@ THE SOFTWARE.
// Maps CUDA header names to HIP header names
const std::map CUDA_DEVICE_FUNCTION_MAP {
// math functions
- {"abs", {"abs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"labs", {"labs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"llabs", {"llabs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"fabs", {"fabs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"fabsf", {"fabsf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"min", {"min", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"fminf", {"fminf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"fmin", {"fmin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"max", {"max", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"fmaxf", {"fmaxf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"fmax", {"fmax", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"sin", {"sin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"cos", {"cos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"sincos", {"sincos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"sincosf", {"sincosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"tan", {"tan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"sqrt", {"sqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"rsqrt", {"rsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"rsqrtf", {"rsqrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"log2", {"log2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"exp2", {"exp2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"exp2f", {"exp2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"exp10", {"exp10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"exp10f", {"exp10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"expm1", {"expm1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"expm1f", {"expm1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"log2f", {"log2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"log10", {"log10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"log", {"log", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"log1p", {"log1p", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"log1pf", {"log1pf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"floor", {"floor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"exp", {"exp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"cosh", {"cosh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"sinh", {"sinh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"tanh", {"tanh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"acosh", {"acosh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"acoshf", {"acoshf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"asinh", {"asinh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"asinhf", {"asinhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atanh", {"atanh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atanhf", {"atanhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"ldexp", {"ldexp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"ldexpf", {"ldexpf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"logb", {"logb", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"logbf", {"logbf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"ilogb", {"ilogb", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"ilogbf", {"ilogbf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"scalbn", {"scalbn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"scalbnf", {"scalbnf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"scalbln", {"scalbln", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"scalblnf", {"scalblnf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"frexp", {"frexp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"frexpf", {"frexpf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"round", {"round", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"roundf", {"roundf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"lround", {"lround", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"lroundf", {"lroundf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"llround", {"llround", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"llroundf", {"llroundf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"rint", {"rint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"rintf", {"rintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"lrint", {"lrint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"lrintf", {"lrintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"llrint", {"llrint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"llrintf", {"llrintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"nearbyint", {"nearbyint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"nearbyintf", {"nearbyintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"ceil", {"ceil", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"trunc", {"trunc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"truncf", {"truncf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"fdim", {"fdim", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"fdimf", {"fdimf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atan2", {"atan2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atan", {"atan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"acos", {"acos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"asin", {"asin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"hypot", {"hypot", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"rhypot", {"rhypot", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"hypotf", {"hypotf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"rhypotf", {"rhypotf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"norm3d", {"norm3d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"rnorm3d", {"rnorm3d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"norm4d", {"norm4d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"rnorm4d", {"rnorm4d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"norm", {"norm", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"rnorm", {"rnorm", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"rnormf", {"rnormf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"normf", {"normf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"norm3df", {"norm3df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"rnorm3df", {"rnorm3df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"norm4df", {"norm4df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"rnorm4df", {"rnorm4df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"cbrt", {"cbrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"cbrtf", {"cbrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"rcbrt", {"rcbrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"rcbrtf", {"rcbrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"sinpi", {"sinpi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"sinpif", {"sinpif", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"cospi", {"cospi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"cospif", {"cospif", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"sincospi", {"sincospi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"sincospif", {"sincospif", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"pow", {"pow", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"modf", {"modf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"fmod", {"fmod", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"remainder", {"remainder", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"remainderf", {"remainderf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"remquo", {"remquo", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"remquof", {"remquof", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"j0", {"j0", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"j0f", {"j0f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"j1", {"j1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"j1f", {"j1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"jn", {"jn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"jnf", {"jnf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"y0", {"y0", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"y0f", {"y0f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"y1", {"y1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"y1f", {"y1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"yn", {"yn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"ynf", {"ynf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"cyl_bessel_i0", {"cyl_bessel_i0", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"cyl_bessel_i0f", {"cyl_bessel_i0f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"cyl_bessel_i1", {"cyl_bessel_i1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"cyl_bessel_i1f", {"cyl_bessel_i1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"erf", {"erf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"erff", {"erff", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"erfinv", {"erfinv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"erfinvf", {"erfinvf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"erfc", {"erfc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"erfcf", {"erfcf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"lgamma", {"lgamma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"erfcinv", {"erfcinv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"erfcinvf", {"erfcinvf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"normcdfinv", {"normcdfinv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"normcdfinvf", {"normcdfinvf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"normcdf", {"normcdf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"normcdff", {"normcdff", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"erfcx", {"erfcx", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"erfcxf", {"erfcxf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"lgammaf", {"lgammaf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"tgamma", {"tgamma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"tgammaf", {"tgammaf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"copysign", {"copysign", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"copysignf", {"copysignf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"nextafter", {"nextafter", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"nextafterf", {"nextafterf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"nan", {"nan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"nanf", {"nanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"fma", {"fma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"fmaf", {"fmaf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"acosf", {"acosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"asinf", {"asinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atanf", {"atanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atan2f", {"atan2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"cosf", {"cosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"sinf", {"sinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"tanf", {"tanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"coshf", {"coshf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"sinhf", {"sinhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"tanhf", {"tanhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"expf", {"expf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"logf", {"logf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"log10f", {"log10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"modff", {"modff", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"powf", {"powf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"sqrtf", {"sqrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"ceilf", {"ceilf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"floorf", {"floorf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"fmodf", {"fmodf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"signbit", {"signbit", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"isfinite", {"isfinite", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"isnan", {"isnan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"isinf", {"isinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"umin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"llmin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"ullmin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"umax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"llmax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"ullmax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__isinff", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__isnanf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__finite", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__finitef", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__signbit", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__isnan", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__isinf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__signbitf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__signbitl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__finitel", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__isinfl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__isnanl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"_ldsign", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"_fdsign", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"_Pow_int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"abs", {"abs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"labs", {"labs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"llabs", {"llabs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"fabs", {"fabs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"fabsf", {"fabsf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"min", {"min", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"fminf", {"fminf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"fmin", {"fmin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"max", {"max", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"fmaxf", {"fmaxf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"fmax", {"fmax", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"sin", {"sin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"cos", {"cos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"sincos", {"sincos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"sincosf", {"sincosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"tan", {"tan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"sqrt", {"sqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"rsqrt", {"rsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"rsqrtf", {"rsqrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"log2", {"log2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"exp2", {"exp2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"exp2f", {"exp2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"exp10", {"exp10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"exp10f", {"exp10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"expm1", {"expm1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"expm1f", {"expm1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"log2f", {"log2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"log10", {"log10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"log", {"log", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"log1p", {"log1p", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"log1pf", {"log1pf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"floor", {"floor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"exp", {"exp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"cosh", {"cosh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"sinh", {"sinh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"tanh", {"tanh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"acosh", {"acosh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"acoshf", {"acoshf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"asinh", {"asinh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"asinhf", {"asinhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atanh", {"atanh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atanhf", {"atanhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"ldexp", {"ldexp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"ldexpf", {"ldexpf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"logb", {"logb", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"logbf", {"logbf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"ilogb", {"ilogb", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"ilogbf", {"ilogbf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"scalbn", {"scalbn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"scalbnf", {"scalbnf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"scalbln", {"scalbln", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"scalblnf", {"scalblnf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"frexp", {"frexp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"frexpf", {"frexpf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"round", {"round", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"roundf", {"roundf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"lround", {"lround", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"lroundf", {"lroundf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"llround", {"llround", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"llroundf", {"llroundf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"rint", {"rint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"rintf", {"rintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"lrint", {"lrint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"lrintf", {"lrintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"llrint", {"llrint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"llrintf", {"llrintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"nearbyint", {"nearbyint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"nearbyintf", {"nearbyintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"ceil", {"ceil", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"trunc", {"trunc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"truncf", {"truncf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"fdim", {"fdim", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"fdimf", {"fdimf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atan2", {"atan2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atan", {"atan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"acos", {"acos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"asin", {"asin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"hypot", {"hypot", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"rhypot", {"rhypot", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"hypotf", {"hypotf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"rhypotf", {"rhypotf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"norm3d", {"norm3d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"rnorm3d", {"rnorm3d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"norm4d", {"norm4d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"rnorm4d", {"rnorm4d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"norm", {"norm", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"rnorm", {"rnorm", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"rnormf", {"rnormf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"normf", {"normf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"norm3df", {"norm3df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"rnorm3df", {"rnorm3df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"norm4df", {"norm4df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"rnorm4df", {"rnorm4df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"cbrt", {"cbrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"cbrtf", {"cbrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"rcbrt", {"rcbrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"rcbrtf", {"rcbrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"sinpi", {"sinpi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"sinpif", {"sinpif", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"cospi", {"cospi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"cospif", {"cospif", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"sincospi", {"sincospi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"sincospif", {"sincospif", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"pow", {"pow", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"modf", {"modf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"fmod", {"fmod", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"remainder", {"remainder", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"remainderf", {"remainderf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"remquo", {"remquo", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"remquof", {"remquof", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"j0", {"j0", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"j0f", {"j0f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"j1", {"j1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"j1f", {"j1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"jn", {"jn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"jnf", {"jnf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"y0", {"y0", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"y0f", {"y0f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"y1", {"y1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"y1f", {"y1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"yn", {"yn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"ynf", {"ynf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"cyl_bessel_i0", {"cyl_bessel_i0", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"cyl_bessel_i0f", {"cyl_bessel_i0f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"cyl_bessel_i1", {"cyl_bessel_i1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"cyl_bessel_i1f", {"cyl_bessel_i1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"erf", {"erf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"erff", {"erff", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"erfinv", {"erfinv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"erfinvf", {"erfinvf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"erfc", {"erfc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"erfcf", {"erfcf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"lgamma", {"lgamma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"erfcinv", {"erfcinv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"erfcinvf", {"erfcinvf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"normcdfinv", {"normcdfinv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"normcdfinvf", {"normcdfinvf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"normcdf", {"normcdf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"normcdff", {"normcdff", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"erfcx", {"erfcx", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"erfcxf", {"erfcxf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"lgammaf", {"lgammaf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"tgamma", {"tgamma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"tgammaf", {"tgammaf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"copysign", {"copysign", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"copysignf", {"copysignf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"nextafter", {"nextafter", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"nextafterf", {"nextafterf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"nan", {"nan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"nanf", {"nanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"fma", {"fma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"fmaf", {"fmaf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"acosf", {"acosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"asinf", {"asinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atanf", {"atanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atan2f", {"atan2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"cosf", {"cosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"sinf", {"sinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"tanf", {"tanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"coshf", {"coshf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"sinhf", {"sinhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"tanhf", {"tanhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"expf", {"expf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"logf", {"logf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"log10f", {"log10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"modff", {"modff", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"powf", {"powf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"sqrtf", {"sqrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"ceilf", {"ceilf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"floorf", {"floorf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"fmodf", {"fmodf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"signbit", {"signbit", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"isfinite", {"isfinite", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"isnan", {"isnan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"isinf", {"isinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"umin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"llmin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"ullmin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"umax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"llmax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"ullmax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__isinff", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__isnanf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__finite", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__finitef", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__signbit", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__isnan", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__isinf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__signbitf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__signbitl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__finitel", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__isinfl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__isnanl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"_ldsign", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"_fdsign", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"_Pow_int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
// static math functions declared in device-functions.h
- {"mulhi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"mul64hi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"float_as_int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"int_as_float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"float_as_uint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"uint_as_float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"saturate", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"mul24", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"umul24", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"float2int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"int2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"uint2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"mulhi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"mul64hi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"float_as_int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"int_as_float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"float_as_uint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"uint_as_float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"saturate", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"mul24", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"umul24", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"float2int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"int2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"uint2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
// device functions
- {"__mulhi", {"__mulhi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__umulhi", {"__umulhi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__mul64hi", {"__mul64hi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__umul64hi", {"__umul64hi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__int_as_float", {"__int_as_float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float_as_int", {"__float_as_int", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__uint_as_float", {"__uint_as_float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float_as_uint", {"__float_as_uint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__syncthreads", {"__syncthreads", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__syncthreads_count", {"__syncthreads_count", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__syncthreads_and", {"__syncthreads_and", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__syncthreads_or", {"__syncthreads_or", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__threadfence", {"__threadfence", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__threadfence_block", {"__threadfence_block", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__threadfence_system",{"__threadfence_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__saturatef", {"__saturatef", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__sad", {"__sad", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__usad", {"__usad", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__mul24", {"__mul24", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__umul24", {"__umul24", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"fdividef", {"fdividef", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__fdividef", {"__fdividef", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"fdivide", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__sinf", {"__sinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__cosf", {"__cosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__tanf", {"__tanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__sincosf", {"__sincosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__expf", {"__expf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__exp10f", {"__exp10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__log2f", {"__log2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__log10f", {"__log10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__logf", {"__logf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__powf", {"__powf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float2int_rn", {"__float2int_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float2int_rz", {"__float2int_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float2int_ru", {"__float2int_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float2int_rd", {"__float2int_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float2uint_rn", {"__float2uint_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float2uint_rz", {"__float2uint_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float2uint_ru", {"__float2uint_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float2uint_rd", {"__float2uint_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__int2float_rn", {"__int2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__int2float_rz", {"__int2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__int2float_ru", {"__int2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__int2float_rd", {"__int2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__uint2float_rn", {"__uint2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__uint2float_rz", {"__uint2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__uint2float_ru", {"__uint2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__uint2float_rd", {"__uint2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float2ll_rn", {"__float2ll_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float2ll_rz", {"__float2ll_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float2ll_ru", {"__float2ll_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float2ll_rd", {"__float2ll_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float2ull_rn", {"__float2ull_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float2ull_rz", {"__float2ull_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float2ull_ru", {"__float2ull_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float2ull_rd", {"__float2ull_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ll2float_rn", {"__ll2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ll2float_rz", {"__ll2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ll2float_ru", {"__ll2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ll2float_rd", {"__ll2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ull2float_rn", {"__ull2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ull2float_rz", {"__ull2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ull2float_ru", {"__ull2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ull2float_rd", {"__ull2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__fadd_rn", {"__fadd_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__fadd_rz", {"__fadd_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__fadd_ru", {"__fadd_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__fadd_rd", {"__fadd_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__fsub_rn", {"__fsub_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__fsub_rz", {"__fsub_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__fsub_ru", {"__fsub_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__fsub_rd", {"__fsub_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__fmul_rn", {"__fmul_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__fmul_rz", {"__fmul_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__fmul_ru", {"__fmul_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__fmul_rd", {"__fmul_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__fmaf_rn", {"__fmaf_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__fmaf_rz", {"__fmaf_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__fmaf_ru", {"__fmaf_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__fmaf_rd", {"__fmaf_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__frcp_rn", {"__frcp_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__frcp_rz", {"__frcp_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__frcp_ru", {"__frcp_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__frcp_rd", {"__frcp_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__fsqrt_rn", {"__fsqrt_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__fsqrt_rz", {"__fsqrt_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__fsqrt_ru", {"__fsqrt_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__fsqrt_rd", {"__fsqrt_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__frsqrt_rn", {"__frsqrt_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__fdiv_rn", {"__fdiv_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__fdiv_rz", {"__fdiv_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__fdiv_ru", {"__fdiv_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__fdiv_rd", {"__fdiv_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__clz", {"__clz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ffs", {"__ffs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__popc", {"__popc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__brev", {"__brev", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__clzll", {"__clzll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ffsll", {"__ffsll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__popcll", {"__popcll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__brevll", {"__brevll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__byte_perm", {"__byte_perm", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hadd", {"__hadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__rhadd", {"__rhadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__uhadd", {"__uhadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__urhadd", {"__urhadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__prof_trigger", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__trap", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__brkpt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__pm0", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__pm1", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__pm2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__pm3", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__mulhi", {"__mulhi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__umulhi", {"__umulhi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__mul64hi", {"__mul64hi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__umul64hi", {"__umul64hi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__int_as_float", {"__int_as_float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float_as_int", {"__float_as_int", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__uint_as_float", {"__uint_as_float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float_as_uint", {"__float_as_uint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__syncthreads", {"__syncthreads", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__syncthreads_count", {"__syncthreads_count", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__syncthreads_and", {"__syncthreads_and", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__syncthreads_or", {"__syncthreads_or", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__threadfence", {"__threadfence", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__threadfence_block", {"__threadfence_block", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__threadfence_system", {"__threadfence_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__saturatef", {"__saturatef", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__sad", {"__sad", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__usad", {"__usad", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__mul24", {"__mul24", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__umul24", {"__umul24", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"fdividef", {"fdividef", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__fdividef", {"__fdividef", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"fdivide", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__sinf", {"__sinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__cosf", {"__cosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__tanf", {"__tanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__sincosf", {"__sincosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__expf", {"__expf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__exp10f", {"__exp10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__log2f", {"__log2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__log10f", {"__log10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__logf", {"__logf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__powf", {"__powf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float2int_rn", {"__float2int_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float2int_rz", {"__float2int_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float2int_ru", {"__float2int_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float2int_rd", {"__float2int_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float2uint_rn", {"__float2uint_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float2uint_rz", {"__float2uint_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float2uint_ru", {"__float2uint_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float2uint_rd", {"__float2uint_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__int2float_rn", {"__int2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__int2float_rz", {"__int2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__int2float_ru", {"__int2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__int2float_rd", {"__int2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__uint2float_rn", {"__uint2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__uint2float_rz", {"__uint2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__uint2float_ru", {"__uint2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__uint2float_rd", {"__uint2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float2ll_rn", {"__float2ll_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float2ll_rz", {"__float2ll_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float2ll_ru", {"__float2ll_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float2ll_rd", {"__float2ll_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float2ull_rn", {"__float2ull_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float2ull_rz", {"__float2ull_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float2ull_ru", {"__float2ull_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float2ull_rd", {"__float2ull_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ll2float_rn", {"__ll2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ll2float_rz", {"__ll2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ll2float_ru", {"__ll2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ll2float_rd", {"__ll2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ull2float_rn", {"__ull2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ull2float_rz", {"__ull2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ull2float_ru", {"__ull2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ull2float_rd", {"__ull2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__fadd_rn", {"__fadd_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__fadd_rz", {"__fadd_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__fadd_ru", {"__fadd_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__fadd_rd", {"__fadd_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__fsub_rn", {"__fsub_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__fsub_rz", {"__fsub_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__fsub_ru", {"__fsub_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__fsub_rd", {"__fsub_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__fmul_rn", {"__fmul_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__fmul_rz", {"__fmul_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__fmul_ru", {"__fmul_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__fmul_rd", {"__fmul_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__fmaf_rn", {"__fmaf_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__fmaf_rz", {"__fmaf_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__fmaf_ru", {"__fmaf_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__fmaf_rd", {"__fmaf_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__frcp_rn", {"__frcp_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__frcp_rz", {"__frcp_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__frcp_ru", {"__frcp_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__frcp_rd", {"__frcp_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__fsqrt_rn", {"__fsqrt_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__fsqrt_rz", {"__fsqrt_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__fsqrt_ru", {"__fsqrt_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__fsqrt_rd", {"__fsqrt_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__frsqrt_rn", {"__frsqrt_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__fdiv_rn", {"__fdiv_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__fdiv_rz", {"__fdiv_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__fdiv_ru", {"__fdiv_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__fdiv_rd", {"__fdiv_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__clz", {"__clz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ffs", {"__ffs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__popc", {"__popc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__brev", {"__brev", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__clzll", {"__clzll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ffsll", {"__ffsll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__popcll", {"__popcll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__brevll", {"__brevll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__byte_perm", {"__byte_perm", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hadd", {"__hadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hadd_rn", {"__hadd_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__rhadd", {"__rhadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__uhadd", {"__uhadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__urhadd", {"__urhadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__prof_trigger", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__trap", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__brkpt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__pm0", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__pm1", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__pm2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__pm3", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
// device double functions
- {"__dadd_rz", {"__dadd_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__dadd_ru", {"__dadd_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__dadd_rd", {"__dadd_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__dadd_rn", {"__dadd_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ddiv_rz", {"__ddiv_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__ddiv_ru", {"__ddiv_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__ddiv_rd", {"__ddiv_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__ddiv_rn", {"__ddiv_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__dmul_rz", {"__dmul_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__dmul_ru", {"__dmul_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__dmul_rd", {"__dmul_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__dmul_rn", {"__dmul_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__drcp_rz", {"__drcp_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__drcp_ru", {"__drcp_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__drcp_rd", {"__drcp_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__drcp_rn", {"__drcp_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__dsqrt_rz", {"__dsqrt_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__dsqrt_ru", {"__dsqrt_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__dsqrt_rd", {"__dsqrt_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__dsqrt_rn", {"__dsqrt_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__dsub_rz", {"__dsub_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__dsub_ru", {"__dsub_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__dsub_rd", {"__dsub_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__dsub_rn", {"__dsub_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__fma_rz", {"__fma_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__fma_ru", {"__fma_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__fma_rd", {"__fma_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__fma_rn", {"__fma_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__double2float_rd", {"__double2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__double2float_rn", {"__double2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__double2float_ru", {"__double2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__double2float_rz", {"__double2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__double2hiint", {"__double2hiint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__double2loint", {"__double2loint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__double2int_rd", {"__double2int_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__double2int_rn", {"__double2int_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__double2int_ru", {"__double2int_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__double2int_rz", {"__double2int_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__double2ll_rd", {"__double2ll_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__double2ll_rn", {"__double2ll_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__double2ll_ru", {"__double2ll_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__double2ll_rz", {"__double2ll_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__double2uint_rd", {"__double2uint_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__double2uint_rn", {"__double2uint_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__double2uint_ru", {"__double2uint_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__double2uint_rz", {"__double2uint_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__double2ull_rd", {"__double2ull_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__double2ull_rn", {"__double2ull_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__double2ull_ru", {"__double2ull_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__double2ull_rz", {"__double2ull_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__double_as_longlong",{"__double_as_longlong", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hiloint2double", {"__hiloint2double", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__int2double_rn", {"__int2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ll2double_rd", {"__ll2double_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ll2double_rn", {"__ll2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ll2double_ru", {"__ll2double_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ll2double_rz", {"__ll2double_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__longlong_as_double",{"__longlong_as_double", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__uint2double_rn", {"__uint2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ull2double_rd", {"__ull2double_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ull2double_rn", {"__ull2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ull2double_ru", {"__ull2double_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ull2double_rz", {"__ull2double_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__dadd_rz", {"__dadd_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__dadd_ru", {"__dadd_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__dadd_rd", {"__dadd_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__dadd_rn", {"__dadd_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ddiv_rz", {"__ddiv_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__ddiv_ru", {"__ddiv_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__ddiv_rd", {"__ddiv_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__ddiv_rn", {"__ddiv_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__dmul_rz", {"__dmul_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__dmul_ru", {"__dmul_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__dmul_rd", {"__dmul_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__dmul_rn", {"__dmul_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__drcp_rz", {"__drcp_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__drcp_ru", {"__drcp_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__drcp_rd", {"__drcp_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__drcp_rn", {"__drcp_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__dsqrt_rz", {"__dsqrt_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__dsqrt_ru", {"__dsqrt_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__dsqrt_rd", {"__dsqrt_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__dsqrt_rn", {"__dsqrt_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__dsub_rz", {"__dsub_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__dsub_ru", {"__dsub_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__dsub_rd", {"__dsub_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__dsub_rn", {"__dsub_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__fma_rz", {"__fma_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__fma_ru", {"__fma_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__fma_rd", {"__fma_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__fma_rn", {"__fma_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double2float_rd", {"__double2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double2float_rn", {"__double2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double2float_ru", {"__double2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double2float_rz", {"__double2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double2hiint", {"__double2hiint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double2loint", {"__double2loint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double2int_rd", {"__double2int_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double2int_rn", {"__double2int_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double2int_ru", {"__double2int_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double2int_rz", {"__double2int_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double2ll_rd", {"__double2ll_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double2ll_rn", {"__double2ll_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double2ll_ru", {"__double2ll_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double2ll_rz", {"__double2ll_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double2uint_rd", {"__double2uint_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double2uint_rn", {"__double2uint_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double2uint_ru", {"__double2uint_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double2uint_rz", {"__double2uint_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double2ull_rd", {"__double2ull_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double2ull_rn", {"__double2ull_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double2ull_ru", {"__double2ull_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double2ull_rz", {"__double2ull_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double_as_longlong", {"__double_as_longlong", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hiloint2double", {"__hiloint2double", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__int2double_rn", {"__int2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ll2double_rd", {"__ll2double_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ll2double_rn", {"__ll2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ll2double_ru", {"__ll2double_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ll2double_rz", {"__ll2double_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__longlong_as_double", {"__longlong_as_double", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__uint2double_rn", {"__uint2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ull2double_rd", {"__ull2double_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ull2double_rn", {"__ull2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ull2double_ru", {"__ull2double_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ull2double_rz", {"__ull2double_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
// SIMD functions
- {"__vabs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vabsss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vadd2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vaddss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vaddus2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vavgs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vavgu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vhaddu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vcmpeq2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vcmpges2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vcmpgeu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vcmpgts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vcmpgtu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vcmples2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vcmplts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vcmpltu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vcmpne2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vabsdiffu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vmaxs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vmaxu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vmins2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vminu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vseteq2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsetges2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsetgeu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsetgts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsetles2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsetleu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsetlts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsetltu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsetne2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsadu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsub2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsubss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsubus2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vneg2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vnegss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vabsdiffs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsads2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vabs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vabsss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vadd4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vaddss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vaddus4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vavgs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vavgu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vhaddu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vcmpeq4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vcmpges4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vcmpgeu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vcmpgts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vcmpgtu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vcmples4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vcmpleu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vcmplts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vcmpltu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vcmpne4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vabsdiffu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vmaxs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vmaxu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vmins4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vminu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vseteq4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsetles4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsetleu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsetlts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsetltu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsetges4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsetgeu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsetgts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsetgtu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsetne4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsadu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsub4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsubss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsubus4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vneg4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vnegss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vabsdiffs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__vsads4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vabs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vabsss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vadd2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vaddss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vaddus2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vavgs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vavgu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vhaddu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vcmpeq2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vcmpges2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vcmpgeu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vcmpgts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vcmpgtu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vcmples2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vcmplts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vcmpltu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vcmpne2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vabsdiffu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vmaxs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vmaxu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vmins2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vminu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vseteq2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsetges2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsetgeu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsetgts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsetles2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsetleu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsetlts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsetltu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsetne2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsadu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsub2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsubss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsubus2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vneg2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vnegss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vabsdiffs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsads2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vabs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vabsss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vadd4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vaddss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vaddus4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vavgs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vavgu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vhaddu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vcmpeq4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vcmpges4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vcmpgeu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vcmpgts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vcmpgtu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vcmples4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vcmpleu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vcmplts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vcmpltu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vcmpne4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vabsdiffu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vmaxs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vmaxu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vmins4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vminu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vseteq4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsetles4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsetleu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsetlts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsetltu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsetges4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsetgeu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsetgts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsetgtu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsetne4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsadu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsub4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsubss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsubus4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vneg4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vnegss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vabsdiffs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__vsads4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
// fp16 functions
- {"__float2half", {"__float2half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float2half_rn", {"__float2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float2half_rz", {"__float2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float2half_rd", {"__float2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float2half_ru", {"__float2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2float", {"__half2float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float2half2_rn", {"__float2half2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__floats2half2_rn", {"__floats2half2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__low2float", {"__low2float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__high2float", {"__high2float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__float22half2_rn", {"__float22half2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half22float2", {"__half22float2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2int_rn", {"__half2int_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2int_rz", {"__half2int_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2int_rd", {"__half2int_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2int_ru", {"__half2int_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__int2half_rn", {"__int2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__int2half_rz", {"__int2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__int2half_rd", {"__int2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__int2half_ru", {"__int2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2short_rn", {"__half2short_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2short_rz", {"__half2short_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2short_rd", {"__half2short_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2short_ru", {"__half2short_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__short2half_rn", {"__short2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__short2half_rz", {"__short2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__short2half_rd", {"__short2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__short2half_ru", {"__short2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2uint_rn", {"__half2uint_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2uint_rz", {"__half2uint_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2uint_rd", {"__half2uint_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2uint_ru", {"__half2uint_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__uint2half_rn", {"__uint2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__uint2half_rz", {"__uint2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__uint2half_rd", {"__uint2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__uint2half_ru", {"__uint2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2ushort_rn", {"__half2ushort_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2ushort_rz", {"__half2ushort_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2ushort_rd", {"__half2ushort_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2ushort_ru", {"__half2ushort_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ushort2half_rn", {"__ushort2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ushort2half_rz", {"__ushort2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ushort2half_rd", {"__ushort2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ushort2half_ru", {"__ushort2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2ull_rn", {"__half2ull_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2ull_rz", {"__half2ull_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2ull_rd", {"__half2ull_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2ull_ru", {"__half2ull_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ull2half_rn", {"__ull2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ull2half_rz", {"__ull2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ull2half_rd", {"__ull2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ull2half_ru", {"__ull2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2ll_rn", {"__half2ll_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2ll_rz", {"__half2ll_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2ll_rd", {"__half2ll_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2ll_ru", {"__half2ll_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ll2half_rn", {"__ll2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ll2half_rz", {"__ll2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ll2half_rd", {"__ll2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ll2half_ru", {"__ll2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"htrunc", {"htrunc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"hceil", {"hceil", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"hfloor", {"hfloor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"hrint", {"hrint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"h2trunc", {"h2trunc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"h2ceil", {"h2ceil", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"h2floor", {"h2floor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"h2rint", {"h2rint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half2half2", {"__half2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__lowhigh2highlow", {"__lowhigh2highlow", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__lows2half2", {"__lows2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__highs2half2", {"__highs2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__high2half", {"__high2half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__low2half", {"__low2half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hisinf", {"__hisinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__halves2half2", {"__halves2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__low2half2", {"__low2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__high2half2", {"__high2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half_as_short", {"__half_as_short", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__half_as_ushort", {"__half_as_ushort", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__short_as_half", {"__short_as_half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ushort_as_half", {"__ushort_as_half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ldg", {"__ldg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ldcg", {"__ldcg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ldca", {"__ldca", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ldcs", {"__ldcs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__heq2", {"__heq2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hne2", {"__hne2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hle2", {"__hle2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hge2", {"__hge2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hlt2", {"__hlt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hgt2", {"__hgt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hequ2", {"__hequ2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hneu2", {"__hneu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hleu2", {"__hleu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hgeu2", {"__hgeu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hltu2", {"__hltu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hgtu2", {"__hgtu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hisnan2", {"__hisnan2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hadd2", {"__hadd2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hsub2", {"__hsub2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hmul2", {"__hmul2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__h2div", {"__h2div", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hadd2_sat", {"__hadd2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hsub2_sat", {"__hsub2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hmul2_sat", {"__hmul2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hfma2", {"__hfma2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hfma2_sat", {"__hfma2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hneg2", {"__hneg2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hsub", {"__hsub", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hmul", {"__hmul", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hdiv", {"__hdiv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hadd_sat", {"__hadd_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hsub_sat", {"__hsub_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hmul_sat", {"__hmul_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hfma", {"__hfma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hfma_sat", {"__hfma_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hneg", {"__hneg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__habs2", {"__habs2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__habs", {"__habs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hbeq2", {"__hbeq2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hbne2", {"__hbne2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hble2", {"__hble2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hbge2", {"__hbge2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hblt2", {"__hblt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hbgt2", {"__hbgt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hbequ2", {"__hbequ2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hbneu2", {"__hbneu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hbleu2", {"__hbleu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hbgeu2", {"__hbgeu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hbltu2", {"__hbltu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hbgtu2", {"__hbgtu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__heq", {"__heq", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hne", {"__hne", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hle", {"__hle", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hge", {"__hge", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hlt", {"__hlt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hgt", {"__hgt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hequ", {"__hequ", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hneu", {"__hneu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hleu", {"__hleu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hgeu", {"__hgeu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hltu", {"__hltu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hgtu", {"__hgtu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__hisnan", {"__hisnan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"hsqrt", {"hsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"hrsqrt", {"hrsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"hrcp", {"hrcp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"hlog", {"hlog", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"hlog2", {"hlog2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"hlog10", {"hlog10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"hexp", {"hexp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"hexp2", {"hexp2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"hexp10", {"hexp10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"hcos", {"hcos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"hsin", {"hsin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"h2sqrt", {"h2sqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"h2rsqrt", {"h2rsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"h2rcp", {"h2rcp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"h2log", {"h2log", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"h2log2", {"h2log2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"h2log10", {"h2log10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"h2exp", {"h2exp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"h2exp2", {"h2exp2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"h2exp10", {"h2exp10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"h2cos", {"h2cos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"h2sin", {"h2sin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__shfl_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__shfl", {"__shfl", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}},
- {"__shfl_up_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__shfl_up", {"__shfl_up", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}},
- {"__shfl_down_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__shfl_down", {"__shfl_down", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}},
- {"__shfl_xor_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
- {"__shfl_xor", {"__shfl_xor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}},
- {"__funnelshift_l", {"__funnelshift_l", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__funnelshift_lc", {"__funnelshift_lc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__funnelshift_r", {"__funnelshift_r", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__funnelshift_rc", {"__funnelshift_rc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float2half", {"__float2half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float2half_rn", {"__float2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float2half_rz", {"__float2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float2half_rd", {"__float2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float2half_ru", {"__float2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2float", {"__half2float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float2half2_rn", {"__float2half2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__floats2half2_rn", {"__floats2half2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__low2float", {"__low2float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__high2float", {"__high2float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__float22half2_rn", {"__float22half2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half22float2", {"__half22float2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2int_rn", {"__half2int_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2int_rz", {"__half2int_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2int_rd", {"__half2int_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2int_ru", {"__half2int_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__int2half_rn", {"__int2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__int2half_rz", {"__int2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__int2half_rd", {"__int2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__int2half_ru", {"__int2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2short_rn", {"__half2short_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2short_rz", {"__half2short_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2short_rd", {"__half2short_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2short_ru", {"__half2short_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__short2half_rn", {"__short2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__short2half_rz", {"__short2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__short2half_rd", {"__short2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__short2half_ru", {"__short2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2uint_rn", {"__half2uint_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2uint_rz", {"__half2uint_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2uint_rd", {"__half2uint_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2uint_ru", {"__half2uint_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__uint2half_rn", {"__uint2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__uint2half_rz", {"__uint2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__uint2half_rd", {"__uint2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__uint2half_ru", {"__uint2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2ushort_rn", {"__half2ushort_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2ushort_rz", {"__half2ushort_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2ushort_rd", {"__half2ushort_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2ushort_ru", {"__half2ushort_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ushort2half_rn", {"__ushort2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ushort2half_rz", {"__ushort2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ushort2half_rd", {"__ushort2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ushort2half_ru", {"__ushort2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2ull_rn", {"__half2ull_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2ull_rz", {"__half2ull_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2ull_rd", {"__half2ull_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2ull_ru", {"__half2ull_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ull2half_rn", {"__ull2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ull2half_rz", {"__ull2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ull2half_rd", {"__ull2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ull2half_ru", {"__ull2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2ll_rn", {"__half2ll_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2ll_rz", {"__half2ll_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2ll_rd", {"__half2ll_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2ll_ru", {"__half2ll_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ll2half_rn", {"__ll2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ll2half_rz", {"__ll2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ll2half_rd", {"__ll2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ll2half_ru", {"__ll2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"htrunc", {"htrunc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"hceil", {"hceil", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"hfloor", {"hfloor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"hrint", {"hrint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"h2trunc", {"h2trunc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"h2ceil", {"h2ceil", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"h2floor", {"h2floor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"h2rint", {"h2rint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half2half2", {"__half2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__lowhigh2highlow", {"__lowhigh2highlow", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__lows2half2", {"__lows2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__highs2half2", {"__highs2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__high2half", {"__high2half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__low2half", {"__low2half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hisinf", {"__hisinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__halves2half2", {"__halves2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__low2half2", {"__low2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__high2half2", {"__high2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half_as_short", {"__half_as_short", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__half_as_ushort", {"__half_as_ushort", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__short_as_half", {"__short_as_half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ushort_as_half", {"__ushort_as_half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ldg", {"__ldg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ldcg", {"__ldcg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ldca", {"__ldca", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ldcs", {"__ldcs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ldlu", {"__ldlu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__ldcv", {"__ldcv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__heq2", {"__heq2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hne2", {"__hne2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hle2", {"__hle2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hge2", {"__hge2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hlt2", {"__hlt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hgt2", {"__hgt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hequ2", {"__hequ2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hneu2", {"__hneu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hleu2", {"__hleu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hgeu2", {"__hgeu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hltu2", {"__hltu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hgtu2", {"__hgtu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hisnan2", {"__hisnan2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hadd2", {"__hadd2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hadd2_rn", {"__hadd2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__hsub2", {"__hsub2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hsub2_rn", {"__hsub2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__hmul2", {"__hmul2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hmul2_rn", {"__hmul2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__h2div", {"__h2div", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hadd2_sat", {"__hadd2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hsub2_sat", {"__hsub2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hmul2_sat", {"__hmul2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hfma2", {"__hfma2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hfma2_sat", {"__hfma2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hfma2_relu", {"__hfma2_relu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__hneg2", {"__hneg2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hsub", {"__hsub", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hsub_rn", {"__hsub_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__hmul", {"__hmul", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hmul_rn", {"__hmul_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__hdiv", {"__hdiv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hadd_sat", {"__hadd_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hsub_sat", {"__hsub_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hmul_sat", {"__hmul_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hfma", {"__hfma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hfma_relu", {"__hfma_relu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__hfma_sat", {"__hfma_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hneg", {"__hneg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__habs2", {"__habs2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__habs", {"__habs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hbeq2", {"__hbeq2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hbne2", {"__hbne2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hble2", {"__hble2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hbge2", {"__hbge2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hblt2", {"__hblt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hbgt2", {"__hbgt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hbequ2", {"__hbequ2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hbneu2", {"__hbneu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hbleu2", {"__hbleu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hbgeu2", {"__hbgeu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hbltu2", {"__hbltu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hbgtu2", {"__hbgtu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__heq", {"__heq", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hne", {"__hne", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hle", {"__hle", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hge", {"__hge", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hlt", {"__hlt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hgt", {"__hgt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hequ", {"__hequ", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hneu", {"__hneu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hleu", {"__hleu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hgeu", {"__hgeu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hltu", {"__hltu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hgtu", {"__hgtu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__hisnan", {"__hisnan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"hsqrt", {"hsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"hrsqrt", {"hrsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"hrcp", {"hrcp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"hlog", {"hlog", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"hlog2", {"hlog2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"hlog10", {"hlog10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"hexp", {"hexp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"hexp2", {"hexp2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"hexp10", {"hexp10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"hcos", {"hcos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"hsin", {"hsin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"h2sqrt", {"h2sqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"h2rsqrt", {"h2rsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"h2rcp", {"h2rcp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"h2log", {"h2log", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"h2log2", {"h2log2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"h2log10", {"h2log10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"h2exp", {"h2exp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"h2exp2", {"h2exp2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"h2exp10", {"h2exp10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"h2cos", {"h2cos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"h2sin", {"h2sin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__shfl_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__shfl", {"__shfl", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}},
+ {"__shfl_up_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__shfl_up", {"__shfl_up", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}},
+ {"__shfl_down_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__shfl_down", {"__shfl_down", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}},
+ {"__shfl_xor_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__shfl_xor", {"__shfl_xor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}},
+ {"__funnelshift_l", {"__funnelshift_l", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__funnelshift_lc", {"__funnelshift_lc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__funnelshift_r", {"__funnelshift_r", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__funnelshift_rc", {"__funnelshift_rc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__double2half", {"__double2half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__hmax", {"__hmax", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__hmax_nan", {"__hmax_nan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__hmax2", {"__hmax2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__hmax2_nan", {"__hmax2_nan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__hmin", {"__hmin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__hmin_nan", {"__hmin_nan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__hmin2", {"__hmin2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__hmin2_nan", {"__hmin2_nan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__stwb", {"__stwb", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__stcg", {"__stcg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__stcs", {"__stcs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__stwt", {"__stwt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__hcmadd", {"__hcmadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ // bfp16 functions
+ {"__double2bfloat16", {"__double2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__float2bfloat16", {"__float2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__float2bfloat16_rn", {"__float2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__float2bfloat16_rz", {"__float2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__float2bfloat16_rd", {"__float2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__float2bfloat16_ru", {"__float2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162float", {"__bfloat162float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__float2bfloat162_rn", {"__float2bfloat162_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__floats2bfloat162_rn", {"__floats2bfloat162_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat1622float2", {"__bfloat1622float2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162int_rn", {"__bfloat162int_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162int_rz", {"__bfloat162int_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162int_rd", {"__bfloat162int_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162int_ru", {"__bfloat162int_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__int2bfloat16_rn", {"__int2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__int2bfloat16_rz", {"__int2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__int2bfloat16_rd", {"__int2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__int2bfloat16_ru", {"__int2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162short_rn", {"__bfloat162short_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162short_rz", {"__bfloat162short_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162short_rd", {"__bfloat162short_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162short_ru", {"__bfloat162short_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__short2bfloat16_rn", {"__short2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__short2bfloat16_rz", {"__short2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__short2bfloat16_rd", {"__short2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__short2bfloat16_ru", {"__short2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162uint_rn", {"__bfloat162uint_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162uint_rz", {"__bfloat162uint_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162uint_rd", {"__bfloat162uint_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162uint_ru", {"__bfloat162uint_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__uint2bfloat16_rn", {"__uint2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__uint2bfloat16_rz", {"__uint2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__uint2bfloat16_rd", {"__uint2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__uint2bfloat16_ru", {"__uint2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162ushort_rn", {"__bfloat162ushort_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162ushort_rz", {"__bfloat162ushort_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162ushort_rd", {"__bfloat162ushort_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162ushort_ru", {"__bfloat162ushort_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__ushort2bfloat16_rn", {"__ushort2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__ushort2bfloat16_rz", {"__ushort2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__ushort2bfloat16_rd", {"__ushort2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__ushort2bfloat16_ru", {"__ushort2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162ull_rn", {"__bfloat162ull_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162ull_rz", {"__bfloat162ull_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162ull_rd", {"__bfloat162ull_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162ull_ru", {"__bfloat162ull_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__ull2bfloat16_rn", {"__ull2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__ull2bfloat16_rz", {"__ull2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__ull2bfloat16_rd", {"__ull2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__ull2bfloat16_ru", {"__ull2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162ll_rn", {"__bfloat162ll_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162ll_rz", {"__bfloat162ll_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162ll_rd", {"__bfloat162ll_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162ll_ru", {"__bfloat162ll_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__ll2bfloat16_rn", {"__ll2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__ll2bfloat16_rz", {"__ll2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__ll2bfloat16_rd", {"__ll2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__ll2bfloat16_ru", {"__ll2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat162bfloat162", {"__bfloat162bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__lows2bfloat162", {"__lows2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__highs2bfloat162", {"__highs2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__high2bfloat16", {"__high2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__low2bfloat16", {"__low2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__halves2bfloat162", {"__halves2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__low2bfloat162", {"__halves2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__high2bfloat162", {"__high2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat16_as_short", {"__bfloat16_as_short", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__bfloat16_as_ushort", {"__bfloat16_as_ushort", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__short_as_bfloat16", {"__short_as_bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__ushort_as_bfloat16", {"__ushort_as_bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
// atomic functions
- {"atomicAdd", {"atomicAdd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atomicAdd_system", {"atomicAdd_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atomicSub", {"atomicSub", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atomicSub_system", {"atomicSub_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atomicExch", {"atomicExch", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atomicExch_system", {"atomicExch_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atomicMin", {"atomicMin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atomicMin_system", {"atomicMin_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atomicMax", {"atomicMax", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atomicMax_system", {"atomicMax_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atomicInc", {"atomicInc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atomicDec", {"atomicDec", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atomicAnd", {"atomicAnd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atomicAnd_system", {"atomicAnd_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atomicOr", {"atomicOr", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atomicOr_system", {"atomicOr_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atomicXor", {"atomicXor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atomicXor_system", {"atomicXor_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atomicCAS", {"atomicCAS", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"atomicCAS_system", {"atomicCAS_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__all", {"__all", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__any", {"__any", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__ballot", {"__ballot", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atomicAdd", {"atomicAdd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atomicAdd_system", {"atomicAdd_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atomicSub", {"atomicSub", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atomicSub_system", {"atomicSub_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atomicExch", {"atomicExch", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atomicExch_system", {"atomicExch_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atomicMin", {"atomicMin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atomicMin_system", {"atomicMin_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atomicMax", {"atomicMax", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atomicMax_system", {"atomicMax_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atomicInc", {"atomicInc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atomicDec", {"atomicDec", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atomicAnd", {"atomicAnd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atomicAnd_system", {"atomicAnd_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atomicOr", {"atomicOr", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atomicOr_system", {"atomicOr_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atomicXor", {"atomicXor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atomicXor_system", {"atomicXor_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atomicCAS", {"atomicCAS", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"atomicCAS_system", {"atomicCAS_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__all", {"__all", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__any", {"__any", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__ballot", {"__ballot", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
// clock functions
- {"clock64", {"clock64", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"clock", {"clock", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"clock64", {"clock64", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"clock", {"clock", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
// common functions
- {"__assert_fail", {"__assert_fail", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
- {"__assertfail", {"__assertfail", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__assert_fail", {"__assert_fail", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ {"__assertfail", {"__assertfail", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
+ // fp8 functions
+ {"__nv_cvt_double_to_fp8", {"__hip_cvt_double_to_fp8", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__nv_cvt_double2_to_fp8x2", {"__hip_cvt_double2_to_fp8x2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__nv_cvt_float_to_fp8", {"__hip_cvt_float_to_fp8", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__nv_cvt_float2_to_fp8x2", {"__hip_cvt_float2_to_fp8x2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__nv_cvt_halfraw_to_fp8", {"__hip_cvt_halfraw_to_fp8", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__nv_cvt_halfraw2_to_fp8x2", {"__hip_cvt_halfraw2_to_fp8x2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__nv_cvt_bfloat16raw_to_fp8", {"__hip_cvt_bfloat16raw_to_fp8", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__nv_cvt_bfloat16raw2_to_fp8x2", {"__hip_cvt_bfloat16raw2_to_fp8x2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__nv_cvt_fp8_to_halfraw", {"__hip_cvt_fp8_to_halfraw", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
+ {"__nv_cvt_fp8x2_to_halfraw2", {"__hip_cvt_fp8x2_to_halfraw2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
};
const std::map CUDA_DEVICE_FUNCTION_VER_MAP {
- {"__shfl", {CUDA_75, CUDA_90, CUDA_0 }},
- {"__shfl_up", {CUDA_75, CUDA_90, CUDA_0 }},
- {"__shfl_down", {CUDA_75, CUDA_90, CUDA_0 }},
- {"__shfl_xor", {CUDA_75, CUDA_90, CUDA_0 }},
+ {"__shfl", {CUDA_75, CUDA_90, CUDA_0 }},
+ {"__shfl_up", {CUDA_75, CUDA_90, CUDA_0 }},
+ {"__shfl_down", {CUDA_75, CUDA_90, CUDA_0 }},
+ {"__shfl_xor", {CUDA_75, CUDA_90, CUDA_0 }},
+ {"__double2half", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__hmax", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__hmax2", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__hmin", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__hmin2", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__ldlu", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__ldcv", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__stwb", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__stcg", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__stcs", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__stwt", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__hmax_nan", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__hmin_nan", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__hmax2_nan", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__hmin2_nan", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__hfma_relu", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__hfma2_relu", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__double2bfloat16", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__float2bfloat16", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__float2bfloat16_rn", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__float2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__float2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__float2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162float", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__float2bfloat162_rn", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__floats2bfloat162_rn", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat1622float2", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162int_rn", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162int_rz", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162int_rd", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162int_ru", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__int2bfloat16_rn", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__int2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__int2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__int2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162short_rn", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162short_rz", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162short_rd", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162short_ru", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__short2bfloat16_rn", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__short2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__short2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__short2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162uint_rn", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162uint_rz", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162uint_rd", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162uint_ru", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__uint2bfloat16_rn", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__uint2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__uint2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__uint2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162ushort_rn", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162ushort_rz", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162ushort_rd", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162ushort_ru", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__ushort2bfloat16_rn", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__ushort2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__ushort2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__ushort2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162ull_rn", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162ull_rz", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162ull_rd", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162ull_ru", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__ull2bfloat16_rn", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__ull2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__ull2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__ull2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162ll_rn", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162ll_rz", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162ll_rd", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162ll_ru", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__ll2bfloat16_rn", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__ll2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__ll2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__ll2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat162bfloat162", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__lows2bfloat162", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__highs2bfloat162", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__high2bfloat16", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__low2bfloat16", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__halves2bfloat162", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__low2bfloat162", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__high2bfloat162", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat16_as_short", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__bfloat16_as_ushort", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__short_as_bfloat16", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__ushort_as_bfloat16", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__hcmadd", {CUDA_111, CUDA_0, CUDA_0 }},
+ {"__hadd2_rn", {CUDA_116, CUDA_0, CUDA_0 }},
+ {"__hsub2_rn", {CUDA_116, CUDA_0, CUDA_0 }},
+ {"__hmul2_rn", {CUDA_116, CUDA_0, CUDA_0 }},
+ {"__hadd_rn", {CUDA_116, CUDA_0, CUDA_0 }},
+ {"__hsub_rn", {CUDA_116, CUDA_0, CUDA_0 }},
+ {"__hmul_rn", {CUDA_116, CUDA_0, CUDA_0 }},
+ {"__nv_cvt_double_to_fp8", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"__nv_cvt_double2_to_fp8x2", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"__nv_cvt_float_to_fp8", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"__nv_cvt_float2_to_fp8x2", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"__nv_cvt_halfraw_to_fp8", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"__nv_cvt_halfraw2_to_fp8x2", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"__nv_cvt_bfloat16raw_to_fp8", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"__nv_cvt_bfloat16raw2_to_fp8x2", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"__nv_cvt_fp8_to_halfraw", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"__nv_cvt_fp8x2_to_halfraw2", {CUDA_118, CUDA_0, CUDA_0 }},
};
const std::map HIP_DEVICE_FUNCTION_VER_MAP {
- {"abs", {HIP_1060, HIP_0, HIP_0 }},
- {"labs", {HIP_1090, HIP_0, HIP_0 }},
- {"llabs", {HIP_1090, HIP_0, HIP_0 }},
- {"fabs", {HIP_1060, HIP_0, HIP_0 }},
- {"fabsf", {HIP_1060, HIP_0, HIP_0 }},
- {"min", {HIP_1060, HIP_0, HIP_0 }},
- {"fminf", {HIP_1060, HIP_0, HIP_0 }},
- {"fmin", {HIP_1060, HIP_0, HIP_0 }},
- {"max", {HIP_1060, HIP_0, HIP_0 }},
- {"fmaxf", {HIP_1060, HIP_0, HIP_0 }},
- {"fmax", {HIP_1060, HIP_0, HIP_0 }},
- {"sin", {HIP_1060, HIP_0, HIP_0 }},
- {"cos", {HIP_1060, HIP_0, HIP_0 }},
- {"sincos", {HIP_1060, HIP_0, HIP_0 }},
- {"sincosf", {HIP_1060, HIP_0, HIP_0 }},
- {"tan", {HIP_1060, HIP_0, HIP_0 }},
- {"sqrt", {HIP_1060, HIP_0, HIP_0 }},
- {"rsqrt", {HIP_1060, HIP_0, HIP_0 }},
- {"rsqrtf", {HIP_1060, HIP_0, HIP_0 }},
- {"log2", {HIP_1060, HIP_0, HIP_0 }},
- {"exp2", {HIP_1060, HIP_0, HIP_0 }},
- {"exp2f", {HIP_1060, HIP_0, HIP_0 }},
- {"exp10", {HIP_1060, HIP_0, HIP_0 }},
- {"exp10f", {HIP_1060, HIP_0, HIP_0 }},
- {"expm1", {HIP_1060, HIP_0, HIP_0 }},
- {"expm1f", {HIP_1060, HIP_0, HIP_0 }},
- {"log2f", {HIP_1060, HIP_0, HIP_0 }},
- {"log10", {HIP_1060, HIP_0, HIP_0 }},
- {"log", {HIP_1060, HIP_0, HIP_0 }},
- {"log1p", {HIP_1060, HIP_0, HIP_0 }},
- {"log1pf", {HIP_1060, HIP_0, HIP_0 }},
- {"floor", {HIP_1060, HIP_0, HIP_0 }},
- {"exp", {HIP_1060, HIP_0, HIP_0 }},
- {"cosh", {HIP_1060, HIP_0, HIP_0 }},
- {"sinh", {HIP_1060, HIP_0, HIP_0 }},
- {"tanh", {HIP_1060, HIP_0, HIP_0 }},
- {"acosh", {HIP_1060, HIP_0, HIP_0 }},
- {"acoshf", {HIP_1060, HIP_0, HIP_0 }},
- {"asinh", {HIP_1060, HIP_0, HIP_0 }},
- {"asinhf", {HIP_1060, HIP_0, HIP_0 }},
- {"atanh", {HIP_1060, HIP_0, HIP_0 }},
- {"atanhf", {HIP_1060, HIP_0, HIP_0 }},
- {"ldexp", {HIP_1060, HIP_0, HIP_0 }},
- {"ldexpf", {HIP_1060, HIP_0, HIP_0 }},
- {"logb", {HIP_1060, HIP_0, HIP_0 }},
- {"logbf", {HIP_1060, HIP_0, HIP_0 }},
- {"ilogb", {HIP_1060, HIP_0, HIP_0 }},
- {"ilogbf", {HIP_1060, HIP_0, HIP_0 }},
- {"scalbn", {HIP_1060, HIP_0, HIP_0 }},
- {"scalbnf", {HIP_1060, HIP_0, HIP_0 }},
- {"scalbln", {HIP_1060, HIP_0, HIP_0 }},
- {"scalblnf", {HIP_1060, HIP_0, HIP_0 }},
- {"frexp", {HIP_1060, HIP_0, HIP_0 }},
- {"frexpf", {HIP_1060, HIP_0, HIP_0 }},
- {"round", {HIP_1060, HIP_0, HIP_0 }},
- {"roundf", {HIP_1060, HIP_0, HIP_0 }},
- {"lround", {HIP_1060, HIP_0, HIP_0 }},
- {"lroundf", {HIP_1060, HIP_0, HIP_0 }},
- {"llround", {HIP_1060, HIP_0, HIP_0 }},
- {"llroundf", {HIP_1060, HIP_0, HIP_0 }},
- {"rint", {HIP_1060, HIP_0, HIP_0 }},
- {"rintf", {HIP_1060, HIP_0, HIP_0 }},
- {"lrint", {HIP_1060, HIP_0, HIP_0 }},
- {"lrintf", {HIP_1060, HIP_0, HIP_0 }},
- {"llrint", {HIP_1060, HIP_0, HIP_0 }},
- {"llrintf", {HIP_1060, HIP_0, HIP_0 }},
- {"nearbyint", {HIP_1060, HIP_0, HIP_0 }},
- {"nearbyintf", {HIP_1060, HIP_0, HIP_0 }},
- {"ceil", {HIP_1060, HIP_0, HIP_0 }},
- {"trunc", {HIP_1060, HIP_0, HIP_0 }},
- {"truncf", {HIP_1060, HIP_0, HIP_0 }},
- {"fdim", {HIP_1060, HIP_0, HIP_0 }},
- {"fdimf", {HIP_1060, HIP_0, HIP_0 }},
- {"atan2", {HIP_1060, HIP_0, HIP_0 }},
- {"atan", {HIP_1060, HIP_0, HIP_0 }},
- {"acos", {HIP_1060, HIP_0, HIP_0 }},
- {"asin", {HIP_1060, HIP_0, HIP_0 }},
- {"hypot", {HIP_1060, HIP_0, HIP_0 }},
- {"rhypot", {HIP_1060, HIP_0, HIP_0 }},
- {"hypotf", {HIP_1060, HIP_0, HIP_0 }},
- {"rhypotf", {HIP_1060, HIP_0, HIP_0 }},
- {"norm3d", {HIP_1060, HIP_0, HIP_0 }},
- {"rnorm3d", {HIP_1060, HIP_0, HIP_0 }},
- {"norm4d", {HIP_1060, HIP_0, HIP_0 }},
- {"rnorm4d", {HIP_1060, HIP_0, HIP_0 }},
- {"norm", {HIP_1060, HIP_0, HIP_0 }},
- {"rnorm", {HIP_1060, HIP_0, HIP_0 }},
- {"rnormf", {HIP_1060, HIP_0, HIP_0 }},
- {"normf", {HIP_1060, HIP_0, HIP_0 }},
- {"norm3df", {HIP_1060, HIP_0, HIP_0 }},
- {"rnorm3df", {HIP_1060, HIP_0, HIP_0 }},
- {"norm4df", {HIP_1060, HIP_0, HIP_0 }},
- {"rnorm4df", {HIP_1060, HIP_0, HIP_0 }},
- {"cbrt", {HIP_1060, HIP_0, HIP_0 }},
- {"cbrtf", {HIP_1060, HIP_0, HIP_0 }},
- {"rcbrt", {HIP_1060, HIP_0, HIP_0 }},
- {"rcbrtf", {HIP_1060, HIP_0, HIP_0 }},
- {"sinpi", {HIP_1060, HIP_0, HIP_0 }},
- {"sinpif", {HIP_1060, HIP_0, HIP_0 }},
- {"cospi", {HIP_1060, HIP_0, HIP_0 }},
- {"cospif", {HIP_1060, HIP_0, HIP_0 }},
- {"sincospi", {HIP_1060, HIP_0, HIP_0 }},
- {"sincospif", {HIP_1060, HIP_0, HIP_0 }},
- {"pow", {HIP_1060, HIP_0, HIP_0 }},
- {"modf", {HIP_1090, HIP_0, HIP_0 }},
- {"fmod", {HIP_1060, HIP_0, HIP_0 }},
- {"remainder", {HIP_1060, HIP_0, HIP_0 }},
- {"remainderf", {HIP_1060, HIP_0, HIP_0 }},
- {"remquo", {HIP_1090, HIP_0, HIP_0 }},
- {"remquof", {HIP_1060, HIP_0, HIP_0 }},
- {"j0", {HIP_1060, HIP_0, HIP_0 }},
- {"j0f", {HIP_1060, HIP_0, HIP_0 }},
- {"j1", {HIP_1060, HIP_0, HIP_0 }},
- {"j1f", {HIP_1060, HIP_0, HIP_0 }},
- {"jn", {HIP_1060, HIP_0, HIP_0 }},
- {"jnf", {HIP_1060, HIP_0, HIP_0 }},
- {"y0", {HIP_1060, HIP_0, HIP_0 }},
- {"y0f", {HIP_1060, HIP_0, HIP_0 }},
- {"y1", {HIP_1060, HIP_0, HIP_0 }},
- {"y1f", {HIP_1060, HIP_0, HIP_0 }},
- {"yn", {HIP_1060, HIP_0, HIP_0 }},
- {"ynf", {HIP_1060, HIP_0, HIP_0 }},
- {"cyl_bessel_i0", {HIP_1090, HIP_0, HIP_0 }},
- {"cyl_bessel_i0f", {HIP_1090, HIP_0, HIP_0 }},
- {"cyl_bessel_i1", {HIP_1090, HIP_0, HIP_0 }},
- {"cyl_bessel_i1f", {HIP_1090, HIP_0, HIP_0 }},
- {"erf", {HIP_1060, HIP_0, HIP_0 }},
- {"erff", {HIP_1060, HIP_0, HIP_0 }},
- {"erfinv", {HIP_1060, HIP_0, HIP_0 }},
- {"erfinvf", {HIP_1060, HIP_0, HIP_0 }},
- {"erfc", {HIP_1060, HIP_0, HIP_0 }},
- {"erfcf", {HIP_1060, HIP_0, HIP_0 }},
- {"lgamma", {HIP_1060, HIP_0, HIP_0 }},
- {"erfcinv", {HIP_1060, HIP_0, HIP_0 }},
- {"erfcinvf", {HIP_1060, HIP_0, HIP_0 }},
- {"normcdfinv", {HIP_1060, HIP_0, HIP_0 }},
- {"normcdfinvf", {HIP_1060, HIP_0, HIP_0 }},
- {"normcdf", {HIP_1060, HIP_0, HIP_0 }},
- {"normcdff", {HIP_1060, HIP_0, HIP_0 }},
- {"erfcx", {HIP_1060, HIP_0, HIP_0 }},
- {"erfcxf", {HIP_1060, HIP_0, HIP_0 }},
- {"lgammaf", {HIP_1060, HIP_0, HIP_0 }},
- {"tgamma", {HIP_1060, HIP_0, HIP_0 }},
- {"tgammaf", {HIP_1060, HIP_0, HIP_0 }},
- {"copysign", {HIP_1060, HIP_0, HIP_0 }},
- {"copysignf", {HIP_1060, HIP_0, HIP_0 }},
- {"nextafter", {HIP_1060, HIP_0, HIP_0 }},
- {"nextafterf", {HIP_1090, HIP_0, HIP_0 }},
- {"nan", {HIP_1060, HIP_0, HIP_0 }},
- {"nanf", {HIP_1060, HIP_0, HIP_0 }},
- {"fma", {HIP_1060, HIP_0, HIP_0 }},
- {"fmaf", {HIP_1060, HIP_0, HIP_0 }},
- {"acosf", {HIP_1060, HIP_0, HIP_0 }},
- {"asinf", {HIP_1060, HIP_0, HIP_0 }},
- {"atanf", {HIP_1060, HIP_0, HIP_0 }},
- {"atan2f", {HIP_1060, HIP_0, HIP_0 }},
- {"cosf", {HIP_1060, HIP_0, HIP_0 }},
- {"sinf", {HIP_1060, HIP_0, HIP_0 }},
- {"tanf", {HIP_1060, HIP_0, HIP_0 }},
- {"coshf", {HIP_1060, HIP_0, HIP_0 }},
- {"sinhf", {HIP_1060, HIP_0, HIP_0 }},
- {"tanhf", {HIP_1060, HIP_0, HIP_0 }},
- {"expf", {HIP_1060, HIP_0, HIP_0 }},
- {"logf", {HIP_1060, HIP_0, HIP_0 }},
- {"log10f", {HIP_1060, HIP_0, HIP_0 }},
- {"modff", {HIP_1090, HIP_0, HIP_0 }},
- {"powf", {HIP_1060, HIP_0, HIP_0 }},
- {"sqrtf", {HIP_1060, HIP_0, HIP_0 }},
- {"ceilf", {HIP_1060, HIP_0, HIP_0 }},
- {"floorf", {HIP_1060, HIP_0, HIP_0 }},
- {"fmodf", {HIP_1060, HIP_0, HIP_0 }},
- {"signbit", {HIP_1060, HIP_0, HIP_0 }},
- {"isfinite", {HIP_1060, HIP_0, HIP_0 }},
- {"isnan", {HIP_1060, HIP_0, HIP_0 }},
- {"isinf", {HIP_1060, HIP_0, HIP_0 }},
- {"__mulhi", {HIP_1060, HIP_0, HIP_0 }},
- {"__umulhi", {HIP_1060, HIP_0, HIP_0 }},
- {"__mul64hi", {HIP_1060, HIP_0, HIP_0 }},
- {"__umul64hi", {HIP_1060, HIP_0, HIP_0 }},
- {"__int_as_float", {HIP_1060, HIP_0, HIP_0 }},
- {"__float_as_int", {HIP_1060, HIP_0, HIP_0 }},
- {"__float_as_uint", {HIP_1060, HIP_0, HIP_0 }},
- {"__uint_as_float", {HIP_1060, HIP_0, HIP_0 }},
- {"__syncthreads", {HIP_1060, HIP_0, HIP_0 }},
- {"__syncthreads_count", {HIP_3070, HIP_0, HIP_0 }},
- {"__syncthreads_and", {HIP_3070, HIP_0, HIP_0 }},
- {"__syncthreads_or", {HIP_3070, HIP_0, HIP_0 }},
- {"__threadfence", {HIP_1060, HIP_0, HIP_0 }},
- {"__threadfence_block", {HIP_1060, HIP_0, HIP_0 }},
- {"__threadfence_system",{HIP_1060, HIP_0, HIP_0 }},
- {"__saturatef", {HIP_1060, HIP_0, HIP_0 }},
- {"__sad", {HIP_1060, HIP_0, HIP_0 }},
- {"__usad", {HIP_1060, HIP_0, HIP_0 }},
- {"__mul24", {HIP_1060, HIP_0, HIP_0 }},
- {"__umul24", {HIP_1060, HIP_0, HIP_0 }},
- {"fdividef", {HIP_1060, HIP_0, HIP_0 }},
- {"__fdividef", {HIP_1060, HIP_0, HIP_0 }},
- {"__sinf", {HIP_1060, HIP_0, HIP_0 }},
- {"__cosf", {HIP_1060, HIP_0, HIP_0 }},
- {"__tanf", {HIP_1060, HIP_0, HIP_0 }},
- {"__sincosf", {HIP_1060, HIP_0, HIP_0 }},
- {"__expf", {HIP_1060, HIP_0, HIP_0 }},
- {"__exp10f", {HIP_1060, HIP_0, HIP_0 }},
- {"__log2f", {HIP_1060, HIP_0, HIP_0 }},
- {"__log10f", {HIP_1060, HIP_0, HIP_0 }},
- {"__logf", {HIP_1060, HIP_0, HIP_0 }},
- {"__powf", {HIP_1060, HIP_0, HIP_0 }},
- {"__float2int_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__float2int_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__float2int_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__float2int_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__float2uint_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__float2uint_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__float2uint_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__float2uint_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__int2float_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__int2float_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__int2float_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__int2float_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__uint2float_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__uint2float_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__uint2float_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__uint2float_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__float2ll_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__float2ll_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__float2ll_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__float2ll_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__float2ull_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__float2ull_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__float2ull_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__float2ull_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__ll2float_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__ll2float_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__ll2float_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__ll2float_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__ull2float_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__ull2float_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__ull2float_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__ull2float_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__fadd_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__fsub_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__fmul_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__fmaf_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__frcp_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__fsqrt_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__frsqrt_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__fdiv_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__clz", {HIP_1060, HIP_0, HIP_0 }},
- {"__ffs", {HIP_1060, HIP_0, HIP_0 }},
- {"__popc", {HIP_1060, HIP_0, HIP_0 }},
- {"__brev", {HIP_1060, HIP_0, HIP_0 }},
- {"__clzll", {HIP_1060, HIP_0, HIP_0 }},
- {"__ffsll", {HIP_1060, HIP_0, HIP_0 }},
- {"__popcll", {HIP_1060, HIP_0, HIP_0 }},
- {"__brevll", {HIP_1060, HIP_0, HIP_0 }},
- {"__byte_perm", {HIP_1060, HIP_0, HIP_0 }},
- {"__hadd", {HIP_1060, HIP_0, HIP_0 }},
- {"__rhadd", {HIP_1060, HIP_0, HIP_0 }},
- {"__uhadd", {HIP_1060, HIP_0, HIP_0 }},
- {"__urhadd", {HIP_1060, HIP_0, HIP_0 }},
- {"__double2float_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__double2float_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__double2float_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__double2float_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__double2hiint", {HIP_1060, HIP_0, HIP_0 }},
- {"__double2loint", {HIP_1060, HIP_0, HIP_0 }},
- {"__double2int_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__double2int_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__double2int_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__double2int_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__double2ll_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__double2ll_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__double2ll_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__double2ll_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__double2uint_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__double2uint_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__double2uint_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__double2uint_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__double2ull_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__double2ull_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__double2ull_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__double2ull_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__double_as_longlong",{HIP_1060, HIP_0, HIP_0 }},
- {"__hiloint2double", {HIP_1060, HIP_0, HIP_0 }},
- {"__int2double_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__ll2double_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__ll2double_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__ll2double_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__ll2double_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__longlong_as_double",{HIP_1060, HIP_0, HIP_0 }},
- {"__uint2double_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__ull2double_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__ull2double_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__ull2double_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__ull2double_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__float2half", {HIP_1060, HIP_0, HIP_0 }},
- {"__float2half_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__float2half_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__float2half_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__float2half_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2float", {HIP_1060, HIP_0, HIP_0 }},
- {"__float2half2_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__floats2half2_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__low2float", {HIP_1060, HIP_0, HIP_0 }},
- {"__high2float", {HIP_1060, HIP_0, HIP_0 }},
- {"__float22half2_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__half22float2", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2int_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2int_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2int_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2int_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__int2half_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__int2half_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__int2half_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__int2half_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2short_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2short_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2short_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2short_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__short2half_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__short2half_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__short2half_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__short2half_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2uint_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2uint_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2uint_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2uint_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__uint2half_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__uint2half_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__uint2half_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__uint2half_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2ushort_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2ushort_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2ushort_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2ushort_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__ushort2half_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__ushort2half_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__ushort2half_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__ushort2half_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2ull_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2ull_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2ull_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2ull_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__ull2half_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__ull2half_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__ull2half_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__ull2half_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2ll_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2ll_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2ll_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__half2ll_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"__ll2half_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__ll2half_rz", {HIP_1060, HIP_0, HIP_0 }},
- {"__ll2half_rd", {HIP_1060, HIP_0, HIP_0 }},
- {"__ll2half_ru", {HIP_1060, HIP_0, HIP_0 }},
- {"htrunc", {HIP_1060, HIP_0, HIP_0 }},
- {"hceil", {HIP_1060, HIP_0, HIP_0 }},
- {"hfloor", {HIP_1060, HIP_0, HIP_0 }},
- {"hrint", {HIP_1060, HIP_0, HIP_0 }},
- {"h2trunc", {HIP_1060, HIP_0, HIP_0 }},
- {"h2ceil", {HIP_1060, HIP_0, HIP_0 }},
- {"h2floor", {HIP_1060, HIP_0, HIP_0 }},
- {"h2rint", {HIP_1090, HIP_0, HIP_0 }},
- {"__half2half2", {HIP_1090, HIP_0, HIP_0 }},
- {"__lowhigh2highlow", {HIP_1060, HIP_0, HIP_0 }},
- {"__lows2half2", {HIP_1060, HIP_0, HIP_0 }},
- {"__highs2half2", {HIP_1060, HIP_0, HIP_0 }},
- {"__high2half", {HIP_1060, HIP_0, HIP_0 }},
- {"__low2half", {HIP_1060, HIP_0, HIP_0 }},
- {"__hisinf", {HIP_1060, HIP_0, HIP_0 }},
- {"__halves2half2", {HIP_1060, HIP_0, HIP_0 }},
- {"__low2half2", {HIP_1060, HIP_0, HIP_0 }},
- {"__high2half2", {HIP_1060, HIP_0, HIP_0 }},
- {"__half_as_short", {HIP_1060, HIP_0, HIP_0 }},
- {"__half_as_ushort", {HIP_1060, HIP_0, HIP_0 }},
- {"__short_as_half", {HIP_1090, HIP_0, HIP_0 }},
- {"__ushort_as_half", {HIP_1060, HIP_0, HIP_0 }},
- {"__ldg", {HIP_1060, HIP_0, HIP_0 }},
- {"__ldcg", {HIP_1090, HIP_0, HIP_0 }},
- {"__ldca", {HIP_1090, HIP_0, HIP_0 }},
- {"__ldcs", {HIP_1090, HIP_0, HIP_0 }},
- {"__heq2", {HIP_1060, HIP_0, HIP_0 }},
- {"__hne2", {HIP_1060, HIP_0, HIP_0 }},
- {"__hle2", {HIP_1060, HIP_0, HIP_0 }},
- {"__hge2", {HIP_1060, HIP_0, HIP_0 }},
- {"__hlt2", {HIP_1060, HIP_0, HIP_0 }},
- {"__hgt2", {HIP_1060, HIP_0, HIP_0 }},
- {"__hequ2", {HIP_1090, HIP_0, HIP_0 }},
- {"__hneu2", {HIP_1090, HIP_0, HIP_0 }},
- {"__hleu2", {HIP_1090, HIP_0, HIP_0 }},
- {"__hgeu2", {HIP_1090, HIP_0, HIP_0 }},
- {"__hltu2", {HIP_1090, HIP_0, HIP_0 }},
- {"__hgtu2", {HIP_1090, HIP_0, HIP_0 }},
- {"__hisnan2", {HIP_1060, HIP_0, HIP_0 }},
- {"__hadd2", {HIP_1060, HIP_0, HIP_0 }},
- {"__hsub2", {HIP_1060, HIP_0, HIP_0 }},
- {"__hmul2", {HIP_1060, HIP_0, HIP_0 }},
- {"__h2div", {HIP_1090, HIP_0, HIP_0 }},
- {"__hadd2_sat", {HIP_1060, HIP_0, HIP_0 }},
- {"__hsub2_sat", {HIP_1060, HIP_0, HIP_0 }},
- {"__hmul2_sat", {HIP_1060, HIP_0, HIP_0 }},
- {"__hfma2", {HIP_1060, HIP_0, HIP_0 }},
- {"__hfma2_sat", {HIP_1060, HIP_0, HIP_0 }},
- {"__hneg2", {HIP_1060, HIP_0, HIP_0 }},
- {"__hsub", {HIP_1060, HIP_0, HIP_0 }},
- {"__hmul", {HIP_1060, HIP_0, HIP_0 }},
- {"__hdiv", {HIP_1090, HIP_0, HIP_0 }},
- {"__hadd_sat", {HIP_1060, HIP_0, HIP_0 }},
- {"__hsub_sat", {HIP_1060, HIP_0, HIP_0 }},
- {"__hmul_sat", {HIP_1060, HIP_0, HIP_0 }},
- {"__hfma", {HIP_1060, HIP_0, HIP_0 }},
- {"__hfma_sat", {HIP_1060, HIP_0, HIP_0 }},
- {"__hneg", {HIP_1060, HIP_0, HIP_0 }},
- {"__habs2", {HIP_3050, HIP_0, HIP_0 }},
- {"__habs", {HIP_3050, HIP_0, HIP_0 }},
- {"__hbeq2", {HIP_1060, HIP_0, HIP_0 }},
- {"__hbne2", {HIP_1060, HIP_0, HIP_0 }},
- {"__hble2", {HIP_1060, HIP_0, HIP_0 }},
- {"__hbge2", {HIP_1060, HIP_0, HIP_0 }},
- {"__hblt2", {HIP_1060, HIP_0, HIP_0 }},
- {"__hbgt2", {HIP_1060, HIP_0, HIP_0 }},
- {"__hbequ2", {HIP_1090, HIP_0, HIP_0 }},
- {"__hbneu2", {HIP_1090, HIP_0, HIP_0 }},
- {"__hbleu2", {HIP_1090, HIP_0, HIP_0 }},
- {"__hbgeu2", {HIP_1090, HIP_0, HIP_0 }},
- {"__hbltu2", {HIP_1090, HIP_0, HIP_0 }},
- {"__hbgtu2", {HIP_1090, HIP_0, HIP_0 }},
- {"__heq", {HIP_1060, HIP_0, HIP_0 }},
- {"__hne", {HIP_1060, HIP_0, HIP_0 }},
- {"__hle", {HIP_1060, HIP_0, HIP_0 }},
- {"__hge", {HIP_1060, HIP_0, HIP_0 }},
- {"__hlt", {HIP_1060, HIP_0, HIP_0 }},
- {"__hgt", {HIP_1060, HIP_0, HIP_0 }},
- {"__hequ", {HIP_1090, HIP_0, HIP_0 }},
- {"__hneu", {HIP_1090, HIP_0, HIP_0 }},
- {"__hleu", {HIP_1090, HIP_0, HIP_0 }},
- {"__hgeu", {HIP_1090, HIP_0, HIP_0 }},
- {"__hltu", {HIP_1090, HIP_0, HIP_0 }},
- {"__hgtu", {HIP_1090, HIP_0, HIP_0 }},
- {"__hisnan", {HIP_1060, HIP_0, HIP_0 }},
- {"hsqrt", {HIP_1060, HIP_0, HIP_0 }},
- {"hrsqrt", {HIP_1060, HIP_0, HIP_0 }},
- {"hrcp", {HIP_1090, HIP_0, HIP_0 }},
- {"hlog", {HIP_1060, HIP_0, HIP_0 }},
- {"hlog2", {HIP_1060, HIP_0, HIP_0 }},
- {"hlog10", {HIP_1060, HIP_0, HIP_0 }},
- {"hexp", {HIP_1060, HIP_0, HIP_0 }},
- {"hexp2", {HIP_1060, HIP_0, HIP_0 }},
- {"hexp10", {HIP_1060, HIP_0, HIP_0 }},
- {"hcos", {HIP_1060, HIP_0, HIP_0 }},
- {"hsin", {HIP_1060, HIP_0, HIP_0 }},
- {"h2sqrt", {HIP_1060, HIP_0, HIP_0 }},
- {"h2rsqrt", {HIP_1060, HIP_0, HIP_0 }},
- {"h2rcp", {HIP_1060, HIP_0, HIP_0 }},
- {"h2log", {HIP_1060, HIP_0, HIP_0 }},
- {"h2log2", {HIP_1060, HIP_0, HIP_0 }},
- {"h2log10", {HIP_1060, HIP_0, HIP_0 }},
- {"h2exp", {HIP_1060, HIP_0, HIP_0 }},
- {"h2exp2", {HIP_1060, HIP_0, HIP_0 }},
- {"h2exp10", {HIP_1060, HIP_0, HIP_0 }},
- {"h2cos", {HIP_1060, HIP_0, HIP_0 }},
- {"h2sin", {HIP_1060, HIP_0, HIP_0 }},
- {"__shfl", {HIP_1060, HIP_0, HIP_0 }},
- {"__shfl_up", {HIP_1060, HIP_0, HIP_0 }},
- {"__shfl_down", {HIP_1060, HIP_0, HIP_0 }},
- {"__shfl_xor", {HIP_1060, HIP_0, HIP_0 }},
- {"atomicAdd", {HIP_1060, HIP_0, HIP_0 }},
- {"atomicSub", {HIP_1060, HIP_0, HIP_0 }},
- {"atomicExch", {HIP_1060, HIP_0, HIP_0 }},
- {"atomicMin", {HIP_1060, HIP_0, HIP_0 }},
- {"atomicMax", {HIP_1060, HIP_0, HIP_0 }},
- {"atomicInc", {HIP_1060, HIP_0, HIP_0 }},
- {"atomicDec", {HIP_1060, HIP_0, HIP_0 }},
- {"atomicAnd", {HIP_1060, HIP_0, HIP_0 }},
- {"atomicOr", {HIP_1060, HIP_0, HIP_0 }},
- {"atomicXor", {HIP_1060, HIP_0, HIP_0 }},
- {"atomicCAS", {HIP_1060, HIP_0, HIP_0 }},
- {"__all", {HIP_1060, HIP_0, HIP_0 }},
- {"__any", {HIP_1060, HIP_0, HIP_0 }},
- {"__ballot", {HIP_1060, HIP_0, HIP_0 }},
- {"clock64", {HIP_1060, HIP_0, HIP_0 }},
- {"clock", {HIP_1060, HIP_0, HIP_0 }},
- {"__dadd_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__ddiv_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__dmul_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__drcp_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__dsqrt_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__dsub_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__fma_rn", {HIP_1060, HIP_0, HIP_0 }},
- {"__assert_fail", {HIP_1090, HIP_0, HIP_0 }},
- {"__assertfail", {HIP_1090, HIP_0, HIP_0 }},
- {"atomicCAS_system", {HIP_4030, HIP_0, HIP_0 }},
- {"atomicSub_system", {HIP_4030, HIP_0, HIP_0 }},
- {"atomicAdd_system", {HIP_4030, HIP_0, HIP_0 }},
- {"atomicExch_system", {HIP_4030, HIP_0, HIP_0 }},
- {"atomicMin_system", {HIP_4030, HIP_0, HIP_0 }},
- {"atomicMax_system", {HIP_4030, HIP_0, HIP_0 }},
- {"atomicAnd_system", {HIP_4030, HIP_0, HIP_0 }},
- {"atomicOr_system", {HIP_4030, HIP_0, HIP_0 }},
- {"atomicXor_system", {HIP_4030, HIP_0, HIP_0 }},
- {"__funnelshift_l", {HIP_4040, HIP_0, HIP_0 }},
- {"__funnelshift_lc", {HIP_4040, HIP_0, HIP_0 }},
- {"__funnelshift_r", {HIP_4040, HIP_0, HIP_0 }},
- {"__funnelshift_rc", {HIP_4040, HIP_0, HIP_0 }},
-};
-
-const std::map CUDA_DEVICE_TYPE_NAME_MAP {
-};
-
-const std::map CUDA_DEVICE_TYPE_NAME_VER_MAP {
-};
-
-const std::map HIP_DEVICE_TYPE_NAME_VER_MAP {
+ {"abs", {HIP_1060, HIP_0, HIP_0 }},
+ {"labs", {HIP_1090, HIP_0, HIP_0 }},
+ {"llabs", {HIP_1090, HIP_0, HIP_0 }},
+ {"fabs", {HIP_1060, HIP_0, HIP_0 }},
+ {"fabsf", {HIP_1060, HIP_0, HIP_0 }},
+ {"min", {HIP_1060, HIP_0, HIP_0 }},
+ {"fminf", {HIP_1060, HIP_0, HIP_0 }},
+ {"fmin", {HIP_1060, HIP_0, HIP_0 }},
+ {"max", {HIP_1060, HIP_0, HIP_0 }},
+ {"fmaxf", {HIP_1060, HIP_0, HIP_0 }},
+ {"fmax", {HIP_1060, HIP_0, HIP_0 }},
+ {"sin", {HIP_1060, HIP_0, HIP_0 }},
+ {"cos", {HIP_1060, HIP_0, HIP_0 }},
+ {"sincos", {HIP_1060, HIP_0, HIP_0 }},
+ {"sincosf", {HIP_1060, HIP_0, HIP_0 }},
+ {"tan", {HIP_1060, HIP_0, HIP_0 }},
+ {"sqrt", {HIP_1060, HIP_0, HIP_0 }},
+ {"rsqrt", {HIP_1060, HIP_0, HIP_0 }},
+ {"rsqrtf", {HIP_1060, HIP_0, HIP_0 }},
+ {"log2", {HIP_1060, HIP_0, HIP_0 }},
+ {"exp2", {HIP_1060, HIP_0, HIP_0 }},
+ {"exp2f", {HIP_1060, HIP_0, HIP_0 }},
+ {"exp10", {HIP_1060, HIP_0, HIP_0 }},
+ {"exp10f", {HIP_1060, HIP_0, HIP_0 }},
+ {"expm1", {HIP_1060, HIP_0, HIP_0 }},
+ {"expm1f", {HIP_1060, HIP_0, HIP_0 }},
+ {"log2f", {HIP_1060, HIP_0, HIP_0 }},
+ {"log10", {HIP_1060, HIP_0, HIP_0 }},
+ {"log", {HIP_1060, HIP_0, HIP_0 }},
+ {"log1p", {HIP_1060, HIP_0, HIP_0 }},
+ {"log1pf", {HIP_1060, HIP_0, HIP_0 }},
+ {"floor", {HIP_1060, HIP_0, HIP_0 }},
+ {"exp", {HIP_1060, HIP_0, HIP_0 }},
+ {"cosh", {HIP_1060, HIP_0, HIP_0 }},
+ {"sinh", {HIP_1060, HIP_0, HIP_0 }},
+ {"tanh", {HIP_1060, HIP_0, HIP_0 }},
+ {"acosh", {HIP_1060, HIP_0, HIP_0 }},
+ {"acoshf", {HIP_1060, HIP_0, HIP_0 }},
+ {"asinh", {HIP_1060, HIP_0, HIP_0 }},
+ {"asinhf", {HIP_1060, HIP_0, HIP_0 }},
+ {"atanh", {HIP_1060, HIP_0, HIP_0 }},
+ {"atanhf", {HIP_1060, HIP_0, HIP_0 }},
+ {"ldexp", {HIP_1060, HIP_0, HIP_0 }},
+ {"ldexpf", {HIP_1060, HIP_0, HIP_0 }},
+ {"logb", {HIP_1060, HIP_0, HIP_0 }},
+ {"logbf", {HIP_1060, HIP_0, HIP_0 }},
+ {"ilogb", {HIP_1060, HIP_0, HIP_0 }},
+ {"ilogbf", {HIP_1060, HIP_0, HIP_0 }},
+ {"scalbn", {HIP_1060, HIP_0, HIP_0 }},
+ {"scalbnf", {HIP_1060, HIP_0, HIP_0 }},
+ {"scalbln", {HIP_1060, HIP_0, HIP_0 }},
+ {"scalblnf", {HIP_1060, HIP_0, HIP_0 }},
+ {"frexp", {HIP_1060, HIP_0, HIP_0 }},
+ {"frexpf", {HIP_1060, HIP_0, HIP_0 }},
+ {"round", {HIP_1060, HIP_0, HIP_0 }},
+ {"roundf", {HIP_1060, HIP_0, HIP_0 }},
+ {"lround", {HIP_1060, HIP_0, HIP_0 }},
+ {"lroundf", {HIP_1060, HIP_0, HIP_0 }},
+ {"llround", {HIP_1060, HIP_0, HIP_0 }},
+ {"llroundf", {HIP_1060, HIP_0, HIP_0 }},
+ {"rint", {HIP_1060, HIP_0, HIP_0 }},
+ {"rintf", {HIP_1060, HIP_0, HIP_0 }},
+ {"lrint", {HIP_1060, HIP_0, HIP_0 }},
+ {"lrintf", {HIP_1060, HIP_0, HIP_0 }},
+ {"llrint", {HIP_1060, HIP_0, HIP_0 }},
+ {"llrintf", {HIP_1060, HIP_0, HIP_0 }},
+ {"nearbyint", {HIP_1060, HIP_0, HIP_0 }},
+ {"nearbyintf", {HIP_1060, HIP_0, HIP_0 }},
+ {"ceil", {HIP_1060, HIP_0, HIP_0 }},
+ {"trunc", {HIP_1060, HIP_0, HIP_0 }},
+ {"truncf", {HIP_1060, HIP_0, HIP_0 }},
+ {"fdim", {HIP_1060, HIP_0, HIP_0 }},
+ {"fdimf", {HIP_1060, HIP_0, HIP_0 }},
+ {"atan2", {HIP_1060, HIP_0, HIP_0 }},
+ {"atan", {HIP_1060, HIP_0, HIP_0 }},
+ {"acos", {HIP_1060, HIP_0, HIP_0 }},
+ {"asin", {HIP_1060, HIP_0, HIP_0 }},
+ {"hypot", {HIP_1060, HIP_0, HIP_0 }},
+ {"rhypot", {HIP_1060, HIP_0, HIP_0 }},
+ {"hypotf", {HIP_1060, HIP_0, HIP_0 }},
+ {"rhypotf", {HIP_1060, HIP_0, HIP_0 }},
+ {"norm3d", {HIP_1060, HIP_0, HIP_0 }},
+ {"rnorm3d", {HIP_1060, HIP_0, HIP_0 }},
+ {"norm4d", {HIP_1060, HIP_0, HIP_0 }},
+ {"rnorm4d", {HIP_1060, HIP_0, HIP_0 }},
+ {"norm", {HIP_1060, HIP_0, HIP_0 }},
+ {"rnorm", {HIP_1060, HIP_0, HIP_0 }},
+ {"rnormf", {HIP_1060, HIP_0, HIP_0 }},
+ {"normf", {HIP_1060, HIP_0, HIP_0 }},
+ {"norm3df", {HIP_1060, HIP_0, HIP_0 }},
+ {"rnorm3df", {HIP_1060, HIP_0, HIP_0 }},
+ {"norm4df", {HIP_1060, HIP_0, HIP_0 }},
+ {"rnorm4df", {HIP_1060, HIP_0, HIP_0 }},
+ {"cbrt", {HIP_1060, HIP_0, HIP_0 }},
+ {"cbrtf", {HIP_1060, HIP_0, HIP_0 }},
+ {"rcbrt", {HIP_1060, HIP_0, HIP_0 }},
+ {"rcbrtf", {HIP_1060, HIP_0, HIP_0 }},
+ {"sinpi", {HIP_1060, HIP_0, HIP_0 }},
+ {"sinpif", {HIP_1060, HIP_0, HIP_0 }},
+ {"cospi", {HIP_1060, HIP_0, HIP_0 }},
+ {"cospif", {HIP_1060, HIP_0, HIP_0 }},
+ {"sincospi", {HIP_1060, HIP_0, HIP_0 }},
+ {"sincospif", {HIP_1060, HIP_0, HIP_0 }},
+ {"pow", {HIP_1060, HIP_0, HIP_0 }},
+ {"modf", {HIP_1090, HIP_0, HIP_0 }},
+ {"fmod", {HIP_1060, HIP_0, HIP_0 }},
+ {"remainder", {HIP_1060, HIP_0, HIP_0 }},
+ {"remainderf", {HIP_1060, HIP_0, HIP_0 }},
+ {"remquo", {HIP_1090, HIP_0, HIP_0 }},
+ {"remquof", {HIP_1060, HIP_0, HIP_0 }},
+ {"j0", {HIP_1060, HIP_0, HIP_0 }},
+ {"j0f", {HIP_1060, HIP_0, HIP_0 }},
+ {"j1", {HIP_1060, HIP_0, HIP_0 }},
+ {"j1f", {HIP_1060, HIP_0, HIP_0 }},
+ {"jn", {HIP_1060, HIP_0, HIP_0 }},
+ {"jnf", {HIP_1060, HIP_0, HIP_0 }},
+ {"y0", {HIP_1060, HIP_0, HIP_0 }},
+ {"y0f", {HIP_1060, HIP_0, HIP_0 }},
+ {"y1", {HIP_1060, HIP_0, HIP_0 }},
+ {"y1f", {HIP_1060, HIP_0, HIP_0 }},
+ {"yn", {HIP_1060, HIP_0, HIP_0 }},
+ {"ynf", {HIP_1060, HIP_0, HIP_0 }},
+ {"cyl_bessel_i0", {HIP_1090, HIP_0, HIP_0 }},
+ {"cyl_bessel_i0f", {HIP_1090, HIP_0, HIP_0 }},
+ {"cyl_bessel_i1", {HIP_1090, HIP_0, HIP_0 }},
+ {"cyl_bessel_i1f", {HIP_1090, HIP_0, HIP_0 }},
+ {"erf", {HIP_1060, HIP_0, HIP_0 }},
+ {"erff", {HIP_1060, HIP_0, HIP_0 }},
+ {"erfinv", {HIP_1060, HIP_0, HIP_0 }},
+ {"erfinvf", {HIP_1060, HIP_0, HIP_0 }},
+ {"erfc", {HIP_1060, HIP_0, HIP_0 }},
+ {"erfcf", {HIP_1060, HIP_0, HIP_0 }},
+ {"lgamma", {HIP_1060, HIP_0, HIP_0 }},
+ {"erfcinv", {HIP_1060, HIP_0, HIP_0 }},
+ {"erfcinvf", {HIP_1060, HIP_0, HIP_0 }},
+ {"normcdfinv", {HIP_1060, HIP_0, HIP_0 }},
+ {"normcdfinvf", {HIP_1060, HIP_0, HIP_0 }},
+ {"normcdf", {HIP_1060, HIP_0, HIP_0 }},
+ {"normcdff", {HIP_1060, HIP_0, HIP_0 }},
+ {"erfcx", {HIP_1060, HIP_0, HIP_0 }},
+ {"erfcxf", {HIP_1060, HIP_0, HIP_0 }},
+ {"lgammaf", {HIP_1060, HIP_0, HIP_0 }},
+ {"tgamma", {HIP_1060, HIP_0, HIP_0 }},
+ {"tgammaf", {HIP_1060, HIP_0, HIP_0 }},
+ {"copysign", {HIP_1060, HIP_0, HIP_0 }},
+ {"copysignf", {HIP_1060, HIP_0, HIP_0 }},
+ {"nextafter", {HIP_1060, HIP_0, HIP_0 }},
+ {"nextafterf", {HIP_1090, HIP_0, HIP_0 }},
+ {"nan", {HIP_1060, HIP_0, HIP_0 }},
+ {"nanf", {HIP_1060, HIP_0, HIP_0 }},
+ {"fma", {HIP_1060, HIP_0, HIP_0 }},
+ {"fmaf", {HIP_1060, HIP_0, HIP_0 }},
+ {"acosf", {HIP_1060, HIP_0, HIP_0 }},
+ {"asinf", {HIP_1060, HIP_0, HIP_0 }},
+ {"atanf", {HIP_1060, HIP_0, HIP_0 }},
+ {"atan2f", {HIP_1060, HIP_0, HIP_0 }},
+ {"cosf", {HIP_1060, HIP_0, HIP_0 }},
+ {"sinf", {HIP_1060, HIP_0, HIP_0 }},
+ {"tanf", {HIP_1060, HIP_0, HIP_0 }},
+ {"coshf", {HIP_1060, HIP_0, HIP_0 }},
+ {"sinhf", {HIP_1060, HIP_0, HIP_0 }},
+ {"tanhf", {HIP_1060, HIP_0, HIP_0 }},
+ {"expf", {HIP_1060, HIP_0, HIP_0 }},
+ {"logf", {HIP_1060, HIP_0, HIP_0 }},
+ {"log10f", {HIP_1060, HIP_0, HIP_0 }},
+ {"modff", {HIP_1090, HIP_0, HIP_0 }},
+ {"powf", {HIP_1060, HIP_0, HIP_0 }},
+ {"sqrtf", {HIP_1060, HIP_0, HIP_0 }},
+ {"ceilf", {HIP_1060, HIP_0, HIP_0 }},
+ {"floorf", {HIP_1060, HIP_0, HIP_0 }},
+ {"fmodf", {HIP_1060, HIP_0, HIP_0 }},
+ {"signbit", {HIP_1060, HIP_0, HIP_0 }},
+ {"isfinite", {HIP_1060, HIP_0, HIP_0 }},
+ {"isnan", {HIP_1060, HIP_0, HIP_0 }},
+ {"isinf", {HIP_1060, HIP_0, HIP_0 }},
+ {"__mulhi", {HIP_1060, HIP_0, HIP_0 }},
+ {"__umulhi", {HIP_1060, HIP_0, HIP_0 }},
+ {"__mul64hi", {HIP_1060, HIP_0, HIP_0 }},
+ {"__umul64hi", {HIP_1060, HIP_0, HIP_0 }},
+ {"__int_as_float", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float_as_int", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float_as_uint", {HIP_1060, HIP_0, HIP_0 }},
+ {"__uint_as_float", {HIP_1060, HIP_0, HIP_0 }},
+ {"__syncthreads", {HIP_1060, HIP_0, HIP_0 }},
+ {"__syncthreads_count", {HIP_3070, HIP_0, HIP_0 }},
+ {"__syncthreads_and", {HIP_3070, HIP_0, HIP_0 }},
+ {"__syncthreads_or", {HIP_3070, HIP_0, HIP_0 }},
+ {"__threadfence", {HIP_1060, HIP_0, HIP_0 }},
+ {"__threadfence_block", {HIP_1060, HIP_0, HIP_0 }},
+ {"__threadfence_system", {HIP_1060, HIP_0, HIP_0 }},
+ {"__saturatef", {HIP_1060, HIP_0, HIP_0 }},
+ {"__sad", {HIP_1060, HIP_0, HIP_0 }},
+ {"__usad", {HIP_1060, HIP_0, HIP_0 }},
+ {"__mul24", {HIP_1060, HIP_0, HIP_0 }},
+ {"__umul24", {HIP_1060, HIP_0, HIP_0 }},
+ {"fdividef", {HIP_1060, HIP_0, HIP_0 }},
+ {"__fdividef", {HIP_1060, HIP_0, HIP_0 }},
+ {"__sinf", {HIP_1060, HIP_0, HIP_0 }},
+ {"__cosf", {HIP_1060, HIP_0, HIP_0 }},
+ {"__tanf", {HIP_1060, HIP_0, HIP_0 }},
+ {"__sincosf", {HIP_1060, HIP_0, HIP_0 }},
+ {"__expf", {HIP_1060, HIP_0, HIP_0 }},
+ {"__exp10f", {HIP_1060, HIP_0, HIP_0 }},
+ {"__log2f", {HIP_1060, HIP_0, HIP_0 }},
+ {"__log10f", {HIP_1060, HIP_0, HIP_0 }},
+ {"__logf", {HIP_1060, HIP_0, HIP_0 }},
+ {"__powf", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float2int_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float2int_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float2int_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float2int_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float2uint_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float2uint_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float2uint_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float2uint_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__int2float_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__int2float_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__int2float_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__int2float_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__uint2float_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__uint2float_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__uint2float_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__uint2float_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float2ll_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float2ll_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float2ll_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float2ll_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float2ull_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float2ull_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float2ull_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float2ull_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ll2float_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ll2float_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ll2float_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ll2float_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ull2float_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ull2float_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ull2float_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ull2float_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__fadd_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__fsub_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__fmul_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__fmaf_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__frcp_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__fsqrt_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__frsqrt_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__fdiv_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__clz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ffs", {HIP_1060, HIP_0, HIP_0 }},
+ {"__popc", {HIP_1060, HIP_0, HIP_0 }},
+ {"__brev", {HIP_1060, HIP_0, HIP_0 }},
+ {"__clzll", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ffsll", {HIP_1060, HIP_0, HIP_0 }},
+ {"__popcll", {HIP_1060, HIP_0, HIP_0 }},
+ {"__brevll", {HIP_1060, HIP_0, HIP_0 }},
+ {"__byte_perm", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hadd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__rhadd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__uhadd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__urhadd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__double2float_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__double2float_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__double2float_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__double2float_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__double2hiint", {HIP_1060, HIP_0, HIP_0 }},
+ {"__double2loint", {HIP_1060, HIP_0, HIP_0 }},
+ {"__double2int_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__double2int_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__double2int_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__double2int_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__double2ll_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__double2ll_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__double2ll_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__double2ll_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__double2uint_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__double2uint_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__double2uint_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__double2uint_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__double2ull_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__double2ull_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__double2ull_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__double2ull_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__double_as_longlong", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hiloint2double", {HIP_1060, HIP_0, HIP_0 }},
+ {"__int2double_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ll2double_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ll2double_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ll2double_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ll2double_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__longlong_as_double", {HIP_1060, HIP_0, HIP_0 }},
+ {"__uint2double_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ull2double_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ull2double_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ull2double_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ull2double_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float2half", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float2half_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float2half_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float2half_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float2half_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2float", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float2half2_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__floats2half2_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__low2float", {HIP_1060, HIP_0, HIP_0 }},
+ {"__high2float", {HIP_1060, HIP_0, HIP_0 }},
+ {"__float22half2_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half22float2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2int_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2int_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2int_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2int_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__int2half_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__int2half_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__int2half_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__int2half_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2short_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2short_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2short_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2short_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__short2half_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__short2half_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__short2half_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__short2half_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2uint_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2uint_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2uint_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2uint_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__uint2half_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__uint2half_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__uint2half_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__uint2half_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2ushort_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2ushort_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2ushort_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2ushort_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ushort2half_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ushort2half_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ushort2half_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ushort2half_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2ull_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2ull_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2ull_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2ull_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ull2half_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ull2half_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ull2half_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ull2half_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2ll_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2ll_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2ll_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2ll_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ll2half_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ll2half_rz", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ll2half_rd", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ll2half_ru", {HIP_1060, HIP_0, HIP_0 }},
+ {"htrunc", {HIP_1060, HIP_0, HIP_0 }},
+ {"hceil", {HIP_1060, HIP_0, HIP_0 }},
+ {"hfloor", {HIP_1060, HIP_0, HIP_0 }},
+ {"hrint", {HIP_1060, HIP_0, HIP_0 }},
+ {"h2trunc", {HIP_1060, HIP_0, HIP_0 }},
+ {"h2ceil", {HIP_1060, HIP_0, HIP_0 }},
+ {"h2floor", {HIP_1060, HIP_0, HIP_0 }},
+ {"h2rint", {HIP_1090, HIP_0, HIP_0 }},
+ {"__half2half2", {HIP_1090, HIP_0, HIP_0 }},
+ {"__lowhigh2highlow", {HIP_1060, HIP_0, HIP_0 }},
+ {"__lows2half2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__highs2half2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__high2half", {HIP_1060, HIP_0, HIP_0 }},
+ {"__low2half", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hisinf", {HIP_1060, HIP_0, HIP_0 }},
+ {"__halves2half2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__low2half2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__high2half2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half_as_short", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half_as_ushort", {HIP_1060, HIP_0, HIP_0 }},
+ {"__short_as_half", {HIP_1090, HIP_0, HIP_0 }},
+ {"__ushort_as_half", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ldg", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ldcg", {HIP_1090, HIP_0, HIP_0 }},
+ {"__ldca", {HIP_1090, HIP_0, HIP_0 }},
+ {"__ldcs", {HIP_1090, HIP_0, HIP_0 }},
+ {"__heq2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hne2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hle2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hge2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hlt2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hgt2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hequ2", {HIP_1090, HIP_0, HIP_0 }},
+ {"__hneu2", {HIP_1090, HIP_0, HIP_0 }},
+ {"__hleu2", {HIP_1090, HIP_0, HIP_0 }},
+ {"__hgeu2", {HIP_1090, HIP_0, HIP_0 }},
+ {"__hltu2", {HIP_1090, HIP_0, HIP_0 }},
+ {"__hgtu2", {HIP_1090, HIP_0, HIP_0 }},
+ {"__hisnan2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hadd2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hsub2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hmul2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__h2div", {HIP_1090, HIP_0, HIP_0 }},
+ {"__hadd2_sat", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hsub2_sat", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hmul2_sat", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hfma2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hfma2_sat", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hneg2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hsub", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hmul", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hdiv", {HIP_1090, HIP_0, HIP_0 }},
+ {"__hadd_sat", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hsub_sat", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hmul_sat", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hfma", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hfma_sat", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hneg", {HIP_1060, HIP_0, HIP_0 }},
+ {"__habs2", {HIP_3050, HIP_0, HIP_0 }},
+ {"__habs", {HIP_3050, HIP_0, HIP_0 }},
+ {"__hbeq2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hbne2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hble2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hbge2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hblt2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hbgt2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hbequ2", {HIP_1090, HIP_0, HIP_0 }},
+ {"__hbneu2", {HIP_1090, HIP_0, HIP_0 }},
+ {"__hbleu2", {HIP_1090, HIP_0, HIP_0 }},
+ {"__hbgeu2", {HIP_1090, HIP_0, HIP_0 }},
+ {"__hbltu2", {HIP_1090, HIP_0, HIP_0 }},
+ {"__hbgtu2", {HIP_1090, HIP_0, HIP_0 }},
+ {"__heq", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hne", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hle", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hge", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hlt", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hgt", {HIP_1060, HIP_0, HIP_0 }},
+ {"__hequ", {HIP_1090, HIP_0, HIP_0 }},
+ {"__hneu", {HIP_1090, HIP_0, HIP_0 }},
+ {"__hleu", {HIP_1090, HIP_0, HIP_0 }},
+ {"__hgeu", {HIP_1090, HIP_0, HIP_0 }},
+ {"__hltu", {HIP_1090, HIP_0, HIP_0 }},
+ {"__hgtu", {HIP_1090, HIP_0, HIP_0 }},
+ {"__hisnan", {HIP_1060, HIP_0, HIP_0 }},
+ {"hsqrt", {HIP_1060, HIP_0, HIP_0 }},
+ {"hrsqrt", {HIP_1060, HIP_0, HIP_0 }},
+ {"hrcp", {HIP_1090, HIP_0, HIP_0 }},
+ {"hlog", {HIP_1060, HIP_0, HIP_0 }},
+ {"hlog2", {HIP_1060, HIP_0, HIP_0 }},
+ {"hlog10", {HIP_1060, HIP_0, HIP_0 }},
+ {"hexp", {HIP_1060, HIP_0, HIP_0 }},
+ {"hexp2", {HIP_1060, HIP_0, HIP_0 }},
+ {"hexp10", {HIP_1060, HIP_0, HIP_0 }},
+ {"hcos", {HIP_1060, HIP_0, HIP_0 }},
+ {"hsin", {HIP_1060, HIP_0, HIP_0 }},
+ {"h2sqrt", {HIP_1060, HIP_0, HIP_0 }},
+ {"h2rsqrt", {HIP_1060, HIP_0, HIP_0 }},
+ {"h2rcp", {HIP_1060, HIP_0, HIP_0 }},
+ {"h2log", {HIP_1060, HIP_0, HIP_0 }},
+ {"h2log2", {HIP_1060, HIP_0, HIP_0 }},
+ {"h2log10", {HIP_1060, HIP_0, HIP_0 }},
+ {"h2exp", {HIP_1060, HIP_0, HIP_0 }},
+ {"h2exp2", {HIP_1060, HIP_0, HIP_0 }},
+ {"h2exp10", {HIP_1060, HIP_0, HIP_0 }},
+ {"h2cos", {HIP_1060, HIP_0, HIP_0 }},
+ {"h2sin", {HIP_1060, HIP_0, HIP_0 }},
+ {"__shfl", {HIP_1060, HIP_0, HIP_0 }},
+ {"__shfl_up", {HIP_1060, HIP_0, HIP_0 }},
+ {"__shfl_down", {HIP_1060, HIP_0, HIP_0 }},
+ {"__shfl_xor", {HIP_1060, HIP_0, HIP_0 }},
+ {"atomicAdd", {HIP_1060, HIP_0, HIP_0 }},
+ {"atomicSub", {HIP_1060, HIP_0, HIP_0 }},
+ {"atomicExch", {HIP_1060, HIP_0, HIP_0 }},
+ {"atomicMin", {HIP_1060, HIP_0, HIP_0 }},
+ {"atomicMax", {HIP_1060, HIP_0, HIP_0 }},
+ {"atomicInc", {HIP_1060, HIP_0, HIP_0 }},
+ {"atomicDec", {HIP_1060, HIP_0, HIP_0 }},
+ {"atomicAnd", {HIP_1060, HIP_0, HIP_0 }},
+ {"atomicOr", {HIP_1060, HIP_0, HIP_0 }},
+ {"atomicXor", {HIP_1060, HIP_0, HIP_0 }},
+ {"atomicCAS", {HIP_1060, HIP_0, HIP_0 }},
+ {"__all", {HIP_1060, HIP_0, HIP_0 }},
+ {"__any", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ballot", {HIP_1060, HIP_0, HIP_0 }},
+ {"clock64", {HIP_1060, HIP_0, HIP_0 }},
+ {"clock", {HIP_1060, HIP_0, HIP_0 }},
+ {"__dadd_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__ddiv_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__dmul_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__drcp_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__dsqrt_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__dsub_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__fma_rn", {HIP_1060, HIP_0, HIP_0 }},
+ {"__assert_fail", {HIP_1090, HIP_0, HIP_0 }},
+ {"__assertfail", {HIP_1090, HIP_0, HIP_0 }},
+ {"atomicCAS_system", {HIP_4030, HIP_0, HIP_0 }},
+ {"atomicSub_system", {HIP_4030, HIP_0, HIP_0 }},
+ {"atomicAdd_system", {HIP_4030, HIP_0, HIP_0 }},
+ {"atomicExch_system", {HIP_4030, HIP_0, HIP_0 }},
+ {"atomicMin_system", {HIP_4030, HIP_0, HIP_0 }},
+ {"atomicMax_system", {HIP_4030, HIP_0, HIP_0 }},
+ {"atomicAnd_system", {HIP_4030, HIP_0, HIP_0 }},
+ {"atomicOr_system", {HIP_4030, HIP_0, HIP_0 }},
+ {"atomicXor_system", {HIP_4030, HIP_0, HIP_0 }},
+ {"__funnelshift_l", {HIP_4040, HIP_0, HIP_0 }},
+ {"__funnelshift_lc", {HIP_4040, HIP_0, HIP_0 }},
+ {"__funnelshift_r", {HIP_4040, HIP_0, HIP_0 }},
+ {"__funnelshift_rc", {HIP_4040, HIP_0, HIP_0 }},
};
const std::map CUDA_DEVICE_FUNCTION_API_SECTION_MAP {
{1, "Device Functions"},
+ {2, "Device Types"},
};
diff --git a/src/CUDA2HIP_Device_types.cpp b/src/CUDA2HIP_Device_types.cpp
new file mode 100644
index 00000000..f621707a
--- /dev/null
+++ b/src/CUDA2HIP_Device_types.cpp
@@ -0,0 +1,86 @@
+/*
+Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "CUDA2HIP.h"
+
+// Maps the names of CUDA Device/Host types to the corresponding HIP types
+const std::map CUDA_DEVICE_TYPE_NAME_MAP {
+ // float16 Precision Device types
+ {"__half", {"__half", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}},
+ {"__half_raw", {"__half_raw", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}},
+ {"__half2", {"__half2", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}},
+ {"__half2_raw", {"__half2_raw", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}},
+ // Bfloat16 Precision Device types
+ {"__nv_bfloat16", {"__hip_bfloat16", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
+ {"nv_bfloat16", {"hip_bfloat16", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
+ {"__nv_bfloat16_raw", {"__hip_bfloat16_raw", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
+ {"__nv_bfloat162", {"__hip_bfloat162", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
+ {"nv_bfloat162", {"hip_bfloat162", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
+ {"__nv_bfloat162_raw", {"__hip_bfloat162_raw", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
+ // float8 Precision Device types
+ {"__nv_fp8_storage_t", {"__hip_fp8_storage_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
+ {"__nv_fp8x2_storage_t", {"__hip_fp8x2_storage_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
+ {"__nv_fp8x4_storage_t", {"__hip_fp8x4_storage_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
+ {"__nv_fp8_e5m2", {"__hip_fp8_e5m2", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
+ {"__nv_fp8x2_e5m2", {"__hip_fp8x2_e5m2", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
+ {"__nv_fp8_e4m3", {"__hip_fp8_e4m3", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
+ {"__nv_fp8x2_e4m3", {"__hip_fp8x2_e4m3", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
+ {"__nv_fp8x4_e4m3", {"__hip_fp8x4_e4m3", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
+ {"__nv_saturation_t", {"__hip_saturation_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
+ {"__NV_NOSAT", {"__HIP_NOSAT", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2, UNSUPPORTED}},
+ {"__NV_SATFINITE", {"__HIP_SATFINITE", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2, UNSUPPORTED}},
+ {"__nv_fp8_interpretation_t", {"__hip_fp8_interpretation_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}},
+ {"__NV_E4M3", {"__HIP_E4M3", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2, UNSUPPORTED}},
+ {"__NV_E5M2", {"__HIP_E5M2", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2, UNSUPPORTED}},
+ {"__nv_fp8x4_e5m2", {"__hip_fp8x4_e5m2", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2, UNSUPPORTED}},
+};
+
+const std::map CUDA_DEVICE_TYPE_NAME_VER_MAP {
+ {"__nv_bfloat16", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"nv_bfloat16", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__nv_bfloat16_raw", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__nv_bfloat162", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"nv_bfloat162", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__nv_bfloat162_raw", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"__nv_fp8_storage_t", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"__nv_fp8x2_storage_t", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"__nv_fp8x4_storage_t", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"__nv_fp8_e5m2", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"__nv_fp8x2_e5m2", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"__nv_fp8_e4m3", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"__nv_fp8x2_e4m3", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"__nv_fp8x4_e4m3", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"__nv_saturation_t", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"__NV_NOSAT", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"__NV_SATFINITE", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"__nv_fp8_interpretation_t", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"__NV_E4M3", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"__NV_E5M2", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"__nv_fp8x4_e5m2", {CUDA_118, CUDA_0, CUDA_0 }},
+};
+
+const std::map HIP_DEVICE_TYPE_NAME_VER_MAP {
+ {"__half", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half2", {HIP_1060, HIP_0, HIP_0 }},
+ {"__half_raw", {HIP_1090, HIP_0, HIP_0 }},
+ {"__half2_raw", {HIP_1090, HIP_0, HIP_0 }},
+};
diff --git a/src/CUDA2HIP_Driver_API_functions.cpp b/src/CUDA2HIP_Driver_API_functions.cpp
index be0b0684..eff0c0c9 100644
--- a/src/CUDA2HIP_Driver_API_functions.cpp
+++ b/src/CUDA2HIP_Driver_API_functions.cpp
@@ -27,10 +27,10 @@ const std::map CUDA_DRIVER_FUNCTION_MAP {
// 2. Error Handling
// no analogue
// NOTE: cudaGetErrorName and cuGetErrorName have different signatures
- {"cuGetErrorName", {"hipDrvGetErrorName", "", CONV_ERROR, API_DRIVER, 2, HIP_UNSUPPORTED}},
+ {"cuGetErrorName", {"hipDrvGetErrorName", "", CONV_ERROR, API_DRIVER, 2, HIP_EXPERIMENTAL}},
// no analogue
// NOTE: cudaGetErrorString and cuGetErrorString have different signatures
- {"cuGetErrorString", {"hipDrvGetErrorString", "", CONV_ERROR, API_DRIVER, 2, HIP_UNSUPPORTED}},
+ {"cuGetErrorString", {"hipDrvGetErrorString", "", CONV_ERROR, API_DRIVER, 2, HIP_EXPERIMENTAL}},
// 3. Initialization
// no analogue
@@ -119,7 +119,7 @@ const std::map CUDA_DRIVER_FUNCTION_MAP {
{"cuCtxResetPersistingL2Cache", {"hipCtxResetPersistingL2Cache", "", CONV_CONTEXT, API_DRIVER, 8, HIP_UNSUPPORTED}},
{"cuCtxSetCurrent", {"hipCtxSetCurrent", "", CONV_CONTEXT, API_DRIVER, 8, HIP_DEPRECATED}},
// cudaDeviceSetLimit
- {"cuCtxSetLimit", {"hipDeviceSetLimit", "", CONV_CONTEXT, API_DRIVER, 8, HIP_EXPERIMENTAL}},
+ {"cuCtxSetLimit", {"hipDeviceSetLimit", "", CONV_CONTEXT, API_DRIVER, 8}},
// cudaDeviceSetSharedMemConfig
// TODO: rename to hipDeviceSetSharedMemConfig
{"cuCtxSetSharedMemConfig", {"hipCtxSetSharedMemConfig", "", CONV_CONTEXT, API_DRIVER, 8, HIP_DEPRECATED}},
@@ -136,14 +136,14 @@ const std::map CUDA_DRIVER_FUNCTION_MAP {
// 10. Module Management
// no analogues
- {"cuLinkAddData", {"hiprtcLinkAddData", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}},
- {"cuLinkAddData_v2", {"hiprtcLinkAddData", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}},
- {"cuLinkAddFile", {"hiprtcLinkAddFile", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}},
- {"cuLinkAddFile_v2", {"hiprtcLinkAddFile", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}},
- {"cuLinkComplete", {"hiprtcLinkComplete", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}},
- {"cuLinkCreate", {"hiprtcLinkCreate", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}},
- {"cuLinkCreate_v2", {"hiprtcLinkCreate", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}},
- {"cuLinkDestroy", {"hiprtcLinkDestroy", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}},
+ {"cuLinkAddData", {"hiprtcLinkAddData", "", CONV_MODULE, API_DRIVER, 10}},
+ {"cuLinkAddData_v2", {"hiprtcLinkAddData", "", CONV_MODULE, API_DRIVER, 10}},
+ {"cuLinkAddFile", {"hiprtcLinkAddFile", "", CONV_MODULE, API_DRIVER, 10}},
+ {"cuLinkAddFile_v2", {"hiprtcLinkAddFile", "", CONV_MODULE, API_DRIVER, 10}},
+ {"cuLinkComplete", {"hiprtcLinkComplete", "", CONV_MODULE, API_DRIVER, 10}},
+ {"cuLinkCreate", {"hiprtcLinkCreate", "", CONV_MODULE, API_DRIVER, 10}},
+ {"cuLinkCreate_v2", {"hiprtcLinkCreate", "", CONV_MODULE, API_DRIVER, 10}},
+ {"cuLinkDestroy", {"hiprtcLinkDestroy", "", CONV_MODULE, API_DRIVER, 10}},
{"cuModuleGetFunction", {"hipModuleGetFunction", "", CONV_MODULE, API_DRIVER, 10}},
{"cuModuleGetGlobal", {"hipModuleGetGlobal", "", CONV_MODULE, API_DRIVER, 10}},
{"cuModuleGetGlobal_v2", {"hipModuleGetGlobal", "", CONV_MODULE, API_DRIVER, 10}},
@@ -543,6 +543,9 @@ const std::map CUDA_DRIVER_FUNCTION_MAP {
// no analogue
// NOTE: Not equal to cudaLaunchKernel due to different signatures
{"cuLaunchKernel", {"hipModuleLaunchKernel", "", CONV_EXECUTION, API_DRIVER, 19}},
+ // no analogue
+ // NOTE: Not equal to cudaLaunchKernelExC due to different signatures
+ {"cuLaunchKernelEx", {"hipLaunchKernelEx", "", CONV_EXECUTION, API_DRIVER, 19, HIP_UNSUPPORTED}},
// 20. Execution Control [DEPRECATED]
// no analogue
@@ -684,7 +687,7 @@ const std::map CUDA_DRIVER_FUNCTION_MAP {
// cudaGraphExecEventWaitNodeSetEvent
{"cuGraphExecEventWaitNodeSetEvent", {"hipGraphExecEventWaitNodeSetEvent", "", CONV_GRAPH, API_DRIVER, 21}},
// cudaGraphUpload
- {"cuGraphUpload", {"hipGraphUpload", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}},
+ {"cuGraphUpload", {"hipGraphUpload", "", CONV_GRAPH, API_DRIVER, 21}},
// cudaGraphAddExternalSemaphoresSignalNode
{"cuGraphAddExternalSemaphoresSignalNode", {"hipGraphAddExternalSemaphoresSignalNode", "", CONV_GRAPH, API_DRIVER, 21, HIP_UNSUPPORTED}},
// cudaGraphExternalSemaphoresSignalNodeGetParams
@@ -702,15 +705,15 @@ const std::map CUDA_DRIVER_FUNCTION_MAP {
// cudaGraphExecExternalSemaphoresWaitNodeSetParams
{"cuGraphExecExternalSemaphoresWaitNodeSetParams", {"hipGraphExecExternalSemaphoresWaitNodeSetParams", "", CONV_GRAPH, API_DRIVER, 21, HIP_UNSUPPORTED}},
// cudaUserObjectCreate
- {"cuUserObjectCreate", {"hipUserObjectCreate", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}},
+ {"cuUserObjectCreate", {"hipUserObjectCreate", "", CONV_GRAPH, API_DRIVER, 21}},
// cudaUserObjectRetain
- {"cuUserObjectRetain", {"hipUserObjectRetain", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}},
+ {"cuUserObjectRetain", {"hipUserObjectRetain", "", CONV_GRAPH, API_DRIVER, 21}},
// cudaUserObjectRelease
- {"cuUserObjectRelease", {"hipUserObjectRelease", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}},
+ {"cuUserObjectRelease", {"hipUserObjectRelease", "", CONV_GRAPH, API_DRIVER, 21}},
// cudaGraphRetainUserObject
- {"cuGraphRetainUserObject", {"hipGraphRetainUserObject", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}},
+ {"cuGraphRetainUserObject", {"hipGraphRetainUserObject", "", CONV_GRAPH, API_DRIVER, 21}},
// cudaGraphReleaseUserObject
- {"cuGraphReleaseUserObject", {"hipGraphReleaseUserObject", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}},
+ {"cuGraphReleaseUserObject", {"hipGraphReleaseUserObject", "", CONV_GRAPH, API_DRIVER, 21}},
// cudaGraphAddMemAllocNode
{"cuGraphAddMemAllocNode", {"hipGraphAddMemAllocNode", "", CONV_GRAPH, API_DRIVER, 21, HIP_UNSUPPORTED}},
// cudaGraphMemAllocNodeGetParams
@@ -720,11 +723,11 @@ const std::map CUDA_DRIVER_FUNCTION_MAP {
// cudaGraphMemFreeNodeGetParams
{"cuGraphMemFreeNodeGetParams", {"hipGraphMemFreeNodeGetParams", "", CONV_GRAPH, API_DRIVER, 21, HIP_UNSUPPORTED}},
// cudaDeviceGraphMemTrim
- {"cuDeviceGraphMemTrim", {"hipDeviceGraphMemTrim", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}},
+ {"cuDeviceGraphMemTrim", {"hipDeviceGraphMemTrim", "", CONV_GRAPH, API_DRIVER, 21}},
// cudaDeviceGetGraphMemAttribute
- {"cuDeviceGetGraphMemAttribute", {"hipDeviceGetGraphMemAttribute", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}},
+ {"cuDeviceGetGraphMemAttribute", {"hipDeviceGetGraphMemAttribute", "", CONV_GRAPH, API_DRIVER, 21}},
// cudaDeviceSetGraphMemAttribute
- {"cuDeviceSetGraphMemAttribute", {"hipDeviceSetGraphMemAttribute", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}},
+ {"cuDeviceSetGraphMemAttribute", {"hipDeviceSetGraphMemAttribute", "", CONV_GRAPH, API_DRIVER, 21}},
// cudaGraphInstantiateWithFlags
{"cuGraphInstantiateWithFlags", {"hipGraphInstantiateWithFlags", "", CONV_GRAPH, API_DRIVER, 21}},
//
@@ -747,6 +750,10 @@ const std::map CUDA_DRIVER_FUNCTION_MAP {
{"cuOccupancyMaxPotentialBlockSize", {"hipModuleOccupancyMaxPotentialBlockSize", "", CONV_OCCUPANCY, API_DRIVER, 22}},
// cudaOccupancyMaxPotentialBlockSizeWithFlags
{"cuOccupancyMaxPotentialBlockSizeWithFlags", {"hipModuleOccupancyMaxPotentialBlockSizeWithFlags", "", CONV_OCCUPANCY, API_DRIVER, 22}},
+ // cudaOccupancyMaxPotentialClusterSize
+ {"cuOccupancyMaxPotentialClusterSize", {"hipOccupancyMaxPotentialClusterSize", "", CONV_OCCUPANCY, API_DRIVER, 22, HIP_UNSUPPORTED}},
+ // cudaOccupancyMaxActiveClusters
+ {"cuOccupancyMaxActiveClusters", {"hipOccupancyMaxActiveClusters", "", CONV_OCCUPANCY, API_DRIVER, 22, HIP_UNSUPPORTED}},
// 23. Texture Reference Management [DEPRECATED]
// no analogues
@@ -1282,6 +1289,9 @@ const std::map CUDA_DRIVER_FUNCTION_VER_MAP {
{"cuGraphBatchMemOpNodeGetParams", {CUDA_117, CUDA_0, CUDA_0 }},
{"cuGraphBatchMemOpNodeSetParams", {CUDA_117, CUDA_0, CUDA_0 }},
{"cuGraphExecBatchMemOpNodeSetParams", {CUDA_117, CUDA_0, CUDA_0 }},
+ {"cuLaunchKernelEx", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cuOccupancyMaxPotentialClusterSize", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cuOccupancyMaxActiveClusters", {CUDA_118, CUDA_0, CUDA_0 }},
};
const std::map HIP_DRIVER_FUNCTION_VER_MAP {
@@ -1410,11 +1420,13 @@ const std::map HIP_DRIVER_FUNCTION_VER_MAP {
{"hipMemRetainAllocationHandle", {HIP_5020, HIP_0, HIP_0 }},
{"hipMemSetAccess", {HIP_5020, HIP_0, HIP_0 }},
{"hipMemUnmap", {HIP_5020, HIP_0, HIP_0 }},
- {"hiprtcLinkCreate", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hiprtcLinkAddFile", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hiprtcLinkAddData", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hiprtcLinkComplete", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hiprtcLinkDestroy", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
+ {"hiprtcLinkCreate", {HIP_5030, HIP_0, HIP_0 }},
+ {"hiprtcLinkAddFile", {HIP_5030, HIP_0, HIP_0 }},
+ {"hiprtcLinkAddData", {HIP_5030, HIP_0, HIP_0 }},
+ {"hiprtcLinkComplete", {HIP_5030, HIP_0, HIP_0 }},
+ {"hiprtcLinkDestroy", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipDrvGetErrorName", {HIP_5040, HIP_0, HIP_0, HIP_LATEST}},
+ {"hipDrvGetErrorString", {HIP_5040, HIP_0, HIP_0, HIP_LATEST}},
};
const std::map CUDA_DRIVER_API_SECTION_MAP {
diff --git a/src/CUDA2HIP_Driver_API_types.cpp b/src/CUDA2HIP_Driver_API_types.cpp
index 633a70b4..2af7b1b7 100644
--- a/src/CUDA2HIP_Driver_API_types.cpp
+++ b/src/CUDA2HIP_Driver_API_types.cpp
@@ -281,9 +281,9 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP {
{"CUmemPoolPtrExportData_v1", {"hipMemPoolPtrExportData", "", CONV_TYPE, API_DRIVER, 1}},
//
- {"CUuserObject_st", {"hipUserObject", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}},
+ {"CUuserObject_st", {"hipUserObject", "", CONV_TYPE, API_DRIVER, 1}},
// cudaUserObject_t
- {"CUuserObject", {"hipUserObject_t", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}},
+ {"CUuserObject", {"hipUserObject_t", "", CONV_TYPE, API_DRIVER, 1}},
//
{"CUexecAffinitySmCount_st", {"hipExecAffinitySmCount", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
@@ -312,6 +312,16 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP {
{"CUDA_BATCH_MEM_OP_NODE_PARAMS_st", {"HIP_BATCH_MEM_OP_NODE_PARAMS", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
{"CUDA_BATCH_MEM_OP_NODE_PARAMS", {"HIP_BATCH_MEM_OP_NODE_PARAMS", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ // cudaLaunchAttribute_st
+ {"CUlaunchAttribute_st", {"hipLaunchAttribute", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ // cudaLaunchAttribute
+ {"CUlaunchAttribute", {"hipLaunchAttribute", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
+
+ // cudaLaunchConfig_st
+ {"CUlaunchConfig_st", {"hipLaunchConfig", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ // cudaLaunchConfig_t
+ {"CUlaunchConfig", {"hipLaunchConfig", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
+
// 2. Unions
{"CUstreamBatchMemOpParams", {"hipStreamBatchMemOpParams", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
@@ -319,9 +329,12 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP {
{"CUstreamBatchMemOpParams_union", {"hipStreamBatchMemOpParams", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
// cudaKernelNodeAttrValue
+ // NOTE: Starting from CUDA 11.8 CUlaunchAttributeValue is used instead of CUkernelNodeAttrValue:
+ // typedef CUlaunchAttributeValue CUkernelNodeAttrValue_v1;
+ // typedef CUkernelNodeAttrValue_v1 CUkernelNodeAttrValue;
{"CUkernelNodeAttrValue", {"hipKernelNodeAttrValue", "", CONV_TYPE, API_DRIVER, 1}},
{"CUkernelNodeAttrValue_v1", {"hipKernelNodeAttrValue", "", CONV_TYPE, API_DRIVER, 1}},
- {"CUkernelNodeAttrValue_union", {"hipKernelNodeAttrValue", "", CONV_TYPE, API_DRIVER, 1}},
+ {"CUkernelNodeAttrValue_union", {"hipKernelNodeAttrValue", "", CONV_TYPE, API_DRIVER, 1, CUDA_REMOVED}},
// cudaStreamAttrValue
{"CUstreamAttrValue", {"hipStreamAttrValue", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
@@ -332,6 +345,10 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP {
{"CUlinkState_st", {"ihiprtcLinkState", "", CONV_TYPE, API_DRIVER, 1}},
{"CUlinkState", {"hiprtcLinkState", "", CONV_TYPE, API_DRIVER, 1}},
+ // cudaLaunchAttributeValue
+ {"CUlaunchAttributeValue", {"hipLaunchAttributeValue", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ {"CUlaunchAttributeValue_union", {"hipLaunchAttributeValue", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
+
// 3. Enums
// TODO: HIPaddress_mode_enum and all its values should be hipTextureAddressMode as long as they are equal.
{"CUaddress_mode", {"HIPaddress_mode", "", CONV_TYPE, API_DRIVER, 1}},
@@ -697,7 +714,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP {
// no analogue
{"CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED", {"hipDeviceAttributeVirtualAddressManagementSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED | CUDA_DEPRECATED}}, // 102
// no analogue
- {"CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED", {"hipDeviceAttributeVirtualMemoryManagementSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // 102
+ {"CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED", {"hipDeviceAttributeVirtualMemoryManagementSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 102
// no analogue
{"CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED", {"hipDeviceAttributeHandleTypePosixFileDescriptorSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 103
// no analogue
@@ -733,6 +750,8 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP {
{"CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING", {"hipDeviceAttributeGpuDirectRdmaWritesOrdering", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 118
// cudaDevAttrMemoryPoolSupportedHandleTypes
{"CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES", {"hipDeviceAttributeMempoolSupportedHandleTypes", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 119
+ // cudaDevAttrClusterLaunch
+ {"CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH", {"hipDeviceAttributeClusterLaunch", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 120
// cudaDevAttrDeferredMappingCudaArraySupported
{"CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED", {"hipDeviceAttributeDeferredMappingCudaArraySupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 121
//
@@ -1038,8 +1057,20 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP {
{"CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES", {"HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 8
// cudaFuncAttributePreferredSharedMemoryCarveout
{"CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT", {"HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 9
+ // cudaFuncAttributeClusterDimMustBeSet
+ {"CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET", {"HIP_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 10
+ // cudaFuncAttributeRequiredClusterWidth
+ {"CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH", {"HIP_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 11
+ // cudaFuncAttributeRequiredClusterHeight
+ {"CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT", {"HIP_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 12
+ // cudaFuncAttributeRequiredClusterDepth
+ {"CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH", {"HIP_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 13
+ // cudaFuncAttributeNonPortableClusterSizeAllowed
+ {"CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED", {"HIP_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 14
+ // cudaFuncAttributeClusterSchedulingPolicyPreference
+ {"CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {"HIP_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 15
// cudaFuncAttributeMax
- {"CU_FUNC_ATTRIBUTE_MAX", {"HIP_FUNC_ATTRIBUTE_MAX", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 10
+ {"CU_FUNC_ATTRIBUTE_MAX", {"HIP_FUNC_ATTRIBUTE_MAX", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 16
// cudaGraphicsMapFlags
{"CUgraphicsMapResourceFlags", {"hipGraphicsMapFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
@@ -1088,9 +1119,9 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP {
// cudaGraphNodeTypeEventRecord = 0x07
{"CU_GRAPH_NODE_TYPE_EVENT_RECORD", {"hipGraphNodeTypeEventRecord", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 7
// cudaGraphNodeTypeExtSemaphoreSignal
- {"CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL", {"hipGraphNodeTypeExtSemaphoreSignal", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // 8
+ {"CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL", {"hipGraphNodeTypeExtSemaphoreSignal", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 8
// cudaGraphNodeTypeExtSemaphoreWait
- {"CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT", {"hipGraphNodeTypeExtSemaphoreWait", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // 9
+ {"CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT", {"hipGraphNodeTypeExtSemaphoreWait", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 9
// cudaGraphNodeTypeMemAlloc
{"CU_GRAPH_NODE_TYPE_MEM_ALLOC", {"hipGraphNodeTypeMemAlloc", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 10
// cudaGraphNodeTypeMemFree
@@ -1153,23 +1184,23 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP {
{"CUjit_option", {"hipJitOption", "", CONV_TYPE, API_DRIVER, 1}},
{"CUjit_option_enum", {"hipJitOption", "", CONV_TYPE, API_DRIVER, 1}},
// CUjit_option enum values
- {"CU_JIT_MAX_REGISTERS", {"hipJitOptionMaxRegisters", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 0
- {"CU_JIT_THREADS_PER_BLOCK", {"hipJitOptionThreadsPerBlock", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
- {"CU_JIT_WALL_TIME", {"hipJitOptionWallTime", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
- {"CU_JIT_INFO_LOG_BUFFER", {"hipJitOptionInfoLogBuffer", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
- {"CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES", {"hipJitOptionInfoLogBufferSizeBytes", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
- {"CU_JIT_ERROR_LOG_BUFFER", {"hipJitOptionErrorLogBuffer", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
- {"CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", {"hipJitOptionErrorLogBufferSizeBytes", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
- {"CU_JIT_OPTIMIZATION_LEVEL", {"hipJitOptionOptimizationLevel", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
- {"CU_JIT_TARGET_FROM_CUCONTEXT", {"hipJitOptionTargetFromContext", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
- {"CU_JIT_TARGET", {"hipJitOptionTarget", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
- {"CU_JIT_FALLBACK_STRATEGY", {"hipJitOptionFallbackStrategy", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
- {"CU_JIT_GENERATE_DEBUG_INFO", {"hipJitOptionGenerateDebugInfo", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
- {"CU_JIT_LOG_VERBOSE", {"hipJitOptionLogVerbose", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
- {"CU_JIT_GENERATE_LINE_INFO", {"hipJitOptionGenerateLineInfo", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
- {"CU_JIT_CACHE_MODE", {"hipJitOptionCacheMode", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
- {"CU_JIT_NEW_SM3X_OPT", {"hipJitOptionSm3xOpt", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
- {"CU_JIT_FAST_COMPILE", {"hipJitOptionFastCompile", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
+ {"CU_JIT_MAX_REGISTERS", {"HIPRTC_JIT_MAX_REGISTERS", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 0
+ {"CU_JIT_THREADS_PER_BLOCK", {"HIPRTC_JIT_THREADS_PER_BLOCK", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
+ {"CU_JIT_WALL_TIME", {"HIPRTC_JIT_WALL_TIME", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
+ {"CU_JIT_INFO_LOG_BUFFER", {"HIPRTC_JIT_INFO_LOG_BUFFER", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
+ {"CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES", {"HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
+ {"CU_JIT_ERROR_LOG_BUFFER", {"HIPRTC_JIT_ERROR_LOG_BUFFER", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
+ {"CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", {"HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
+ {"CU_JIT_OPTIMIZATION_LEVEL", {"HIPRTC_JIT_OPTIMIZATION_LEVEL", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
+ {"CU_JIT_TARGET_FROM_CUCONTEXT", {"HIPRTC_JIT_TARGET_FROM_HIPCONTEXT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
+ {"CU_JIT_TARGET", {"HIPRTC_JIT_TARGET", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
+ {"CU_JIT_FALLBACK_STRATEGY", {"HIPRTC_JIT_FALLBACK_STRATEGY", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
+ {"CU_JIT_GENERATE_DEBUG_INFO", {"HIPRTC_JIT_GENERATE_DEBUG_INFO", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
+ {"CU_JIT_LOG_VERBOSE", {"HIPRTC_JIT_LOG_VERBOSE", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
+ {"CU_JIT_GENERATE_LINE_INFO", {"HIPRTC_JIT_GENERATE_LINE_INFO", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
+ {"CU_JIT_CACHE_MODE", {"HIPRTC_JIT_CACHE_MODE", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
+ {"CU_JIT_NEW_SM3X_OPT", {"HIPRTC_JIT_NEW_SM3X_OPT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
+ {"CU_JIT_FAST_COMPILE", {"HIPRTC_JIT_FAST_COMPILE", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
{"CU_JIT_GLOBAL_SYMBOL_NAMES", {"hipJitGlobalSymbolNames", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
{"CU_JIT_GLOBAL_SYMBOL_ADDRESSES", {"hipJitGlobalSymbolAddresses", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
{"CU_JIT_GLOBAL_SYMBOL_COUNT", {"hipJitGlobalSymbolCount", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
@@ -1183,7 +1214,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP {
{"CU_JIT_REFERENCED_VARIABLE_NAMES", {"hipJitReferencedVariableNames", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
{"CU_JIT_REFERENCED_VARIABLE_COUNT", {"hipJitReferencedVariableCount", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
{"CU_JIT_OPTIMIZE_UNUSED_DEVICE_VARIABLES", {"hipJitOptimizeUnusedDeviceVariables", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
- {"CU_JIT_NUM_OPTIONS", {"hipJitOptionNumOptions", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
+ {"CU_JIT_NUM_OPTIONS", {"HIPRTC_JIT_NUM_OPTIONS", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
// no analogue
{"CUjit_target", {"hipJitTarget", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
@@ -1217,25 +1248,27 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP {
{"CU_TARGET_COMPUTE_80", {"hipJitTargetCompute80", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 80
{"CU_TARGET_COMPUTE_86", {"hipJitTargetCompute86", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 86
{"CU_TARGET_COMPUTE_87", {"hipJitTargetCompute87", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 87
+ {"CU_TARGET_COMPUTE_89", {"hipJitTargetCompute89", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 89
+ {"CU_TARGET_COMPUTE_90", {"hipJitTargetCompute90", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 90
// no analogue
- {"CUjitInputType", {"hiprtcJITInputType", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}},
- {"CUjitInputType_enum", {"hiprtcJITInputType", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}},
+ {"CUjitInputType", {"hiprtcJITInputType", "", CONV_TYPE, API_DRIVER, 1}},
+ {"CUjitInputType_enum", {"hiprtcJITInputType", "", CONV_TYPE, API_DRIVER, 1}},
// CUjitInputType enum values
- {"CU_JIT_INPUT_CUBIN", {"HIPRTC_JIT_INPUT_CUBIN", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // 0
- {"CU_JIT_INPUT_PTX", {"HIPRTC_JIT_INPUT_PTX", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}},
- {"CU_JIT_INPUT_FATBINARY", {"HIPRTC_JIT_INPUT_FATBINARY", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}},
- {"CU_JIT_INPUT_OBJECT", {"HIPRTC_JIT_INPUT_OBJECT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}},
- {"CU_JIT_INPUT_LIBRARY", {"HIPRTC_JIT_INPUT_LIBRARY", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}},
- {"CU_JIT_INPUT_NVVM", {"HIPRTC_JIT_INPUT_NVVM", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}},
- {"CU_JIT_NUM_INPUT_TYPES", {"HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}},
+ {"CU_JIT_INPUT_CUBIN", {"HIPRTC_JIT_INPUT_CUBIN", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 0
+ {"CU_JIT_INPUT_PTX", {"HIPRTC_JIT_INPUT_PTX", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
+ {"CU_JIT_INPUT_FATBINARY", {"HIPRTC_JIT_INPUT_FATBINARY", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
+ {"CU_JIT_INPUT_OBJECT", {"HIPRTC_JIT_INPUT_OBJECT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
+ {"CU_JIT_INPUT_LIBRARY", {"HIPRTC_JIT_INPUT_LIBRARY", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
+ {"CU_JIT_INPUT_NVVM", {"HIPRTC_JIT_INPUT_NVVM", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
+ {"CU_JIT_NUM_INPUT_TYPES", {"HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
// cudaLimit
{"CUlimit", {"hipLimit_t", "", CONV_TYPE, API_DRIVER, 1}},
{"CUlimit_enum", {"hipLimit_t", "", CONV_TYPE, API_DRIVER, 1}},
// CUlimit enum values
// cudaLimitStackSize
- {"CU_LIMIT_STACK_SIZE", {"hipLimitStackSize", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // 0x00
+ {"CU_LIMIT_STACK_SIZE", {"hipLimitStackSize", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 0x00
// cudaLimitPrintfFifoSize
{"CU_LIMIT_PRINTF_FIFO_SIZE", {"hipLimitPrintfFifoSize", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 0x01
// cudaLimitMallocHeapSize
@@ -1584,6 +1617,8 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP {
{"CUDA_ERROR_MPS_MAX_CLIENTS_REACHED", {"hipErrorMpsMaxClientsReached", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 808
// cudaErrorMpsMaxConnectionsReached
{"CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED", {"hipErrorMpsMaxConnectionsReached", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 809
+ // cudaErrorMpsClientTerminated
+ {"CUDA_ERROR_MPS_CLIENT_TERMINATED", {"hipErrorMpsClientTerminated", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 810
// cudaErrorStreamCaptureUnsupported
{"CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED", {"hipErrorStreamCaptureUnsupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 900
// cudaErrorStreamCaptureInvalidated
@@ -1608,6 +1643,8 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP {
{"CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE", {"hipErrorGraphExecUpdateFailure", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 910
// cudaErrorExternalDevice
{"CUDA_ERROR_EXTERNAL_DEVICE", {"hipErrorExternalDevice", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 911
+ // cudaErrorInvalidClusterSize
+ {"CUDA_ERROR_INVALID_CLUSTER_SIZE", {"hipErrorInvalidClusterSize", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 912
// cudaErrorUnknown
{"CUDA_ERROR_UNKNOWN", {"hipErrorUnknown", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 999
@@ -1871,7 +1908,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP {
// cudaKernelNodeAttrID
{"CUkernelNodeAttrID", {"hipKernelNodeAttrID", "", CONV_TYPE, API_DRIVER, 1}},
- {"CUkernelNodeAttrID_enum", {"hipKernelNodeAttrID", "", CONV_TYPE, API_DRIVER, 1}},
+ {"CUkernelNodeAttrID_enum", {"hipKernelNodeAttrID", "", CONV_TYPE, API_DRIVER, 1, CUDA_REMOVED}},
// CUkernelNodeAttrID_enum enum values
// cudaKernelNodeAttributeAccessPolicyWindow
{"CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW", {"hipKernelNodeAttributeAccessPolicyWindow", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 1
@@ -2046,18 +2083,18 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP {
{"CU_GRAPH_DEBUG_DOT_FLAGS_BATCH_MEM_OP_NODE_PARAMS", {"hipGraphDebugDotFlagsBatchMemOpNodeParams", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 1<<13
// cudaUserObjectFlags
- {"CUuserObject_flags", {"hipUserObjectFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}},
- {"CUuserObject_flags_enum", {"hipUserObjectFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}},
+ {"CUuserObject_flags", {"hipUserObjectFlags", "", CONV_TYPE, API_DRIVER, 1}},
+ {"CUuserObject_flags_enum", {"hipUserObjectFlags", "", CONV_TYPE, API_DRIVER, 1}},
// CUuserObject_flags enum values
// cudaUserObjectNoDestructorSync
- {"CU_USER_OBJECT_NO_DESTRUCTOR_SYNC", {"hipUserObjectNoDestructorSync", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // 1
+ {"CU_USER_OBJECT_NO_DESTRUCTOR_SYNC", {"hipUserObjectNoDestructorSync", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 1
// cudaUserObjectRetainFlags
- {"CUuserObjectRetain_flags", {"hipUserObjectRetainFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}},
- {"CUuserObjectRetain_flags_enum", {"hipUserObjectRetainFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}},
+ {"CUuserObjectRetain_flags", {"hipUserObjectRetainFlags", "", CONV_TYPE, API_DRIVER, 1}},
+ {"CUuserObjectRetain_flags_enum", {"hipUserObjectRetainFlags", "", CONV_TYPE, API_DRIVER, 1}},
// CUuserObjectRetain_flags enum values
// cudaGraphUserObjectMove
- {"CU_GRAPH_USER_OBJECT_MOVE", {"hipGraphUserObjectMove", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // 1
+ {"CU_GRAPH_USER_OBJECT_MOVE", {"hipGraphUserObjectMove", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 1
// no analogue
{"CUexecAffinityType", {"hipExecAffinityType", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
@@ -2067,17 +2104,17 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP {
{"CU_EXEC_AFFINITY_TYPE_MAX", {"hipExecAffinityTypeMax", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, //
// cudaGraphMemAttributeType
- {"CUgraphMem_attribute", {"hipGraphMemAttributeType", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}},
- {"CUgraphMem_attribute_enum", {"hipGraphMemAttributeType", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}},
+ {"CUgraphMem_attribute", {"hipGraphMemAttributeType", "", CONV_TYPE, API_DRIVER, 1}},
+ {"CUgraphMem_attribute_enum", {"hipGraphMemAttributeType", "", CONV_TYPE, API_DRIVER, 1}},
// CUgraphMem_attribute enum values
// cudaGraphMemAttrUsedMemCurrent
- {"CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT", {"hipGraphMemAttrUsedMemCurrent", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}},
+ {"CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT", {"hipGraphMemAttrUsedMemCurrent", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
// cudaGraphMemAttrUsedMemHigh
- {"CU_GRAPH_MEM_ATTR_USED_MEM_HIGH", {"hipGraphMemAttrUsedMemHigh", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}},
+ {"CU_GRAPH_MEM_ATTR_USED_MEM_HIGH", {"hipGraphMemAttrUsedMemHigh", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
// cudaGraphMemAttrReservedMemCurrent
- {"CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT", {"hipGraphMemAttrReservedMemCurrent", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}},
+ {"CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT", {"hipGraphMemAttrReservedMemCurrent", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
// cudaGraphMemAttrReservedMemHigh
- {"CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH", {"hipGraphMemAttrReservedMemHigh", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}},
+ {"CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH", {"hipGraphMemAttrReservedMemHigh", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}},
// cudaGraphInstantiateFlags
{"CUgraphInstantiate_flags", {"hipGraphInstantiateFlags", "", CONV_TYPE, API_DRIVER, 1}},
@@ -2109,6 +2146,79 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP {
{"CU_MODULE_EAGER_LOADING", {"HIP_MODULE_EAGER_LOADING", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
{"CU_MODULE_LAZY_LOADING", {"HIP_MODULE_LAZY_LOADING", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ //
+ {"CUevent_sched_flags", {"hipEventSchedFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ {"CUevent_sched_flags_enum", {"hipEventSchedFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ // CUevent_sched_flags enum values
+ //
+ {"CU_EVENT_SCHED_AUTO", {"HIP_EVENT_SCHED_AUTO", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ //
+ {"CU_EVENT_SCHED_SPIN", {"HIP_EVENT_SCHED_SPIN", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ //
+ {"CU_EVENT_SCHED_YIELD", {"HIP_EVENT_SCHED_YIELD", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ //
+ {"CU_EVENT_SCHED_BLOCKING_SYNC", {"HIP_EVENT_SCHED_BLOCKING_SYNC", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+
+ //
+ {"cl_event_flags", {"hipClEventFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ {"cl_event_flags_enum", {"hipClEventFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ // cl_event_flags enum values
+ //
+ {"NVCL_EVENT_SCHED_AUTO", {"HIP_CL_EVENT_SCHED_AUTO", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ //
+ {"NVCL_EVENT_SCHED_SPIN", {"HIP_CL_EVENT_SCHED_SPIN", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ //
+ {"NVCL_EVENT_SCHED_YIELD", {"HIP_CL_EVENT_SCHED_YIELD", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ //
+ {"NVCL_EVENT_SCHED_BLOCKING_SYNC", {"HIP_CL_EVENT_SCHED_BLOCKING_SYNC", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+
+ //
+ {"cl_context_flags", {"hipClContextFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ {"cl_context_flags_enum", {"hipClContextFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ // cl_context_flags enum values
+ //
+ {"NVCL_CTX_SCHED_AUTO", {"HIP_CL_CTX_SCHED_AUTO", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ //
+ {"NVCL_CTX_SCHED_SPIN", {"HIP_CL_CTX_SCHED_SPIN", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ //
+ {"NVCL_CTX_SCHED_YIELD", {"HIP_CL_CTX_SCHED_YIELD", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ //
+ {"NVCL_CTX_SCHED_BLOCKING_SYNC", {"HIP_CL_CTX_SCHED_BLOCKING_SYNC", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+
+ // cudaClusterSchedulingPolicy
+ {"CUclusterSchedulingPolicy", {"hipClusterSchedulingPolicy", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ {"CUclusterSchedulingPolicy_enum", {"hipClusterSchedulingPolicy", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ // CUclusterSchedulingPolicy enum values
+ // cudaClusterSchedulingPolicyDefault
+ {"CU_CLUSTER_SCHEDULING_POLICY_DEFAULT", {"hipClusterSchedulingPolicyDefault", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ // cudaClusterSchedulingPolicySpread
+ {"CU_CLUSTER_SCHEDULING_POLICY_SPREAD", {"hipClusterSchedulingPolicySpread", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ // cudaClusterSchedulingPolicyLoadBalancing
+ {"CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING", {"hipClusterSchedulingPolicyLoadBalancing", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+
+ // cudaLaunchAttributeID
+ {"CUlaunchAttributeID", {"hipLaunchAttributeID", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ {"CUlaunchAttributeID_enum", {"hipLaunchAttributeID", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ // CUlaunchAttributeID enum values
+ // cudaLaunchAttributeIgnore
+ {"CU_LAUNCH_ATTRIBUTE_IGNORE", {"hipLaunchAttributeIgnore", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ // cudaLaunchAttributeAccessPolicyWindow
+ {"CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW", {"hipLaunchAttributeAccessPolicyWindow", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ // cudaLaunchAttributeCooperative
+ {"CU_LAUNCH_ATTRIBUTE_COOPERATIVE", {"hipLaunchAttributeCooperative", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ // cudaLaunchAttributeSynchronizationPolicy
+ {"CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY", {"hipLaunchAttributeSynchronizationPolicy", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ // cudaLaunchAttributeClusterDimension
+ {"CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION", {"hipLaunchAttributeClusterDimension", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ // cudaLaunchAttributeClusterSchedulingPolicyPreference
+ {"CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {"hipLaunchAttributeClusterSchedulingPolicyPreference", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ // cudaLaunchAttributeProgrammaticStreamSerialization
+ {"CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION", {"hipLaunchAttributeProgrammaticStreamSerialization", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ // cudaLaunchAttributeProgrammaticEvent
+ {"CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT", {"hipLaunchAttributeProgrammaticEvent", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+ // cudaLaunchAttributePriority
+ {"CU_LAUNCH_ATTRIBUTE_PRIORITY", {"hipLaunchAttributePriority", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}},
+
// 4. Typedefs
// no analogue
@@ -2227,6 +2337,10 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP {
{"CU_LAUNCH_PARAM_BUFFER_POINTER_AS_INT", {"HIP_LAUNCH_PARAM_BUFFER_POINTER_AS_INT", "", CONV_DEFINE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 0x01
//
{"CU_LAUNCH_PARAM_BUFFER_SIZE_AS_INT", {"HIP_LAUNCH_PARAM_BUFFER_SIZE_AS_INT", "", CONV_DEFINE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 0x02
+ // cudaKernelNodeAttributeClusterDimension
+ {"CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION", {"hipKernelNodeAttributeClusterDimension", "", CONV_DEFINE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION
+ // cudaKernelNodeAttributeClusterSchedulingPolicyPreference
+ {"CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {"hipKernelNodeAttributeClusterSchedulingPolicyPreference", "", CONV_DEFINE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE
};
const std::map CUDA_DRIVER_TYPE_NAME_VER_MAP {
@@ -2274,7 +2388,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_VER_MAP {
{"CUstreamBatchMemOpParams", {CUDA_80, CUDA_0, CUDA_0 }},
{"CUstreamBatchMemOpParams_union", {CUDA_80, CUDA_0, CUDA_0 }},
{"CUkernelNodeAttrValue", {CUDA_110, CUDA_0, CUDA_0 }},
- {"CUkernelNodeAttrValue_union", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"CUkernelNodeAttrValue_union", {CUDA_110, CUDA_0, CUDA_118}},
{"CUstreamAttrValue", {CUDA_110, CUDA_0, CUDA_0 }},
{"CUstreamAttrValue_union", {CUDA_110, CUDA_0, CUDA_0 }},
{"CU_COMPUTEMODE_EXCLUSIVE", {CUDA_0, CUDA_0, CUDA_80 }},
@@ -2557,7 +2671,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_VER_MAP {
{"CU_SYNC_POLICY_YIELD", {CUDA_110, CUDA_0, CUDA_0 }},
{"CU_SYNC_POLICY_BLOCKING_SYNC", {CUDA_110, CUDA_0, CUDA_0 }},
{"CUkernelNodeAttrID", {CUDA_110, CUDA_0, CUDA_0 }},
- {"CUkernelNodeAttrID_enum", {CUDA_110, CUDA_0, CUDA_0 }},
+ {"CUkernelNodeAttrID_enum", {CUDA_110, CUDA_0, CUDA_118}},
{"CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW", {CUDA_110, CUDA_0, CUDA_0 }},
{"CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE", {CUDA_110, CUDA_0, CUDA_0 }},
{"CUstreamAttrID", {CUDA_110, CUDA_0, CUDA_0 }},
@@ -2863,6 +2977,59 @@ const std::map CUDA_DRIVER_TYPE_NAME_VER_MAP {
{"CUmoduleLoadingMode_enum", {CUDA_117, CUDA_0, CUDA_0 }},
{"CU_MODULE_EAGER_LOADING", {CUDA_117, CUDA_0, CUDA_0 }},
{"CU_MODULE_LAZY_LOADING", {CUDA_117, CUDA_0, CUDA_0 }},
+ {"CUevent_sched_flags", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CUevent_sched_flags_enum", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_EVENT_SCHED_AUTO", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_EVENT_SCHED_SPIN", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_EVENT_SCHED_YIELD", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_EVENT_SCHED_BLOCKING_SYNC", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cl_event_flags", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cl_event_flags_enum", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"NVCL_EVENT_SCHED_AUTO", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"NVCL_EVENT_SCHED_SPIN", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"NVCL_EVENT_SCHED_YIELD", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"NVCL_EVENT_SCHED_BLOCKING_SYNC", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cl_context_flags", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cl_context_flags_enum", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"NVCL_CTX_SCHED_AUTO", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"NVCL_CTX_SCHED_SPIN", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"NVCL_CTX_SCHED_YIELD", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"NVCL_CTX_SCHED_BLOCKING_SYNC", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_TARGET_COMPUTE_89", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_TARGET_COMPUTE_90", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CUclusterSchedulingPolicy", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CUclusterSchedulingPolicy_enum", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_CLUSTER_SCHEDULING_POLICY_DEFAULT", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_CLUSTER_SCHEDULING_POLICY_SPREAD", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CUlaunchAttributeID", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CUlaunchAttributeID_enum", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_LAUNCH_ATTRIBUTE_IGNORE", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_LAUNCH_ATTRIBUTE_COOPERATIVE", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_LAUNCH_ATTRIBUTE_PRIORITY", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CUlaunchAttributeValue", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CUlaunchAttributeValue_union", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CUlaunchAttribute", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CUlaunchAttribute_st", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CUlaunchConfig", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CUlaunchConfig_st", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CUDA_ERROR_MPS_CLIENT_TERMINATED", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"CUDA_ERROR_INVALID_CLUSTER_SIZE", {CUDA_118, CUDA_0, CUDA_0 }},
};
const std::map HIP_DRIVER_TYPE_NAME_VER_MAP {
@@ -2941,24 +3108,24 @@ const std::map HIP_DRIVER_TYPE_NAME_VER_MAP {
{"hipMemRangeAttributeAccessedBy", {HIP_3070, HIP_0, HIP_0 }},
{"hipMemRangeAttributeLastPrefetchLocation", {HIP_3070, HIP_0, HIP_0 }},
{"hipJitOption", {HIP_1060, HIP_0, HIP_0 }},
- {"hipJitOptionMaxRegisters", {HIP_1060, HIP_0, HIP_0 }},
- {"hipJitOptionThreadsPerBlock", {HIP_1060, HIP_0, HIP_0 }},
- {"hipJitOptionWallTime", {HIP_1060, HIP_0, HIP_0 }},
- {"hipJitOptionInfoLogBuffer", {HIP_1060, HIP_0, HIP_0 }},
- {"hipJitOptionInfoLogBufferSizeBytes", {HIP_1060, HIP_0, HIP_0 }},
- {"hipJitOptionErrorLogBuffer", {HIP_1060, HIP_0, HIP_0 }},
- {"hipJitOptionErrorLogBufferSizeBytes", {HIP_1060, HIP_0, HIP_0 }},
- {"hipJitOptionOptimizationLevel", {HIP_1060, HIP_0, HIP_0 }},
- {"hipJitOptionTargetFromContext", {HIP_1060, HIP_0, HIP_0 }},
- {"hipJitOptionTarget", {HIP_1060, HIP_0, HIP_0 }},
- {"hipJitOptionFallbackStrategy", {HIP_1060, HIP_0, HIP_0 }},
- {"hipJitOptionGenerateDebugInfo", {HIP_1060, HIP_0, HIP_0 }},
- {"hipJitOptionLogVerbose", {HIP_1060, HIP_0, HIP_0 }},
- {"hipJitOptionGenerateLineInfo", {HIP_1060, HIP_0, HIP_0 }},
- {"hipJitOptionCacheMode", {HIP_1060, HIP_0, HIP_0 }},
- {"hipJitOptionSm3xOpt", {HIP_1060, HIP_0, HIP_0 }},
- {"hipJitOptionFastCompile", {HIP_1060, HIP_0, HIP_0 }},
- {"hipJitOptionNumOptions", {HIP_1060, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_MAX_REGISTERS", {HIP_1060, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_THREADS_PER_BLOCK", {HIP_1060, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_WALL_TIME", {HIP_1060, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_INFO_LOG_BUFFER", {HIP_1060, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES", {HIP_1060, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_ERROR_LOG_BUFFER", {HIP_1060, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", {HIP_1060, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_OPTIMIZATION_LEVEL", {HIP_1060, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_TARGET_FROM_HIPCONTEXT", {HIP_1060, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_TARGET", {HIP_1060, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_FALLBACK_STRATEGY", {HIP_1060, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_GENERATE_DEBUG_INFO", {HIP_1060, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_LOG_VERBOSE", {HIP_1060, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_GENERATE_LINE_INFO", {HIP_1060, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_CACHE_MODE", {HIP_1060, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_NEW_SM3X_OPT", {HIP_1060, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_FAST_COMPILE", {HIP_1060, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_NUM_OPTIONS", {HIP_1060, HIP_0, HIP_0 }},
{"hipFuncCache_t", {HIP_1060, HIP_0, HIP_0 }},
{"hipFuncCachePreferNone", {HIP_1060, HIP_0, HIP_0 }},
{"hipFuncCachePreferShared", {HIP_1060, HIP_0, HIP_0 }},
@@ -3288,17 +3455,17 @@ const std::map HIP_DRIVER_TYPE_NAME_VER_MAP {
{"hipArraySparseSubresourceType", {HIP_5020, HIP_0, HIP_0 }},
{"hipArraySparseSubresourceTypeSparseLevel", {HIP_5020, HIP_0, HIP_0 }},
{"hipArraySparseSubresourceTypeMiptail", {HIP_5020, HIP_0, HIP_0 }},
- {"hipDeviceAttributeVirtualMemoryManagementSupported", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hipUserObject", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hipUserObject_t", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hiprtcJITInputType", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"HIPRTC_JIT_INPUT_CUBIN", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"HIPRTC_JIT_INPUT_PTX", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"HIPRTC_JIT_INPUT_FATBINARY", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"HIPRTC_JIT_INPUT_OBJECT", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"HIPRTC_JIT_INPUT_LIBRARY", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"HIPRTC_JIT_INPUT_NVVM", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"ihiprtcLinkState", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hiprtcLinkState", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
+ {"hipDeviceAttributeVirtualMemoryManagementSupported", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipUserObject", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipUserObject_t", {HIP_5030, HIP_0, HIP_0 }},
+ {"hiprtcJITInputType", {HIP_5030, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_INPUT_CUBIN", {HIP_5030, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_INPUT_PTX", {HIP_5030, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_INPUT_FATBINARY", {HIP_5030, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_INPUT_OBJECT", {HIP_5030, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_INPUT_LIBRARY", {HIP_5030, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_INPUT_NVVM", {HIP_5030, HIP_0, HIP_0 }},
+ {"HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES", {HIP_5030, HIP_0, HIP_0 }},
+ {"ihiprtcLinkState", {HIP_5030, HIP_0, HIP_0 }},
+ {"hiprtcLinkState", {HIP_5030, HIP_0, HIP_0 }},
};
diff --git a/src/CUDA2HIP_FFT_API_functions.cpp b/src/CUDA2HIP_FFT_API_functions.cpp
index a4db4976..01f9a2e2 100644
--- a/src/CUDA2HIP_FFT_API_functions.cpp
+++ b/src/CUDA2HIP_FFT_API_functions.cpp
@@ -84,6 +84,7 @@ const std::map CUDA_FFT_FUNCTION_MAP {
{"cufftXtExec", {"hipfftXtExec", "", CONV_LIB_FUNC, API_FFT, 2, HIP_UNSUPPORTED}},
{"cufftXtExecDescriptor", {"hipfftXtExecDescriptor", "", CONV_LIB_FUNC, API_FFT, 2, HIP_UNSUPPORTED}},
{"cufftXtSetWorkAreaPolicy", {"hipfftXtSetWorkAreaPolicy", "", CONV_LIB_FUNC, API_FFT, 2, HIP_UNSUPPORTED}},
+ {"cufftXtSetDistribution", {"hipfftXtSetDistribution", "", CONV_LIB_FUNC, API_FFT, 2, HIP_UNSUPPORTED}},
};
const std::map CUDA_FFT_FUNCTION_VER_MAP {
@@ -93,7 +94,9 @@ const std::map CUDA_FFT_FUNCTION_VER_MAP {
{"cufftXtMakePlanMany", {CUDA_80, CUDA_0, CUDA_0}},
{"cufftXtGetSizeMany", {CUDA_80, CUDA_0, CUDA_0}},
{"cufftXtExec", {CUDA_80, CUDA_0, CUDA_0}},
+ {"cufftXtExecDescriptor", {CUDA_80, CUDA_0, CUDA_0}},
{"cufftXtSetWorkAreaPolicy", {CUDA_92, CUDA_0, CUDA_0}},
+ {"cufftXtSetDistribution", {CUDA_118, CUDA_0, CUDA_0}},
};
const std::map HIP_FFT_FUNCTION_VER_MAP {
diff --git a/src/CUDA2HIP_FFT_API_types.cpp b/src/CUDA2HIP_FFT_API_types.cpp
index 9b003869..8831f73f 100644
--- a/src/CUDA2HIP_FFT_API_types.cpp
+++ b/src/CUDA2HIP_FFT_API_types.cpp
@@ -26,155 +26,163 @@ THE SOFTWARE.
const std::map CUDA_FFT_TYPE_NAME_MAP {
// cuFFT defines
- {"CUFFT_VER_MAJOR", {"HIPRAND_VER_MAJOR", "", CONV_DEFINE, API_FFT, 1, HIP_UNSUPPORTED}},
- {"CUFFT_VER_MINOR", {"HIPRAND_VER_MINOR", "", CONV_DEFINE, API_FFT, 1, HIP_UNSUPPORTED}},
- {"CUFFT_VER_PATCH", {"HIPRAND_VER_PATCH", "", CONV_DEFINE, API_FFT, 1, HIP_UNSUPPORTED}},
- {"CUFFT_VER_BUILD", {"HIPRAND_VER_BUILD", "", CONV_DEFINE, API_FFT, 1, HIP_UNSUPPORTED}},
- {"CUFFT_VERSION", {"HIPRAND_VERSION", "", CONV_DEFINE, API_FFT, 1, HIP_UNSUPPORTED}},
- {"CUFFT_FORWARD", {"HIPFFT_FORWARD", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // -1
- {"CUFFT_INVERSE", {"HIPFFT_BACKWARD", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 1
- {"CUFFT_COMPATIBILITY_DEFAULT", {"HIPFFT_COMPATIBILITY_DEFAULT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // CUFFT_COMPATIBILITY_FFTW_PADDING
- {"MAX_CUFFT_ERROR", {"HIPFFT_MAX_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x11
+ {"CUFFT_VER_MAJOR", {"HIPRAND_VER_MAJOR", "", CONV_DEFINE, API_FFT, 1, UNSUPPORTED}},
+ {"CUFFT_VER_MINOR", {"HIPRAND_VER_MINOR", "", CONV_DEFINE, API_FFT, 1, UNSUPPORTED}},
+ {"CUFFT_VER_PATCH", {"HIPRAND_VER_PATCH", "", CONV_DEFINE, API_FFT, 1, UNSUPPORTED}},
+ {"CUFFT_VER_BUILD", {"HIPRAND_VER_BUILD", "", CONV_DEFINE, API_FFT, 1, UNSUPPORTED}},
+ {"CUFFT_VERSION", {"HIPRAND_VERSION", "", CONV_DEFINE, API_FFT, 1, UNSUPPORTED}},
+ {"CUFFT_FORWARD", {"HIPFFT_FORWARD", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // -1
+ {"CUFFT_INVERSE", {"HIPFFT_BACKWARD", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 1
+ {"CUFFT_COMPATIBILITY_DEFAULT", {"HIPFFT_COMPATIBILITY_DEFAULT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // CUFFT_COMPATIBILITY_FFTW_PADDING
+ {"MAX_CUFFT_ERROR", {"HIPFFT_MAX_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x11
// cuFFT enums
- {"cufftResult_t", {"hipfftResult_t", "", CONV_TYPE, API_FFT, 1}},
- {"cufftResult", {"hipfftResult", "", CONV_TYPE, API_FFT, 1}},
- {"CUFFT_SUCCESS", {"HIPFFT_SUCCESS", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x0 0
- {"CUFFT_INVALID_PLAN", {"HIPFFT_INVALID_PLAN", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x1 1
- {"CUFFT_ALLOC_FAILED", {"HIPFFT_ALLOC_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2 2
- {"CUFFT_INVALID_TYPE", {"HIPFFT_INVALID_TYPE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x3 3
- {"CUFFT_INVALID_VALUE", {"HIPFFT_INVALID_VALUE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x4 4
- {"CUFFT_INTERNAL_ERROR", {"HIPFFT_INTERNAL_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x5 5
- {"CUFFT_EXEC_FAILED", {"HIPFFT_EXEC_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6 6
- {"CUFFT_SETUP_FAILED", {"HIPFFT_SETUP_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7 7
- {"CUFFT_INVALID_SIZE", {"HIPFFT_INVALID_SIZE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x8 8
- {"CUFFT_UNALIGNED_DATA", {"HIPFFT_UNALIGNED_DATA", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x9 9
- {"CUFFT_INCOMPLETE_PARAMETER_LIST", {"HIPFFT_INCOMPLETE_PARAMETER_LIST", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xA 10
- {"CUFFT_INVALID_DEVICE", {"HIPFFT_INVALID_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xB 11
- {"CUFFT_PARSE_ERROR", {"HIPFFT_PARSE_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xC 12
- {"CUFFT_NO_WORKSPACE", {"HIPFFT_NO_WORKSPACE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xD 13
- {"CUFFT_NOT_IMPLEMENTED", {"HIPFFT_NOT_IMPLEMENTED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xE 14
- {"CUFFT_LICENSE_ERROR", {"HIPFFT_LICENSE_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}},
- {"CUFFT_NOT_SUPPORTED", {"HIPFFT_NOT_SUPPORTED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x10 16
-
- {"cufftType_t", {"hipfftType_t", "", CONV_TYPE, API_FFT, 1}},
- {"cufftType", {"hipfftType", "", CONV_TYPE, API_FFT, 1}},
- {"CUFFT_R2C", {"HIPFFT_R2C", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2a
- {"CUFFT_C2R", {"HIPFFT_C2R", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2c
- {"CUFFT_C2C", {"HIPFFT_C2C", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x29
- {"CUFFT_D2Z", {"HIPFFT_D2Z", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6a
- {"CUFFT_Z2D", {"HIPFFT_Z2D", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6c
- {"CUFFT_Z2Z", {"HIPFFT_Z2Z", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x69
-
- {"cufftCompatibility_t", {"hipfftCompatibility_t", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}},
- {"cufftCompatibility", {"hipfftCompatibility", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}},
- {"CUFFT_COMPATIBILITY_FFTW_PADDING", {"HIPFFT_COMPATIBILITY_FFTW_PADDING", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x01
-
- {"cufftXtSubFormat_t", {"hipfftXtSubFormat_t", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}},
- {"cufftXtSubFormat", {"hipfftXtSubFormat", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}},
- {"CUFFT_XT_FORMAT_INPUT", {"HIPFFT_XT_FORMAT_INPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x00
- {"CUFFT_XT_FORMAT_OUTPUT", {"HIPFFT_XT_FORMAT_OUTPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x01
- {"CUFFT_XT_FORMAT_INPLACE", {"HIPFFT_XT_FORMAT_INPLACE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x02
- {"CUFFT_XT_FORMAT_INPLACE_SHUFFLED", {"HIPFFT_XT_FORMAT_INPLACE_SHUFFLED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x03
- {"CUFFT_XT_FORMAT_1D_INPUT_SHUFFLED",{"HIPFFT_XT_FORMAT_1D_INPUT_SHUFFLED","", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x04
- {"CUFFT_FORMAT_UNDEFINED", {"HIPFFT_FORMAT_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x05
-
- {"cufftXtCopyType_t", {"hipfftXtCopyType_t", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}},
- {"cufftXtCopyType", {"hipfftXtCopyType", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}},
- {"CUFFT_COPY_HOST_TO_DEVICE", {"HIPFFT_COPY_HOST_TO_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x00
- {"CUFFT_COPY_DEVICE_TO_HOST", {"HIPFFT_COPY_DEVICE_TO_HOST", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x01
- {"CUFFT_COPY_DEVICE_TO_DEVICE", {"HIPFFT_COPY_DEVICE_TO_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x02
- {"CUFFT_COPY_UNDEFINED", {"HIPFFT_COPY_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x03
-
- {"cufftXtQueryType_t", {"hipfftXtQueryType_t", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}},
- {"cufftXtQueryType", {"hipfftXtQueryType", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}},
- {"CUFFT_QUERY_1D_FACTORS", {"HIPFFT_QUERY_1D_FACTORS", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x00
- {"CUFFT_QUERY_UNDEFINED", {"HIPFFT_QUERY_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x01
-
- {"cufftXtWorkAreaPolicy_t", {"hipfftXtWorkAreaPolicy_t", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}},
- {"cufftXtWorkAreaPolicy", {"hipfftXtWorkAreaPolicy", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}},
- {"CUFFT_WORKAREA_MINIMAL", {"HIPFFT_WORKAREA_MINIMAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0
- {"CUFFT_WORKAREA_USER", {"HIPFFT_WORKAREA_USER", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 1
- {"CUFFT_WORKAREA_PERFORMANCE", {"HIPFFT_WORKAREA_PERFORMANCE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 2
-
- {"cufftXtCallbackType_t", {"hipfftXtCallbackType_t", "", CONV_TYPE, API_FFT, 1}},
- {"cufftXtCallbackType", {"hipfftXtCallbackType", "", CONV_TYPE, API_FFT, 1}},
- {"CUFFT_CB_LD_COMPLEX", {"HIPFFT_CB_LD_COMPLEX", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x0
- {"CUFFT_CB_LD_COMPLEX_DOUBLE", {"HIPFFT_CB_LD_COMPLEX_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x1
- {"CUFFT_CB_LD_REAL", {"HIPFFT_CB_LD_REAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2
- {"CUFFT_CB_LD_REAL_DOUBLE", {"HIPFFT_CB_LD_REAL_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x3
- {"CUFFT_CB_ST_COMPLEX", {"HIPFFT_CB_ST_COMPLEX", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x4
- {"CUFFT_CB_ST_COMPLEX_DOUBLE", {"HIPFFT_CB_ST_COMPLEX_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x5
- {"CUFFT_CB_ST_REAL", {"HIPFFT_CB_ST_REAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6
- {"CUFFT_CB_ST_REAL_DOUBLE", {"HIPFFT_CB_ST_REAL_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7
- {"CUFFT_CB_UNDEFINED", {"HIPFFT_CB_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7
+ {"cufftResult_t", {"hipfftResult_t", "", CONV_TYPE, API_FFT, 1}},
+ {"cufftResult", {"hipfftResult", "", CONV_TYPE, API_FFT, 1}},
+ {"CUFFT_SUCCESS", {"HIPFFT_SUCCESS", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x0 0
+ {"CUFFT_INVALID_PLAN", {"HIPFFT_INVALID_PLAN", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x1 1
+ {"CUFFT_ALLOC_FAILED", {"HIPFFT_ALLOC_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2 2
+ {"CUFFT_INVALID_TYPE", {"HIPFFT_INVALID_TYPE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x3 3
+ {"CUFFT_INVALID_VALUE", {"HIPFFT_INVALID_VALUE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x4 4
+ {"CUFFT_INTERNAL_ERROR", {"HIPFFT_INTERNAL_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x5 5
+ {"CUFFT_EXEC_FAILED", {"HIPFFT_EXEC_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6 6
+ {"CUFFT_SETUP_FAILED", {"HIPFFT_SETUP_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7 7
+ {"CUFFT_INVALID_SIZE", {"HIPFFT_INVALID_SIZE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x8 8
+ {"CUFFT_UNALIGNED_DATA", {"HIPFFT_UNALIGNED_DATA", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x9 9
+ {"CUFFT_INCOMPLETE_PARAMETER_LIST", {"HIPFFT_INCOMPLETE_PARAMETER_LIST", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xA 10
+ {"CUFFT_INVALID_DEVICE", {"HIPFFT_INVALID_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xB 11
+ {"CUFFT_PARSE_ERROR", {"HIPFFT_PARSE_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xC 12
+ {"CUFFT_NO_WORKSPACE", {"HIPFFT_NO_WORKSPACE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xD 13
+ {"CUFFT_NOT_IMPLEMENTED", {"HIPFFT_NOT_IMPLEMENTED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xE 14
+ {"CUFFT_LICENSE_ERROR", {"HIPFFT_LICENSE_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}},
+ {"CUFFT_NOT_SUPPORTED", {"HIPFFT_NOT_SUPPORTED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x10 16
+
+ {"cufftType_t", {"hipfftType_t", "", CONV_TYPE, API_FFT, 1}},
+ {"cufftType", {"hipfftType", "", CONV_TYPE, API_FFT, 1}},
+ {"CUFFT_R2C", {"HIPFFT_R2C", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2a
+ {"CUFFT_C2R", {"HIPFFT_C2R", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2c
+ {"CUFFT_C2C", {"HIPFFT_C2C", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x29
+ {"CUFFT_D2Z", {"HIPFFT_D2Z", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6a
+ {"CUFFT_Z2D", {"HIPFFT_Z2D", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6c
+ {"CUFFT_Z2Z", {"HIPFFT_Z2Z", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x69
+
+ {"cufftCompatibility_t", {"hipfftCompatibility_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}},
+ {"cufftCompatibility", {"hipfftCompatibility", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}},
+ {"CUFFT_COMPATIBILITY_FFTW_PADDING", {"HIPFFT_COMPATIBILITY_FFTW_PADDING", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x01
+
+ {"cufftXtSubFormat_t", {"hipfftXtSubFormat_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}},
+ {"cufftXtSubFormat", {"hipfftXtSubFormat", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}},
+ {"CUFFT_XT_FORMAT_INPUT", {"HIPFFT_XT_FORMAT_INPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x00
+ {"CUFFT_XT_FORMAT_OUTPUT", {"HIPFFT_XT_FORMAT_OUTPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x01
+ {"CUFFT_XT_FORMAT_INPLACE", {"HIPFFT_XT_FORMAT_INPLACE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x02
+ {"CUFFT_XT_FORMAT_INPLACE_SHUFFLED", {"HIPFFT_XT_FORMAT_INPLACE_SHUFFLED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x03
+ {"CUFFT_XT_FORMAT_1D_INPUT_SHUFFLED", {"HIPFFT_XT_FORMAT_1D_INPUT_SHUFFLED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x04
+ {"CUFFT_XT_FORMAT_DISTRIBUTED_INPUT", {"HIPFFT_XT_FORMAT_DISTRIBUTED_INPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x05
+ {"CUFFT_XT_FORMAT_DISTRIBUTED_OUTPUT", {"HIPFFT_XT_FORMAT_DISTRIBUTED_OUTPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x06
+ {"CUFFT_FORMAT_UNDEFINED", {"HIPFFT_FORMAT_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x07
+
+ {"cufftXtCopyType_t", {"hipfftXtCopyType_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}},
+ {"cufftXtCopyType", {"hipfftXtCopyType", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}},
+ {"CUFFT_COPY_HOST_TO_DEVICE", {"HIPFFT_COPY_HOST_TO_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x00
+ {"CUFFT_COPY_DEVICE_TO_HOST", {"HIPFFT_COPY_DEVICE_TO_HOST", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x01
+ {"CUFFT_COPY_DEVICE_TO_DEVICE", {"HIPFFT_COPY_DEVICE_TO_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x02
+ {"CUFFT_COPY_UNDEFINED", {"HIPFFT_COPY_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x03
+
+ {"cufftXtQueryType_t", {"hipfftXtQueryType_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}},
+ {"cufftXtQueryType", {"hipfftXtQueryType", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}},
+ {"CUFFT_QUERY_1D_FACTORS", {"HIPFFT_QUERY_1D_FACTORS", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x00
+ {"CUFFT_QUERY_UNDEFINED", {"HIPFFT_QUERY_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x01
+
+ {"cufftXtWorkAreaPolicy_t", {"hipfftXtWorkAreaPolicy_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}},
+ {"cufftXtWorkAreaPolicy", {"hipfftXtWorkAreaPolicy", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}},
+ {"CUFFT_WORKAREA_MINIMAL", {"HIPFFT_WORKAREA_MINIMAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0
+ {"CUFFT_WORKAREA_USER", {"HIPFFT_WORKAREA_USER", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 1
+ {"CUFFT_WORKAREA_PERFORMANCE", {"HIPFFT_WORKAREA_PERFORMANCE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 2
+
+ {"cufftXtCallbackType_t", {"hipfftXtCallbackType_t", "", CONV_TYPE, API_FFT, 1}},
+ {"cufftXtCallbackType", {"hipfftXtCallbackType", "", CONV_TYPE, API_FFT, 1}},
+ {"CUFFT_CB_LD_COMPLEX", {"HIPFFT_CB_LD_COMPLEX", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x0
+ {"CUFFT_CB_LD_COMPLEX_DOUBLE", {"HIPFFT_CB_LD_COMPLEX_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x1
+ {"CUFFT_CB_LD_REAL", {"HIPFFT_CB_LD_REAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2
+ {"CUFFT_CB_LD_REAL_DOUBLE", {"HIPFFT_CB_LD_REAL_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x3
+ {"CUFFT_CB_ST_COMPLEX", {"HIPFFT_CB_ST_COMPLEX", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x4
+ {"CUFFT_CB_ST_COMPLEX_DOUBLE", {"HIPFFT_CB_ST_COMPLEX_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x5
+ {"CUFFT_CB_ST_REAL", {"HIPFFT_CB_ST_REAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6
+ {"CUFFT_CB_ST_REAL_DOUBLE", {"HIPFFT_CB_ST_REAL_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7
+ {"CUFFT_CB_UNDEFINED", {"HIPFFT_CB_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7
// cuFFT types
- {"cufftReal", {"hipfftReal", "", CONV_TYPE, API_FFT, 1}},
- {"cufftDoubleReal", {"hipfftDoubleReal", "", CONV_TYPE, API_FFT, 1}},
- {"cufftComplex", {"hipfftComplex", "", CONV_TYPE, API_FFT, 1}},
- {"cufftDoubleComplex", {"hipfftDoubleComplex", "", CONV_TYPE, API_FFT, 1}},
- {"cufftHandle", {"hipfftHandle", "", CONV_TYPE, API_FFT, 1}},
- {"cufftXt1dFactors_t", {"hipfftXt1dFactors_t", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}},
- {"cufftXt1dFactors", {"hipfftXt1dFactors", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}},
+ {"cufftReal", {"hipfftReal", "", CONV_TYPE, API_FFT, 1}},
+ {"cufftDoubleReal", {"hipfftDoubleReal", "", CONV_TYPE, API_FFT, 1}},
+ {"cufftComplex", {"hipfftComplex", "", CONV_TYPE, API_FFT, 1}},
+ {"cufftDoubleComplex", {"hipfftDoubleComplex", "", CONV_TYPE, API_FFT, 1}},
+ {"cufftHandle", {"hipfftHandle", "", CONV_TYPE, API_FFT, 1}},
+ {"cufftXt1dFactors_t", {"hipfftXt1dFactors_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}},
+ {"cufftXt1dFactors", {"hipfftXt1dFactors", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}},
+ {"cufftBox3d_t", {"hipfftBox3d_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}},
+ {"cufftBox3d", {"hipfftBox3d", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}},
};
const std::map CUDA_FFT_TYPE_NAME_VER_MAP {
- {"CUFFT_VER_MAJOR", {CUDA_102, CUDA_0, CUDA_0}},
- {"CUFFT_VER_MINOR", {CUDA_102, CUDA_0, CUDA_0}},
- {"CUFFT_VER_PATCH", {CUDA_102, CUDA_0, CUDA_0}},
- {"CUFFT_VER_BUILD", {CUDA_102, CUDA_0, CUDA_0}},
- {"CUFFT_VERSION", {CUDA_102, CUDA_0, CUDA_0}},
- {"CUFFT_NOT_SUPPORTED", {CUDA_80, CUDA_0, CUDA_0}},
- {"cufftXtWorkAreaPolicy_t", {CUDA_92, CUDA_0, CUDA_0}},
- {"cufftXtWorkAreaPolicy", {CUDA_92, CUDA_0, CUDA_0}},
- {"CUFFT_WORKAREA_MINIMAL", {CUDA_92, CUDA_0, CUDA_0}},
- {"CUFFT_WORKAREA_USER", {CUDA_92, CUDA_0, CUDA_0}},
+ {"CUFFT_VER_MAJOR", {CUDA_102, CUDA_0, CUDA_0}},
+ {"CUFFT_VER_MINOR", {CUDA_102, CUDA_0, CUDA_0}},
+ {"CUFFT_VER_PATCH", {CUDA_102, CUDA_0, CUDA_0}},
+ {"CUFFT_VER_BUILD", {CUDA_102, CUDA_0, CUDA_0}},
+ {"CUFFT_VERSION", {CUDA_102, CUDA_0, CUDA_0}},
+ {"CUFFT_NOT_SUPPORTED", {CUDA_80, CUDA_0, CUDA_0}},
+ {"cufftXtWorkAreaPolicy_t", {CUDA_92, CUDA_0, CUDA_0}},
+ {"cufftXtWorkAreaPolicy", {CUDA_92, CUDA_0, CUDA_0}},
+ {"CUFFT_WORKAREA_MINIMAL", {CUDA_92, CUDA_0, CUDA_0}},
+ {"CUFFT_WORKAREA_USER", {CUDA_92, CUDA_0, CUDA_0}},
+ {"CUFFT_XT_FORMAT_DISTRIBUTED_INPUT", {CUDA_118, CUDA_0, CUDA_0}},
+ {"CUFFT_XT_FORMAT_DISTRIBUTED_OUTPUT", {CUDA_118, CUDA_0, CUDA_0}},
+ {"cufftBox3d_t", {CUDA_118, CUDA_0, CUDA_0}},
+ {"cufftBox3d", {CUDA_118, CUDA_0, CUDA_0}},
};
const std::map HIP_FFT_TYPE_NAME_VER_MAP {
- {"HIPFFT_FORWARD", {HIP_1070, HIP_0, HIP_0 }},
- {"HIPFFT_BACKWARD", {HIP_1070, HIP_0, HIP_0 }},
- {"hipfftResult_t", {HIP_1070, HIP_0, HIP_0 }},
- {"hipfftResult", {HIP_1070, HIP_0, HIP_0 }},
- {"HIPFFT_SUCCESS", {HIP_1070, HIP_0, HIP_0 }},
- {"HIPFFT_INVALID_PLAN", {HIP_1070, HIP_0, HIP_0 }},
- {"HIPFFT_ALLOC_FAILED", {HIP_1070, HIP_0, HIP_0 }},
- {"HIPFFT_INVALID_TYPE", {HIP_1070, HIP_0, HIP_0 }},
- {"HIPFFT_INVALID_VALUE", {HIP_1070, HIP_0, HIP_0 }},
- {"HIPFFT_INTERNAL_ERROR", {HIP_1070, HIP_0, HIP_0 }},
- {"HIPFFT_EXEC_FAILED", {HIP_1070, HIP_0, HIP_0 }},
- {"HIPFFT_SETUP_FAILED", {HIP_1070, HIP_0, HIP_0 }},
- {"HIPFFT_INVALID_SIZE", {HIP_1070, HIP_0, HIP_0 }},
- {"HIPFFT_UNALIGNED_DATA", {HIP_1070, HIP_0, HIP_0 }},
- {"HIPFFT_INCOMPLETE_PARAMETER_LIST", {HIP_1070, HIP_0, HIP_0 }},
- {"HIPFFT_INVALID_DEVICE", {HIP_1070, HIP_0, HIP_0 }},
- {"HIPFFT_PARSE_ERROR", {HIP_1070, HIP_0, HIP_0 }},
- {"HIPFFT_NO_WORKSPACE", {HIP_1070, HIP_0, HIP_0 }},
- {"HIPFFT_NOT_IMPLEMENTED", {HIP_1070, HIP_0, HIP_0 }},
- {"HIPFFT_NOT_SUPPORTED", {HIP_1070, HIP_0, HIP_0 }},
- {"hipfftType_t", {HIP_1070, HIP_0, HIP_0 }},
- {"hipfftType", {HIP_1070, HIP_0, HIP_0 }},
- {"HIPFFT_R2C", {HIP_1070, HIP_0, HIP_0 }},
- {"HIPFFT_C2R", {HIP_1070, HIP_0, HIP_0 }},
- {"HIPFFT_C2C", {HIP_1070, HIP_0, HIP_0 }},
- {"HIPFFT_D2Z", {HIP_1070, HIP_0, HIP_0 }},
- {"HIPFFT_Z2D", {HIP_1070, HIP_0, HIP_0 }},
- {"HIPFFT_Z2Z", {HIP_1070, HIP_0, HIP_0 }},
- {"hipfftReal", {HIP_1070, HIP_0, HIP_0 }},
- {"hipfftDoubleReal", {HIP_1070, HIP_0, HIP_0 }},
- {"hipfftComplex", {HIP_1070, HIP_0, HIP_0 }},
- {"hipfftDoubleComplex", {HIP_1070, HIP_0, HIP_0 }},
- {"hipfftHandle", {HIP_1070, HIP_0, HIP_0 }},
- {"hipfftXtCallbackType_t", {HIP_4030, HIP_0, HIP_0 }},
- {"hipfftXtCallbackType", {HIP_4030, HIP_0, HIP_0 }},
- {"HIPFFT_CB_LD_COMPLEX", {HIP_4030, HIP_0, HIP_0 }},
- {"HIPFFT_CB_LD_COMPLEX_DOUBLE", {HIP_4030, HIP_0, HIP_0 }},
- {"HIPFFT_CB_LD_REAL", {HIP_4030, HIP_0, HIP_0 }},
- {"HIPFFT_CB_LD_REAL_DOUBLE", {HIP_4030, HIP_0, HIP_0 }},
- {"HIPFFT_CB_ST_COMPLEX", {HIP_4030, HIP_0, HIP_0 }},
- {"HIPFFT_CB_ST_COMPLEX_DOUBLE", {HIP_4030, HIP_0, HIP_0 }},
- {"HIPFFT_CB_ST_REAL", {HIP_4030, HIP_0, HIP_0 }},
- {"HIPFFT_CB_ST_REAL_DOUBLE", {HIP_4030, HIP_0, HIP_0 }},
- {"HIPFFT_CB_UNDEFINED", {HIP_4030, HIP_0, HIP_0 }},
+ {"HIPFFT_FORWARD", {HIP_1070, HIP_0, HIP_0 }},
+ {"HIPFFT_BACKWARD", {HIP_1070, HIP_0, HIP_0 }},
+ {"hipfftResult_t", {HIP_1070, HIP_0, HIP_0 }},
+ {"hipfftResult", {HIP_1070, HIP_0, HIP_0 }},
+ {"HIPFFT_SUCCESS", {HIP_1070, HIP_0, HIP_0 }},
+ {"HIPFFT_INVALID_PLAN", {HIP_1070, HIP_0, HIP_0 }},
+ {"HIPFFT_ALLOC_FAILED", {HIP_1070, HIP_0, HIP_0 }},
+ {"HIPFFT_INVALID_TYPE", {HIP_1070, HIP_0, HIP_0 }},
+ {"HIPFFT_INVALID_VALUE", {HIP_1070, HIP_0, HIP_0 }},
+ {"HIPFFT_INTERNAL_ERROR", {HIP_1070, HIP_0, HIP_0 }},
+ {"HIPFFT_EXEC_FAILED", {HIP_1070, HIP_0, HIP_0 }},
+ {"HIPFFT_SETUP_FAILED", {HIP_1070, HIP_0, HIP_0 }},
+ {"HIPFFT_INVALID_SIZE", {HIP_1070, HIP_0, HIP_0 }},
+ {"HIPFFT_UNALIGNED_DATA", {HIP_1070, HIP_0, HIP_0 }},
+ {"HIPFFT_INCOMPLETE_PARAMETER_LIST", {HIP_1070, HIP_0, HIP_0 }},
+ {"HIPFFT_INVALID_DEVICE", {HIP_1070, HIP_0, HIP_0 }},
+ {"HIPFFT_PARSE_ERROR", {HIP_1070, HIP_0, HIP_0 }},
+ {"HIPFFT_NO_WORKSPACE", {HIP_1070, HIP_0, HIP_0 }},
+ {"HIPFFT_NOT_IMPLEMENTED", {HIP_1070, HIP_0, HIP_0 }},
+ {"HIPFFT_NOT_SUPPORTED", {HIP_1070, HIP_0, HIP_0 }},
+ {"hipfftType_t", {HIP_1070, HIP_0, HIP_0 }},
+ {"hipfftType", {HIP_1070, HIP_0, HIP_0 }},
+ {"HIPFFT_R2C", {HIP_1070, HIP_0, HIP_0 }},
+ {"HIPFFT_C2R", {HIP_1070, HIP_0, HIP_0 }},
+ {"HIPFFT_C2C", {HIP_1070, HIP_0, HIP_0 }},
+ {"HIPFFT_D2Z", {HIP_1070, HIP_0, HIP_0 }},
+ {"HIPFFT_Z2D", {HIP_1070, HIP_0, HIP_0 }},
+ {"HIPFFT_Z2Z", {HIP_1070, HIP_0, HIP_0 }},
+ {"hipfftReal", {HIP_1070, HIP_0, HIP_0 }},
+ {"hipfftDoubleReal", {HIP_1070, HIP_0, HIP_0 }},
+ {"hipfftComplex", {HIP_1070, HIP_0, HIP_0 }},
+ {"hipfftDoubleComplex", {HIP_1070, HIP_0, HIP_0 }},
+ {"hipfftHandle", {HIP_1070, HIP_0, HIP_0 }},
+ {"hipfftXtCallbackType_t", {HIP_4030, HIP_0, HIP_0 }},
+ {"hipfftXtCallbackType", {HIP_4030, HIP_0, HIP_0 }},
+ {"HIPFFT_CB_LD_COMPLEX", {HIP_4030, HIP_0, HIP_0 }},
+ {"HIPFFT_CB_LD_COMPLEX_DOUBLE", {HIP_4030, HIP_0, HIP_0 }},
+ {"HIPFFT_CB_LD_REAL", {HIP_4030, HIP_0, HIP_0 }},
+ {"HIPFFT_CB_LD_REAL_DOUBLE", {HIP_4030, HIP_0, HIP_0 }},
+ {"HIPFFT_CB_ST_COMPLEX", {HIP_4030, HIP_0, HIP_0 }},
+ {"HIPFFT_CB_ST_COMPLEX_DOUBLE", {HIP_4030, HIP_0, HIP_0 }},
+ {"HIPFFT_CB_ST_REAL", {HIP_4030, HIP_0, HIP_0 }},
+ {"HIPFFT_CB_ST_REAL_DOUBLE", {HIP_4030, HIP_0, HIP_0 }},
+ {"HIPFFT_CB_UNDEFINED", {HIP_4030, HIP_0, HIP_0 }},
};
diff --git a/src/CUDA2HIP_RTC_API_functions.cpp b/src/CUDA2HIP_RTC_API_functions.cpp
index 2763485b..263c1811 100644
--- a/src/CUDA2HIP_RTC_API_functions.cpp
+++ b/src/CUDA2HIP_RTC_API_functions.cpp
@@ -33,8 +33,8 @@ const std::map CUDA_RTC_FUNCTION_MAP {
{"nvrtcCompileProgram", {"hiprtcCompileProgram", "", CONV_LIB_FUNC, API_RTC, 2}},
{"nvrtcGetPTXSize", {"hiprtcGetCodeSize", "", CONV_LIB_FUNC, API_RTC, 2}},
{"nvrtcGetPTX", {"hiprtcGetCode", "", CONV_LIB_FUNC, API_RTC, 2}},
- {"nvrtcGetCUBINSize", {"hiprtcGetBitcodeSize", "", CONV_LIB_FUNC, API_RTC, 2, HIP_EXPERIMENTAL}},
- {"nvrtcGetCUBIN", {"hiprtcGetBitcode", "", CONV_LIB_FUNC, API_RTC, 2, HIP_EXPERIMENTAL}},
+ {"nvrtcGetCUBINSize", {"hiprtcGetBitcodeSize", "", CONV_LIB_FUNC, API_RTC, 2}},
+ {"nvrtcGetCUBIN", {"hiprtcGetBitcode", "", CONV_LIB_FUNC, API_RTC, 2}},
{"nvrtcGetNVVMSize", {"hiprtcGetNVVMSize", "", CONV_LIB_FUNC, API_RTC, 2, HIP_UNSUPPORTED}},
{"nvrtcGetNVVM", {"hiprtcGetNVVM", "", CONV_LIB_FUNC, API_RTC, 2, HIP_UNSUPPORTED}},
{"nvrtcGetProgramLogSize", {"hiprtcGetProgramLogSize", "", CONV_LIB_FUNC, API_RTC, 2}},
@@ -66,8 +66,8 @@ const std::map HIP_RTC_FUNCTION_VER_MAP {
{"hiprtcGetProgramLog", {HIP_2060, HIP_0, HIP_0 }},
{"hiprtcAddNameExpression", {HIP_2060, HIP_0, HIP_0 }},
{"hiprtcGetLoweredName", {HIP_2060, HIP_0, HIP_0 }},
- {"hiprtcGetBitcode", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hiprtcGetBitcodeSize", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
+ {"hiprtcGetBitcode", {HIP_5030, HIP_0, HIP_0 }},
+ {"hiprtcGetBitcodeSize", {HIP_5030, HIP_0, HIP_0 }},
};
const std::map CUDA_RTC_API_SECTION_MAP {
diff --git a/src/CUDA2HIP_Runtime_API_functions.cpp b/src/CUDA2HIP_Runtime_API_functions.cpp
index 4ead60ca..4a9ef995 100644
--- a/src/CUDA2HIP_Runtime_API_functions.cpp
+++ b/src/CUDA2HIP_Runtime_API_functions.cpp
@@ -52,7 +52,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP {
// no analogue
{"cudaDeviceSetCacheConfig", {"hipDeviceSetCacheConfig", "", CONV_DEVICE, API_RUNTIME, 1}},
// cuCtxSetLimit
- {"cudaDeviceSetLimit", {"hipDeviceSetLimit", "", CONV_DEVICE, API_RUNTIME, 1, HIP_EXPERIMENTAL}},
+ {"cudaDeviceSetLimit", {"hipDeviceSetLimit", "", CONV_DEVICE, API_RUNTIME, 1}},
// cuCtxSetSharedMemConfig
{"cudaDeviceSetSharedMemConfig", {"hipDeviceSetSharedMemConfig", "", CONV_DEVICE, API_RUNTIME, 1}},
// cuCtxSynchronize
@@ -229,6 +229,9 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP {
{"cudaSetDoubleForDevice", {"hipSetDoubleForDevice", "", CONV_EXECUTION, API_RUNTIME, 7, HIP_UNSUPPORTED | CUDA_DEPRECATED}},
// no analogue
{"cudaSetDoubleForHost", {"hipSetDoubleForHost", "", CONV_EXECUTION, API_RUNTIME, 7, HIP_UNSUPPORTED | CUDA_DEPRECATED}},
+ // no analogue
+ // NOTE: Not equal to cuLaunchKernelEx due to different signatures
+ {"cudaLaunchKernelExC", {"hipLaunchKernelExC", "", CONV_EXECUTION, API_RUNTIME, 7, HIP_UNSUPPORTED}},
// 8. Occupancy
// cuOccupancyAvailableDynamicSMemPerBlock
@@ -245,6 +248,10 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP {
{"cudaOccupancyMaxPotentialBlockSizeVariableSMem", {"hipOccupancyMaxPotentialBlockSizeVariableSMem", "", CONV_OCCUPANCY, API_RUNTIME, 8, HIP_UNSUPPORTED}},
// no analogue
{"cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags", {"hipOccupancyMaxPotentialBlockSizeVariableSMemWithFlags", "", CONV_OCCUPANCY, API_RUNTIME, 8, HIP_UNSUPPORTED}},
+ // cuOccupancyMaxPotentialClusterSize
+ {"cudaOccupancyMaxPotentialClusterSize", {"hipOccupancyMaxPotentialClusterSize", "", CONV_OCCUPANCY, API_RUNTIME, 8, HIP_UNSUPPORTED}},
+ // cuOccupancyMaxActiveClusters
+ {"cudaOccupancyMaxActiveClusters", {"hipOccupancyMaxActiveClusters", "", CONV_OCCUPANCY, API_RUNTIME, 8, HIP_UNSUPPORTED}},
// 9. Memory Management
// no analogue
@@ -642,6 +649,10 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP {
// no analogue
// NOTE: Not equal to cuTexObjectGetTextureDesc due to different signatures
{"cudaGetTextureObjectTextureDesc", {"hipGetTextureObjectTextureDesc", "", CONV_TEXTURE, API_RUNTIME, 27}},
+ //
+ {"cudaCreateTextureObject_v2", {"hipCreateTextureObject_v2", "", CONV_TEXTURE, API_RUNTIME, 27, HIP_UNSUPPORTED}},
+ //
+ {"cudaGetTextureObjectTextureDesc_v2", {"hipGetTextureObjectTextureDesc_v2", "", CONV_TEXTURE, API_RUNTIME, 27, HIP_UNSUPPORTED}},
// 28. Surface Object Management
// no analogue
@@ -789,7 +800,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP {
// cuGraphExecEventWaitNodeSetEvent
{"cudaGraphExecEventWaitNodeSetEvent", {"hipGraphExecEventWaitNodeSetEvent", "", CONV_GRAPH, API_RUNTIME, 30}},
// cuGraphUpload
- {"cudaGraphUpload", {"hipGraphUpload", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}},
+ {"cudaGraphUpload", {"hipGraphUpload", "", CONV_GRAPH, API_RUNTIME, 30}},
// cuGraphAddExternalSemaphoresSignalNode
{"cudaGraphAddExternalSemaphoresSignalNode", {"hipGraphAddExternalSemaphoresSignalNode", "", CONV_GRAPH, API_RUNTIME, 30, HIP_UNSUPPORTED}},
// cuGraphExternalSemaphoresSignalNodeGetParams
@@ -807,15 +818,15 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP {
// cuGraphExecExternalSemaphoresWaitNodeSetParams
{"cudaGraphExecExternalSemaphoresWaitNodeSetParams", {"hipGraphExecExternalSemaphoresWaitNodeSetParams", "", CONV_GRAPH, API_RUNTIME, 30, HIP_UNSUPPORTED}},
// cuUserObjectCreate
- {"cudaUserObjectCreate", {"hipUserObjectCreate", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}},
+ {"cudaUserObjectCreate", {"hipUserObjectCreate", "", CONV_GRAPH, API_RUNTIME, 30}},
// cuUserObjectRetain
- {"cudaUserObjectRetain", {"hipUserObjectRetain", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}},
+ {"cudaUserObjectRetain", {"hipUserObjectRetain", "", CONV_GRAPH, API_RUNTIME, 30}},
// cuUserObjectRelease
- {"cudaUserObjectRelease", {"hipUserObjectRelease", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}},
+ {"cudaUserObjectRelease", {"hipUserObjectRelease", "", CONV_GRAPH, API_RUNTIME, 30}},
// cuGraphRetainUserObject
- {"cudaGraphRetainUserObject", {"hipGraphRetainUserObject", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}},
+ {"cudaGraphRetainUserObject", {"hipGraphRetainUserObject", "", CONV_GRAPH, API_RUNTIME, 30}},
// cuGraphReleaseUserObject
- {"cudaGraphReleaseUserObject", {"hipGraphReleaseUserObject", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}},
+ {"cudaGraphReleaseUserObject", {"hipGraphReleaseUserObject", "", CONV_GRAPH, API_RUNTIME, 30}},
// cuGraphAddMemAllocNode
{"cudaGraphAddMemAllocNode", {"hipGraphAddMemAllocNode", "", CONV_GRAPH, API_RUNTIME, 30, HIP_UNSUPPORTED}},
// cuGraphMemAllocNodeGetParams
@@ -825,11 +836,11 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP {
// cuGraphMemFreeNodeGetParams
{"cudaGraphMemFreeNodeGetParams", {"hipGraphMemFreeNodeGetParams", "", CONV_GRAPH, API_RUNTIME, 30, HIP_UNSUPPORTED}},
// cuDeviceGraphMemTrim
- {"cudaDeviceGraphMemTrim", {"hipDeviceGraphMemTrim", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}},
+ {"cudaDeviceGraphMemTrim", {"hipDeviceGraphMemTrim", "", CONV_GRAPH, API_RUNTIME, 30}},
// cuDeviceGetGraphMemAttribute
- {"cudaDeviceGetGraphMemAttribute", {"hipDeviceGetGraphMemAttribute", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}},
+ {"cudaDeviceGetGraphMemAttribute", {"hipDeviceGetGraphMemAttribute", "", CONV_GRAPH, API_RUNTIME, 30}},
// cuDeviceSetGraphMemAttribute
- {"cudaDeviceSetGraphMemAttribute", {"hipDeviceSetGraphMemAttribute", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}},
+ {"cudaDeviceSetGraphMemAttribute", {"hipDeviceSetGraphMemAttribute", "", CONV_GRAPH, API_RUNTIME, 30}},
// cuGraphInstantiateWithFlags
{"cudaGraphInstantiateWithFlags", {"hipGraphInstantiateWithFlags", "", CONV_GRAPH, API_RUNTIME, 30}},
// cuGraphNodeSetEnabled
@@ -1073,6 +1084,11 @@ const std::map CUDA_RUNTIME_FUNCTION_VER_MAP {
{"cudaGraphInstantiateWithFlags", {CUDA_114, CUDA_0, CUDA_0 }},
{"cudaArrayGetMemoryRequirements", {CUDA_116, CUDA_0, CUDA_0 }},
{"cudaGraphNodeSetEnabled", {CUDA_116, CUDA_0, CUDA_0 }},
+ {"cudaLaunchKernelExC", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaOccupancyMaxPotentialClusterSize", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaOccupancyMaxActiveClusters", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaCreateTextureObject_v2", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaGetTextureObjectTextureDesc_v2", {CUDA_118, CUDA_0, CUDA_0 }},
};
const std::map HIP_RUNTIME_FUNCTION_VER_MAP {
@@ -1304,16 +1320,16 @@ const std::map HIP_RUNTIME_FUNCTION_VER_MAP {
{"hipThreadExchangeStreamCaptureMode", {HIP_5020, HIP_0, HIP_0 }},
{"hipGraphKernelNodeSetAttribute", {HIP_5020, HIP_0, HIP_0 }},
{"hipGraphKernelNodeGetAttribute", {HIP_5020, HIP_0, HIP_0 }},
- {"hipDeviceSetLimit", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hipGraphUpload", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hipDeviceGetGraphMemAttribute", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hipDeviceSetGraphMemAttribute", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hipDeviceGraphMemTrim", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hipUserObjectCreate", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hipUserObjectRelease", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hipUserObjectRetain", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hipGraphRetainUserObject", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hipGraphReleaseUserObject", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
+ {"hipDeviceSetLimit", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipGraphUpload", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipDeviceGetGraphMemAttribute", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipDeviceSetGraphMemAttribute", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipDeviceGraphMemTrim", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipUserObjectCreate", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipUserObjectRelease", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipUserObjectRetain", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipGraphRetainUserObject", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipGraphReleaseUserObject", {HIP_5030, HIP_0, HIP_0 }},
};
const std::map CUDA_RUNTIME_API_SECTION_MAP {
diff --git a/src/CUDA2HIP_Runtime_API_types.cpp b/src/CUDA2HIP_Runtime_API_types.cpp
index 9cc43d89..ff3c4986 100644
--- a/src/CUDA2HIP_Runtime_API_types.cpp
+++ b/src/CUDA2HIP_Runtime_API_types.cpp
@@ -225,6 +225,19 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP {
// CUkernelNodeAttrValue
{"cudaKernelNodeAttrValue", {"hipKernelNodeAttrValue", "", CONV_TYPE, API_RUNTIME, 36}},
+ // CUlaunchAttributeValue
+ {"cudaLaunchAttributeValue", {"hipLaunchAttributeValue", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}},
+
+ // CUlaunchAttribute_st
+ {"cudaLaunchAttribute_st", {"hipLaunchAttribute", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}},
+ // CUlaunchAttribute
+ {"cudaLaunchAttribute", {"hipLaunchAttribute", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}},
+
+ // CUlaunchConfig_st
+ {"cudaLaunchConfig_st", {"hipLaunchConfig", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}},
+ // CUlaunchConfig
+ {"cudaLaunchConfig_t", {"hipLaunchConfig", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}},
+
// 3. Enums
// no analogue
@@ -535,7 +548,7 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP {
// CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED
{"cudaDevAttrMaxTimelineSemaphoreInteropSupported", {"hipDeviceAttributeMaxTimelineSemaphoreInteropSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED | CUDA_DEPRECATED}}, // 114
// CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED
- {"cudaDevAttrTimelineSemaphoreInteropSupported", {"hipDevAttrTimelineSemaphoreInteropSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 114
+ {"cudaDevAttrTimelineSemaphoreInteropSupported", {"hipDeviceAttributeTimelineSemaphoreInteropSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 114
// CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED
{"cudaDevAttrMemoryPoolsSupported", {"hipDeviceAttributeMemoryPoolsSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 115
// CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED
@@ -546,6 +559,8 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP {
{"cudaDevAttrGPUDirectRDMAWritesOrdering", {"hipDeviceAttributeGpuDirectRdmaWritesOrdering", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 118
// CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES
{"cudaDevAttrMemoryPoolSupportedHandleTypes", {"hipDeviceAttributeMempoolSupportedHandleTypes", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 119
+ // CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH
+ {"cudaDevAttrClusterLaunch", {"hipDeviceAttributeClusterLaunch", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 120
// CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED
{"cudaDevAttrDeferredMappingCudaArraySupported", {"hipDeviceAttributeDeferredMappingCudaArraySupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 121
// CU_DEVICE_ATTRIBUTE_MAX
@@ -963,6 +978,8 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP {
{"cudaErrorMpsMaxClientsReached", {"hipErrorMpsMaxClientsReached", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 808
// CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED
{"cudaErrorMpsMaxConnectionsReached", {"hipErrorMpsMaxConnectionsReached", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 809
+ // CUDA_ERROR_MPS_CLIENT_TERMINATED
+ {"cudaErrorMpsClientTerminated", {"hipErrorMpsClientTerminated", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 810
// CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED
{"cudaErrorStreamCaptureUnsupported", {"hipErrorStreamCaptureUnsupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 900
// CUDA_ERROR_STREAM_CAPTURE_INVALIDATED
@@ -987,6 +1004,8 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP {
{"cudaErrorGraphExecUpdateFailure", {"hipErrorGraphExecUpdateFailure", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 910
// CUDA_ERROR_EXTERNAL_DEVICE
{"cudaErrorExternalDevice", {"hipErrorExternalDevice", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 911
+ // CUDA_ERROR_INVALID_CLUSTER_SIZE
+ {"cudaErrorInvalidClusterSize", {"hipErrorInvalidClusterSize", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 912
// CUDA_ERROR_UNKNOWN
{"cudaErrorUnknown", {"hipErrorUnknown", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 999
// Deprecated since CUDA 4.1
@@ -1044,8 +1063,20 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP {
{"cudaFuncAttributeMaxDynamicSharedMemorySize", {"hipFuncAttributeMaxDynamicSharedMemorySize", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 8
// CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT
{"cudaFuncAttributePreferredSharedMemoryCarveout", {"hipFuncAttributePreferredSharedMemoryCarveout", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 9
+ // CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET
+ {"cudaFuncAttributeClusterDimMustBeSet", {"hipFuncAttributeClusterDimMustBeSet", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 10
+ // CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH
+ {"cudaFuncAttributeRequiredClusterWidth", {"hipFuncAttributeRequiredClusterWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 11
+ // CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT
+ {"cudaFuncAttributeRequiredClusterHeight", {"hipFuncAttributeRequiredClusterHeight", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 12
+ // CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH
+ {"cudaFuncAttributeRequiredClusterDepth", {"hipFuncAttributeRequiredClusterDepth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 13
+ // CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED
+ {"cudaFuncAttributeNonPortableClusterSizeAllowed", {"hipFuncAttributeNonPortableClusterSizeAllowed", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 14
+ // CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE
+ {"cudaFuncAttributeClusterSchedulingPolicyPreference", {"hipFuncAttributeClusterSchedulingPolicyPreference", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 15
// CU_FUNC_ATTRIBUTE_MAX
- {"cudaFuncAttributeMax", {"hipFuncAttributeMax", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 10
+ {"cudaFuncAttributeMax", {"hipFuncAttributeMax", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 16
// CUfunc_cache
{"cudaFuncCache", {"hipFuncCache_t", "", CONV_TYPE, API_RUNTIME, 36}},
@@ -1119,9 +1150,9 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP {
// CU_GRAPH_NODE_TYPE_EVENT_RECORD = 7
{"cudaGraphNodeTypeEventRecord", {"hipGraphNodeTypeEventRecord", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x07
// CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL = 8
- {"cudaGraphNodeTypeExtSemaphoreSignal", {"hipGraphNodeTypeExtSemaphoreSignal", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, // 0x08
+ {"cudaGraphNodeTypeExtSemaphoreSignal", {"hipGraphNodeTypeExtSemaphoreSignal", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x08
// CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT = 9
- {"cudaGraphNodeTypeExtSemaphoreWait", {"hipGraphNodeTypeExtSemaphoreWait", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, // 0x09
+ {"cudaGraphNodeTypeExtSemaphoreWait", {"hipGraphNodeTypeExtSemaphoreWait", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x09
// CU_GRAPH_NODE_TYPE_MEM_ALLOC = 10
{"cudaGraphNodeTypeMemAlloc", {"hipGraphNodeTypeMemAlloc", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 0x0a
// CU_GRAPH_NODE_TYPE_MEM_FREE = 11
@@ -1155,7 +1186,7 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP {
{"cudaLimit", {"hipLimit_t", "", CONV_TYPE, API_RUNTIME, 36}},
// cudaLimit enum values
// CU_LIMIT_STACK_SIZE
- {"cudaLimitStackSize", {"hipLimitStackSize", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, // 0x00
+ {"cudaLimitStackSize", {"hipLimitStackSize", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x00
// CU_LIMIT_PRINTF_FIFO_SIZE
{"cudaLimitPrintfFifoSize", {"hipLimitPrintfFifoSize", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x01
// CU_LIMIT_MALLOC_HEAP_SIZE
@@ -1201,7 +1232,7 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP {
{"cudaMemoryTypeUnregistered", {"hipMemoryTypeUnregistered", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 0
{"cudaMemoryTypeHost", {"hipMemoryTypeHost", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 1
{"cudaMemoryTypeDevice", {"hipMemoryTypeDevice", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 2
- {"cudaMemoryTypeManaged", {"hipMemoryTypeManaged", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, // 3
+ {"cudaMemoryTypeManaged", {"hipMemoryTypeManaged", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 3
// CUmem_range_attribute
{"cudaMemRangeAttribute", {"hipMemRangeAttribute", "", CONV_TYPE, API_RUNTIME, 36}},
@@ -1517,7 +1548,7 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP {
// CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE
{"cudaKernelNodeAttributeCooperative", {"hipKernelNodeAttributeCooperative", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 2
// CU_KERNEL_NODE_ATTRIBUTE_PRIORITY
- {"cudaKernelNodeAttributePriority", {"hipKernelNodeAttributePriority", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 8
+ {"cudaKernelNodeAttributePriority", {"hipKernelNodeAttributePriority", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 8
// CUmemPool_attribute
{"cudaMemPoolAttr", {"hipMemPoolAttr", "", CONV_TYPE, API_RUNTIME, 36}},
@@ -1588,16 +1619,16 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP {
{"cudaStreamSetCaptureDependencies", {"hipStreamSetCaptureDependencies", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x1
// CUuserObject_flags
- {"cudaUserObjectFlags", {"hipUserObjectFlags", "", CONV_TYPE, API_RUNTIME, 36, HIP_EXPERIMENTAL}},
+ {"cudaUserObjectFlags", {"hipUserObjectFlags", "", CONV_TYPE, API_RUNTIME, 36}},
// cudaUserObjectFlags enum values
// CU_USER_OBJECT_NO_DESTRUCTOR_SYNC
- {"cudaUserObjectNoDestructorSync", {"hipUserObjectNoDestructorSync", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, // 0x1
+ {"cudaUserObjectNoDestructorSync", {"hipUserObjectNoDestructorSync", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x1
// CUuserObjectRetain_flags
- {"cudaUserObjectRetainFlags", {"hipUserObjectRetainFlags", "", CONV_TYPE, API_RUNTIME, 36, HIP_EXPERIMENTAL}},
+ {"cudaUserObjectRetainFlags", {"hipUserObjectRetainFlags", "", CONV_TYPE, API_RUNTIME, 36}},
// cudaUserObjectRetainFlags enum values
// CU_GRAPH_USER_OBJECT_MOVE
- {"cudaGraphUserObjectMove", {"hipGraphUserObjectMove", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, // 0x1
+ {"cudaGraphUserObjectMove", {"hipGraphUserObjectMove", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x1
// CUflushGPUDirectRDMAWritesOptions
{"cudaFlushGPUDirectRDMAWritesOptions", {"hipFlushGPUDirectRDMAWritesOptions", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}},
@@ -1666,16 +1697,16 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP {
{"cudaGraphDebugDotFlagsHandles", {"hipGraphDebugDotFlagsHandles", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 1<<10
// CUgraphMem_attribute
- {"cudaGraphMemAttributeType", {"hipGraphMemAttributeType", "", CONV_TYPE, API_RUNTIME, 36, HIP_EXPERIMENTAL}},
+ {"cudaGraphMemAttributeType", {"hipGraphMemAttributeType", "", CONV_TYPE, API_RUNTIME, 36}},
// cudaGraphMemAttributeType enum values
// CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT
- {"cudaGraphMemAttrUsedMemCurrent", {"hipGraphMemAttrUsedMemCurrent", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}},
+ {"cudaGraphMemAttrUsedMemCurrent", {"hipGraphMemAttrUsedMemCurrent", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}},
// CU_GRAPH_MEM_ATTR_USED_MEM_HIGH
- {"cudaGraphMemAttrUsedMemHigh", {"hipGraphMemAttrUsedMemHigh", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}},
+ {"cudaGraphMemAttrUsedMemHigh", {"hipGraphMemAttrUsedMemHigh", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}},
// CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT
- {"cudaGraphMemAttrReservedMemCurrent", {"hipGraphMemAttrReservedMemCurrent", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}},
+ {"cudaGraphMemAttrReservedMemCurrent", {"hipGraphMemAttrReservedMemCurrent", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}},
// CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH
- {"cudaGraphMemAttrReservedMemHigh", {"hipGraphMemAttrReservedMemHigh", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}},
+ {"cudaGraphMemAttrReservedMemHigh", {"hipGraphMemAttrReservedMemHigh", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}},
// CUgraphInstantiate_flags
{"cudaGraphInstantiateFlags", {"hipGraphInstantiateFlags", "", CONV_TYPE, API_RUNTIME, 36}},
@@ -1685,6 +1716,38 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP {
// CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY
{"cudaGraphInstantiateFlagUseNodePriority", {"hipGraphInstantiateFlagUseNodePriority", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}},
+ // CUclusterSchedulingPolicy
+ {"cudaClusterSchedulingPolicy", {"hipClusterSchedulingPolicy", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}},
+ // cudaClusterSchedulingPolicy enum values
+ // CU_CLUSTER_SCHEDULING_POLICY_DEFAULT
+ {"cudaClusterSchedulingPolicyDefault", {"hipClusterSchedulingPolicyDefault", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}},
+ // CU_CLUSTER_SCHEDULING_POLICY_SPREAD
+ {"cudaClusterSchedulingPolicySpread", {"hipClusterSchedulingPolicySpread", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}},
+ // CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING
+ {"cudaClusterSchedulingPolicyLoadBalancing", {"hipClusterSchedulingPolicyLoadBalancing", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}},
+
+ // CUlaunchAttributeID
+ {"cudaLaunchAttributeID", {"hipLaunchAttributeID", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}},
+ // cudaLaunchAttributeID enum values
+ // CU_LAUNCH_ATTRIBUTE_IGNORE
+ {"cudaLaunchAttributeIgnore", {"hipLaunchAttributeIgnore", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}},
+ // CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW
+ {"cudaLaunchAttributeAccessPolicyWindow", {"hipLaunchAttributeAccessPolicyWindow", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}},
+ // CU_LAUNCH_ATTRIBUTE_COOPERATIVE
+ {"cudaLaunchAttributeCooperative", {"hipLaunchAttributeCooperative", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}},
+ // CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY
+ {"cudaLaunchAttributeSynchronizationPolicy", {"hipLaunchAttributeSynchronizationPolicy", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}},
+ // CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION
+ {"cudaLaunchAttributeClusterDimension", {"hipLaunchAttributeClusterDimension", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}},
+ // CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE
+ {"cudaLaunchAttributeClusterSchedulingPolicyPreference", {"hipLaunchAttributeClusterSchedulingPolicyPreference", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}},
+ // CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION
+ {"cudaLaunchAttributeProgrammaticStreamSerialization", {"hipLaunchAttributeProgrammaticStreamSerialization", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}},
+ // CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT
+ {"cudaLaunchAttributeProgrammaticEvent", {"hipLaunchAttributeProgrammaticEvent", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}},
+ // CU_LAUNCH_ATTRIBUTE_PRIORITY
+ {"cudaLaunchAttributePriority", {"hipLaunchAttributePriority", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}},
+
// 4. Typedefs
// CUhostFn
@@ -1706,7 +1769,7 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP {
{"cudaMemPool_t", {"hipMemPool_t", "", CONV_TYPE, API_RUNTIME, 36}},
// CUuserObject
- {"cudaUserObject_t", {"hipUserObject_t", "", CONV_TYPE, API_RUNTIME, 36, HIP_EXPERIMENTAL}},
+ {"cudaUserObject_t", {"hipUserObject_t", "", CONV_TYPE, API_RUNTIME, 36}},
// 5. Defines
@@ -1840,6 +1903,10 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP {
{"cudaStreamPerThread", {"hipStreamPerThread", "", CONV_DEFINE, API_RUNTIME, 36}}, // ((cudaStream_t)0x2)
// CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL
{"cudaArraySparsePropertiesSingleMipTail", {"hipArraySparsePropertiesSingleMipTail", "", CONV_DEFINE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 0x1
+ // CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION
+ {"cudaKernelNodeAttributeClusterDimension", {"hipKernelNodeAttributeClusterDimension", "", CONV_DEFINE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // cudaLaunchAttributeClusterDimension
+ // CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE
+ {"cudaKernelNodeAttributeClusterSchedulingPolicyPreference", {"hipKernelNodeAttributeClusterSchedulingPolicyPreference", "", CONV_DEFINE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // cudaLaunchAttributeClusterSchedulingPolicyPreference
};
const std::map CUDA_RUNTIME_TYPE_NAME_VER_MAP {
@@ -2272,6 +2339,36 @@ const std::map CUDA_RUNTIME_TYPE_NAME_VER_MAP
{"cudaGraphExecUpdateErrorAttributesChanged", {CUDA_116, CUDA_0, CUDA_0 }},
{"cudaKernelNodeAttributePriority", {CUDA_117, CUDA_0, CUDA_0 }},
{"cudaGraphInstantiateFlagUseNodePriority", {CUDA_117, CUDA_0, CUDA_0 }},
+ {"cudaErrorMpsClientTerminated", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaErrorInvalidClusterSize", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaClusterSchedulingPolicy", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaClusterSchedulingPolicyDefault", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaClusterSchedulingPolicySpread", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaClusterSchedulingPolicyLoadBalancing", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaFuncAttributeClusterDimMustBeSet", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaFuncAttributeRequiredClusterWidth", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaFuncAttributeRequiredClusterHeight", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaFuncAttributeRequiredClusterDepth", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaFuncAttributeNonPortableClusterSizeAllowed", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaFuncAttributeClusterSchedulingPolicyPreference", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaDevAttrClusterLaunch", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaLaunchAttributeID", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaLaunchAttributeIgnore", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaLaunchAttributeAccessPolicyWindow", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaLaunchAttributeCooperative", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaLaunchAttributeSynchronizationPolicy", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaLaunchAttributeClusterDimension", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaLaunchAttributeClusterSchedulingPolicyPreference", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaLaunchAttributeProgrammaticStreamSerialization", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaLaunchAttributeProgrammaticEvent", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaLaunchAttributePriority", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaLaunchAttributeValue", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaLaunchAttribute_st", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaLaunchAttribute", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaLaunchConfig_st", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaLaunchConfig_t", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaKernelNodeAttributeClusterDimension", {CUDA_118, CUDA_0, CUDA_0 }},
+ {"cudaKernelNodeAttributeClusterSchedulingPolicyPreference", {CUDA_118, CUDA_0, CUDA_0 }},
};
const std::map HIP_RUNTIME_TYPE_NAME_VER_MAP {
@@ -2477,17 +2574,17 @@ const std::map HIP_RUNTIME_TYPE_NAME_VER_MAP {
{"hipMemPoolPtrExportData", {HIP_5020, HIP_0, HIP_0 }},
{"hipGraphInstantiateFlags", {HIP_5020, HIP_0, HIP_0 }},
{"hipGraphInstantiateFlagAutoFreeOnLaunch", {HIP_5020, HIP_0, HIP_0 }},
- {"hipMemoryTypeManaged", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hipLimitStackSize", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hipGraphNodeTypeExtSemaphoreSignal", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hipGraphNodeTypeExtSemaphoreWait", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hipGraphMemAttributeType", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hipGraphMemAttrUsedMemCurrent", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hipGraphMemAttrUsedMemHigh", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hipGraphMemAttrReservedMemCurrent", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hipGraphMemAttrReservedMemHigh", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hipUserObjectFlags", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hipUserObjectNoDestructorSync", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hipUserObjectRetainFlags", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
- {"hipGraphUserObjectMove", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}},
+ {"hipMemoryTypeManaged", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipLimitStackSize", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipGraphNodeTypeExtSemaphoreSignal", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipGraphNodeTypeExtSemaphoreWait", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipGraphMemAttributeType", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipGraphMemAttrUsedMemCurrent", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipGraphMemAttrUsedMemHigh", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipGraphMemAttrReservedMemCurrent", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipGraphMemAttrReservedMemHigh", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipUserObjectFlags", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipUserObjectNoDestructorSync", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipUserObjectRetainFlags", {HIP_5030, HIP_0, HIP_0 }},
+ {"hipGraphUserObjectMove", {HIP_5030, HIP_0, HIP_0 }},
};
diff --git a/src/Statistics.cpp b/src/Statistics.cpp
index ed6aea21..7925cbde 100644
--- a/src/Statistics.cpp
+++ b/src/Statistics.cpp
@@ -62,6 +62,7 @@ const char *counterNames[NUM_CONV_TYPES] = {
"library", // CONV_LIB_FUNC
"device_library", // CONV_LIB_DEVICE_FUNC
"device_function", // CONV_DEVICE_FUNC
+ "device_type", // CONV_DEVICE_TYPE
"include", // CONV_INCLUDE
"include_cuda_main_header", // CONV_INCLUDE_CUDA_MAIN_H
"include_cuda_main_header_v2", // CONV_INCLUDE_CUDA_MAIN_V2_H
@@ -348,7 +349,7 @@ void Statistics::setActive(const std::string &name) {
}
bool Statistics::isToRoc(const hipCounter &counter) {
- return TranslateToRoc && counter.apiType == API_BLAS;
+ return TranslateToRoc && (counter.apiType == API_BLAS || counter.apiType == API_DNN);
}
bool Statistics::isHipExperimental(const hipCounter& counter) {
@@ -442,6 +443,7 @@ std::string Statistics::getCudaVersion(const cudaVersions& ver) {
case CUDA_115: return "11.5";
case CUDA_116: return "11.6";
case CUDA_117: return "11.7";
+ case CUDA_118: return "11.8";
case CUDNN_10: return "1.0.0";
case CUDNN_20: return "2.0.0";
case CUDNN_30: return "3.0.0";
@@ -475,6 +477,10 @@ std::string Statistics::getCudaVersion(const cudaVersions& ver) {
case CUDNN_810: return "8.1.0";
case CUDNN_811: return "8.1.1";
case CUDNN_820: return "8.2.0";
+ case CUDNN_830: return "8.3.0";
+ case CUDNN_840: return "8.4.0";
+ case CUDNN_850: return "8.5.0";
+ case CUDNN_860: return "8.6.0";
}
return "";
}
@@ -488,6 +494,7 @@ std::string Statistics::getHipVersion(const hipVersions& ver) {
case HIP_1052: return "1.5.2";
case HIP_1060: return "1.6.0";
case HIP_1061: return "1.6.1";
+ case HIP_1064: return "1.6.4";
case HIP_1070: return "1.7.0";
case HIP_1071: return "1.7.1";
case HIP_1080: return "1.8.0";
@@ -538,6 +545,7 @@ std::string Statistics::getHipVersion(const hipVersions& ver) {
case HIP_5011: return "5.1.1";
case HIP_5020: return "5.2.0";
case HIP_5030: return "5.3.0";
+ case HIP_5040: return "5.4.0";
}
return "";
}
diff --git a/src/Statistics.h b/src/Statistics.h
index 02b4da7d..79dd6a78 100644
--- a/src/Statistics.h
+++ b/src/Statistics.h
@@ -119,6 +119,7 @@ enum ConvTypes {
CONV_LIB_FUNC,
CONV_LIB_DEVICE_FUNC,
CONV_DEVICE_FUNC,
+ CONV_DEVICE_TYPE,
CONV_INCLUDE,
CONV_INCLUDE_CUDA_MAIN_H,
CONV_INCLUDE_CUDA_MAIN_V2_H,
@@ -198,6 +199,7 @@ enum cudaVersions {
CUDA_115 = 11050,
CUDA_116 = 11060,
CUDA_117 = 11070,
+ CUDA_118 = 11080,
CUDNN_10 = 100,
CUDNN_20 = 200,
CUDNN_30 = 300,
@@ -231,6 +233,10 @@ enum cudaVersions {
CUDNN_810 = 810,
CUDNN_811 = 811,
CUDNN_820 = 820,
+ CUDNN_830 = 830,
+ CUDNN_840 = 840,
+ CUDNN_850 = 850,
+ CUDNN_860 = 860,
};
enum hipVersions {
@@ -240,6 +246,7 @@ enum hipVersions {
HIP_1052 = 1052,
HIP_1060 = 1060,
HIP_1061 = 1061,
+ HIP_1064 = 1064,
HIP_1070 = 1070,
HIP_1071 = 1071,
HIP_1080 = 1080,
@@ -290,7 +297,8 @@ enum hipVersions {
HIP_5011 = 5011,
HIP_5020 = 5020,
HIP_5030 = 5030,
- HIP_LATEST = HIP_5030,
+ HIP_5040 = 5040,
+ HIP_LATEST = HIP_5040,
};
struct cudaAPIversions {
diff --git a/tests/unit_tests/synthetic/driver_enums.cu b/tests/unit_tests/synthetic/driver_enums.cu
index b97398c3..8368e648 100644
--- a/tests/unit_tests/synthetic/driver_enums.cu
+++ b/tests/unit_tests/synthetic/driver_enums.cu
@@ -313,21 +313,21 @@ int main() {
// CHECK: hipJitOption jit_option;
// CHECK-NEXT: hipJitOption jit_option_enum;
- // CHECK-NEXT: hipJitOption JIT_MAX_REGISTERS = hipJitOptionMaxRegisters;
- // CHECK-NEXT: hipJitOption JIT_THREADS_PER_BLOCK = hipJitOptionThreadsPerBlock;
- // CHECK-NEXT: hipJitOption JIT_WALL_TIME = hipJitOptionWallTime;
- // CHECK-NEXT: hipJitOption JIT_INFO_LOG_BUFFER = hipJitOptionInfoLogBuffer;
- // CHECK-NEXT: hipJitOption JIT_INFO_LOG_BUFFER_SIZE_BYTES = hipJitOptionInfoLogBufferSizeBytes;
- // CHECK-NEXT: hipJitOption JIT_ERROR_LOG_BUFFER = hipJitOptionErrorLogBuffer;
- // CHECK-NEXT: hipJitOption JIT_ERROR_LOG_BUFFER_SIZE_BYTES = hipJitOptionErrorLogBufferSizeBytes;
- // CHECK-NEXT: hipJitOption JIT_OPTIMIZATION_LEVEL = hipJitOptionOptimizationLevel;
- // CHECK-NEXT: hipJitOption JIT_TARGET_FROM_CUCONTEXT = hipJitOptionTargetFromContext;
- // CHECK-NEXT: hipJitOption JIT_TARGET = hipJitOptionTarget;
- // CHECK-NEXT: hipJitOption JIT_FALLBACK_STRATEGY = hipJitOptionFallbackStrategy;
- // CHECK-NEXT: hipJitOption JIT_GENERATE_DEBUG_INFO = hipJitOptionGenerateDebugInfo;
- // CHECK-NEXT: hipJitOption JIT_LOG_VERBOSE = hipJitOptionLogVerbose;
- // CHECK-NEXT: hipJitOption JIT_GENERATE_LINE_INFO = hipJitOptionGenerateLineInfo;
- // CHECK-NEXT: hipJitOption JIT_CACHE_MODE = hipJitOptionCacheMode;
+ // CHECK-NEXT: hipJitOption JIT_MAX_REGISTERS = HIPRTC_JIT_MAX_REGISTERS;
+ // CHECK-NEXT: hipJitOption JIT_THREADS_PER_BLOCK = HIPRTC_JIT_THREADS_PER_BLOCK;
+ // CHECK-NEXT: hipJitOption JIT_WALL_TIME = HIPRTC_JIT_WALL_TIME;
+ // CHECK-NEXT: hipJitOption JIT_INFO_LOG_BUFFER = HIPRTC_JIT_INFO_LOG_BUFFER;
+ // CHECK-NEXT: hipJitOption JIT_INFO_LOG_BUFFER_SIZE_BYTES = HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
+ // CHECK-NEXT: hipJitOption JIT_ERROR_LOG_BUFFER = HIPRTC_JIT_ERROR_LOG_BUFFER;
+ // CHECK-NEXT: hipJitOption JIT_ERROR_LOG_BUFFER_SIZE_BYTES = HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
+ // CHECK-NEXT: hipJitOption JIT_OPTIMIZATION_LEVEL = HIPRTC_JIT_OPTIMIZATION_LEVEL;
+ // CHECK-NEXT: hipJitOption JIT_TARGET_FROM_CUCONTEXT = HIPRTC_JIT_TARGET_FROM_HIPCONTEXT;
+ // CHECK-NEXT: hipJitOption JIT_TARGET = HIPRTC_JIT_TARGET;
+ // CHECK-NEXT: hipJitOption JIT_FALLBACK_STRATEGY = HIPRTC_JIT_FALLBACK_STRATEGY;
+ // CHECK-NEXT: hipJitOption JIT_GENERATE_DEBUG_INFO = HIPRTC_JIT_GENERATE_DEBUG_INFO;
+ // CHECK-NEXT: hipJitOption JIT_LOG_VERBOSE = HIPRTC_JIT_LOG_VERBOSE;
+ // CHECK-NEXT: hipJitOption JIT_GENERATE_LINE_INFO = HIPRTC_JIT_GENERATE_LINE_INFO;
+ // CHECK-NEXT: hipJitOption JIT_CACHE_MODE = HIPRTC_JIT_CACHE_MODE;
CUjit_option jit_option;
CUjit_option_enum jit_option_enum;
CUjit_option JIT_MAX_REGISTERS = CU_JIT_MAX_REGISTERS;
@@ -346,7 +346,7 @@ int main() {
CUjit_option JIT_GENERATE_LINE_INFO = CU_JIT_GENERATE_LINE_INFO;
CUjit_option JIT_CACHE_MODE = CU_JIT_CACHE_MODE;
- // CHECK: hipJitOption JIT_NUM_OPTIONS = hipJitOptionNumOptions;
+ // CHECK: hipJitOption JIT_NUM_OPTIONS = HIPRTC_JIT_NUM_OPTIONS;
CUjit_option JIT_NUM_OPTIONS = CU_JIT_NUM_OPTIONS;
// CHECK: hipLimit_t limit;
@@ -642,8 +642,8 @@ int main() {
CUdevice_P2PAttribute DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED = CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED;
CUdevice_P2PAttribute DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED = CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED;
- // CHECK: hipJitOption JIT_NEW_SM3X_OPT = hipJitOptionSm3xOpt;
- // CHECK-NEXT: hipJitOption JIT_FAST_COMPILE = hipJitOptionFastCompile;
+ // CHECK: hipJitOption JIT_NEW_SM3X_OPT = HIPRTC_JIT_NEW_SM3X_OPT;
+ // CHECK-NEXT: hipJitOption JIT_FAST_COMPILE = HIPRTC_JIT_FAST_COMPILE;
CUjit_option JIT_NEW_SM3X_OPT = CU_JIT_NEW_SM3X_OPT;
CUjit_option JIT_FAST_COMPILE = CU_JIT_FAST_COMPILE;
@@ -913,15 +913,6 @@ int main() {
// CHECK: hipDeviceAttribute_t DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = hipDeviceAttributeMaxBlocksPerMultiprocessor;
CUdevice_attribute DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR;
- // CHECK: hipKernelNodeAttrID kernelNodeAttrID;
- // CHECK-NEXT: hipKernelNodeAttrID kernelNodeAttrID_enum;
- // CHECK-NEXT: hipKernelNodeAttrID KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = hipKernelNodeAttributeAccessPolicyWindow;
- // CHECK-NEXT: hipKernelNodeAttrID KERNEL_NODE_ATTRIBUTE_COOPERATIVE = hipKernelNodeAttributeCooperative;
- CUkernelNodeAttrID kernelNodeAttrID;
- CUkernelNodeAttrID_enum kernelNodeAttrID_enum;
- CUkernelNodeAttrID KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW;
- CUkernelNodeAttrID KERNEL_NODE_ATTRIBUTE_COOPERATIVE = CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE;
-
// CHECK: hipAccessProperty accessProperty;
// CHECK-NEXT: hipAccessProperty accessProperty_enum;
// CHECK-NEXT: hipAccessProperty ACCESS_PROPERTY_NORMAL = hipAccessPropertyNormal;
@@ -937,7 +928,19 @@ int main() {
CUpointer_attribute POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE = CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE;
#endif
+#if CUDA_VERSION >= 11000 && CUDA_VERSION < 11080
+ // CHECK: hipKernelNodeAttrID kernelNodeAttrID_enum;
+ CUkernelNodeAttrID_enum kernelNodeAttrID_enum;
+#endif
+
#if CUDA_VERSION >= 11010
+ // CHECK: hipKernelNodeAttrID kernelNodeAttrID;
+ // CHECK-NEXT: hipKernelNodeAttrID KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = hipKernelNodeAttributeAccessPolicyWindow;
+ // CHECK-NEXT: hipKernelNodeAttrID KERNEL_NODE_ATTRIBUTE_COOPERATIVE = hipKernelNodeAttributeCooperative;
+ CUkernelNodeAttrID kernelNodeAttrID;
+ CUkernelNodeAttrID KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW;
+ CUkernelNodeAttrID KERNEL_NODE_ATTRIBUTE_COOPERATIVE = CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE;
+
// CHECK: hipGraphNodeType GRAPH_NODE_TYPE_WAIT_EVENT = hipGraphNodeTypeWaitEvent;
// CHECK-NEXT: hipGraphNodeType GRAPH_NODE_TYPE_EVENT_RECORD = hipGraphNodeTypeEventRecord;
CUgraphNodeType GRAPH_NODE_TYPE_WAIT_EVENT = CU_GRAPH_NODE_TYPE_WAIT_EVENT;
diff --git a/tests/unit_tests/synthetic/driver_functions.cu b/tests/unit_tests/synthetic/driver_functions.cu
index 736e7bdc..d77eee21 100644
--- a/tests/unit_tests/synthetic/driver_functions.cu
+++ b/tests/unit_tests/synthetic/driver_functions.cu
@@ -527,6 +527,16 @@ int main() {
// CHECK: result = hipDevicePrimaryCtxSetFlags(device, flags);
result = cuDevicePrimaryCtxSetFlags_v2(device, flags);
+ // CUDA: CUresult CUDAAPI cuMemRetainAllocationHandle(CUmemGenericAllocationHandle *handle, void *addr);
+ // HIP: hipError_t hipMemRetainAllocationHandle(hipMemGenericAllocationHandle_t* handle, void* addr);
+ // CHECK: result = hipMemRetainAllocationHandle(&memGenericAllocationHandle_t, image);
+ result = cuMemRetainAllocationHandle(&memGenericAllocationHandle_t, image);
+
+ // CHECK: result = hipGraphInstantiate(&graphExec, graph, &graphNode, nullptr, bytes);
+ result = cuGraphInstantiate_v2(&graphExec, graph, &graphNode, nullptr, bytes);
+#endif
+
+#if CUDA_VERSION >= 11000
// CHECK: hipKernelNodeAttrID kernelNodeAttrID;
CUkernelNodeAttrID kernelNodeAttrID;
// CHECK: hipKernelNodeAttrValue kernelNodeAttrValue;
@@ -542,16 +552,6 @@ int main() {
// CHECK: result = hipGraphKernelNodeGetAttribute(graphNode, kernelNodeAttrID, &kernelNodeAttrValue);
result = cuGraphKernelNodeGetAttribute(graphNode, kernelNodeAttrID, &kernelNodeAttrValue);
- // CUDA: CUresult CUDAAPI cuMemRetainAllocationHandle(CUmemGenericAllocationHandle *handle, void *addr);
- // HIP: hipError_t hipMemRetainAllocationHandle(hipMemGenericAllocationHandle_t* handle, void* addr);
- // CHECK: result = hipMemRetainAllocationHandle(&memGenericAllocationHandle_t, image);
- result = cuMemRetainAllocationHandle(&memGenericAllocationHandle_t, image);
-
- // CHECK: result = hipGraphInstantiate(&graphExec, graph, &graphNode, nullptr, bytes);
- result = cuGraphInstantiate_v2(&graphExec, graph, &graphNode, nullptr, bytes);
-#endif
-
-#if CUDA_VERSION >= 11010
// CUDA: CUresult CUDAAPI cuGraphExecChildGraphNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraph childGraph);
// HIP: hipError_t hipGraphExecChildGraphNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, hipGraph_t childGraph);
// CHECK: result = hipGraphExecChildGraphNodeSetParams(graphExec, graphNode, graph);
diff --git a/tests/unit_tests/synthetic/driver_unions.cu b/tests/unit_tests/synthetic/driver_unions.cu
index 1d9bddc4..0c375f96 100644
--- a/tests/unit_tests/synthetic/driver_unions.cu
+++ b/tests/unit_tests/synthetic/driver_unions.cu
@@ -9,8 +9,11 @@ int main() {
#if CUDA_VERSION >= 11000
// CHECK: hipKernelNodeAttrValue kernelNodeAttrValue;
- // CHECK-NEXT: hipKernelNodeAttrValue kernelNodeAttrValue_union;
CUkernelNodeAttrValue kernelNodeAttrValue;
+#endif
+
+#if CUDA_VERSION >= 11000 && CUDA_VERSION < 11080
+ // CHECK: hipKernelNodeAttrValue kernelNodeAttrValue_union;
CUkernelNodeAttrValue_union kernelNodeAttrValue_union;
#endif
diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu
index 61643cdd..0a24616d 100644
--- a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu
+++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu
@@ -144,6 +144,7 @@ int main() {
blasStatus = cublasGetPointerMode_v2(blasHandle, &blasPointerMode);
int n = 0;
+ int nrhs = 0;
int m = 0;
int num = 0;
int lda = 0;
@@ -156,6 +157,8 @@ int main() {
int kl = 0;
int ku = 0;
int batchCount = 0;
+ int P = 0;
+ int info = 0;
void* image = nullptr;
void* image_2 = nullptr;
void* deviceptr = nullptr;
@@ -221,6 +224,7 @@ int main() {
float** fAarray = 0;
float** fBarray = 0;
float** fCarray = 0;
+ float** fTauarray = 0;
double da = 0;
double dA = 0;
@@ -240,6 +244,11 @@ int main() {
double** dAarray = 0;
double** dBarray = 0;
double** dCarray = 0;
+ double** dTauarray = 0;
+
+ void** voidAarray = nullptr;
+ void** voidBarray = nullptr;
+ void** voidCarray = nullptr;
// CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle, int n, const float* x, int incx, float* result);
// HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSnrm2(hipblasHandle_t handle, int n, const float* x, int incx, float* result);
@@ -263,16 +272,20 @@ int main() {
// CHECK: hipComplex** complexAarray = 0;
// CHECK-NEXT: hipComplex** complexBarray = 0;
// CHECK-NEXT: hipComplex** complexCarray = 0;
+ // CHECK-NEXT: hipComplex** complexTauarray = 0;
cuComplex** complexAarray = 0;
cuComplex** complexBarray = 0;
cuComplex** complexCarray = 0;
+ cuComplex** complexTauarray = 0;
// CHECK: hipDoubleComplex** dcomplexAarray = 0;
// CHECK-NEXT: hipDoubleComplex** dcomplexBarray = 0;
// CHECK-NEXT: hipDoubleComplex** dcomplexCarray = 0;
+ // CHECK-NEXT: hipDoubleComplex** dcomplexTauarray = 0;
cuDoubleComplex** dcomplexAarray = 0;
cuDoubleComplex** dcomplexBarray = 0;
cuDoubleComplex** dcomplexCarray = 0;
+ cuDoubleComplex** dcomplexTauarray = 0;
// CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScnrm2_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, float* result);
// HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasScnrm2(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, float* result);
@@ -1192,6 +1205,330 @@ int main() {
// CHECK: blasStatus = hipblasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount);
blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount);
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* beta, float* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* AP, int lda, const float* beta, float* CP, int ldc);
+ // CHECK: blasStatus = hipblasSsyrk(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, &fC, ldc);
+ // CHECK-NEXT: blasStatus = hipblasSsyrk(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, &fC, ldc);
+ blasStatus = cublasSsyrk(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, &fC, ldc);
+ blasStatus = cublasSsyrk_v2(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, &fC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const double* alpha, const double* A, int lda, const double* beta, double* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* AP, int lda, const double* beta, double* CP, int ldc);
+ // CHECK: blasStatus = hipblasDsyrk(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, &dC, ldc);
+ // CHECK-NEXT: blasStatus = hipblasDsyrk(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, &dC, ldc);
+ blasStatus = cublasDsyrk(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, &dC, ldc);
+ blasStatus = cublasDsyrk_v2(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, &dC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* beta, cuComplex* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* beta, hipblasComplex* CP, int ldc);
+ // CHECK: blasStatus = hipblasCsyrk(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, &complexC, ldc);
+ // CHECK-NEXT: blasStatus = hipblasCsyrk(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, &complexC, ldc);
+ blasStatus = cublasCsyrk(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, &complexC, ldc);
+ blasStatus = cublasCsyrk_v2(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, &complexC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZsyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* beta, hipblasDoubleComplex* CP, int ldc);
+ // CHECK: blasStatus = hipblasZsyrk(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexC, ldc);
+ // CHECK-NEXT: blasStatus = hipblasZsyrk(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexC, ldc);
+ blasStatus = cublasZsyrk(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexC, ldc);
+ blasStatus = cublasZsyrk_v2(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const cuComplex* A, int lda, const float* beta, cuComplex* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCherk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const hipblasComplex* AP, int lda, const float* beta, hipblasComplex* CP, int ldc);
+ // CHECK: blasStatus = hipblasCherk(blasHandle, blasFillMode, transa, n, k, &fa, &complexA, lda, &fb, &complexC, ldc);
+ // CHECK-NEXT: blasStatus = hipblasCherk(blasHandle, blasFillMode, transa, n, k, &fa, &complexA, lda, &fb, &complexC, ldc);
+ blasStatus = cublasCherk(blasHandle, blasFillMode, transa, n, k, &fa, &complexA, lda, &fb, &complexC, ldc);
+ blasStatus = cublasCherk_v2(blasHandle, blasFillMode, transa, n, k, &fa, &complexA, lda, &fb, &complexC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const double* alpha, const cuDoubleComplex* A, int lda, const double* beta, cuDoubleComplex* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZherk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const hipblasDoubleComplex* AP, int lda, const double* beta, hipblasDoubleComplex* CP, int ldc);
+ // CHECK: blasStatus = hipblasZherk(blasHandle, blasFillMode, transa, n, k, &da, &dcomplexA, lda, &db, &dcomplexC, ldc);
+ // CHECK-NEXT: blasStatus = hipblasZherk(blasHandle, blasFillMode, transa, n, k, &da, &dcomplexA, lda, &db, &dcomplexC, ldc);
+ blasStatus = cublasZherk(blasHandle, blasFillMode, transa, n, k, &da, &dcomplexA, lda, &db, &dcomplexC, ldc);
+ blasStatus = cublasZherk_v2(blasHandle, blasFillMode, transa, n, k, &da, &dcomplexA, lda, &db, &dcomplexC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsyr2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* AP, int lda, const float* BP, int ldb, const float* beta, float* CP, int ldc);
+ // CHECK: blasStatus = hipblasSsyr2k(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, ldb, &fb, &fC, ldc);
+ // CHECK-NEXT: blasStatus = hipblasSsyr2k(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, ldb, &fb, &fC, ldc);
+ blasStatus = cublasSsyr2k(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, ldb, &fb, &fC, ldc);
+ blasStatus = cublasSsyr2k_v2(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, ldb, &fb, &fC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsyr2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* AP, int lda, const double* BP, int ldb, const double* beta, double* CP, int ldc);
+ // CHECK: blasStatus = hipblasDsyr2k(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, ldb, &db, &dC, ldc);
+ // CHECK-NEXT: blasStatus = hipblasDsyr2k(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, ldb, &db, &dC, ldc);
+ blasStatus = cublasDsyr2k(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, ldb, &db, &dC, ldc);
+ blasStatus = cublasDsyr2k_v2(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, ldb, &db, &dC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const cuComplex* beta, cuComplex* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsyr2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* BP, int ldb, const hipblasComplex* beta, hipblasComplex* CP, int ldc);
+ // CHECK: blasStatus = hipblasCsyr2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &complexb, &complexC, ldc);
+ // CHECK-NEXT: blasStatus = hipblasCsyr2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &complexb, &complexC, ldc);
+ blasStatus = cublasCsyr2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &complexb, &complexC, ldc);
+ blasStatus = cublasCsyr2k_v2(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &complexb, &complexC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZsyr2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* BP, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* CP, int ldc);
+ // CHECK: blasStatus = hipblasZsyr2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &dcomplexb, &dcomplexC, ldc);
+ // CHECK-NEXT: blasStatus = hipblasZsyr2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &dcomplexb, &dcomplexC, ldc);
+ blasStatus = cublasZsyr2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &dcomplexb, &dcomplexC, ldc);
+ blasStatus = cublasZsyr2k_v2(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &dcomplexb, &dcomplexC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsyrkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* AP, int lda, const float* BP, int ldb, const float* beta, float* CP, int ldc);
+ // CHECK: blasStatus = hipblasSsyrkx(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc);
+ blasStatus = cublasSsyrkx(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyrkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsyrkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* AP, int lda, const double* BP, int ldb, const double* beta, double* CP, int ldc);
+ // CHECK: blasStatus = hipblasDsyrkx(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc);
+ blasStatus = cublasDsyrkx(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const cuComplex* beta, cuComplex* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsyrkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* BP, int ldb, const hipblasComplex* beta, hipblasComplex* CP, int ldc);
+ // CHECK: blasStatus = hipblasCsyrkx(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc);
+ blasStatus = cublasCsyrkx(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyrkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZsyrkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* BP, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* CP, int ldc);
+ // CHECK: blasStatus = hipblasZsyrkx(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc);
+ blasStatus = cublasZsyrkx(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const float* beta, cuComplex* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCher2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* BP, int ldb, const float* beta, hipblasComplex* CP, int ldc);
+ // CHECK: blasStatus = hipblasCher2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &fb, &complexC, ldc);
+ // CHECK-NEXT: blasStatus = hipblasCher2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &fb, &complexC, ldc);
+ blasStatus = cublasCher2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &fb, &complexC, ldc);
+ blasStatus = cublasCher2k_v2(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &fb, &complexC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const double* beta, cuDoubleComplex* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZher2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* BP, int ldb, const double* beta, hipblasDoubleComplex* CP, int ldc);
+ // CHECK: blasStatus = hipblasZher2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &db, &dcomplexC, ldc);
+ // CHECK-NEXT: blasStatus = hipblasZher2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &db, &dcomplexC, ldc);
+ blasStatus = cublasZher2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &db, &dcomplexC, ldc);
+ blasStatus = cublasZher2k_v2(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &db, &dcomplexC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const float* beta, cuComplex* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCherkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* BP, int ldb, const float* beta, hipblasComplex* CP, int ldc);
+ // CHECK: blasStatus = hipblasCherkx(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexB, ldb, &fb, &complexC, ldc);
+ blasStatus = cublasCherkx(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexB, ldb, &fb, &complexC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const double* beta, cuDoubleComplex* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZherkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* BP, int ldb, const double* beta, hipblasDoubleComplex* CP, int ldc);
+ // CHECK: blasStatus = hipblasZherkx(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &db, &dcomplexC, ldc);
+ blasStatus = cublasZherkx(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &db, &dcomplexC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsymm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsymm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const float* alpha, const float* AP, int lda, const float* BP, int ldb, const float* beta, float* CP, int ldc);
+ // CHECK: blasStatus = hipblasSsymm(blasHandle, blasSideMode, blasFillMode, m, n, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc);
+ // CHECK-NEXT: blasStatus = hipblasSsymm(blasHandle, blasSideMode, blasFillMode, m, n, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc);
+ blasStatus = cublasSsymm(blasHandle, blasSideMode, blasFillMode, m, n, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc);
+ blasStatus = cublasSsymm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsymm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsymm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const double* alpha, const double* AP, int lda, const double* BP, int ldb, const double* beta, double* CP, int ldc);
+ // CHECK: blasStatus = hipblasDsymm(blasHandle, blasSideMode, blasFillMode, m, n, &da, &dA, lda, &dB, ldb, &db, &dC, ldc);
+ // CHECK-NEXT: blasStatus = hipblasDsymm(blasHandle, blasSideMode, blasFillMode, m, n, &da, &dA, lda, &dB, ldb, &db, &dC, ldc);
+ blasStatus = cublasDsymm(blasHandle, blasSideMode, blasFillMode, m, n, &da, &dA, lda, &dB, ldb, &db, &dC, ldc);
+ blasStatus = cublasDsymm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &da, &dA, lda, &dB, ldb, &db, &dC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsymm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const cuComplex* beta, cuComplex* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsymm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* BP, int ldb, const hipblasComplex* beta, hipblasComplex* CP, int ldc);
+ // CHECK: blasStatus = hipblasCsymm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc);
+ // CHECK-NEXT: blasStatus = hipblasCsymm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc);
+ blasStatus = cublasCsymm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc);
+ blasStatus = cublasCsymm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsymm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZsymm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* BP, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* CP, int ldc);
+ // CHECK: blasStatus = hipblasZsymm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc);
+ // CHECK-NEXT: blasStatus = hipblasZsymm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc);
+ blasStatus = cublasZsymm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc);
+ blasStatus = cublasZsymm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChemm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const cuComplex* beta, cuComplex* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasChemm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* BP, int ldb, const hipblasComplex* beta, hipblasComplex* CP, int ldc);
+ // CHECK: blasStatus = hipblasChemm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc);
+ // CHECK-NEXT: blasStatus = hipblasChemm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc);
+ blasStatus = cublasChemm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc);
+ blasStatus = cublasChemm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhemm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZhemm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* BP, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* CP, int ldc);
+ // CHECK: blasStatus = hipblasZhemm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc);
+ // CHECK-NEXT: blasStatus = hipblasZhemm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc);
+ blasStatus = cublasZhemm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc);
+ blasStatus = cublasZhemm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* A, int lda, float* B, int ldb);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrsm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, float* AP, int lda, float* BP, int ldb);
+ // CHECK: blasStatus = hipblasStrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb);
+ // CHECK-NEXT: blasStatus = hipblasStrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb);
+ blasStatus = cublasStrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb);
+ blasStatus = cublasStrsm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* A, int lda, double* B, int ldb);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, double* AP, int lda, double* BP, int ldb);
+ // CHECK: blasStatus = hipblasDtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb);
+ // CHECK-NEXT: blasStatus = hipblasDtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb);
+ blasStatus = cublasDtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb);
+ blasStatus = cublasDtrsm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, cuComplex* B, int ldb);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, hipblasComplex* AP, int lda, hipblasComplex* BP, int ldb);
+ // CHECK: blasStatus = hipblasCtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb);
+ // CHECK-NEXT: blasStatus = hipblasCtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb);
+ blasStatus = cublasCtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb);
+ blasStatus = cublasCtrsm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, cuDoubleComplex* B, int ldb);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* AP, int lda, hipblasDoubleComplex* BP, int ldb);
+ // CHECK: blasStatus = hipblasZtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb);
+ // CHECK-NEXT: blasStatus = hipblasZtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb);
+ blasStatus = cublasZtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb);
+ blasStatus = cublasZtrsm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgeam(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const float* alpha, const float* A, int lda, const float* beta, const float* B, int ldb, float* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgeam(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const float* alpha, const float* AP, int lda, const float* beta, const float* BP, int ldb, float* CP, int ldc);
+ // CHECK: blasStatus = hipblasSgeam(blasHandle, transa, transb, m, n, &fa, &fA, lda, &fb, &fB, ldb, &fC, ldc);
+ blasStatus = cublasSgeam(blasHandle, transa, transb, m, n, &fa, &fA, lda, &fb, &fB, ldb, &fC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgeam(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const double* alpha, const double* A, int lda, const double* beta, const double* B, int ldb, double* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgeam(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const double* alpha, const double* AP, int lda, const double* beta, const double* BP, int ldb, double* CP, int ldc);
+ // CHECK: blasStatus = hipblasDgeam(blasHandle, transa, transb, m, n, &da, &dA, lda, &db, &dB, ldb, &dC, ldc);
+ blasStatus = cublasDgeam(blasHandle, transa, transb, m, n, &da, &dA, lda, &db, &dB, ldb, &dC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeam(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* beta, const cuComplex* B, int ldb, cuComplex* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgeam(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* beta, const hipblasComplex* BP, int ldb, hipblasComplex* CP, int ldc);
+ // CHECK: blasStatus = hipblasCgeam(blasHandle, transa, transb, m, n, &complexa, &complexA, lda, &complexb, &complexB, ldb, &complexC, ldc);
+ blasStatus = cublasCgeam(blasHandle, transa, transb, m, n, &complexa, &complexA, lda, &complexb, &complexB, ldb, &complexC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeam(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* beta, const cuDoubleComplex* B, int ldb, cuDoubleComplex* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgeam(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* beta, const hipblasDoubleComplex* BP, int ldb, hipblasDoubleComplex* CP, int ldc);
+ // CHECK: blasStatus = hipblasZgeam(blasHandle, transa, transb, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexB, ldb, &dcomplexC, ldc);
+ blasStatus = cublasZgeam(blasHandle, transa, transb, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexB, ldb, &dcomplexC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetrfBatched(cublasHandle_t handle, int n, float* const A[], int lda, int* P, int* info, int batchSize);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgetrfBatched(hipblasHandle_t handle, const int n, float* const A[], const int lda, int* ipiv, int* info, const int batchCount);
+ // CHECK: blasStatus = hipblasSgetrfBatched(blasHandle, n, fAarray, lda, &P, &info, batchCount);
+ blasStatus = cublasSgetrfBatched(blasHandle, n, fAarray, lda, &P, &info, batchCount);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetrfBatched(cublasHandle_t handle, int n, double* const A[], int lda, int* P, int* info, int batchSize);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgetrfBatched(hipblasHandle_t handle, const int n, double* const A[], const int lda, int* ipiv, int* info, const int batchCount);
+ // CHECK: blasStatus = hipblasDgetrfBatched(blasHandle, n, dAarray, lda, &P, &info, batchCount);
+ blasStatus = cublasDgetrfBatched(blasHandle, n, dAarray, lda, &P, &info, batchCount);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetrfBatched(cublasHandle_t handle, int n, cuComplex* const A[], int lda, int* P, int* info, int batchSize);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgetrfBatched(hipblasHandle_t handle, const int n, hipblasComplex* const A[], const int lda, int* ipiv, int* info, const int batchCount);
+ // CHECK: blasStatus = hipblasCgetrfBatched(blasHandle, n, complexAarray, lda, &P, &info, batchCount);
+ blasStatus = cublasCgetrfBatched(blasHandle, n, complexAarray, lda, &P, &info, batchCount);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetrfBatched(cublasHandle_t handle, int n, cuDoubleComplex* const A[], int lda, int* P, int* info, int batchSize);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgetrfBatched(hipblasHandle_t handle, const int n, hipblasDoubleComplex* const A[], const int lda, int* ipiv, int* info, const int batchCount);
+ // CHECK: blasStatus = hipblasZgetrfBatched(blasHandle, n, dcomplexAarray, lda, &P, &info, batchCount);
+ blasStatus = cublasZgetrfBatched(blasHandle, n, dcomplexAarray, lda, &P, &info, batchCount);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetriBatched(cublasHandle_t handle, int n, const float* const A[], int lda, const int* P, float* const C[], int ldc, int* info, int batchSize);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgetriBatched(hipblasHandle_t handle, const int n, float* const A[], const int lda, int* ipiv, float* const C[], const int ldc, int* info, const int batchCount);
+ // CHECK: blasStatus = hipblasSgetriBatched(blasHandle, n, fAarray, lda, &P, fCarray, ldc, &info, batchCount);
+ blasStatus = cublasSgetriBatched(blasHandle, n, fAarray, lda, &P, fCarray, ldc, &info, batchCount);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetriBatched(cublasHandle_t handle, int n, const double* const A[], int lda, const int* P, double* const C[], int ldc, int* info, int batchSize);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgetriBatched(hipblasHandle_t handle, const int n, double* const A[], const int lda, int* ipiv, double* const C[], const int ldc, int* info, const int batchCount);
+ // CHECK: blasStatus = hipblasDgetriBatched(blasHandle, n, dAarray, lda, &P, dCarray, ldc, &info, batchCount);
+ blasStatus = cublasDgetriBatched(blasHandle, n, dAarray, lda, &P, dCarray, ldc, &info, batchCount);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetriBatched(cublasHandle_t handle, int n, const cuComplex* const A[], int lda, const int* P, cuComplex* const C[], int ldc, int* info, int batchSize);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgetriBatched(hipblasHandle_t handle, const int n, hipblasComplex* const A[], const int lda, int* ipiv, hipblasComplex* const C[], const int ldc, int* info, const int batchCount);
+ // CHECK: blasStatus = hipblasCgetriBatched(blasHandle, n, complexAarray, lda, &P, complexCarray, ldc, &info, batchCount);
+ blasStatus = cublasCgetriBatched(blasHandle, n, complexAarray, lda, &P, complexCarray, ldc, &info, batchCount);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetriBatched(cublasHandle_t handle, int n, const cuDoubleComplex* const A[], int lda, const int* P, cuDoubleComplex* const C[], int ldc, int* info, int batchSize);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgetriBatched(hipblasHandle_t handle, const int n, hipblasDoubleComplex* const A[], const int lda, int* ipiv, hipblasDoubleComplex* const C[], const int ldc, int* info, const int batchCount);
+ // CHECK: blasStatus = hipblasZgetriBatched(blasHandle, n, dcomplexAarray, lda, &P, dcomplexCarray, ldc, &info, batchCount);
+ blasStatus = cublasZgetriBatched(blasHandle, n, dcomplexAarray, lda, &P, dcomplexCarray, ldc, &info, batchCount);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const float* const Aarray[], int lda, const int* devIpiv, float* const Barray[], int ldb, int* info, int batchSize);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, float* const A[], const int lda, const int* ipiv, float* const B[], const int ldb, int* info, const int batchCount);
+ // CHECK: blasStatus = hipblasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray, lda, &P, fBarray, ldb, &info, batchCount);
+ blasStatus = cublasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray, lda, &P, fBarray, ldb, &info, batchCount);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const double* const Aarray[], int lda, const int* devIpiv, double* const Barray[], int ldb, int* info, int batchSize);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, double* const A[], const int lda, const int* ipiv, double* const B[], const int ldb, int* info, const int batchCount);
+ // CHECK: blasStatus = hipblasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray, lda, &P, dBarray, ldb, &info, batchCount);
+ blasStatus = cublasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray, lda, &P, dBarray, ldb, &info, batchCount);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const cuComplex* const Aarray[], int lda, const int* devIpiv, cuComplex* const Barray[], int ldb, int* info, int batchSize);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex* const A[], const int lda, const int* ipiv, hipblasComplex* const B[], const int ldb, int* info, const int batchCount);
+ // CHECK: blasStatus = hipblasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray, lda, &P, complexBarray, ldb, &info, batchCount);
+ blasStatus = cublasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray, lda, &P, complexBarray, ldb, &info, batchCount);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const cuDoubleComplex* const Aarray[], int lda, const int* devIpiv, cuDoubleComplex* const Barray[], int ldb, int* info, int batchSize);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex* const A[], const int lda, const int* ipiv, hipblasDoubleComplex* const B[], const int ldb, int* info, const int batchCount);
+ // CHECK: blasStatus = hipblasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray, lda, &P, dcomplexBarray, ldb, &info, batchCount);
+ blasStatus = cublasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray, lda, &P, dcomplexBarray, ldb, &info, batchCount);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* const A[], int lda, float* const B[], int ldb, int batchCount);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, float* const AP[], int lda, float* BP[], int ldb, int batchCount);
+ // CHECK: blasStatus = hipblasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount);
+ blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* const A[], int lda, double* const B[], int ldb, int batchCount);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, double* const AP[], int lda, double* BP[], int ldb, int batchCount);
+ // CHECK: blasStatus = hipblasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount);
+ blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* const A[], int lda, cuComplex* const B[], int ldb, int batchCount);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, hipblasComplex* const AP[], int lda, hipblasComplex* BP[], int ldb, int batchCount);
+ // CHECK: blasStatus = hipblasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount);
+ blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* const A[], int lda, cuDoubleComplex* const B[], int ldb, int batchCount);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* const AP[], int lda, hipblasDoubleComplex* BP[], int ldb, int batchCount);
+ // CHECK: blasStatus = hipblasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount);
+ blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgeqrfBatched(cublasHandle_t handle, int m, int n, float* const Aarray[], int lda, float* const TauArray[], int* info, int batchSize);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgeqrfBatched(hipblasHandle_t handle, const int m, const int n, float* const A[], const int lda, float* const ipiv[], int* info, const int batchCount);
+ // CHECK: blasStatus = hipblasSgeqrfBatched(blasHandle, m, n, fAarray, lda, fTauarray, &info, batchCount);
+ blasStatus = cublasSgeqrfBatched(blasHandle, m, n, fAarray, lda, fTauarray, &info, batchCount);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgeqrfBatched(cublasHandle_t handle, int m, int n, double* const Aarray[], int lda, double* const TauArray[], int* info, int batchSize);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgeqrfBatched(hipblasHandle_t handle, const int m, const int n, double* const A[], const int lda, double* const ipiv[], int* info, const int batchCount);
+ // CHECK: blasStatus = hipblasDgeqrfBatched(blasHandle, m, n, dAarray, lda, dTauarray, &info, batchCount);
+ blasStatus = cublasDgeqrfBatched(blasHandle, m, n, dAarray, lda, dTauarray, &info, batchCount);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeqrfBatched(cublasHandle_t handle, int m, int n, cuComplex* const Aarray[], int lda, cuComplex* const TauArray[], int* info, int batchSize);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgeqrfBatched(hipblasHandle_t handle, const int m, const int n, hipblasComplex* const A[], const int lda, hipblasComplex* const ipiv[], int* info, const int batchCount);
+ // CHECK: blasStatus = hipblasCgeqrfBatched(blasHandle, m, n, complexAarray, lda, complexTauarray, &info, batchCount);
+ blasStatus = cublasCgeqrfBatched(blasHandle, m, n, complexAarray, lda, complexTauarray, &info, batchCount);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeqrfBatched(cublasHandle_t handle, int m, int n, cuDoubleComplex* const Aarray[], int lda, cuDoubleComplex* const TauArray[], int* info, int batchSize);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgeqrfBatched(hipblasHandle_t handle, const int m, const int n, hipblasDoubleComplex* const A[], const int lda, hipblasDoubleComplex* const ipiv[], int* info, const int batchCount);
+ // CHECK: blasStatus = hipblasZgeqrfBatched(blasHandle, m, n, dcomplexAarray, lda, dcomplexTauarray, &info, batchCount);
+ blasStatus = cublasZgeqrfBatched(blasHandle, m, n, dcomplexAarray, lda, dcomplexTauarray, &info, batchCount);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const float* A, int lda, const float* x, int incx, float* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSdgmm(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const float* AP, int lda, const float* x, int incx, float* CP, int ldc);
+ // CHECK: blasStatus = hipblasSdgmm(blasHandle, blasSideMode, m, n, &fa, lda, &fx, incx, &fC, ldc);
+ blasStatus = cublasSdgmm(blasHandle, blasSideMode, m, n, &fa, lda, &fx, incx, &fC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const double* A, int lda, const double* x, int incx, double* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDdgmm(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const double* AP, int lda, const double* x, int incx, double* CP, int ldc);
+ // CHECK: blasStatus = hipblasDdgmm(blasHandle, blasSideMode, m, n, &da, lda, &dx, incx, &dC, ldc);
+ blasStatus = cublasDdgmm(blasHandle, blasSideMode, m, n, &da, lda, &dx, incx, &dC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const cuComplex* A, int lda, const cuComplex* x, int incx, cuComplex* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCdgmm(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasComplex* AP, int lda, const hipblasComplex* x, int incx, hipblasComplex* CP, int ldc);
+ // CHECK: blasStatus = hipblasCdgmm(blasHandle, blasSideMode, m, n, &complexa, lda, &complexx, incx, &complexC, ldc);
+ blasStatus = cublasCdgmm(blasHandle, blasSideMode, m, n, &complexa, lda, &complexx, incx, &complexC, ldc);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, cuDoubleComplex* C, int ldc);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZdgmm(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* CP, int ldc);
+ // CHECK: blasStatus = hipblasZdgmm(blasHandle, blasSideMode, m, n, &dcomplexa, lda, &dcomplexx, incx, &dcomplexC, ldc);
+ blasStatus = cublasZdgmm(blasHandle, blasSideMode, m, n, &dcomplexa, lda, &dcomplexx, incx, &dcomplexC, ldc);
+
long long int strideA = 0;
long long int strideB = 0;
long long int strideC = 0;
@@ -1264,6 +1601,51 @@ int main() {
// HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemmStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, long long strideA, const hipblasDoubleComplex* BP, int ldb, long long strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* CP, int ldc, long long strideC, int batchCount);
// CHECK: blasStatus = hipblasZgemmStridedBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, strideA, &dcomplexB, ldb, strideB, &dcomplexb, &dcomplexC, ldc, strideC, batchCount);
blasStatus = cublasZgemmStridedBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, strideA, &dcomplexB, ldb, strideB, &dcomplexb, &dcomplexC, ldc, strideC, batchCount);
+
+ void* aptr = nullptr;
+ void* Aptr = nullptr;
+ void* bptr = nullptr;
+ void* Bptr = nullptr;
+ void* cptr = nullptr;
+ void* Cptr = nullptr;
+ void* xptr = nullptr;
+ void* yptr = nullptr;
+ void* sptr = nullptr;
+
+ // CHECK: hipblasDatatype_t Atype;
+ // CHECK-NEXT: hipblasDatatype_t Btype;
+ // CHECK-NEXT: hipblasDatatype_t Ctype;
+ // CHECK-NEXT: hipblasDatatype_t Xtype;
+ // CHECK-NEXT: hipblasDatatype_t Ytype;
+ // CHECK-NEXT: hipblasDatatype_t CStype;
+ // CHECK-NEXT: hipblasDatatype_t Executiontype;
+ cudaDataType Atype;
+ cudaDataType Btype;
+ cudaDataType Ctype;
+ cudaDataType Xtype;
+ cudaDataType Ytype;
+ cudaDataType CStype;
+ cudaDataType Executiontype;
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScalEx(cublasHandle_t handle, int n, const void* alpha, cudaDataType alphaType, void* x, cudaDataType xType, int incx, cudaDataType executionType);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasScalEx(hipblasHandle_t handle, int n, const void* alpha, hipblasDatatype_t alphaType, void* x, hipblasDatatype_t xType, int incx, hipblasDatatype_t executionType);
+ // CHECK: blasStatus = hipblasScalEx(blasHandle, n, aptr, Atype, xptr, Xtype, incx, Executiontype);
+ blasStatus = cublasScalEx(blasHandle, n, aptr, Atype, xptr, Xtype, incx, Executiontype);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasAxpyEx(cublasHandle_t handle, int n, const void* alpha, cudaDataType alphaType, const void* x, cudaDataType xType, int incx, void* y, cudaDataType yType, int incy, cudaDataType executiontype);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasAxpyEx(hipblasHandle_t handle, int n, const void* alpha, hipblasDatatype_t alphaType, const void* x, hipblasDatatype_t xType, int incx, void* y, hipblasDatatype_t yType, int incy, hipblasDatatype_t executionType);
+ // CHECK: blasStatus = hipblasAxpyEx(blasHandle, n, aptr, Atype, xptr, Xtype, incx, yptr, Ytype, incy, Executiontype);
+ blasStatus = cublasAxpyEx(blasHandle, n, aptr, Atype, xptr, Xtype, incx, yptr, Ytype, incy, Executiontype);
+#endif
+
+#if CUDA_VERSION >= 8000 && CUDA_VERSION < 11000
+ // CHECK: hipblasDatatype_t computeType;
+ cudaDataType computeType;
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, const void* B, cudaDataType Btype, int ldb, const void* beta, void* C, cudaDataType Ctype, int ldc, cudaDataType computeType, cublasGemmAlgo_t algo);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipblasDatatype_t aType, int lda, const void* B, hipblasDatatype_t bType, int ldb, const void* beta, void* C, hipblasDatatype_t cType, int ldc, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo);
+ // CHECK: blasStatus = hipblasGemmEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, Bptr, Btype, ldb, bptr, Cptr, Ctype, ldc, computeType, blasGemmAlgo);
+ blasStatus = cublasGemmEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, Bptr, Btype, ldb, bptr, Cptr, Ctype, ldc, computeType, blasGemmAlgo);
#endif
#if CUDA_VERSION >= 9000
@@ -1271,12 +1653,29 @@ int main() {
cublasGemmAlgo_t BLAS_GEMM_DEFAULT = CUBLAS_GEMM_DEFAULT;
#endif
+#if CUDA_VERSION >= 9010 && CUDA_VERSION < 11000
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A[], hipblasDatatype_t aType, int lda, const void* B[], hipblasDatatype_t bType, int ldb, const void* beta, void* C[], hipblasDatatype_t cType, int ldc, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo);
+ // CHECK: blasStatus = hipblasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo);
+ blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmStridedBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipblasDatatype_t aType, int lda, hipblasStride strideA, const void* B, hipblasDatatype_t bType, int ldb, hipblasStride strideB, const void* beta, void* C, hipblasDatatype_t cType, int ldc, hipblasStride strideC, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo);
+ // CHECK: blasStatus = hipblasGemmStridedBatchedEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, strideA, Bptr, Btype, ldb, strideB, bptr, Cptr, Ctype, ldc, strideC, batchCount, computeType, blasGemmAlgo);
+ blasStatus = cublasGemmStridedBatchedEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, strideA, Bptr, Btype, ldb, strideB, bptr, Cptr, Ctype, ldc, strideC, batchCount, computeType, blasGemmAlgo);
+#endif
+
#if CUDA_VERSION >= 10010
// CHECK: hipblasOperation_t BLAS_OP_HERMITAN = HIPBLAS_OP_C;
cublasOperation_t BLAS_OP_HERMITAN = CUBLAS_OP_HERMITAN;
// CHECK: hipblasFillMode_t BLAS_FILL_MODE_FULL = HIPBLAS_FILL_MODE_FULL;
cublasFillMode_t BLAS_FILL_MODE_FULL = CUBLAS_FILL_MODE_FULL;
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasRotEx(cublasHandle_t handle, int n, void* x, cudaDataType xType, int incx, void* y, cudaDataType yType, int incy, const void* c, const void* s, cudaDataType csType, cudaDataType executiontype);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasRotEx(hipblasHandle_t handle, int n, void* x, hipblasDatatype_t xType, int incx, void* y, hipblasDatatype_t yType, int incy, const void* c, const void* s, hipblasDatatype_t csType, hipblasDatatype_t executionType);
+ // CHECK: blasStatus = hipblasRotEx(blasHandle, n, xptr, Xtype, incx, yptr, Ytype, incy, cptr, sptr, CStype, Executiontype);
+ blasStatus = cublasRotEx(blasHandle, n, xptr, Xtype, incx, yptr, Ytype, incy, cptr, sptr, CStype, Executiontype);
#endif
#if CUDA_VERSION >= 11000
@@ -1284,6 +1683,26 @@ int main() {
// CHECK-NEXT: hipblasDatatype_t C_16BF = HIPBLAS_C_16B;
cublasDataType_t R_16BF = CUDA_R_16BF;
cublasDataType_t C_16BF = CUDA_C_16BF;
+
+ // NOTE: WORKAROUND: cublasComputeType_t is not actually supported by hipBLAS
+ // TODO: Fix it after fixing https://github.com/ROCmSoftwarePlatform/hipBLAS/issues/529
+ // CHECK: hipblasDatatype_t blasComputeType;
+ cublasComputeType_t blasComputeType;
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, const void* B, cudaDataType Btype, int ldb, const void* beta, void* C, cudaDataType Ctype, int ldc, cublasComputeType_t computeType, cublasGemmAlgo_t algo);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipblasDatatype_t aType, int lda, const void* B, hipblasDatatype_t bType, int ldb, const void* beta, void* C, hipblasDatatype_t cType, int ldc, hipblasDatatype_t computeType, ipblasGemmAlgo_t algo);
+ // CHECK: blasStatus = hipblasGemmEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, Bptr, Btype, ldb, bptr, Cptr, Ctype, ldc, blasComputeType, blasGemmAlgo);
+ blasStatus = cublasGemmEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, Bptr, Btype, ldb, bptr, Cptr, Ctype, ldc, blasComputeType, blasGemmAlgo);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A[], hipblasDatatype_t aType, int lda, const void* B[], hipblasDatatype_t bType, int ldb, const void* beta, void* C[], hipblasDatatype_t cType, int ldc, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo);
+ // CHECK: blasStatus = hipblasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, blasComputeType, blasGemmAlgo);
+ blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, blasComputeType, blasGemmAlgo);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmStridedBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipblasDatatype_t aType, int lda, hipblasStride strideA, const void* B, hipblasDatatype_t bType, int ldb, hipblasStride strideB, const void* beta, void* C, hipblasDatatype_t cType, int ldc, hipblasStride strideC, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo);
+ // CHECK: blasStatus = hipblasGemmStridedBatchedEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, strideA, Bptr, Btype, ldb, strideB, bptr, Cptr, Ctype, ldc, strideC, batchCount, blasComputeType, blasGemmAlgo);
+ blasStatus = cublasGemmStridedBatchedEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, strideA, Bptr, Btype, ldb, strideB, bptr, Cptr, Ctype, ldc, strideC, batchCount, blasComputeType, blasGemmAlgo);
#endif
return 0;
diff --git a/tests/unit_tests/synthetic/libraries/cudnn2miopen.cu b/tests/unit_tests/synthetic/libraries/cudnn2miopen.cu
new file mode 100644
index 00000000..d7a9ebb3
--- /dev/null
+++ b/tests/unit_tests/synthetic/libraries/cudnn2miopen.cu
@@ -0,0 +1,30 @@
+// RUN: %run_test hipify "%s" "%t" %hipify_args 3 --skip-excluded-preprocessor-conditional-blocks --experimental -roc %clang_args -D__CUDA_API_VERSION_INTERNAL
+
+// CHECK: #include
+#include
+#include
+// CHECK: #include "miopen/miopen.h"
+#include "cudnn.h"
+
+int main() {
+ printf("15. cuDNN API to MIOpen API synthetic test\n");
+
+ // CHECK: miopenStatus_t dnnStatus_t;
+ // CHECK-NEXT: miopenStatus_t STATUS_SUCCESS = miopenStatusSuccess;
+ // CHECK-NEXT: miopenStatus_t STATUS_NOT_INITIALIZED = miopenStatusNotInitialized;
+ // CHECK-NEXT: miopenStatus_t STATUS_ALLOC_FAILED = miopenStatusAllocFailed;
+ // CHECK-NEXT: miopenStatus_t STATUS_BAD_PARAM = miopenStatusBadParm;
+ // CHECK-NEXT: miopenStatus_t STATUS_INTERNAL_ERROR = miopenStatusInternalError;
+ // CHECK-NEXT: miopenStatus_t STATUS_INVALID_VALUE = miopenStatusInvalidValue;
+ // CHECK-NEXT: miopenStatus_t STATUS_NOT_SUPPORTED = miopenStatusUnsupportedOp;
+ cudnnStatus_t dnnStatus_t;
+ cudnnStatus_t STATUS_SUCCESS = CUDNN_STATUS_SUCCESS;
+ cudnnStatus_t STATUS_NOT_INITIALIZED = CUDNN_STATUS_NOT_INITIALIZED;
+ cudnnStatus_t STATUS_ALLOC_FAILED = CUDNN_STATUS_ALLOC_FAILED;
+ cudnnStatus_t STATUS_BAD_PARAM = CUDNN_STATUS_BAD_PARAM;
+ cudnnStatus_t STATUS_INTERNAL_ERROR = CUDNN_STATUS_INTERNAL_ERROR;
+ cudnnStatus_t STATUS_INVALID_VALUE = CUDNN_STATUS_INVALID_VALUE;
+ cudnnStatus_t STATUS_NOT_SUPPORTED = CUDNN_STATUS_NOT_SUPPORTED;
+
+ return 0;
+}