diff --git a/CMakeLists.txt b/CMakeLists.txt index a8d36f32..b04de886 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,12 +71,26 @@ if(LLVM_PACKAGE_VERSION VERSION_EQUAL "15.0.0" OR LLVM_PACKAGE_VERSION VERSION_G target_link_libraries(hipify-clang PRIVATE LLVMWindowsDriver clangSupport) endif() +if(LLVM_PACKAGE_VERSION VERSION_EQUAL "16.0.0" OR LLVM_PACKAGE_VERSION VERSION_GREATER "16.0.0") + if(MSVC) + set(STD "/std:c++17") + else() + set(STD "-std=c++17") + endif() +else() + if(MSVC) + set(STD "/std:c++14") + else() + set(STD "-std=c++14") + endif() +endif() + if(MSVC) target_link_libraries(hipify-clang PRIVATE version) - target_compile_options(hipify-clang PRIVATE /std:c++14 /Od /GR- /EHs- /EHc-) + target_compile_options(hipify-clang PRIVATE ${STD} /Od /GR- /EHs- /EHc-) set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} /SUBSYSTEM:WINDOWS") else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -pthread -fno-rtti -fvisibility-inlines-hidden") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${STD} -pthread -fno-rtti -fvisibility-inlines-hidden") endif() # Address Sanitize Flag diff --git a/README.md b/README.md index ede663db..7e7f8871 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ After applying all the matchers, the output HIP source is produced. `hipify-clang` requires: -1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [4.0.0](http://releases.llvm.org/download.html#4.0.0); the latest stable and recommended release: [**14.0.6**](https://github.com/llvm/llvm-project/releases/tag/llvmorg-14.0.6). +1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [4.0.0](http://releases.llvm.org/download.html#4.0.0); the latest stable and recommended release: [**15.0.0**](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.0). 2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive), the latest supported version is [**11.7.1**](https://developer.nvidia.com/cuda-downloads). @@ -170,7 +170,8 @@ After applying all the matchers, the output HIP source is produced. + 14.0.5, - 14.0.6 + 14.0.6, + 15.0.0 11.7.1 LATEST STABLE CONFIG @@ -186,7 +187,7 @@ After applying all the matchers, the output HIP source is produced. In most cases, you can get a suitable version of `LLVM+CLANG` with your package manager. Failing that or having multiple versions of `LLVM`, you can [download a release archive](http://releases.llvm.org/), build or install it, and set -[CMAKE_PREFIX_PATH](https://cmake.org/cmake/help/v3.5/variable/CMAKE_PREFIX_PATH.html) so `cmake` can find it; for instance: `-DCMAKE_PREFIX_PATH=d:\LLVM\14.0.6\dist` +[CMAKE_PREFIX_PATH](https://cmake.org/cmake/help/v3.5/variable/CMAKE_PREFIX_PATH.html) so `cmake` can find it; for instance: `-DCMAKE_PREFIX_PATH=d:\LLVM\15.0.0\dist` ### hipify-clang: usage @@ -284,7 +285,7 @@ Run `Visual Studio 16 2019`, open the generated `LLVM.sln`, build all, and build **LLVM >= 10.0.0:** -1. download [`LLVM project`](https://github.com/llvm/llvm-project/releases/tag/llvmorg-14.0.6) sources; +1. download [`LLVM project`](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.0) sources; 2. build [`LLVM project`](http://llvm.org/docs/CMake.html): ```bash @@ -349,21 +350,21 @@ Run `Visual Studio 17 2022`, open the generated `LLVM.sln`, build all, build pro * Install `lit` into `python`: - - ***Linux***: `python /usr/llvm/14.0.6/llvm-project/llvm/utils/lit/setup.py install` + - ***Linux***: `python /usr/llvm/15.0.0/llvm-project/llvm/utils/lit/setup.py install` - - ***Windows***: `python d:/LLVM/14.0.6/llvm-project/llvm/utils/lit/setup.py install` + - ***Windows***: `python d:/LLVM/15.0.0/llvm-project/llvm/utils/lit/setup.py install` * Starting with LLVM 6.0.1 path to `llvm-lit` python script should be specified by the `LLVM_EXTERNAL_LIT` option: - - ***Linux***: `-DLLVM_EXTERNAL_LIT=/usr/llvm/14.0.6/build/bin/llvm-lit` + - ***Linux***: `-DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.0/build/bin/llvm-lit` - - ***Windows***: `-DLLVM_EXTERNAL_LIT=d:/LLVM/14.0.6/build/Release/bin/llvm-lit.py` + - ***Windows***: `-DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.0/build/Release/bin/llvm-lit.py` * `FileCheck`: - - ***Linux***: copy from `/usr/llvm/14.0.6/build/bin/` to `CMAKE_INSTALL_PREFIX/dist/bin` + - ***Linux***: copy from `/usr/llvm/15.0.0/build/bin/` to `CMAKE_INSTALL_PREFIX/dist/bin` - - ***Windows***: copy from `d:/LLVM/14.0.6/build/Release/bin` to `CMAKE_INSTALL_PREFIX/dist/bin` + - ***Windows***: copy from `d:/LLVM/15.0.0/build/Release/bin` to `CMAKE_INSTALL_PREFIX/dist/bin` - Or specify the path to `FileCheck` in `CMAKE_INSTALL_PREFIX` option @@ -385,7 +386,7 @@ Ubuntu 14: LLVM 4.0.0 - 7.1.0, CUDA 7.0 - 9.0, cuDNN 5.0.5 - 7.6.5 Ubuntu 16-18: LLVM 8.0.0 - 14.0.6, CUDA 8.0 - 10.2, cuDNN 5.1.10 - 8.0.5 -Ubuntu 20-21: LLVM 9.0.0 - 14.0.6, CUDA 8.0 - 11.7.1, cuDNN 5.1.10 - 8.4.1 +Ubuntu 20-21: LLVM 9.0.0 - 15.0.0, CUDA 8.0 - 11.7.1, cuDNN 5.1.10 - 8.4.1 Minimum build system requirements for the above configurations: @@ -402,11 +403,11 @@ cmake -DHIPIFY_CLANG_TESTS=1 \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=/usr/llvm/14.0.6/dist \ + -DCMAKE_PREFIX_PATH=/usr/llvm/15.0.0/dist \ -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda \ -DCUDA_DNN_ROOT_DIR=/usr/local/cuda \ -DCUDA_CUB_ROOT_DIR=/usr/CUB \ - -DLLVM_EXTERNAL_LIT=/usr/llvm/14.0.6/build/bin/llvm-lit \ + -DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.0/build/bin/llvm-lit \ ../hipify ``` *A corresponding successful output:* @@ -424,14 +425,14 @@ cmake -- Detecting CXX compile features -- Detecting CXX compile features - done -- Found ZLIB: /usr/lib/x86_64-linux-gnu/libz.so (found version "1.2.11") --- Found LLVM 14.0.6: --- - CMake module path: /usr/llvm/14.0.6/dist/lib/cmake/llvm --- - Include path : /usr/llvm/14.0.6/dist/include --- - Binary path : /usr/llvm/14.0.6/dist/bin +-- Found LLVM 15.0.0: +-- - CMake module path: /usr/llvm/15.0.0/dist/lib/cmake/llvm +-- - Include path : /usr/llvm/15.0.0/dist/include +-- - Binary path : /usr/llvm/15.0.0/dist/bin -- Linker detection: GNU ld -- Found PythonInterp: /usr/bin/python (found suitable version "3.9.7", minimum required is "2.7") -- Found lit: /usr/local/bin/lit --- Found FileCheck: /usr/llvm/14.0.6/dist/bin/FileCheck +-- Found FileCheck: /usr/llvm/15.0.0/dist/bin/FileCheck -- Looking for pthread.h -- Looking for pthread.h - found -- Performing Test CMAKE_HAVE_LIBC_PTHREAD @@ -450,7 +451,7 @@ make test-hipify Running HIPify regression tests ======================================== CUDA 11.7 - will be used for testing -LLVM 14.0.6 - will be used for testing +LLVM 15.0.0 - will be used for testing x86_64 - Platform architecture Linux 5.13.0-21-generic - Platform OS 64 - hipify-clang binary bitness @@ -564,7 +565,8 @@ Testing Time: 6.22s | 11.0.1 - 11.1.0 | 7.0 - 11.2.2 | 7.6.5 - 8.0.5 | 2017.15.9.31, 2019.16.8.4 | 3.19.3 | 3.9.2 | | 12.0.0 - 13.0.1 | 7.0 - 11.5.1 | 7.6.5 - 8.3.2 | 2017.15.9.43, 2019.16.11.9 | 3.22.2 | 3.10.2 | | 14.0.0 - 14.0.6 | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.49, 2019.16.11.17, 2022.17.2.6 | 3.24.0 | 3.10.6 | -| 16.0.0git | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.0 | 3.24.0 | 3.10.6 | +| 15.0.0 | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | +| 16.0.0git | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | *Building with testing support by `Visual Studio 17 2022` on `Windows 10`:* @@ -576,23 +578,23 @@ cmake -DHIPIFY_CLANG_TESTS=1 \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=d:/LLVM/14.0.6/dist \ + -DCMAKE_PREFIX_PATH=d:/LLVM/15.0.0/dist \ -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.7" \ -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v11.7" \ -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.7-windows-x64-v8.4.1 \ -DCUDA_CUB_ROOT_DIR=d:/GIT/cub \ - -DLLVM_EXTERNAL_LIT=d:/LLVM/14.0.6/build/Release/bin/llvm-lit.py \ + -DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.0/build/Release/bin/llvm-lit.py \ ../hipify ``` *A corresponding successful output:* ```shell --- Found LLVM 14.0.6: --- - CMake module path: d:/LLVM/14.0.6/dist/lib/cmake/llvm --- - Include path : d:/LLVM/14.0.6/dist/include --- - Binary path : d:/LLVM/14.0.6/dist/bin +-- Found LLVM 15.0.0: +-- - CMake module path: d:/LLVM/15.0.0/dist/lib/cmake/llvm +-- - Include path : d:/LLVM/15.0.0/dist/include +-- - Binary path : d:/LLVM/15.0.0/dist/bin -- Found PythonInterp: c:/Program Files/Python39/python.exe (found suitable version "3.9.5", minimum required is "3.6") -- Found lit: c:/Program Files/Python39/Scripts/lit.exe --- Found FileCheck: d:/LLVM/14.0.6/dist/bin/FileCheck.exe +-- Found FileCheck: d:/LLVM/15.0.0/dist/bin/FileCheck.exe -- Found CUDA: c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.7 (found version "11.7") -- Configuring done -- Generating done diff --git a/bin/hipify-perl b/bin/hipify-perl index 22781216..4dfd064d 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -715,6 +715,8 @@ my %removed_funcs = ( ); my %experimental_funcs = ( + "nvrtcGetCUBINSize" => "5.3.0", + "nvrtcGetCUBIN" => "5.3.0", "cudaUserObject_t" => "5.3.0", "cudaUserObjectRetainFlags" => "5.3.0", "cudaUserObjectRetain" => "5.3.0", @@ -742,11 +744,17 @@ my %experimental_funcs = ( "cuUserObjectRetain" => "5.3.0", "cuUserObjectRelease" => "5.3.0", "cuUserObjectCreate" => "5.3.0", + "cuLinkDestroy" => "5.3.0", + "cuLinkCreate_v2" => "5.3.0", + "cuLinkCreate" => "5.3.0", + "cuLinkComplete" => "5.3.0", + "cuLinkAddFile_v2" => "5.3.0", + "cuLinkAddFile" => "5.3.0", + "cuLinkAddData_v2" => "5.3.0", + "cuLinkAddData" => "5.3.0", "cuGraphUpload" => "5.3.0", "cuGraphRetainUserObject" => "5.3.0", "cuGraphReleaseUserObject" => "5.3.0", - "cuGetErrorString" => "5.3.0", - "cuGetErrorName" => "5.3.0", "cuDeviceSetGraphMemAttribute" => "5.3.0", "cuDeviceGraphMemTrim" => "5.3.0", "cuDeviceGetGraphMemAttribute" => "5.3.0", @@ -757,10 +765,19 @@ my %experimental_funcs = ( "CUuserObjectRetain_flags_enum" => "5.3.0", "CUuserObjectRetain_flags" => "5.3.0", "CUuserObject" => "5.3.0", + "CUjitInputType_enum" => "5.3.0", + "CUjitInputType" => "5.3.0", "CUgraphMem_attribute_enum" => "5.3.0", "CUgraphMem_attribute" => "5.3.0", "CU_USER_OBJECT_NO_DESTRUCTOR_SYNC" => "5.3.0", "CU_LIMIT_STACK_SIZE" => "5.3.0", + "CU_JIT_NUM_INPUT_TYPES" => "5.3.0", + "CU_JIT_INPUT_PTX" => "5.3.0", + "CU_JIT_INPUT_OBJECT" => "5.3.0", + "CU_JIT_INPUT_NVVM" => "5.3.0", + "CU_JIT_INPUT_LIBRARY" => "5.3.0", + "CU_JIT_INPUT_FATBINARY" => "5.3.0", + "CU_JIT_INPUT_CUBIN" => "5.3.0", "CU_GRAPH_USER_OBJECT_MOVE" => "5.3.0", "CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT" => "5.3.0", "CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL" => "5.3.0", @@ -906,10 +923,16 @@ sub subst { } sub experimentalSubstitutions { - subst("cuGetErrorName", "hipDrvGetErrorName", "error"); - subst("cuGetErrorString", "hipDrvGetErrorString", "error"); subst("cudaDeviceSetLimit", "hipDeviceSetLimit", "device"); subst("cuCtxSetLimit", "hipDeviceSetLimit", "context"); + subst("cuLinkAddData", "hiprtcLinkAddData", "module"); + subst("cuLinkAddData_v2", "hiprtcLinkAddData", "module"); + subst("cuLinkAddFile", "hiprtcLinkAddFile", "module"); + subst("cuLinkAddFile_v2", "hiprtcLinkAddFile", "module"); + subst("cuLinkComplete", "hiprtcLinkComplete", "module"); + subst("cuLinkCreate", "hiprtcLinkCreate", "module"); + subst("cuLinkCreate_v2", "hiprtcLinkCreate", "module"); + subst("cuLinkDestroy", "hiprtcLinkDestroy", "module"); subst("cuDeviceGetGraphMemAttribute", "hipDeviceGetGraphMemAttribute", "graph"); subst("cuDeviceGraphMemTrim", "hipDeviceGraphMemTrim", "graph"); subst("cuDeviceSetGraphMemAttribute", "hipDeviceSetGraphMemAttribute", "graph"); @@ -928,8 +951,12 @@ sub experimentalSubstitutions { subst("cudaUserObjectCreate", "hipUserObjectCreate", "graph"); subst("cudaUserObjectRelease", "hipUserObjectRelease", "graph"); subst("cudaUserObjectRetain", "hipUserObjectRetain", "graph"); + subst("nvrtcGetCUBIN", "hiprtcGetBitcode", "library"); + subst("nvrtcGetCUBINSize", "hiprtcGetBitcodeSize", "library"); subst("CUgraphMem_attribute", "hipGraphMemAttributeType", "type"); subst("CUgraphMem_attribute_enum", "hipGraphMemAttributeType", "type"); + subst("CUjitInputType", "hiprtcJITInputType", "type"); + subst("CUjitInputType_enum", "hiprtcJITInputType", "type"); subst("CUuserObject", "hipUserObject_t", "type"); subst("CUuserObjectRetain_flags", "hipUserObjectRetainFlags", "type"); subst("CUuserObjectRetain_flags_enum", "hipUserObjectRetainFlags", "type"); @@ -948,6 +975,13 @@ sub experimentalSubstitutions { subst("CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL", "hipGraphNodeTypeExtSemaphoreSignal", "numeric_literal"); subst("CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT", "hipGraphNodeTypeExtSemaphoreWait", "numeric_literal"); subst("CU_GRAPH_USER_OBJECT_MOVE", "hipGraphUserObjectMove", "numeric_literal"); + subst("CU_JIT_INPUT_CUBIN", "HIPRTC_JIT_INPUT_CUBIN", "numeric_literal"); + subst("CU_JIT_INPUT_FATBINARY", "HIPRTC_JIT_INPUT_FATBINARY", "numeric_literal"); + subst("CU_JIT_INPUT_LIBRARY", "HIPRTC_JIT_INPUT_LIBRARY", "numeric_literal"); + subst("CU_JIT_INPUT_NVVM", "HIPRTC_JIT_INPUT_NVVM", "numeric_literal"); + subst("CU_JIT_INPUT_OBJECT", "HIPRTC_JIT_INPUT_OBJECT", "numeric_literal"); + subst("CU_JIT_INPUT_PTX", "HIPRTC_JIT_INPUT_PTX", "numeric_literal"); + subst("CU_JIT_NUM_INPUT_TYPES", "HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES", "numeric_literal"); subst("CU_LIMIT_STACK_SIZE", "hipLimitStackSize", "numeric_literal"); subst("CU_USER_OBJECT_NO_DESTRUCTOR_SYNC", "hipUserObjectNoDestructorSync", "numeric_literal"); subst("cudaGraphMemAttrReservedMemCurrent", "hipGraphMemAttrReservedMemCurrent", "numeric_literal"); @@ -3202,6 +3236,8 @@ sub simpleSubstitutions { subst("CUkernelNodeAttrValue_v1", "hipKernelNodeAttrValue", "type"); subst("CUlimit", "hipLimit_t", "type"); subst("CUlimit_enum", "hipLimit_t", "type"); + subst("CUlinkState", "hiprtcLinkState", "type"); + subst("CUlinkState_st", "ihiprtcLinkState", "type"); subst("CUmemAccessDesc", "hipMemAccessDesc", "type"); subst("CUmemAccessDesc_st", "hipMemAccessDesc", "type"); subst("CUmemAccessDesc_v1", "hipMemAccessDesc", "type"); @@ -5485,8 +5521,6 @@ sub warnUnsupportedFunctions { "nvrtcGetNumSupportedArchs", "nvrtcGetNVVMSize", "nvrtcGetNVVM", - "nvrtcGetCUBINSize", - "nvrtcGetCUBIN", "memoryBarrier", "libraryPropertyType_t", "libraryPropertyType", @@ -6432,14 +6466,6 @@ sub warnUnsupportedFunctions { "cuMemcpy3DPeer", "cuMemcpy", "cuMemGetHandleForAddressRange", - "cuLinkDestroy", - "cuLinkCreate_v2", - "cuLinkCreate", - "cuLinkComplete", - "cuLinkAddFile_v2", - "cuLinkAddFile", - "cuLinkAddData_v2", - "cuLinkAddData", "cuLaunchGridAsync", "cuLaunchGrid", "cuLaunchCooperativeKernelMultiDevice", @@ -6480,6 +6506,8 @@ sub warnUnsupportedFunctions { "cuGraphAddExternalSemaphoresSignalNode", "cuGraphAddBatchMemOpNode", "cuGetProcAddress", + "cuGetErrorString", + "cuGetErrorName", "cuGLUnregisterBufferObject", "cuGLUnmapBufferObjectAsync", "cuGLUnmapBufferObject", @@ -6616,8 +6644,6 @@ sub warnUnsupportedFunctions { "CUjit_fallback", "CUjit_cacheMode_enum", "CUjit_cacheMode", - "CUjitInputType_enum", - "CUjitInputType", "CUipcMem_flags_enum", "CUipcMem_flags", "CUgraphicsMapResourceFlags_enum", @@ -6760,14 +6786,7 @@ sub warnUnsupportedFunctions { "CU_JIT_PREC_SQRT", "CU_JIT_PREC_DIV", "CU_JIT_OPTIMIZE_UNUSED_DEVICE_VARIABLES", - "CU_JIT_NUM_INPUT_TYPES", "CU_JIT_LTO", - "CU_JIT_INPUT_PTX", - "CU_JIT_INPUT_OBJECT", - "CU_JIT_INPUT_NVVM", - "CU_JIT_INPUT_LIBRARY", - "CU_JIT_INPUT_FATBINARY", - "CU_JIT_INPUT_CUBIN", "CU_JIT_GLOBAL_SYMBOL_NAMES", "CU_JIT_GLOBAL_SYMBOL_COUNT", "CU_JIT_GLOBAL_SYMBOL_ADDRESSES", diff --git a/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md b/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md index a32c2bb0..55ebfb57 100644 --- a/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md @@ -621,17 +621,17 @@ |`CU_JIT_GLOBAL_SYMBOL_NAMES`| | | | | | | | | |`CU_JIT_INFO_LOG_BUFFER`| | | |`hipJitOptionInfoLogBuffer`|1.6.0| | | | |`CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES`| | | |`hipJitOptionInfoLogBufferSizeBytes`|1.6.0| | | | -|`CU_JIT_INPUT_CUBIN`| | | | | | | | | -|`CU_JIT_INPUT_FATBINARY`| | | | | | | | | -|`CU_JIT_INPUT_LIBRARY`| | | | | | | | | -|`CU_JIT_INPUT_NVVM`|11.4| | | | | | | | -|`CU_JIT_INPUT_OBJECT`| | | | | | | | | -|`CU_JIT_INPUT_PTX`| | | | | | | | | +|`CU_JIT_INPUT_CUBIN`| | | |`HIPRTC_JIT_INPUT_CUBIN`|5.3.0| | |5.3.0| +|`CU_JIT_INPUT_FATBINARY`| | | |`HIPRTC_JIT_INPUT_FATBINARY`|5.3.0| | |5.3.0| +|`CU_JIT_INPUT_LIBRARY`| | | |`HIPRTC_JIT_INPUT_LIBRARY`|5.3.0| | |5.3.0| +|`CU_JIT_INPUT_NVVM`|11.4| | |`HIPRTC_JIT_INPUT_NVVM`|5.3.0| | |5.3.0| +|`CU_JIT_INPUT_OBJECT`| | | |`HIPRTC_JIT_INPUT_OBJECT`|5.3.0| | |5.3.0| +|`CU_JIT_INPUT_PTX`| | | |`HIPRTC_JIT_INPUT_PTX`|5.3.0| | |5.3.0| |`CU_JIT_LOG_VERBOSE`| | | |`hipJitOptionLogVerbose`|1.6.0| | | | |`CU_JIT_LTO`|11.4| | | | | | | | |`CU_JIT_MAX_REGISTERS`| | | |`hipJitOptionMaxRegisters`|1.6.0| | | | |`CU_JIT_NEW_SM3X_OPT`| | | |`hipJitOptionSm3xOpt`|1.6.0| | | | -|`CU_JIT_NUM_INPUT_TYPES`| | | | | | | | | +|`CU_JIT_NUM_INPUT_TYPES`| | | |`HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES`|5.3.0| | |5.3.0| |`CU_JIT_NUM_OPTIONS`| | | |`hipJitOptionNumOptions`|1.6.0| | | | |`CU_JIT_OPTIMIZATION_LEVEL`| | | |`hipJitOptionOptimizationLevel`|1.6.0| | | | |`CU_JIT_OPTIMIZE_UNUSED_DEVICE_VARIABLES`|11.7| | | | | | | | @@ -989,8 +989,8 @@ |`CUipcMemHandle_v1`|11.3| | |`hipIpcMemHandle_t`|1.6.0| | | | |`CUipcMem_flags`| | | | | | | | | |`CUipcMem_flags_enum`| | | | | | | | | -|`CUjitInputType`| | | | | | | | | -|`CUjitInputType_enum`| | | | | | | | | +|`CUjitInputType`| | | |`hiprtcJITInputType`|5.3.0| | |5.3.0| +|`CUjitInputType_enum`| | | |`hiprtcJITInputType`|5.3.0| | |5.3.0| |`CUjit_cacheMode`| | | | | | | | | |`CUjit_cacheMode_enum`| | | | | | | | | |`CUjit_fallback`| | | | | | | | | @@ -1006,6 +1006,8 @@ |`CUkernelNodeAttrValue_v1`|11.3| | |`hipKernelNodeAttrValue`|5.2.0| | | | |`CUlimit`| | | |`hipLimit_t`|1.6.0| | | | |`CUlimit_enum`| | | |`hipLimit_t`|1.6.0| | | | +|`CUlinkState`| | | |`hiprtcLinkState`|5.3.0| | |5.3.0| +|`CUlinkState_st`| | | |`ihiprtcLinkState`|5.3.0| | |5.3.0| |`CUmemAccessDesc`|10.2| | |`hipMemAccessDesc`|5.2.0| | | | |`CUmemAccessDesc_st`|10.2| | |`hipMemAccessDesc`|5.2.0| | | | |`CUmemAccessDesc_v1`|11.3| | |`hipMemAccessDesc`|5.2.0| | | | @@ -1127,8 +1129,8 @@ |**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**| |:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:| -|`cuGetErrorName`| | | |`hipDrvGetErrorName`|5.3.0| | |5.3.0| -|`cuGetErrorString`| | | |`hipDrvGetErrorString`|5.3.0| | |5.3.0| +|`cuGetErrorName`| | | | | | | | | +|`cuGetErrorString`| | | | | | | | | ## **3. Initialization** @@ -1222,14 +1224,14 @@ |**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**| |:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:| -|`cuLinkAddData`| | | | | | | | | -|`cuLinkAddData_v2`| | | | | | | | | -|`cuLinkAddFile`| | | | | | | | | -|`cuLinkAddFile_v2`| | | | | | | | | -|`cuLinkComplete`| | | | | | | | | -|`cuLinkCreate`| | | | | | | | | -|`cuLinkCreate_v2`| | | | | | | | | -|`cuLinkDestroy`| | | | | | | | | +|`cuLinkAddData`| | | |`hiprtcLinkAddData`|5.3.0| | |5.3.0| +|`cuLinkAddData_v2`| | | |`hiprtcLinkAddData`|5.3.0| | |5.3.0| +|`cuLinkAddFile`| | | |`hiprtcLinkAddFile`|5.3.0| | |5.3.0| +|`cuLinkAddFile_v2`| | | |`hiprtcLinkAddFile`|5.3.0| | |5.3.0| +|`cuLinkComplete`| | | |`hiprtcLinkComplete`|5.3.0| | |5.3.0| +|`cuLinkCreate`| | | |`hiprtcLinkCreate`|5.3.0| | |5.3.0| +|`cuLinkCreate_v2`| | | |`hiprtcLinkCreate`|5.3.0| | |5.3.0| +|`cuLinkDestroy`| | | |`hiprtcLinkDestroy`|5.3.0| | |5.3.0| |`cuModuleGetFunction`| | | |`hipModuleGetFunction`|1.6.0| | | | |`cuModuleGetGlobal`| | | |`hipModuleGetGlobal`|1.6.0| | | | |`cuModuleGetGlobal_v2`| | | |`hipModuleGetGlobal`|1.6.0| | | | diff --git a/doc/markdown/CUDA_RTC_API_supported_by_HIP.md b/doc/markdown/CUDA_RTC_API_supported_by_HIP.md index e1f0bf5d..22d82bba 100644 --- a/doc/markdown/CUDA_RTC_API_supported_by_HIP.md +++ b/doc/markdown/CUDA_RTC_API_supported_by_HIP.md @@ -27,8 +27,8 @@ |`nvrtcCompileProgram`| | | |`hiprtcCompileProgram`|2.6.0| | | | |`nvrtcCreateProgram`| | | |`hiprtcCreateProgram`|2.6.0| | | | |`nvrtcDestroyProgram`| | | |`hiprtcDestroyProgram`|2.6.0| | | | -|`nvrtcGetCUBIN`|11.1| | | | | | | | -|`nvrtcGetCUBINSize`|11.1| | | | | | | | +|`nvrtcGetCUBIN`|11.1| | |`hiprtcGetBitcode`|5.3.0| | |5.3.0| +|`nvrtcGetCUBINSize`|11.1| | |`hiprtcGetBitcodeSize`|5.3.0| | |5.3.0| |`nvrtcGetErrorString`| | | |`hiprtcGetErrorString`|2.6.0| | | | |`nvrtcGetLoweredName`|8.0| | |`hiprtcGetLoweredName`|2.6.0| | | | |`nvrtcGetNVVM`|11.4| | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 0f6f5ae9..b36ae477 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -104,173 +104,173 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasZdscal", {"hipblasZdscal", "rocblas_zdscal", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, // AXPY - {"cublasSaxpy", {"hipblasSaxpy", "rocblas_saxpy", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasDaxpy", {"hipblasDaxpy", "rocblas_daxpy", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasCaxpy", {"hipblasCaxpy", "rocblas_caxpy", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasZaxpy", {"hipblasZaxpy", "rocblas_zaxpy", CONV_LIB_FUNC, API_BLAS, 5}}, + {"cublasSaxpy", {"hipblasSaxpy", "rocblas_saxpy", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDaxpy", {"hipblasDaxpy", "rocblas_daxpy", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCaxpy", {"hipblasCaxpy", "rocblas_caxpy", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZaxpy", {"hipblasZaxpy", "rocblas_zaxpy", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, // COPY - {"cublasScopy", {"hipblasScopy", "rocblas_scopy", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasDcopy", {"hipblasDcopy", "rocblas_dcopy", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasCcopy", {"hipblasCcopy", "rocblas_ccopy", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasZcopy", {"hipblasZcopy", "rocblas_zcopy", CONV_LIB_FUNC, API_BLAS, 5}}, + {"cublasScopy", {"hipblasScopy", "rocblas_scopy", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDcopy", {"hipblasDcopy", "rocblas_dcopy", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCcopy", {"hipblasCcopy", "rocblas_ccopy", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZcopy", {"hipblasZcopy", "rocblas_zcopy", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, // SWAP - {"cublasSswap", {"hipblasSswap", "rocblas_sswap", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasDswap", {"hipblasDswap", "rocblas_dswap", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasCswap", {"hipblasCswap", "rocblas_cswap", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasZswap", {"hipblasZswap", "rocblas_zswap", CONV_LIB_FUNC, API_BLAS, 5}}, + {"cublasSswap", {"hipblasSswap", "rocblas_sswap", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDswap", {"hipblasDswap", "rocblas_dswap", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCswap", {"hipblasCswap", "rocblas_cswap", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZswap", {"hipblasZswap", "rocblas_zswap", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, // AMAX - {"cublasIsamax", {"hipblasIsamax", "rocblas_isamax", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasIdamax", {"hipblasIdamax", "rocblas_idamax", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasIcamax", {"hipblasIcamax", "rocblas_icamax", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasIzamax", {"hipblasIzamax", "rocblas_izamax", CONV_LIB_FUNC, API_BLAS, 5}}, + {"cublasIsamax", {"hipblasIsamax", "rocblas_isamax", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasIdamax", {"hipblasIdamax", "rocblas_idamax", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasIcamax", {"hipblasIcamax", "rocblas_icamax", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasIzamax", {"hipblasIzamax", "rocblas_izamax", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, // AMIN - {"cublasIsamin", {"hipblasIsamin", "rocblas_isamin", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasIdamin", {"hipblasIdamin", "rocblas_idamin", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasIcamin", {"hipblasIcamin", "rocblas_icamin", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasIzamin", {"hipblasIzamin", "rocblas_izamin", CONV_LIB_FUNC, API_BLAS, 5}}, + {"cublasIsamin", {"hipblasIsamin", "rocblas_isamin", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasIdamin", {"hipblasIdamin", "rocblas_idamin", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasIcamin", {"hipblasIcamin", "rocblas_icamin", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasIzamin", {"hipblasIzamin", "rocblas_izamin", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, // ASUM - {"cublasSasum", {"hipblasSasum", "rocblas_sasum", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasDasum", {"hipblasDasum", "rocblas_dasum", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasScasum", {"hipblasScasum", "rocblas_scasum", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasDzasum", {"hipblasDzasum", "rocblas_dzasum", CONV_LIB_FUNC, API_BLAS, 5}}, + {"cublasSasum", {"hipblasSasum", "rocblas_sasum", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDasum", {"hipblasDasum", "rocblas_dasum", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasScasum", {"hipblasScasum", "rocblas_scasum", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDzasum", {"hipblasDzasum", "rocblas_dzasum", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, // ROT - {"cublasSrot", {"hipblasSrot", "rocblas_srot", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasDrot", {"hipblasDrot", "rocblas_drot", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasCrot", {"hipblasCrot", "rocblas_crot", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasCsrot", {"hipblasCsrot", "rocblas_csrot", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasZrot", {"hipblasZrot", "rocblas_zrot", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasZdrot", {"hipblasZdrot", "rocblas_zdrot", CONV_LIB_FUNC, API_BLAS, 5}}, + {"cublasSrot", {"hipblasSrot", "rocblas_srot", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDrot", {"hipblasDrot", "rocblas_drot", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCrot", {"hipblasCrot", "rocblas_crot", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCsrot", {"hipblasCsrot", "rocblas_csrot", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZrot", {"hipblasZrot", "rocblas_zrot", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZdrot", {"hipblasZdrot", "rocblas_zdrot", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, // ROTG - {"cublasSrotg", {"hipblasSrotg", "rocblas_srotg", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasDrotg", {"hipblasDrotg", "rocblas_drotg", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasCrotg", {"hipblasCrotg", "rocblas_crotg", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasZrotg", {"hipblasZrotg", "rocblas_zrotg", CONV_LIB_FUNC, API_BLAS, 5}}, + {"cublasSrotg", {"hipblasSrotg", "rocblas_srotg", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDrotg", {"hipblasDrotg", "rocblas_drotg", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCrotg", {"hipblasCrotg", "rocblas_crotg", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZrotg", {"hipblasZrotg", "rocblas_zrotg", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, // ROTM - {"cublasSrotm", {"hipblasSrotm", "rocblas_srotm", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasDrotm", {"hipblasDrotm", "rocblas_drotm", CONV_LIB_FUNC, API_BLAS, 5}}, + {"cublasSrotm", {"hipblasSrotm", "rocblas_srotm", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDrotm", {"hipblasDrotm", "rocblas_drotm", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, // ROTMG - {"cublasSrotmg", {"hipblasSrotmg", "rocblas_srotmg", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasDrotmg", {"hipblasDrotmg", "rocblas_drotmg", CONV_LIB_FUNC, API_BLAS, 5}}, + {"cublasSrotmg", {"hipblasSrotmg", "rocblas_srotmg", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDrotmg", {"hipblasDrotmg", "rocblas_drotmg", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, // GEMV - {"cublasSgemv", {"hipblasSgemv", "rocblas_sgemv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasDgemv", {"hipblasDgemv", "rocblas_dgemv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasCgemv", {"hipblasCgemv", "rocblas_cgemv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasZgemv", {"hipblasZgemv", "rocblas_zgemv", CONV_LIB_FUNC, API_BLAS, 6}}, + {"cublasSgemv", {"hipblasSgemv", "rocblas_sgemv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDgemv", {"hipblasDgemv", "rocblas_dgemv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCgemv", {"hipblasCgemv", "rocblas_cgemv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZgemv", {"hipblasZgemv", "rocblas_zgemv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, // GBMV - {"cublasSgbmv", {"hipblasSgbmv", "rocblas_sgbmv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasDgbmv", {"hipblasDgbmv", "rocblas_dgbmv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasCgbmv", {"hipblasCgbmv", "rocblas_cgbmv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasZgbmv", {"hipblasZgbmv", "rocblas_zgbmv", CONV_LIB_FUNC, API_BLAS, 6}}, + {"cublasSgbmv", {"hipblasSgbmv", "rocblas_sgbmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDgbmv", {"hipblasDgbmv", "rocblas_dgbmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCgbmv", {"hipblasCgbmv", "rocblas_cgbmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZgbmv", {"hipblasZgbmv", "rocblas_zgbmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, // TRMV - {"cublasStrmv", {"hipblasStrmv", "rocblas_strmv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasDtrmv", {"hipblasDtrmv", "rocblas_dtrmv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasCtrmv", {"hipblasCtrmv", "rocblas_ctrmv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasZtrmv", {"hipblasZtrmv", "rocblas_ztrmv", CONV_LIB_FUNC, API_BLAS, 6}}, + {"cublasStrmv", {"hipblasStrmv", "rocblas_strmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDtrmv", {"hipblasDtrmv", "rocblas_dtrmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCtrmv", {"hipblasCtrmv", "rocblas_ctrmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZtrmv", {"hipblasZtrmv", "rocblas_ztrmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, // TBMV - {"cublasStbmv", {"hipblasStbmv", "rocblas_stbmv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasDtbmv", {"hipblasDtbmv", "rocblas_dtbmv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasCtbmv", {"hipblasCtbmv", "rocblas_ctbmv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasZtbmv", {"hipblasZtbmv", "rocblas_ztbmv", CONV_LIB_FUNC, API_BLAS, 6}}, + {"cublasStbmv", {"hipblasStbmv", "rocblas_stbmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDtbmv", {"hipblasDtbmv", "rocblas_dtbmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCtbmv", {"hipblasCtbmv", "rocblas_ctbmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZtbmv", {"hipblasZtbmv", "rocblas_ztbmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, // TPMV - {"cublasStpmv", {"hipblasStpmv", "rocblas_stpmv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasDtpmv", {"hipblasDtpmv", "rocblas_dtpmv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasCtpmv", {"hipblasCtpmv", "rocblas_ctpmv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasZtpmv", {"hipblasZtpmv", "rocblas_ztpmv", CONV_LIB_FUNC, API_BLAS, 6}}, + {"cublasStpmv", {"hipblasStpmv", "rocblas_stpmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDtpmv", {"hipblasDtpmv", "rocblas_dtpmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCtpmv", {"hipblasCtpmv", "rocblas_ctpmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZtpmv", {"hipblasZtpmv", "rocblas_ztpmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, // TRSV - {"cublasStrsv", {"hipblasStrsv", "rocblas_strsv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasDtrsv", {"hipblasDtrsv", "rocblas_dtrsv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasCtrsv", {"hipblasCtrsv", "rocblas_ctrsv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasZtrsv", {"hipblasZtrsv", "rocblas_ztrsv", CONV_LIB_FUNC, API_BLAS, 6}}, + {"cublasStrsv", {"hipblasStrsv", "rocblas_strsv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDtrsv", {"hipblasDtrsv", "rocblas_dtrsv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCtrsv", {"hipblasCtrsv", "rocblas_ctrsv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZtrsv", {"hipblasZtrsv", "rocblas_ztrsv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, // TPSV - {"cublasStpsv", {"hipblasStpsv", "rocblas_stpsv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasDtpsv", {"hipblasDtpsv", "rocblas_dtpsv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasCtpsv", {"hipblasCtpsv", "rocblas_ctpsv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasZtpsv", {"hipblasZtpsv", "rocblas_ztpsv", CONV_LIB_FUNC, API_BLAS, 6}}, + {"cublasStpsv", {"hipblasStpsv", "rocblas_stpsv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDtpsv", {"hipblasDtpsv", "rocblas_dtpsv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCtpsv", {"hipblasCtpsv", "rocblas_ctpsv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZtpsv", {"hipblasZtpsv", "rocblas_ztpsv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, // TBSV - {"cublasStbsv", {"hipblasStbsv", "rocblas_stbsv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasDtbsv", {"hipblasDtbsv", "rocblas_dtbsv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasCtbsv", {"hipblasCtbsv", "rocblas_ctbsv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasZtbsv", {"hipblasZtbsv", "rocblas_ztbsv", CONV_LIB_FUNC, API_BLAS, 6}}, + {"cublasStbsv", {"hipblasStbsv", "rocblas_stbsv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDtbsv", {"hipblasDtbsv", "rocblas_dtbsv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCtbsv", {"hipblasCtbsv", "rocblas_ctbsv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZtbsv", {"hipblasZtbsv", "rocblas_ztbsv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, // SYMV/HEMV - {"cublasSsymv", {"hipblasSsymv", "rocblas_ssymv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasDsymv", {"hipblasDsymv", "rocblas_dsymv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasCsymv", {"hipblasCsymv", "rocblas_csymv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasZsymv", {"hipblasZsymv", "rocblas_zsymv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasChemv", {"hipblasChemv", "rocblas_chemv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasZhemv", {"hipblasZhemv", "rocblas_zhemv", CONV_LIB_FUNC, API_BLAS, 6}}, + {"cublasSsymv", {"hipblasSsymv", "rocblas_ssymv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDsymv", {"hipblasDsymv", "rocblas_dsymv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCsymv", {"hipblasCsymv", "rocblas_csymv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZsymv", {"hipblasZsymv", "rocblas_zsymv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasChemv", {"hipblasChemv", "rocblas_chemv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZhemv", {"hipblasZhemv", "rocblas_zhemv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, // SBMV/HBMV - {"cublasSsbmv", {"hipblasSsbmv", "rocblas_ssbmv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasDsbmv", {"hipblasDsbmv", "rocblas_dsbmv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasChbmv", {"hipblasChbmv", "rocblas_chbmv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasZhbmv", {"hipblasZhbmv", "rocblas_zhbmv", CONV_LIB_FUNC, API_BLAS, 6}}, + {"cublasSsbmv", {"hipblasSsbmv", "rocblas_ssbmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDsbmv", {"hipblasDsbmv", "rocblas_dsbmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasChbmv", {"hipblasChbmv", "rocblas_chbmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZhbmv", {"hipblasZhbmv", "rocblas_zhbmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, // SPMV/HPMV - {"cublasSspmv", {"hipblasSspmv", "rocblas_sspmv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasDspmv", {"hipblasDspmv", "rocblas_dspmv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasChpmv", {"hipblasChpmv", "rocblas_chpmv", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasZhpmv", {"hipblasZhpmv", "rocblas_zhpmv", CONV_LIB_FUNC, API_BLAS, 6}}, + {"cublasSspmv", {"hipblasSspmv", "rocblas_sspmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDspmv", {"hipblasDspmv", "rocblas_dspmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasChpmv", {"hipblasChpmv", "rocblas_chpmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZhpmv", {"hipblasZhpmv", "rocblas_zhpmv", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, // GER - {"cublasSger", {"hipblasSger", "rocblas_sger", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasDger", {"hipblasDger", "rocblas_dger", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasCgeru", {"hipblasCgeru", "rocblas_cgeru", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasCgerc", {"hipblasCgerc", "rocblas_cgerc", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasZgeru", {"hipblasZgeru", "rocblas_zgeru", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasZgerc", {"hipblasZgerc", "rocblas_zgerc", CONV_LIB_FUNC, API_BLAS, 6}}, + {"cublasSger", {"hipblasSger", "rocblas_sger", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDger", {"hipblasDger", "rocblas_dger", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCgeru", {"hipblasCgeru", "rocblas_cgeru", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCgerc", {"hipblasCgerc", "rocblas_cgerc", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZgeru", {"hipblasZgeru", "rocblas_zgeru", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZgerc", {"hipblasZgerc", "rocblas_zgerc", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, // SYR/HER - {"cublasSsyr", {"hipblasSsyr", "rocblas_ssyr", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasDsyr", {"hipblasDsyr", "rocblas_dsyr", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasCsyr", {"hipblasCsyr", "rocblas_csyr", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasZsyr", {"hipblasZsyr", "rocblas_zsyr", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasCher", {"hipblasCher", "rocblas_cher", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasZher", {"hipblasZher", "rocblas_zher", CONV_LIB_FUNC, API_BLAS, 6}}, + {"cublasSsyr", {"hipblasSsyr", "rocblas_ssyr", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDsyr", {"hipblasDsyr", "rocblas_dsyr", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCsyr", {"hipblasCsyr", "rocblas_csyr", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZsyr", {"hipblasZsyr", "rocblas_zsyr", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCher", {"hipblasCher", "rocblas_cher", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZher", {"hipblasZher", "rocblas_zher", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, // SPR/HPR - {"cublasSspr", {"hipblasSspr", "rocblas_sspr", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasDspr", {"hipblasDspr", "rocblas_dspr", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasChpr", {"hipblasChpr", "rocblas_chpr", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasZhpr", {"hipblasZhpr", "rocblas_zhpr", CONV_LIB_FUNC, API_BLAS, 6}}, + {"cublasSspr", {"hipblasSspr", "rocblas_sspr", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDspr", {"hipblasDspr", "rocblas_dspr", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasChpr", {"hipblasChpr", "rocblas_chpr", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZhpr", {"hipblasZhpr", "rocblas_zhpr", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, // SYR2/HER2 - {"cublasSsyr2", {"hipblasSsyr2", "rocblas_ssyr2", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasDsyr2", {"hipblasDsyr2", "rocblas_dsyr2", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasCsyr2", {"hipblasCsyr2", "rocblas_csyr2", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasZsyr2", {"hipblasZsyr2", "rocblas_zsyr2", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasCher2", {"hipblasCher2", "rocblas_cher2", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasZher2", {"hipblasZher2", "rocblas_zher2", CONV_LIB_FUNC, API_BLAS, 6}}, + {"cublasSsyr2", {"hipblasSsyr2", "rocblas_ssyr2", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDsyr2", {"hipblasDsyr2", "rocblas_dsyr2", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCsyr2", {"hipblasCsyr2", "rocblas_csyr2", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZsyr2", {"hipblasZsyr2", "rocblas_zsyr2", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCher2", {"hipblasCher2", "rocblas_cher2", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZher2", {"hipblasZher2", "rocblas_zher2", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, // SPR2/HPR2 - {"cublasSspr2", {"hipblasSspr2", "rocblas_sspr2", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasDspr2", {"hipblasDspr2", "rocblas_dspr2", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasChpr2", {"hipblasChpr2", "rocblas_chpr2", CONV_LIB_FUNC, API_BLAS, 6}}, - {"cublasZhpr2", {"hipblasZhpr2", "rocblas_zhpr2", CONV_LIB_FUNC, API_BLAS, 6}}, + {"cublasSspr2", {"hipblasSspr2", "rocblas_sspr2", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDspr2", {"hipblasDspr2", "rocblas_dspr2", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasChpr2", {"hipblasChpr2", "rocblas_chpr2", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZhpr2", {"hipblasZhpr2", "rocblas_zhpr2", CONV_LIB_FUNC, API_BLAS, 6, HIP_SUPPORTED_V2_ONLY}}, // Blas3 (v1) Routines // GEMM - {"cublasSgemm", {"hipblasSgemm", "rocblas_sgemm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasDgemm", {"hipblasDgemm", "rocblas_dgemm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasCgemm", {"hipblasCgemm", "rocblas_cgemm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasZgemm", {"hipblasZgemm", "rocblas_zgemm", CONV_LIB_FUNC, API_BLAS, 7}}, + {"cublasSgemm", {"hipblasSgemm", "rocblas_sgemm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDgemm", {"hipblasDgemm", "rocblas_dgemm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCgemm", {"hipblasCgemm", "rocblas_cgemm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZgemm", {"hipblasZgemm", "rocblas_zgemm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, {"cublasHgemm", {"hipblasHgemm", "rocblas_hgemm", CONV_LIB_FUNC, API_BLAS, 7}}, // BATCH GEMM diff --git a/src/CUDA2HIP_Driver_API_functions.cpp b/src/CUDA2HIP_Driver_API_functions.cpp index 222c7266..be0b0684 100644 --- a/src/CUDA2HIP_Driver_API_functions.cpp +++ b/src/CUDA2HIP_Driver_API_functions.cpp @@ -27,10 +27,10 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // 2. Error Handling // no analogue // NOTE: cudaGetErrorName and cuGetErrorName have different signatures - {"cuGetErrorName", {"hipDrvGetErrorName", "", CONV_ERROR, API_DRIVER, 2, HIP_EXPERIMENTAL}}, + {"cuGetErrorName", {"hipDrvGetErrorName", "", CONV_ERROR, API_DRIVER, 2, HIP_UNSUPPORTED}}, // no analogue // NOTE: cudaGetErrorString and cuGetErrorString have different signatures - {"cuGetErrorString", {"hipDrvGetErrorString", "", CONV_ERROR, API_DRIVER, 2, HIP_EXPERIMENTAL}}, + {"cuGetErrorString", {"hipDrvGetErrorString", "", CONV_ERROR, API_DRIVER, 2, HIP_UNSUPPORTED}}, // 3. Initialization // no analogue @@ -136,14 +136,14 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // 10. Module Management // no analogues - {"cuLinkAddData", {"hipLinkAddData", "", CONV_MODULE, API_DRIVER, 10, HIP_UNSUPPORTED}}, - {"cuLinkAddData_v2", {"hipLinkAddData", "", CONV_MODULE, API_DRIVER, 10, HIP_UNSUPPORTED}}, - {"cuLinkAddFile", {"hipLinkAddFile", "", CONV_MODULE, API_DRIVER, 10, HIP_UNSUPPORTED}}, - {"cuLinkAddFile_v2", {"hipLinkAddFile", "", CONV_MODULE, API_DRIVER, 10, HIP_UNSUPPORTED}}, - {"cuLinkComplete", {"hipLinkComplete", "", CONV_MODULE, API_DRIVER, 10, HIP_UNSUPPORTED}}, - {"cuLinkCreate", {"hipLinkCreate", "", CONV_MODULE, API_DRIVER, 10, HIP_UNSUPPORTED}}, - {"cuLinkCreate_v2", {"hipLinkCreate", "", CONV_MODULE, API_DRIVER, 10, HIP_UNSUPPORTED}}, - {"cuLinkDestroy", {"hipLinkDestroy", "", CONV_MODULE, API_DRIVER, 10, HIP_UNSUPPORTED}}, + {"cuLinkAddData", {"hiprtcLinkAddData", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, + {"cuLinkAddData_v2", {"hiprtcLinkAddData", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, + {"cuLinkAddFile", {"hiprtcLinkAddFile", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, + {"cuLinkAddFile_v2", {"hiprtcLinkAddFile", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, + {"cuLinkComplete", {"hiprtcLinkComplete", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, + {"cuLinkCreate", {"hiprtcLinkCreate", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, + {"cuLinkCreate_v2", {"hiprtcLinkCreate", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, + {"cuLinkDestroy", {"hiprtcLinkDestroy", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, {"cuModuleGetFunction", {"hipModuleGetFunction", "", CONV_MODULE, API_DRIVER, 10}}, {"cuModuleGetGlobal", {"hipModuleGetGlobal", "", CONV_MODULE, API_DRIVER, 10}}, {"cuModuleGetGlobal_v2", {"hipModuleGetGlobal", "", CONV_MODULE, API_DRIVER, 10}}, @@ -1410,8 +1410,11 @@ const std::map HIP_DRIVER_FUNCTION_VER_MAP { {"hipMemRetainAllocationHandle", {HIP_5020, HIP_0, HIP_0 }}, {"hipMemSetAccess", {HIP_5020, HIP_0, HIP_0 }}, {"hipMemUnmap", {HIP_5020, HIP_0, HIP_0 }}, - {"hipDrvGetErrorName", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipDrvGetErrorString", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"hiprtcLinkCreate", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"hiprtcLinkAddFile", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"hiprtcLinkAddData", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"hiprtcLinkComplete", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"hiprtcLinkDestroy", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map CUDA_DRIVER_API_SECTION_MAP { diff --git a/src/CUDA2HIP_Driver_API_types.cpp b/src/CUDA2HIP_Driver_API_types.cpp index 370980ef..633a70b4 100644 --- a/src/CUDA2HIP_Driver_API_types.cpp +++ b/src/CUDA2HIP_Driver_API_types.cpp @@ -328,6 +328,10 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUstreamAttrValue_v1", {"hipStreamAttrValue", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, {"CUstreamAttrValue_union", {"hipStreamAttrValue", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // no analogue + {"CUlinkState_st", {"ihiprtcLinkState", "", CONV_TYPE, API_DRIVER, 1}}, + {"CUlinkState", {"hiprtcLinkState", "", CONV_TYPE, API_DRIVER, 1}}, + // 3. Enums // TODO: HIPaddress_mode_enum and all its values should be hipTextureAddressMode as long as they are equal. {"CUaddress_mode", {"HIPaddress_mode", "", CONV_TYPE, API_DRIVER, 1}}, @@ -1215,16 +1219,16 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_TARGET_COMPUTE_87", {"hipJitTargetCompute87", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 87 // no analogue - {"CUjitInputType", {"hipJitInputType", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, - {"CUjitInputType_enum", {"hipJitInputType", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + {"CUjitInputType", {"hiprtcJITInputType", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CUjitInputType_enum", {"hiprtcJITInputType", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // CUjitInputType enum values - {"CU_JIT_INPUT_CUBIN", {"hipJitInputTypeBin", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 0 - {"CU_JIT_INPUT_PTX", {"hipJitInputTypePtx", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, - {"CU_JIT_INPUT_FATBINARY", {"hipJitInputTypeFatBinary", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, - {"CU_JIT_INPUT_OBJECT", {"hipJitInputTypeObject", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, - {"CU_JIT_INPUT_LIBRARY", {"hipJitInputTypeLibrary", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, - {"CU_JIT_INPUT_NVVM", {"hipJitInputTypeNvvm", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, - {"CU_JIT_NUM_INPUT_TYPES", {"hipJitInputTypeNumInputTypes", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + {"CU_JIT_INPUT_CUBIN", {"HIPRTC_JIT_INPUT_CUBIN", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // 0 + {"CU_JIT_INPUT_PTX", {"HIPRTC_JIT_INPUT_PTX", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CU_JIT_INPUT_FATBINARY", {"HIPRTC_JIT_INPUT_FATBINARY", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CU_JIT_INPUT_OBJECT", {"HIPRTC_JIT_INPUT_OBJECT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CU_JIT_INPUT_LIBRARY", {"HIPRTC_JIT_INPUT_LIBRARY", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CU_JIT_INPUT_NVVM", {"HIPRTC_JIT_INPUT_NVVM", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CU_JIT_NUM_INPUT_TYPES", {"HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // cudaLimit {"CUlimit", {"hipLimit_t", "", CONV_TYPE, API_DRIVER, 1}}, @@ -3287,4 +3291,14 @@ const std::map HIP_DRIVER_TYPE_NAME_VER_MAP { {"hipDeviceAttributeVirtualMemoryManagementSupported", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, {"hipUserObject", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, {"hipUserObject_t", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"hiprtcJITInputType", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"HIPRTC_JIT_INPUT_CUBIN", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"HIPRTC_JIT_INPUT_PTX", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"HIPRTC_JIT_INPUT_FATBINARY", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"HIPRTC_JIT_INPUT_OBJECT", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"HIPRTC_JIT_INPUT_LIBRARY", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"HIPRTC_JIT_INPUT_NVVM", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"ihiprtcLinkState", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"hiprtcLinkState", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, }; diff --git a/src/CUDA2HIP_RTC_API_functions.cpp b/src/CUDA2HIP_RTC_API_functions.cpp index 98be9840..2763485b 100644 --- a/src/CUDA2HIP_RTC_API_functions.cpp +++ b/src/CUDA2HIP_RTC_API_functions.cpp @@ -33,8 +33,8 @@ const std::map CUDA_RTC_FUNCTION_MAP { {"nvrtcCompileProgram", {"hiprtcCompileProgram", "", CONV_LIB_FUNC, API_RTC, 2}}, {"nvrtcGetPTXSize", {"hiprtcGetCodeSize", "", CONV_LIB_FUNC, API_RTC, 2}}, {"nvrtcGetPTX", {"hiprtcGetCode", "", CONV_LIB_FUNC, API_RTC, 2}}, - {"nvrtcGetCUBINSize", {"hiprtcGetCUBINSize", "", CONV_LIB_FUNC, API_RTC, 2, HIP_UNSUPPORTED}}, - {"nvrtcGetCUBIN", {"hiprtcGetCUBIN", "", CONV_LIB_FUNC, API_RTC, 2, HIP_UNSUPPORTED}}, + {"nvrtcGetCUBINSize", {"hiprtcGetBitcodeSize", "", CONV_LIB_FUNC, API_RTC, 2, HIP_EXPERIMENTAL}}, + {"nvrtcGetCUBIN", {"hiprtcGetBitcode", "", CONV_LIB_FUNC, API_RTC, 2, HIP_EXPERIMENTAL}}, {"nvrtcGetNVVMSize", {"hiprtcGetNVVMSize", "", CONV_LIB_FUNC, API_RTC, 2, HIP_UNSUPPORTED}}, {"nvrtcGetNVVM", {"hiprtcGetNVVM", "", CONV_LIB_FUNC, API_RTC, 2, HIP_UNSUPPORTED}}, {"nvrtcGetProgramLogSize", {"hiprtcGetProgramLogSize", "", CONV_LIB_FUNC, API_RTC, 2}}, @@ -66,6 +66,8 @@ const std::map HIP_RTC_FUNCTION_VER_MAP { {"hiprtcGetProgramLog", {HIP_2060, HIP_0, HIP_0 }}, {"hiprtcAddNameExpression", {HIP_2060, HIP_0, HIP_0 }}, {"hiprtcGetLoweredName", {HIP_2060, HIP_0, HIP_0 }}, + {"hiprtcGetBitcode", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"hiprtcGetBitcodeSize", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map CUDA_RTC_API_SECTION_MAP { diff --git a/tests/unit_tests/synthetic/driver_functions.cu b/tests/unit_tests/synthetic/driver_functions.cu index 70f4d83b..736e7bdc 100644 --- a/tests/unit_tests/synthetic/driver_functions.cu +++ b/tests/unit_tests/synthetic/driver_functions.cu @@ -1652,15 +1652,5 @@ int main() { CUresult result_2; const char* ret = NULL; - // CUDA: CUresult CUDAAPI cuGetErrorName(CUresult error, const char **pStr); - // HIP: hipError_t hipDrvGetErrorName(hipError_t hipError, const char** errorString); - // CHECK: result = hipDrvGetErrorName(result_2, &ret); - result = cuGetErrorName(result_2, &ret); - - // CUDA: CUresult CUDAAPI cuGetErrorString(CUresult error, const char **pStr); - // HIP: hipError_t hipDrvGetErrorString(hipError_t hipError, const char** errorString); - // CHECK: result = hipDrvGetErrorString(result_2, &ret); - result = cuGetErrorString(result_2, &ret); - return 0; } diff --git a/tests/unit_tests/synthetic/driver_structs.cu b/tests/unit_tests/synthetic/driver_structs.cu index 57f3f7e6..56379750 100644 --- a/tests/unit_tests/synthetic/driver_structs.cu +++ b/tests/unit_tests/synthetic/driver_structs.cu @@ -104,6 +104,11 @@ int main() { // CHECK: hipUUID_t uuid_st; CUuuid_st uuid_st; + // CHECK: ihiprtcLinkState* linkState_ptr; + // CHECK-NEXT: hiprtcLinkState linkState; + CUlinkState_st* linkState_ptr; + CUlinkState linkState; + #if CUDA_VERSION >= 10000 // CHECK: hipExternalMemoryBufferDesc_st ext_mem_buff_st; // CHECK-NEXT: hipExternalMemoryBufferDesc ext_mem_buff; diff --git a/tests/unit_tests/synthetic/driver_typedefs.cu b/tests/unit_tests/synthetic/driver_typedefs.cu index 1b3b22c2..e26244e3 100644 --- a/tests/unit_tests/synthetic/driver_typedefs.cu +++ b/tests/unit_tests/synthetic/driver_typedefs.cu @@ -49,9 +49,7 @@ int main() { // CHECK: hipTextureObject_t texObject_v1; CUtexObject_v1 texObject_v1; -#endif -#if CUDA_VERSION >= 11040 // CHECK: hipMemGenericAllocationHandle_t memGenericAllocationHandle_v1; CUmemGenericAllocationHandle_v1 memGenericAllocationHandle_v1; #endif diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu index c4a38256..61643cdd 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu @@ -144,9 +144,18 @@ int main() { blasStatus = cublasGetPointerMode_v2(blasHandle, &blasPointerMode); int n = 0; + int m = 0; int num = 0; + int lda = 0; + int ldb = 0; + int ldc = 0; + int res = 0; int incx = 0; int incy = 0; + int k = 0; + int kl = 0; + int ku = 0; + int batchCount = 0; void* image = nullptr; void* image_2 = nullptr; void* deviceptr = nullptr; @@ -194,14 +203,44 @@ int main() { // CHECK: blasStatus = hipblasGetMatrixAsync(rows, cols, num, image, incx, image_2, incy, stream); blasStatus = cublasGetMatrixAsync(rows, cols, num, image, incx, image_2, incy, stream); + float fa = 0; + float fA = 0; + float fb = 0; + float fB = 0; float fx = 0; + float fx1 = 0; float fy = 0; + float fy1 = 0; + float fc = 0; + float fC = 0; + float fs = 0; + float fd1 = 0; + float fd2 = 0; float fresult = 0; + float** fAarray = 0; + float** fBarray = 0; + float** fCarray = 0; + + double da = 0; + double dA = 0; + double db = 0; + double dB = 0; double dx = 0; + double dx1 = 0; double dy = 0; + double dy1 = 0; + double dc = 0; + double dC = 0; + double ds = 0; + double dd1 = 0; + double dd2 = 0; double dresult = 0; + double** dAarray = 0; + double** dBarray = 0; + double** dCarray = 0; + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle, int n, const float* x, int incx, float* result); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSnrm2(hipblasHandle_t handle, int n, const float* x, int incx, float* result); // CHECK: blasStatus = hipblasSnrm2(blasHandle, n, &fx, incx, &fresult); @@ -216,10 +255,24 @@ int main() { blasStatus = cublasDnrm2(blasHandle, n, &dx, incx, &dresult); blasStatus = cublasDnrm2_v2(blasHandle, n, &dx, incx, &dresult); - // CHECK: hipComplex complex, complex_2, complex_res; - cuComplex complex, complex_2, complex_res; - // CHECK: hipDoubleComplex dcomplex, dcomplex_2, dcomplex_res; - cuDoubleComplex dcomplex, dcomplex_2, dcomplex_res; + // CHECK: hipComplex complex, complexa, complexA, complexB, complexC, complexx, complexy, complexs, complexb; + cuComplex complex, complexa, complexA, complexB, complexC, complexx, complexy, complexs, complexb; + // CHECK: hipDoubleComplex dcomplex, dcomplexa, dcomplexA, dcomplexB, dcomplexC, dcomplexx, dcomplexy, dcomplexs, dcomplexb; + cuDoubleComplex dcomplex, dcomplexa, dcomplexA, dcomplexB, dcomplexC, dcomplexx, dcomplexy, dcomplexs, dcomplexb; + + // CHECK: hipComplex** complexAarray = 0; + // CHECK-NEXT: hipComplex** complexBarray = 0; + // CHECK-NEXT: hipComplex** complexCarray = 0; + cuComplex** complexAarray = 0; + cuComplex** complexBarray = 0; + cuComplex** complexCarray = 0; + + // CHECK: hipDoubleComplex** dcomplexAarray = 0; + // CHECK-NEXT: hipDoubleComplex** dcomplexBarray = 0; + // CHECK-NEXT: hipDoubleComplex** dcomplexCarray = 0; + cuDoubleComplex** dcomplexAarray = 0; + cuDoubleComplex** dcomplexBarray = 0; + cuDoubleComplex** dcomplexCarray = 0; // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScnrm2_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, float* result); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasScnrm2(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, float* result); @@ -251,31 +304,31 @@ int main() { // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdotu_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* result); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCdotu(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* result); - // CHECK: blasStatus = hipblasCdotu(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res); - // CHECK-NEXT: blasStatus = hipblasCdotu(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res); - blasStatus = cublasCdotu(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res); - blasStatus = cublasCdotu_v2(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res); + // CHECK: blasStatus = hipblasCdotu(blasHandle, n, &complexx, incx, &complexy, incy, &complex); + // CHECK-NEXT: blasStatus = hipblasCdotu(blasHandle, n, &complexx, incx, &complexy, incy, &complex); + blasStatus = cublasCdotu(blasHandle, n, &complexx, incx, &complexy, incy, &complex); + blasStatus = cublasCdotu_v2(blasHandle, n, &complexx, incx, &complexy, incy, &complex); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdotc_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* result); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCdotc(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* result); - // CHECK: blasStatus = hipblasCdotc(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res); - // CHECK-NEXT: blasStatus = hipblasCdotc(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res); - blasStatus = cublasCdotc(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res); - blasStatus = cublasCdotc_v2(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res); + // CHECK: blasStatus = hipblasCdotc(blasHandle, n, &complexx, incx, &complexy, incy, &complex); + // CHECK-NEXT: blasStatus = hipblasCdotc(blasHandle, n, &complexx, incx, &complexy, incy, &complex); + blasStatus = cublasCdotc(blasHandle, n, &complexx, incx, &complexy, incy, &complex); + blasStatus = cublasCdotc_v2(blasHandle, n, &complexx, incx, &complexy, incy, &complex); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdotu_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* result); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZdotu(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* result); - // CHECK: blasStatus = hipblasZdotu(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res); - // CHECK-NEXT: blasStatus = hipblasZdotu(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res); - blasStatus = cublasZdotu(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res); - blasStatus = cublasZdotu_v2(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res); + // CHECK: blasStatus = hipblasZdotu(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dcomplex); + // CHECK-NEXT: blasStatus = hipblasZdotu(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dcomplex); + blasStatus = cublasZdotu(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dcomplex); + blasStatus = cublasZdotu_v2(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dcomplex); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdotc_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* result); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZdotc(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* result); - // CHECK: blasStatus = hipblasZdotc(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res); - // CHECK-NEXT: blasStatus = hipblasZdotc(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res); - blasStatus = cublasZdotc(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res); - blasStatus = cublasZdotc_v2(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res); + // CHECK: blasStatus = hipblasZdotc(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dcomplex); + // CHECK-NEXT: blasStatus = hipblasZdotc(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dcomplex); + blasStatus = cublasZdotc(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dcomplex); + blasStatus = cublasZdotc_v2(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dcomplex); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSscal_v2(cublasHandle_t handle, int n, const float* alpha, float* x, int incx); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSscal(hipblasHandle_t handle, int n, const float* alpha, float* x, int incx); @@ -293,31 +346,855 @@ int main() { // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCscal_v2(cublasHandle_t handle, int n, const cuComplex* alpha, cuComplex* x, int incx); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCscal(hipblasHandle_t handle, int n, const hipblasComplex* alpha, hipblasComplex* x, int incx); - // CHECK: blasStatus = hipblasCscal(blasHandle, n, &complex, &complex_2, incx); - // CHECK-NEXT: blasStatus = hipblasCscal(blasHandle, n, &complex, &complex_2, incx); - blasStatus = cublasCscal(blasHandle, n, &complex, &complex_2, incx); - blasStatus = cublasCscal_v2(blasHandle, n, &complex, &complex_2, incx); + // CHECK: blasStatus = hipblasCscal(blasHandle, n, &complexa, &complexx, incx); + // CHECK-NEXT: blasStatus = hipblasCscal(blasHandle, n, &complexa, &complexx, incx); + blasStatus = cublasCscal(blasHandle, n, &complexa, &complexx, incx); + blasStatus = cublasCscal_v2(blasHandle, n, &complexa, &complexx, incx); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsscal_v2(cublasHandle_t handle, int n, const float* alpha, cuComplex* x, int incx); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsscal(hipblasHandle_t handle, int n, const float* alpha, hipblasComplex* x, int incx); - // CHECK: blasStatus = hipblasCsscal(blasHandle, n, &fx, &complex, incx); - // CHECK-NEXT: blasStatus = hipblasCsscal(blasHandle, n, &fx, &complex, incx); - blasStatus = cublasCsscal(blasHandle, n, &fx, &complex, incx); - blasStatus = cublasCsscal_v2(blasHandle, n, &fx, &complex, incx); + // CHECK: blasStatus = hipblasCsscal(blasHandle, n, &fx, &complexx, incx); + // CHECK-NEXT: blasStatus = hipblasCsscal(blasHandle, n, &fx, &complexx, incx); + blasStatus = cublasCsscal(blasHandle, n, &fx, &complexx, incx); + blasStatus = cublasCsscal_v2(blasHandle, n, &fx, &complexx, incx); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZscal_v2(cublasHandle_t handle, int n, const cuDoubleComplex* alpha, cuDoubleComplex* x, int incx); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZscal(hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* x, int incx); - // CHECK: blasStatus = hipblasZscal(blasHandle, n, &dcomplex, &dcomplex_2, incx); - // CHECK-NEXT: blasStatus = hipblasZscal(blasHandle, n, &dcomplex, &dcomplex_2, incx); - blasStatus = cublasZscal(blasHandle, n, &dcomplex, &dcomplex_2, incx); - blasStatus = cublasZscal_v2(blasHandle, n, &dcomplex, &dcomplex_2, incx); + // CHECK: blasStatus = hipblasZscal(blasHandle, n, &dcomplexa, &dcomplexx, incx); + // CHECK-NEXT: blasStatus = hipblasZscal(blasHandle, n, &dcomplexa, &dcomplexx, incx); + blasStatus = cublasZscal(blasHandle, n, &dcomplexa, &dcomplexx, incx); + blasStatus = cublasZscal_v2(blasHandle, n, &dcomplexa, &dcomplexx, incx); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdscal_v2(cublasHandle_t handle, int n, const double* alpha, cuDoubleComplex* x, int incx); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZdscal(hipblasHandle_t handle, int n, const double* alpha, hipblasDoubleComplex* x, int incx); - // CHECK: blasStatus = hipblasZdscal(blasHandle, n, &dx, &dcomplex, incx); - // CHECK-NEXT: blasStatus = hipblasZdscal(blasHandle, n, &dx, &dcomplex, incx); - blasStatus = cublasZdscal(blasHandle, n, &dx, &dcomplex, incx); - blasStatus = cublasZdscal_v2(blasHandle, n, &dx, &dcomplex, incx); + // CHECK: blasStatus = hipblasZdscal(blasHandle, n, &dx, &dcomplexx, incx); + // CHECK-NEXT: blasStatus = hipblasZdscal(blasHandle, n, &dx, &dcomplexx, incx); + blasStatus = cublasZdscal(blasHandle, n, &dx, &dcomplexx, incx); + blasStatus = cublasZdscal_v2(blasHandle, n, &dx, &dcomplexx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSaxpy_v2(cublasHandle_t handle, int n, const float* alpha, const float* x, int incx, float* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSaxpy(hipblasHandle_t handle, int n, const float* alpha, const float* x, int incx, float* y, int incy); + // CHECK: blasStatus = hipblasSaxpy(blasHandle, n, &fa, &fx, incx, &fy, incy); + // CHECK-NEXT: blasStatus = hipblasSaxpy(blasHandle, n, &fa, &fx, incx, &fy, incy); + blasStatus = cublasSaxpy(blasHandle, n, &fa, &fx, incx, &fy, incy); + blasStatus = cublasSaxpy_v2(blasHandle, n, &fa, &fx, incx, &fy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDaxpy_v2(cublasHandle_t handle, int n, const double* alpha, const double* x, int incx, double* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDaxpy(hipblasHandle_t handle, int n, const double* alpha, const double* x, int incx, double* y, int incy); + // CHECK: blasStatus = hipblasDaxpy(blasHandle, n, &da, &dx, incx, &dy, incy); + // CHECK-NEXT: blasStatus = hipblasDaxpy(blasHandle, n, &da, &dx, incx, &dy, incy); + blasStatus = cublasDaxpy(blasHandle, n, &da, &dx, incx, &dy, incy); + blasStatus = cublasDaxpy_v2(blasHandle, n, &da, &dx, incx, &dy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCaxpy_v2(cublasHandle_t handle, int n, const cuComplex* alpha, const cuComplex* x, int incx, cuComplex* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCaxpy(hipblasHandle_t handle, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasComplex* y, int incy); + // CHECK: blasStatus = hipblasCaxpy(blasHandle, n, &complexa, &complexx, incx, &complexy, incy); + // CHECK-NEXT: blasStatus = hipblasCaxpy(blasHandle, n, &complexa, &complexx, incx, &complexy, incy); + blasStatus = cublasCaxpy(blasHandle, n, &complexa, &complexx, incx, &complexy, incy); + blasStatus = cublasCaxpy_v2(blasHandle, n, &complexa, &complexx, incx, &complexy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZaxpy_v2(cublasHandle_t handle, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZaxpy(hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy); + // CHECK: blasStatus = hipblasZaxpy(blasHandle, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy); + // CHECK-NEXT: blasStatus = hipblasZaxpy(blasHandle, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy); + blasStatus = cublasZaxpy(blasHandle, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy); + blasStatus = cublasZaxpy_v2(blasHandle, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScopy_v2(cublasHandle_t handle, int n, const float* x, int incx, float* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasScopy(hipblasHandle_t handle, int n, const float* x, int incx, float* y, int incy); + // CHECK: blasStatus = hipblasScopy(blasHandle, n, &fx, incx, &fy, incy); + // CHECK-NEXT: blasStatus = hipblasScopy(blasHandle, n, &fx, incx, &fy, incy); + blasStatus = cublasScopy(blasHandle, n, &fx, incx, &fy, incy); + blasStatus = cublasScopy_v2(blasHandle, n, &fx, incx, &fy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDcopy_v2(cublasHandle_t handle, int n, const double* x, int incx, double* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDcopy(hipblasHandle_t handle, int n, const double* x, int incx, double* y, int incy); + // CHECK: blasStatus = hipblasDcopy(blasHandle, n, &dx, incx, &dy, incy); + // CHECK-NEXT: blasStatus = hipblasDcopy(blasHandle, n, &dx, incx, &dy, incy); + blasStatus = cublasDcopy(blasHandle, n, &dx, incx, &dy, incy); + blasStatus = cublasDcopy_v2(blasHandle, n, &dx, incx, &dy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCcopy_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, cuComplex* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCcopy(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasComplex* y, int incy); + // CHECK: blasStatus = hipblasCcopy(blasHandle, n, &complexx, incx, &complexy, incy); + // CHECK-NEXT: blasStatus = hipblasCcopy(blasHandle, n, &complexx, incx, &complexy, incy); + blasStatus = cublasCcopy(blasHandle, n, &complexx, incx, &complexy, incy); + blasStatus = cublasCcopy_v2(blasHandle, n, &complexx, incx, &complexy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZcopy_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZcopy(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy); + // CHECK: blasStatus = hipblasZcopy(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy); + // CHECK-NEXT: blasStatus = hipblasZcopy(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy); + blasStatus = cublasZcopy(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy); + blasStatus = cublasZcopy_v2(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSswap_v2(cublasHandle_t handle, int n, float* x, int incx, float* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSswap(hipblasHandle_t handle, int n, float* x, int incx, float* y, int incy); + // CHECK: blasStatus = hipblasSswap(blasHandle, n, &fx, incx, &fy, incy); + // CHECK-NEXT: blasStatus = hipblasSswap(blasHandle, n, &fx, incx, &fy, incy); + blasStatus = cublasSswap(blasHandle, n, &fx, incx, &fy, incy); + blasStatus = cublasSswap_v2(blasHandle, n, &fx, incx, &fy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDswap_v2(cublasHandle_t handle, int n, double* x, int incx, double* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDswap(hipblasHandle_t handle, int n, double* x, int incx, double* y, int incy); + // CHECK: blasStatus = hipblasDswap(blasHandle, n, &dx, incx, &dy, incy); + // CHECK-NEXT: blasStatus = hipblasDswap(blasHandle, n, &dx, incx, &dy, incy); + blasStatus = cublasDswap(blasHandle, n, &dx, incx, &dy, incy); + blasStatus = cublasDswap_v2(blasHandle, n, &dx, incx, &dy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCswap_v2(cublasHandle_t handle, int n, cuComplex* x, int incx, cuComplex* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCswap(hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasComplex* y, int incy); + // CHECK: blasStatus = hipblasCswap(blasHandle, n, &complexx, incx, &complexy, incy); + // CHECK-NEXT: blasStatus = hipblasCswap(blasHandle, n, &complexx, incx, &complexy, incy); + blasStatus = cublasCswap(blasHandle, n, &complexx, incx, &complexy, incy); + blasStatus = cublasCswap_v2(blasHandle, n, &complexx, incx, &complexy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZswap_v2(cublasHandle_t handle, int n, cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZswap(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy); + // CHECK: blasStatus = hipblasZswap(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy); + // CHECK-NEXT: blasStatus = hipblasZswap(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy); + blasStatus = cublasZswap(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy); + blasStatus = cublasZswap_v2(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIsamax_v2(cublasHandle_t handle, int n, const float* x, int incx, int* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasIsamax(hipblasHandle_t handle, int n, const float* x, int incx, int* result); + // CHECK: blasStatus = hipblasIsamax(blasHandle, n, &fx, incx, &res); + // CHECK-NEXT: blasStatus = hipblasIsamax(blasHandle, n, &fx, incx, &res); + blasStatus = cublasIsamax(blasHandle, n, &fx, incx, &res); + blasStatus = cublasIsamax_v2(blasHandle, n, &fx, incx, &res); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIdamax_v2(cublasHandle_t handle, int n, const double* x, int incx, int* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasIdamax(hipblasHandle_t handle, int n, const double* x, int incx, int* result); + // CHECK: blasStatus = hipblasIdamax(blasHandle, n, &dx, incx, &res); + // CHECK-NEXT: blasStatus = hipblasIdamax(blasHandle, n, &dx, incx, &res); + blasStatus = cublasIdamax(blasHandle, n, &dx, incx, &res); + blasStatus = cublasIdamax_v2(blasHandle, n, &dx, incx, &res); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIcamax_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, int* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasIcamax(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, int* result); + // CHECK: blasStatus = hipblasIcamax(blasHandle, n, &complexx, incx, &res); + // CHECK-NEXT: blasStatus = hipblasIcamax(blasHandle, n, &complexx, incx, &res); + blasStatus = cublasIcamax(blasHandle, n, &complexx, incx, &res); + blasStatus = cublasIcamax_v2(blasHandle, n, &complexx, incx, &res); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIzamax_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, int* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasIzamax(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, int* result); + // CHECK: blasStatus = hipblasIzamax(blasHandle, n, &dcomplexx, incx, &res); + // CHECK-NEXT: blasStatus = hipblasIzamax(blasHandle, n, &dcomplexx, incx, &res); + blasStatus = cublasIzamax(blasHandle, n, &dcomplexx, incx, &res); + blasStatus = cublasIzamax_v2(blasHandle, n, &dcomplexx, incx, &res); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIsamin_v2(cublasHandle_t handle, int n, const float* x, int incx, int* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasIsamin(hipblasHandle_t handle, int n, const float* x, int incx, int* result); + // CHECK: blasStatus = hipblasIsamin(blasHandle, n, &fx, incx, &res); + // CHECK-NEXT: blasStatus = hipblasIsamin(blasHandle, n, &fx, incx, &res); + blasStatus = cublasIsamin(blasHandle, n, &fx, incx, &res); + blasStatus = cublasIsamin_v2(blasHandle, n, &fx, incx, &res); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIdamin_v2(cublasHandle_t handle, int n, const double* x, int incx, int* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasIdamin(hipblasHandle_t handle, int n, const double* x, int incx, int* result); + // CHECK: blasStatus = hipblasIdamin(blasHandle, n, &dx, incx, &res); + // CHECK-NEXT: blasStatus = hipblasIdamin(blasHandle, n, &dx, incx, &res); + blasStatus = cublasIdamin(blasHandle, n, &dx, incx, &res); + blasStatus = cublasIdamin_v2(blasHandle, n, &dx, incx, &res); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIcamin_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, int* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasIcamin(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, int* result); + // CHECK: blasStatus = hipblasIcamin(blasHandle, n, &complexx, incx, &res); + // CHECK-NEXT: blasStatus = hipblasIcamin(blasHandle, n, &complexx, incx, &res); + blasStatus = cublasIcamin(blasHandle, n, &complexx, incx, &res); + blasStatus = cublasIcamin_v2(blasHandle, n, &complexx, incx, &res); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIzamin_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, int* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasIzamin(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, int* result); + // CHECK: blasStatus = hipblasIzamin(blasHandle, n, &dcomplexx, incx, &res); + // CHECK-NEXT: blasStatus = hipblasIzamin(blasHandle, n, &dcomplexx, incx, &res); + blasStatus = cublasIzamin(blasHandle, n, &dcomplexx, incx, &res); + blasStatus = cublasIzamin_v2(blasHandle, n, &dcomplexx, incx, &res); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSasum_v2(cublasHandle_t handle, int n, const float* x, int incx, float* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSasum(hipblasHandle_t handle, int n, const float* x, int incx, float* result); + // CHECK: blasStatus = hipblasSasum(blasHandle, n, &fx, incx, &fresult); + // CHECK-NEXT: blasStatus = hipblasSasum(blasHandle, n, &fx, incx, &fresult); + blasStatus = cublasSasum(blasHandle, n, &fx, incx, &fresult); + blasStatus = cublasSasum_v2(blasHandle, n, &fx, incx, &fresult); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDasum_v2(cublasHandle_t handle, int n, const double* x, int incx, double* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDasum(hipblasHandle_t handle, int n, const double* x, int incx, double* result); + // CHECK: blasStatus = hipblasDasum(blasHandle, n, &dx, incx, &dresult); + // CHECK-NEXT: blasStatus = hipblasDasum(blasHandle, n, &dx, incx, &dresult); + blasStatus = cublasDasum(blasHandle, n, &dx, incx, &dresult); + blasStatus = cublasDasum_v2(blasHandle, n, &dx, incx, &dresult); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScasum_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, float* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasScasum(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, float* result); + // CHECK: blasStatus = hipblasScasum(blasHandle, n, &complexx, incx, &fresult); + // CHECK-NEXT: blasStatus = hipblasScasum(blasHandle, n, &complexx, incx, &fresult); + blasStatus = cublasScasum(blasHandle, n, &complexx, incx, &fresult); + blasStatus = cublasScasum_v2(blasHandle, n, &complexx, incx, &fresult); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDzasum_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, double* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDzasum(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, double* result); + // CHECK: blasStatus = hipblasDzasum(blasHandle, n, &dcomplexx, incx, &dresult); + // CHECK-NEXT: blasStatus = hipblasDzasum(blasHandle, n, &dcomplexx, incx, &dresult); + blasStatus = cublasDzasum(blasHandle, n, &dcomplexx, incx, &dresult); + blasStatus = cublasDzasum_v2(blasHandle, n, &dcomplexx, incx, &dresult); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSrot_v2(cublasHandle_t handle, int n, float* x, int incx, float* y, int incy, const float* c, const float* s); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSrot(hipblasHandle_t handle, int n, float* x, int incx, float* y, int incy, const float* c, const float* s); + // CHECK: blasStatus = hipblasSrot(blasHandle, n, &fx, incx, &fy, incy, &fc, &fs); + // CHECK-NEXT: blasStatus = hipblasSrot(blasHandle, n, &fx, incx, &fy, incy, &fc, &fs); + blasStatus = cublasSrot(blasHandle, n, &fx, incx, &fy, incy, &fc, &fs); + blasStatus = cublasSrot_v2(blasHandle, n, &fx, incx, &fy, incy, &fc, &fs); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDrot_v2(cublasHandle_t handle, int n, double* x, int incx, double* y, int incy, const double* c, const double* s); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDrot(hipblasHandle_t handle, int n, double* x, int incx, double* y, int incy, const double* c, const double* s); + // CHECK: blasStatus = hipblasDrot(blasHandle, n, &dx, incx, &dy, incy, &dc, &ds); + // CHECK-NEXT: blasStatus = hipblasDrot(blasHandle, n, &dx, incx, &dy, incy, &dc, &ds); + blasStatus = cublasDrot(blasHandle, n, &dx, incx, &dy, incy, &dc, &ds); + blasStatus = cublasDrot_v2(blasHandle, n, &dx, incx, &dy, incy, &dc, &ds); + + // CUDA: CUBLASAPI CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCrot_v2(cublasHandle_t handle, int n, cuComplex* x, int incx, cuComplex* y, int incy, const float* c, const cuComplex* s); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCrot(hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasComplex* y, int incy, const float* c, const hipblasComplex* s); + // CHECK: blasStatus = hipblasCrot(blasHandle, n, &complexx, incx, &complexy, incy, &fc, &complexs); + // CHECK-NEXT: blasStatus = hipblasCrot(blasHandle, n, &complexx, incx, &complexy, incy, &fc, &complexs); + blasStatus = cublasCrot(blasHandle, n, &complexx, incx, &complexy, incy, &fc, &complexs); + blasStatus = cublasCrot_v2(blasHandle, n, &complexx, incx, &complexy, incy, &fc, &complexs); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsrot_v2(cublasHandle_t handle, int n, cuComplex* x, int incx, cuComplex* y, int incy, const float* c, const float* s); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsrot(hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasComplex* y, int incy, const float* c, const float* s); + // CHECK: blasStatus = hipblasCsrot(blasHandle, n, &complexx, incx, &complexy, incy, &fc, &fs); + // CHECK-NEXT: blasStatus = hipblasCsrot(blasHandle, n, &complexx, incx, &complexy, incy, &fc, &fs); + blasStatus = cublasCsrot(blasHandle, n, &complexx, incx, &complexy, incy, &fc, &fs); + blasStatus = cublasCsrot_v2(blasHandle, n, &complexx, incx, &complexy, incy, &fc, &fs); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZrot_v2(cublasHandle_t handle, int n, cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy, const double* c, const cuDoubleComplex* s); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZrot(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy, const double* c, const hipblasDoubleComplex* s); + // CHECK: blasStatus = hipblasZrot(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dc, &dcomplexs); + // CHECK-NEXT: blasStatus = hipblasZrot(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dc, &dcomplexs); + blasStatus = cublasZrot(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dc, &dcomplexs); + blasStatus = cublasZrot_v2(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dc, &dcomplexs); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdrot_v2(cublasHandle_t handle, int n, cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy, const double* c, const double* s); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZdrot(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy, const double* c, const double* s); + // CHECK: blasStatus = hipblasZdrot(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dc, &ds); + // CHECK-NEXT: blasStatus = hipblasZdrot(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dc, &ds); + blasStatus = cublasZdrot(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dc, &ds); + blasStatus = cublasZdrot_v2(blasHandle, n, &dcomplexx, incx, &dcomplexy, incy, &dc, &ds); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSrotg_v2(cublasHandle_t handle, float* a, float* b, float* c, float* s); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSrotg(hipblasHandle_t handle, float* a, float* b, float* c, float* s); + // CHECK: blasStatus = hipblasSrotg(blasHandle, &fa, &fb, &fc, &fs); + // CHECK-NEXT: blasStatus = hipblasSrotg(blasHandle, &fa, &fb, &fc, &fs); + blasStatus = cublasSrotg(blasHandle, &fa, &fb, &fc, &fs); + blasStatus = cublasSrotg_v2(blasHandle, &fa, &fb, &fc, &fs); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDrotg_v2(cublasHandle_t handle, double* a, double* b, double* c, double* s); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDrotg(hipblasHandle_t handle, double* a, double* b, double* c, double* s); + // CHECK: blasStatus = hipblasDrotg(blasHandle, &da, &db, &dc, &ds); + // CHECK-NEXT: blasStatus = hipblasDrotg(blasHandle, &da, &db, &dc, &ds); + blasStatus = cublasDrotg(blasHandle, &da, &db, &dc, &ds); + blasStatus = cublasDrotg_v2(blasHandle, &da, &db, &dc, &ds); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCrotg_v2(cublasHandle_t handle, cuComplex* a, cuComplex* b, float* c, cuComplex* s); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCrotg(hipblasHandle_t handle, hipblasComplex* a, hipblasComplex* b, float* c, hipblasComplex* s); + // CHECK: blasStatus = hipblasCrotg(blasHandle, &complexa, &complexb, &fc, &complexs); + // CHECK-NEXT: blasStatus = hipblasCrotg(blasHandle, &complexa, &complexb, &fc, &complexs); + blasStatus = cublasCrotg(blasHandle, &complexa, &complexb, &fc, &complexs); + blasStatus = cublasCrotg_v2(blasHandle, &complexa, &complexb, &fc, &complexs); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZrotg_v2(cublasHandle_t handle, cuDoubleComplex* a, cuDoubleComplex* b, double* c, cuDoubleComplex* s); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZrotg(hipblasHandle_t handle, hipblasDoubleComplex* a, hipblasDoubleComplex* b, double* c, hipblasDoubleComplex* s); + // CHECK: blasStatus = hipblasZrotg(blasHandle, &dcomplexa, &dcomplexb, &dc, &dcomplexs); + // CHECK-NEXT: blasStatus = hipblasZrotg(blasHandle, &dcomplexa, &dcomplexb, &dc, &dcomplexs); + blasStatus = cublasZrotg(blasHandle, &dcomplexa, &dcomplexb, &dc, &dcomplexs); + blasStatus = cublasZrotg_v2(blasHandle, &dcomplexa, &dcomplexb, &dc, &dcomplexs); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSrotm_v2(cublasHandle_t handle, int n, float* x, int incx, float* y, int incy, const float* param); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSrotm(hipblasHandle_t handle, int n, float* x, int incx, float* y, int incy, const float* param); + // CHECK: blasStatus = hipblasSrotm(blasHandle, n, &fx, incx, &fy, incy, &fresult); + // CHECK-NEXT: blasStatus = hipblasSrotm(blasHandle, n, &fx, incx, &fy, incy, &fresult); + blasStatus = cublasSrotm(blasHandle, n, &fx, incx, &fy, incy, &fresult); + blasStatus = cublasSrotm_v2(blasHandle, n, &fx, incx, &fy, incy, &fresult); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDrotm_v2(cublasHandle_t handle, int n, double* x, int incx, double* y, int incy, const double* param); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDrotm(hipblasHandle_t handle, int n, double* x, int incx, double* y, int incy, const double* param); + // CHECK: blasStatus = hipblasDrotm(blasHandle, n, &dx, incx, &dy, incy, &dresult); + // CHECK-NEXT: blasStatus = hipblasDrotm(blasHandle, n, &dx, incx, &dy, incy, &dresult); + blasStatus = cublasDrotm(blasHandle, n, &dx, incx, &dy, incy, &dresult); + blasStatus = cublasDrotm_v2(blasHandle, n, &dx, incx, &dy, incy, &dresult); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSrotmg_v2(cublasHandle_t handle, float* d1, float* d2, float* x1, const float* y1, float* param); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSrotmg(hipblasHandle_t handle, float* d1, float* d2, float* x1, const float* y1, float* param); + // CHECK: blasStatus = hipblasSrotmg(blasHandle, &fd1, &fd2, &fx1, &fy1, &fresult); + // CHECK-NEXT: blasStatus = hipblasSrotmg(blasHandle, &fd1, &fd2, &fx1, &fy1, &fresult); + blasStatus = cublasSrotmg(blasHandle, &fd1, &fd2, &fx1, &fy1, &fresult); + blasStatus = cublasSrotmg_v2(blasHandle, &fd1, &fd2, &fx1, &fy1, &fresult); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDrotmg_v2(cublasHandle_t handle, double* d1, double* d2, double* x1, const double* y1, double* param); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDrotmg(hipblasHandle_t handle, double* d1, double* d2, double* x1, const double* y1, double* param); + // CHECK: blasStatus = hipblasDrotmg(blasHandle, &dd1, &dd2, &dx1, &dy1, &dresult); + // CHECK-NEXT: blasStatus = hipblasDrotmg(blasHandle, &dd1, &dd2, &dx1, &dy1, &dresult); + blasStatus = cublasDrotmg(blasHandle, &dd1, &dd2, &dx1, &dy1, &dresult); + blasStatus = cublasDrotmg_v2(blasHandle, &dd1, &dd2, &dx1, &dy1, &dresult); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemv(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const float* alpha, const float* AP, int lda, const float* x, int incx, const float* beta, float* y, int incy); + // CHECK: blasStatus = hipblasSgemv(blasHandle, blasOperation, m, n, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); + // CHECK-NEXT: blasStatus = hipblasSgemv(blasHandle, blasOperation, m, n, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); + blasStatus = cublasSgemv(blasHandle, blasOperation, m, n, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); + blasStatus = cublasSgemv_v2(blasHandle, blasOperation, m, n, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemv(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const double* alpha, const double* AP, int lda, const double* x, int incx, const double* beta, double* y, int incy); + // CHECK: blasStatus = hipblasDgemv(blasHandle, blasOperation, m, n, &da, &dA, lda, &dx, incx, &db, &dy, incy); + // CHECK-NEXT: blasStatus = hipblasDgemv(blasHandle, blasOperation, m, n, &da, &dA, lda, &dx, incx, &db, &dy, incy); + blasStatus = cublasDgemv(blasHandle, blasOperation, m, n, &da, &dA, lda, &dx, incx, &db, &dy, incy); + blasStatus = cublasDgemv_v2(blasHandle, blasOperation, m, n, &da, &dA, lda, &dx, incx, &db, &dy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* x, int incx, const cuComplex* beta, cuComplex* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemv(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy); + // CHECK: blasStatus = hipblasCgemv(blasHandle, blasOperation, m, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); + // CHECK-NEXT: blasStatus = hipblasCgemv(blasHandle, blasOperation, m, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); + blasStatus = cublasCgemv(blasHandle, blasOperation, m, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); + blasStatus = cublasCgemv_v2(blasHandle, blasOperation, m, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemv(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy); + // CHECK: blasStatus = hipblasZgemv(blasHandle, blasOperation, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + // CHECK-NEXT: blasStatus = hipblasZgemv(blasHandle, blasOperation, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + blasStatus = cublasZgemv(blasHandle, blasOperation, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + blasStatus = cublasZgemv_v2(blasHandle, blasOperation, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl, int ku, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgbmv(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, int kl, int ku, const float* alpha, const float* AP, int lda, const float* x, int incx, const float* beta, float* y, int incy); + // CHECK: blasStatus = hipblasSgbmv(blasHandle, blasOperation, m, n, kl, ku, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); + // CHECK-NEXT: blasStatus = hipblasSgbmv(blasHandle, blasOperation, m, n, kl, ku, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); + blasStatus = cublasSgbmv(blasHandle, blasOperation, m, n, kl, ku, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); + blasStatus = cublasSgbmv_v2(blasHandle, blasOperation, m, n, kl, ku, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl, int ku, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgbmv(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, int kl, int ku, const double* alpha, const double* AP, int lda, const double* x, int incx, const double* beta, double* y, int incy); + // CHECK: blasStatus = hipblasDgbmv(blasHandle, blasOperation, m, n, kl, ku, &da, &dA, lda, &dx, incx, &db, &dy, incy); + // CHECK-NEXT: blasStatus = hipblasDgbmv(blasHandle, blasOperation, m, n, kl, ku, &da, &dA, lda, &dx, incx, &db, &dy, incy); + blasStatus = cublasDgbmv(blasHandle, blasOperation, m, n, kl, ku, &da, &dA, lda, &dx, incx, &db, &dy, incy); + blasStatus = cublasDgbmv_v2(blasHandle, blasOperation, m, n, kl, ku, &da, &dA, lda, &dx, incx, &db, &dy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl, int ku, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* x, int incx, const cuComplex* beta, cuComplex* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgbmv(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, int kl, int ku, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy); + // CHECK: blasStatus = hipblasCgbmv(blasHandle, blasOperation, m, n, kl, ku, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); + // CHECK-NEXT: blasStatus = hipblasCgbmv(blasHandle, blasOperation, m, n, kl, ku, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); + blasStatus = cublasCgbmv(blasHandle, blasOperation, m, n, kl, ku, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); + blasStatus = cublasCgbmv_v2(blasHandle, blasOperation, m, n, kl, ku, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl, int ku, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgbmv(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, int kl, int ku, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy); + // CHECK: blasStatus = hipblasZgbmv(blasHandle, blasOperation, m, n, kl, ku, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + // CHECK-NEXT: blasStatus = hipblasZgbmv(blasHandle, blasOperation, m, n, kl, ku, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + blasStatus = cublasZgbmv(blasHandle, blasOperation, m, n, kl, ku, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + blasStatus = cublasZgbmv_v2(blasHandle, blasOperation, m, n, kl, ku, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const float* A, int lda, float* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* AP, int lda, float* x, int incx); + // CHECK: blasStatus = hipblasStrmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, lda, &fx, incx); + // CHECK-NEXT: blasStatus = hipblasStrmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, lda, &fx, incx); + blasStatus = cublasStrmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, lda, &fx, incx); + blasStatus = cublasStrmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, lda, &fx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const double* A, int lda, double* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtrmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* AP, int lda, double* x, int incx); + // CHECK: blasStatus = hipblasDtrmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, lda, &dx, incx); + // CHECK-NEXT: blasStatus = hipblasDtrmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, lda, &dx, incx); + blasStatus = cublasDtrmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, lda, &dx, incx); + blasStatus = cublasDtrmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, lda, &dx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuComplex* A, int lda, cuComplex* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* AP, int lda, hipblasComplex* x, int incx); + // CHECK: blasStatus = hipblasCtrmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, lda, &complexx, incx); + // CHECK-NEXT: blasStatus = hipblasCtrmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, lda, &complexx, incx); + blasStatus = cublasCtrmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, lda, &complexx, incx); + blasStatus = cublasCtrmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, lda, &complexx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuDoubleComplex* A, int lda, cuDoubleComplex* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* AP, int lda, hipblasDoubleComplex* x, int incx); + // CHECK: blasStatus = hipblasZtrmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, lda, &dcomplexx, incx); + // CHECK-NEXT: blasStatus = hipblasZtrmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, lda, &dcomplexx, incx); + blasStatus = cublasZtrmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, lda, &dcomplexx, incx); + blasStatus = cublasZtrmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, lda, &dcomplexx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const float* A, int lda, float* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const float* AP, int lda, float* x, int incx); + // CHECK: blasStatus = hipblasStbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &fA, lda, &fx, incx); + // CHECK-NEXT: blasStatus = hipblasStbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &fA, lda, &fx, incx); + blasStatus = cublasStbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &fA, lda, &fx, incx); + blasStatus = cublasStbmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &fA, lda, &fx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const double* A, int lda, double* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const double* AP, int lda, double* x, int incx); + // CHECK: blasStatus = hipblasDtbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dA, lda, &dx, incx); + // CHECK-NEXT: blasStatus = hipblasDtbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dA, lda, &dx, incx); + blasStatus = cublasDtbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dA, lda, &dx, incx); + blasStatus = cublasDtbmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dA, lda, &dx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const cuComplex* A, int lda, cuComplex* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasComplex* AP, int lda, hipblasComplex* x, int incx); + // CHECK: blasStatus = hipblasCtbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &complexA, lda, &complexx, incx); + // CHECK-NEXT: blasStatus = hipblasCtbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &complexA, lda, &complexx, incx); + blasStatus = cublasCtbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &complexA, lda, &complexx, incx); + blasStatus = cublasCtbmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &complexA, lda, &complexx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const cuDoubleComplex* A, int lda, cuDoubleComplex* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasDoubleComplex* AP, int lda, hipblasDoubleComplex* x, int incx); + // CHECK: blasStatus = hipblasZtbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dcomplexA, lda, &dcomplexx, incx); + // CHECK-NEXT: blasStatus = hipblasZtbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dcomplexA, lda, &dcomplexx, incx); + blasStatus = cublasZtbmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dcomplexA, lda, &dcomplexx, incx); + blasStatus = cublasZtbmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dcomplexA, lda, &dcomplexx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const float* AP, float* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* AP, float* x, int incx); + // CHECK: blasStatus = hipblasStpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, &fx, incx); + // CHECK-NEXT: blasStatus = hipblasStpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, &fx, incx); + blasStatus = cublasStpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, &fx, incx); + blasStatus = cublasStpmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, &fx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const double* AP, double* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* AP, double* x, int incx); + // CHECK: blasStatus = hipblasDtpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, &dx, incx); + // CHECK-NEXT: blasStatus = hipblasDtpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, &dx, incx); + blasStatus = cublasDtpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, &dx, incx); + blasStatus = cublasDtpmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, &dx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuComplex* AP, cuComplex* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* AP, hipblasComplex* x, int incx); + // CHECK: blasStatus = hipblasCtpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, &complexx, incx); + // CHECK-NEXT: blasStatus = hipblasCtpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, &complexx, incx); + blasStatus = cublasCtpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, &complexx, incx); + blasStatus = cublasCtpmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, &complexx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuDoubleComplex* AP, cuDoubleComplex* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* AP, hipblasDoubleComplex* x, int incx); + // CHECK: blasStatus = hipblasZtpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, &dcomplexx, incx); + // CHECK-NEXT: blasStatus = hipblasZtpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, &dcomplexx, incx); + blasStatus = cublasZtpmv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, &dcomplexx, incx); + blasStatus = cublasZtpmv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, &dcomplexx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const float* A, int lda, float* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* AP, int lda, float* x, int incx); + // CHECK: blasStatus = hipblasStrsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, lda, &fx, incx); + // CHECK-NEXT: blasStatus = hipblasStrsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, lda, &fx, incx); + blasStatus = cublasStrsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, lda, &fx, incx); + blasStatus = cublasStrsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, lda, &fx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const double* A, int lda, double* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* AP, int lda, double* x, int incx); + // CHECK: blasStatus = hipblasDtrsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, lda, &dx, incx); + // CHECK-NEXT: blasStatus = hipblasDtrsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, lda, &dx, incx); + blasStatus = cublasDtrsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, lda, &dx, incx); + blasStatus = cublasDtrsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, lda, &dx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuComplex* A, int lda, cuComplex* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* AP, int lda, hipblasComplex* x, int incx); + // CHECK: blasStatus = hipblasCtrsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, lda, &complexx, incx); + // CHECK-NEXT: blasStatus = hipblasCtrsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, lda, &complexx, incx); + blasStatus = cublasCtrsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, lda, &complexx, incx); + blasStatus = cublasCtrsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, lda, &complexx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuDoubleComplex* A, int lda, cuDoubleComplex* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* AP, int lda, hipblasDoubleComplex* x, int incx); + // CHECK: blasStatus = hipblasZtrsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, lda, &dcomplexx, incx); + // CHECK-NEXT: blasStatus = hipblasZtrsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, lda, &dcomplexx, incx); + blasStatus = cublasZtrsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, lda, &dcomplexx, incx); + blasStatus = cublasZtrsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, lda, &dcomplexx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStpsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const float* AP, float* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStpsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float* AP, float* x, int incx); + // CHECK: blasStatus = hipblasStpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, &fx, incx); + // CHECK-NEXT: blasStatus = hipblasStpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, &fx, incx); + blasStatus = cublasStpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, &fx, incx); + blasStatus = cublasStpsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &fA, &fx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtpsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const double* AP, double* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtpsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double* AP, double* x, int incx); + // CHECK: blasStatus = hipblasDtpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, &dx, incx); + // CHECK-NEXT: blasStatus = hipblasDtpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, &dx, incx); + blasStatus = cublasDtpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, &dx, incx); + blasStatus = cublasDtpsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dA, &dx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtpsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuComplex* AP, cuComplex* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtpsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* AP, hipblasComplex* x, int incx); + // CHECK: blasStatus = hipblasCtpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, &complexx, incx); + // CHECK-NEXT: blasStatus = hipblasCtpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, &complexx, incx); + blasStatus = cublasCtpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, &complexx, incx); + blasStatus = cublasCtpsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &complexA, &complexx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtpsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, const cuDoubleComplex* AP, cuDoubleComplex* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtpsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* AP, hipblasDoubleComplex* x, int incx); + // CHECK: blasStatus = hipblasZtpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, &dcomplexx, incx); + // CHECK-NEXT: blasStatus = hipblasZtpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, &dcomplexx, incx); + blasStatus = cublasZtpsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, &dcomplexx, incx); + blasStatus = cublasZtpsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, &dcomplexA, &dcomplexx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStbsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const float* A, int lda, float* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStbsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const float* AP, int lda, float* x, int incx); + // CHECK: blasStatus = hipblasStbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &fA, lda, &fx, incx); + // CHECK-NEXT: blasStatus = hipblasStbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &fA, lda, &fx, incx); + blasStatus = cublasStbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &fA, lda, &fx, incx); + blasStatus = cublasStbsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &fA, lda, &fx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtbsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const double* A, int lda, double* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtbsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const double* AP, int lda, double* x, int incx); + // CHECK: blasStatus = hipblasDtbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dA, lda, &dx, incx); + // CHECK-NEXT: blasStatus = hipblasDtbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dA, lda, &dx, incx); + blasStatus = cublasDtbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dA, lda, &dx, incx); + blasStatus = cublasDtbsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dA, lda, &dx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtbsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const cuComplex* A, int lda, cuComplex* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtbsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasComplex* AP, int lda, hipblasComplex* x, int incx); + // CHECK: blasStatus = hipblasCtbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &complexA, lda, &complexx, incx); + // CHECK-NEXT: blasStatus = hipblasCtbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &complexA, lda, &complexx, incx); + blasStatus = cublasCtbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &complexA, lda, &complexx, incx); + blasStatus = cublasCtbsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &complexA, lda, &complexx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtbsv_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, int k, const cuDoubleComplex* A, int lda, cuDoubleComplex* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtbsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasDoubleComplex* AP, int lda, hipblasDoubleComplex* x, int incx); + // CHECK: blasStatus = hipblasZtbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dcomplexA, lda, &dcomplexx, incx); + // CHECK-NEXT: blasStatus = hipblasZtbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dcomplexA, lda, &dcomplexx, incx); + blasStatus = cublasZtbsv(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dcomplexA, lda, &dcomplexx, incx); + blasStatus = cublasZtbsv_v2(blasHandle, blasFillMode, blasOperation, blasDiagType, n, k, &dcomplexA, lda, &dcomplexx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsymv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* AP, int lda, const float* x, int incx, const float* beta, float* y, int incy); + // CHECK: blasStatus = hipblasSsymv(blasHandle, blasFillMode, n, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); + // CHECK-NEXT: blasStatus = hipblasSsymv(blasHandle, blasFillMode, n, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); + blasStatus = cublasSsymv(blasHandle, blasFillMode, n, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); + blasStatus = cublasSsymv_v2(blasHandle, blasFillMode, n, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsymv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* AP, int lda, const double* x, int incx, const double* beta, double* y, int incy); + // CHECK: blasStatus = hipblasDsymv(blasHandle, blasFillMode, n, &da, &dA, lda, &dx, incx, &db, &dy, incy); + // CHECK-NEXT: blasStatus = hipblasDsymv(blasHandle, blasFillMode, n, &da, &dA, lda, &dx, incx, &db, &dy, incy); + blasStatus = cublasDsymv(blasHandle, blasFillMode, n, &da, &dA, lda, &dx, incx, &db, &dy, incy); + blasStatus = cublasDsymv_v2(blasHandle, blasFillMode, n, &da, &dA, lda, &dx, incx, &db, &dy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* x, int incx, const cuComplex* beta, cuComplex* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsymv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy); + // CHECK: blasStatus = hipblasCsymv(blasHandle, blasFillMode, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); + // CHECK-NEXT: blasStatus = hipblasCsymv(blasHandle, blasFillMode, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); + blasStatus = cublasCsymv(blasHandle, blasFillMode, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); + blasStatus = cublasCsymv_v2(blasHandle, blasFillMode, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZsymv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy); + // CHECK: blasStatus = hipblasZsymv(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + // CHECK-NEXT: blasStatus = hipblasZsymv(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + blasStatus = cublasZsymv(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + blasStatus = cublasZsymv_v2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChemv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* x, int incx, const cuComplex* beta, cuComplex* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasChemv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy); + // CHECK: blasStatus = hipblasChemv(blasHandle, blasFillMode, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); + // CHECK-NEXT: blasStatus = hipblasChemv(blasHandle, blasFillMode, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); + blasStatus = cublasChemv(blasHandle, blasFillMode, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); + blasStatus = cublasChemv_v2(blasHandle, blasFillMode, n, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhemv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZhemv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy); + // CHECK: blasStatus = hipblasZhemv(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + // CHECK-NEXT: blasStatus = hipblasZhemv(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + blasStatus = cublasZhemv(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + blasStatus = cublasZhemv_v2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const float* alpha, const float* AP, int lda, const float* x, int incx, const float* beta, float* y, int incy); + // CHECK: blasStatus = hipblasSsbmv(blasHandle, blasFillMode, n, k, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); + // CHECK-NEXT: blasStatus = hipblasSsbmv(blasHandle, blasFillMode, n, k, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); + blasStatus = cublasSsbmv(blasHandle, blasFillMode, n, k, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); + blasStatus = cublasSsbmv_v2(blasHandle, blasFillMode, n, k, &fa, &fA, lda, &fx, incx, &fb, &fy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const double* alpha, const double* AP, int lda, const double* x, int incx, const double* beta, double* y, int incy); + // CHECK: blasStatus = hipblasDsbmv(blasHandle, blasFillMode, n, k, &da, &dA, lda, &dx, incx, &db, &dy, incy); + // CHECK-NEXT: blasStatus = hipblasDsbmv(blasHandle, blasFillMode, n, k, &da, &dA, lda, &dx, incx, &db, &dy, incy); + blasStatus = cublasDsbmv(blasHandle, blasFillMode, n, k, &da, &dA, lda, &dx, incx, &db, &dy, incy); + blasStatus = cublasDsbmv_v2(blasHandle, blasFillMode, n, k, &da, &dA, lda, &dx, incx, &db, &dy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* x, int incx, const cuComplex* beta, cuComplex* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasChbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy); + // CHECK: blasStatus = hipblasChbmv(blasHandle, blasFillMode, n, k, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); + // CHECK-NEXT: blasStatus = hipblasChbmv(blasHandle, blasFillMode, n, k, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); + blasStatus = cublasChbmv(blasHandle, blasFillMode, n, k, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); + blasStatus = cublasChbmv_v2(blasHandle, blasFillMode, n, k, &complexa, &complexA, lda, &complexx, incx, &complexb, &complexy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZhbmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy); + // CHECK: blasStatus = hipblasZhbmv(blasHandle, blasFillMode, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + // CHECK-NEXT: blasStatus = hipblasZhbmv(blasHandle, blasFillMode, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + blasStatus = cublasZhbmv(blasHandle, blasFillMode, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + blasStatus = cublasZhbmv_v2(blasHandle, blasFillMode, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const float* AP, const float* x, int incx, const float* beta, float* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSspmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* AP, const float* x, int incx, const float* beta, float* y, int incy); + // CHECK: blasStatus = hipblasSspmv(blasHandle, blasFillMode, n, &fa, &fA, &fx, incx, &fb, &fy, incy); + // CHECK-NEXT: blasStatus = hipblasSspmv(blasHandle, blasFillMode, n, &fa, &fA, &fx, incx, &fb, &fy, incy); + blasStatus = cublasSspmv(blasHandle, blasFillMode, n, &fa, &fA, &fx, incx, &fb, &fy, incy); + blasStatus = cublasSspmv_v2(blasHandle, blasFillMode, n, &fa, &fA, &fx, incx, &fb, &fy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* alpha, const double* AP, const double* x, int incx, const double* beta, double* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDspmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* AP, const double* x, int incx, const double* beta, double* y, int incy); + // CHECK: blasStatus = hipblasDspmv(blasHandle, blasFillMode, n, &da, &dA, &dx, incx, &db, &dy, incy); + // CHECK-NEXT: blasStatus = hipblasDspmv(blasHandle, blasFillMode, n, &da, &dA, &dx, incx, &db, &dy, incy); + blasStatus = cublasDspmv(blasHandle, blasFillMode, n, &da, &dA, &dx, incx, &db, &dy, incy); + blasStatus = cublasDspmv_v2(blasHandle, blasFillMode, n, &da, &dA, &dx, incx, &db, &dy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex* alpha, const cuComplex* AP, const cuComplex* x, int incx, const cuComplex* beta, cuComplex* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasChpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* AP, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy); + // CHECK: blasStatus = hipblasChpmv(blasHandle, blasFillMode, n, &complexa, &complexA, &complexx, incx, &complexb, &complexy, incy); + // CHECK-NEXT: blasStatus = hipblasChpmv(blasHandle, blasFillMode, n, &complexa, &complexA, &complexx, incx, &complexb, &complexy, incy); + blasStatus = cublasChpmv(blasHandle, blasFillMode, n, &complexa, &complexA, &complexx, incx, &complexb, &complexy, incy); + blasStatus = cublasChpmv_v2(blasHandle, blasFillMode, n, &complexa, &complexA, &complexx, incx, &complexb, &complexy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* AP, const cuDoubleComplex* x, int incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZhpmv(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy); + // CHECK: blasStatus = hipblasZhpmv(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + // CHECK-NEXT: blasStatus = hipblasZhpmv(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + blasStatus = cublasZhpmv(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + blasStatus = cublasZhpmv_v2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexA, &dcomplexx, incx, &dcomplexb, &dcomplexy, incy); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSger_v2(cublasHandle_t handle, int m, int n, const float* alpha, const float* x, int incx, const float* y, int incy, float* A, int lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSger(hipblasHandle_t handle, int m, int n, const float* alpha, const float* x, int incx, const float* y, int incy, float* AP, int lda); + // CHECK: blasStatus = hipblasSger(blasHandle, m, n, &fa, &fx, incx, &fy, incy, &fA, lda); + // CHECK-NEXT: blasStatus = hipblasSger(blasHandle, m, n, &fa, &fx, incx, &fy, incy, &fA, lda); + blasStatus = cublasSger(blasHandle, m, n, &fa, &fx, incx, &fy, incy, &fA, lda); + blasStatus = cublasSger_v2(blasHandle, m, n, &fa, &fx, incx, &fy, incy, &fA, lda); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDger_v2(cublasHandle_t handle, int m, int n, const double* alpha, const double* x, int incx, const double* y, int incy, double* A, int lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDger(hipblasHandle_t handle, int m, int n, const double* alpha, const double* x, int incx, const double* y, int incy, double* AP, int lda); + // CHECK: blasStatus = hipblasDger(blasHandle, m, n, &da, &dx, incx, &dy, incy, &dA, lda); + // CHECK-NEXT: blasStatus = hipblasDger(blasHandle, m, n, &da, &dx, incx, &dy, incy, &dA, lda); + blasStatus = cublasDger(blasHandle, m, n, &da, &dx, incx, &dy, incy, &dA, lda); + blasStatus = cublasDger_v2(blasHandle, m, n, &da, &dx, incx, &dy, incy, &dA, lda); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeru_v2(cublasHandle_t handle, int m, int n, const cuComplex* alpha, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* A, int lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgeru(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* AP, int lda); + // CHECK: blasStatus = hipblasCgeru(blasHandle, m, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); + // CHECK-NEXT: blasStatus = hipblasCgeru(blasHandle, m, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); + blasStatus = cublasCgeru(blasHandle, m, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); + blasStatus = cublasCgeru_v2(blasHandle, m, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgerc_v2(cublasHandle_t handle, int m, int n, const cuComplex* alpha, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* A, int lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgerc(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* AP, int lda); + // CHECK: blasStatus = hipblasCgerc(blasHandle, m, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); + // CHECK-NEXT: blasStatus = hipblasCgerc(blasHandle, m, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); + blasStatus = cublasCgerc(blasHandle, m, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); + blasStatus = cublasCgerc_v2(blasHandle, m, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeru_v2(cublasHandle_t handle, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* A, int lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgeru(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* AP, int lda); + // CHECK: blasStatus = hipblasZgeru(blasHandle, m, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); + // CHECK-NEXT: blasStatus = hipblasZgeru(blasHandle, m, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); + blasStatus = cublasZgeru(blasHandle, m, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); + blasStatus = cublasZgeru_v2(blasHandle, m, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgerc_v2(cublasHandle_t handle, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* A, int lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgerc(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* AP, int lda); + // CHECK: blasStatus = hipblasZgerc(blasHandle, m, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); + // CHECK-NEXT: blasStatus = hipblasZgerc(blasHandle, m, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); + blasStatus = cublasZgerc(blasHandle, m, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); + blasStatus = cublasZgerc_v2(blasHandle, m, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, float* A, int lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsyr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, float* AP, int lda); + // CHECK: blasStatus = hipblasSsyr(blasHandle, blasFillMode, n, &fa, &fx, incx, &fA, lda); + // CHECK-NEXT: blasStatus = hipblasSsyr(blasHandle, blasFillMode, n, &fa, &fx, incx, &fA, lda); + blasStatus = cublasSsyr(blasHandle, blasFillMode, n, &fa, &fx, incx, &fA, lda); + blasStatus = cublasSsyr_v2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fA, lda); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, float* A, int lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsyr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, float* AP, int lda); + // CHECK: blasStatus = hipblasDsyr(blasHandle, blasFillMode, n, &da, &dx, incx, &dA, lda); + // CHECK-NEXT: blasStatus = hipblasDsyr(blasHandle, blasFillMode, n, &da, &dx, incx, &dA, lda); + blasStatus = cublasDsyr(blasHandle, blasFillMode, n, &da, &dx, incx, &dA, lda); + blasStatus = cublasDsyr_v2(blasHandle, blasFillMode, n, &da, &dx, incx, &dA, lda); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex* alpha, const cuComplex* x, int incx, cuComplex* A, int lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsyr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasComplex* AP, int lda); + // CHECK: blasStatus = hipblasCsyr(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexA, lda); + // CHECK-NEXT: blasStatus = hipblasCsyr(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexA, lda); + blasStatus = cublasCsyr(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexA, lda); + blasStatus = cublasCsyr_v2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexA, lda); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int incx, cuDoubleComplex* A, int lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZsyr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* AP, int lda); + // CHECK: blasStatus = hipblasZsyr(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexA, lda); + // CHECK-NEXT: blasStatus = hipblasZsyr(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexA, lda); + blasStatus = cublasZsyr(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexA, lda); + blasStatus = cublasZsyr_v2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexA, lda); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const cuComplex* x, int incx, cuComplex* A, int lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCher(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* x, int incx, hipblasComplex* AP, int lda); + // CHECK: blasStatus = hipblasCher(blasHandle, blasFillMode, n, &fa, &complexx, incx, &complexA, lda); + // CHECK-NEXT: blasStatus = hipblasCher(blasHandle, blasFillMode, n, &fa, &complexx, incx, &complexA, lda); + blasStatus = cublasCher(blasHandle, blasFillMode, n, &fa, &complexx, incx, &complexA, lda); + blasStatus = cublasCher_v2(blasHandle, blasFillMode, n, &fa, &complexx, incx, &complexA, lda); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* alpha, const cuDoubleComplex* x, int incx, cuDoubleComplex* A, int lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZher(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* AP, int lda); + // CHECK: blasStatus = hipblasZher(blasHandle, blasFillMode, n, &da, &dcomplexx, incx, &dcomplexA, lda); + // CHECK-NEXT: blasStatus = hipblasZher(blasHandle, blasFillMode, n, &da, &dcomplexx, incx, &dcomplexA, lda); + blasStatus = cublasZher(blasHandle, blasFillMode, n, &da, &dcomplexx, incx, &dcomplexA, lda); + blasStatus = cublasZher_v2(blasHandle, blasFillMode, n, &da, &dcomplexx, incx, &dcomplexA, lda); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, float* AP); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSspr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, float* AP); + // CHECK: blasStatus = hipblasSspr(blasHandle, blasFillMode, n, &fa, &fx, incx, &fA); + // CHECK-NEXT: blasStatus = hipblasSspr(blasHandle, blasFillMode, n, &fa, &fx, incx, &fA); + blasStatus = cublasSspr(blasHandle, blasFillMode, n, &fa, &fx, incx, &fA); + blasStatus = cublasSspr_v2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, double* AP); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDspr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, double* AP); + // CHECK: blasStatus = hipblasDspr(blasHandle, blasFillMode, n, &da, &dx, incx, &dA); + // CHECK-NEXT: blasStatus = hipblasDspr(blasHandle, blasFillMode, n, &da, &dx, incx, &dA); + blasStatus = cublasDspr(blasHandle, blasFillMode, n, &da, &dx, incx, &dA); + blasStatus = cublasDspr_v2(blasHandle, blasFillMode, n, &da, &dx, incx, &dA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const cuComplex* x, int incx, cuComplex* AP); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasChpr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* x, int incx, hipblasComplex* AP); + // CHECK: blasStatus = hipblasChpr(blasHandle, blasFillMode, n, &fa, &complexx, incx, &complexA); + // CHECK-NEXT: blasStatus = hipblasChpr(blasHandle, blasFillMode, n, &fa, &complexx, incx, &complexA); + blasStatus = cublasChpr(blasHandle, blasFillMode, n, &fa, &complexx, incx, &complexA); + blasStatus = cublasChpr_v2(blasHandle, blasFillMode, n, &fa, &complexx, incx, &complexA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* alpha, const cuDoubleComplex* x, int incx, cuDoubleComplex* AP); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZhpr(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* AP); + // CHECK: blasStatus = hipblasZhpr(blasHandle, blasFillMode, n, &da, &dcomplexx, incx, &dcomplexA); + // CHECK-NEXT: blasStatus = hipblasZhpr(blasHandle, blasFillMode, n, &da, &dcomplexx, incx, &dcomplexA); + blasStatus = cublasZhpr(blasHandle, blasFillMode, n, &da, &dcomplexx, incx, &dcomplexA); + blasStatus = cublasZhpr_v2(blasHandle, blasFillMode, n, &da, &dcomplexx, incx, &dcomplexA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, const float* y, int incy, float* A, int lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsyr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, const float* y, int incy, float* AP, int lda); + // CHECK: blasStatus = hipblasSsyr2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fy, incy, &fA, lda); + // CHECK-NEXT: blasStatus = hipblasSsyr2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fy, incy, &fA, lda); + blasStatus = cublasSsyr2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fy, incy, &fA, lda); + blasStatus = cublasSsyr2_v2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fy, incy, &fA, lda); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, const double* y, int incy, double* A, int lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsyr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, const double* y, int incy, double* AP, int lda); + // CHECK: blasStatus = hipblasDsyr2(blasHandle, blasFillMode, n, &da, &dx, incx, &dy, incy, &dA, lda); + // CHECK-NEXT: blasStatus = hipblasDsyr2(blasHandle, blasFillMode, n, &da, &dx, incx, &dy, incy, &dA, lda); + blasStatus = cublasDsyr2(blasHandle, blasFillMode, n, &da, &dx, incx, &dy, incy, &dA, lda); + blasStatus = cublasDsyr2_v2(blasHandle, blasFillMode, n, &da, &dx, incx, &dy, incy, &dA, lda); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex* alpha, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* A, int lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsyr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* AP, int lda); + // CHECK: blasStatus = hipblasCsyr2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); + // CHECK-NEXT: blasStatus = hipblasCsyr2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); + blasStatus = cublasCsyr2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); + blasStatus = cublasCsyr2_v2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* A, int lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZsyr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* AP, int lda); + // CHECK: blasStatus = hipblasZsyr2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); + // CHECK-NEXT: blasStatus = hipblasZsyr2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); + blasStatus = cublasZsyr2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); + blasStatus = cublasZsyr2_v2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex* alpha, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* A, int lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCher2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* AP, int lda); + // CHECK: blasStatus = hipblasCher2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); + // CHECK-NEXT: blasStatus = hipblasCher2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); + blasStatus = cublasCher2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); + blasStatus = cublasCher2_v2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA, lda); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* A, int lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZher2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* AP, int lda); + // CHECK: blasStatus = hipblasZher2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); + // CHECK-NEXT: blasStatus = hipblasZher2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); + blasStatus = cublasZher2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); + blasStatus = cublasZher2_v2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA, lda); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, const float* y, int incy, float* AP); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSspr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, const float* y, int incy, float* AP); + // CHECK: blasStatus = hipblasSspr2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fy, incy, &fA); + // CHECK-NEXT: blasStatus = hipblasSspr2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fy, incy, &fA); + blasStatus = cublasSspr2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fy, incy, &fA); + blasStatus = cublasSspr2_v2(blasHandle, blasFillMode, n, &fa, &fx, incx, &fy, incy, &fA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, const double* y, int incy, double* AP); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDspr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, const double* y, int incy, double* AP); + // CHECK: blasStatus = hipblasDspr2(blasHandle, blasFillMode, n, &da, &dx, incx, &dy, incy, &dA); + // CHECK-NEXT: blasStatus = hipblasDspr2(blasHandle, blasFillMode, n, &da, &dx, incx, &dy, incy, &dA); + blasStatus = cublasDspr2(blasHandle, blasFillMode, n, &da, &dx, incx, &dy, incy, &dA); + blasStatus = cublasDspr2_v2(blasHandle, blasFillMode, n, &da, &dx, incx, &dy, incy, &dA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex* alpha, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* AP); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasChpr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* AP); + // CHECK: blasStatus = hipblasChpr2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA); + // CHECK-NEXT: blasStatus = hipblasChpr2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA); + blasStatus = cublasChpr2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA); + blasStatus = cublasChpr2_v2(blasHandle, blasFillMode, n, &complexa, &complexx, incx, &complexy, incy, &complexA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* AP); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZhpr2(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* AP); + // CHECK: blasStatus = hipblasZhpr2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA); + // CHECK-NEXT: blasStatus = hipblasZhpr2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA); + blasStatus = cublasZhpr2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA); + blasStatus = cublasZhpr2_v2(blasHandle, blasFillMode, n, &dcomplexa, &dcomplexx, incx, &dcomplexy, incy, &dcomplexA); + + cublasOperation_t transa, transb; + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemm_v2(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemm(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const float* alpha, const float* AP, int lda, const float* BP, int ldb, const float* beta, float* CP, int ldc); + // CHECK: blasStatus = hipblasSgemm(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + // CHECK-NEXT: blasStatus = hipblasSgemm(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + blasStatus = cublasSgemm(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + blasStatus = cublasSgemm_v2(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemm_v2(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemm(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const double* alpha, const double* AP, int lda, const double* BP, int ldb, const double* beta, double* CP, int ldc); + // CHECK: blasStatus = hipblasDgemm(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + // CHECK-NEXT: blasStatus = hipblasDgemm(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + blasStatus = cublasDgemm(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + blasStatus = cublasDgemm_v2(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemm_v2(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const cuComplex* beta, cuComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemm(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* BP, int ldb, const hipblasComplex* beta, hipblasComplex* CP, int ldc); + // CHECK: blasStatus = hipblasCgemm(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + // CHECK-NEXT: blasStatus = hipblasCgemm(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + blasStatus = cublasCgemm(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + blasStatus = cublasCgemm_v2(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemm_v2(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemm(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* BP, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* CP, int ldc); + // CHECK: blasStatus = hipblasZgemm(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + // CHECK-NEXT: blasStatus = hipblasZgemm(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZgemm(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZgemm_v2(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* const Aarray[], int lda, const float* const Barray[], int ldb, const float* beta, float* const Carray[], int ldc, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const float* alpha, const float* const AP[], int lda, const float* const BP[], int ldb, const float* beta, float* const CP[], int ldc, int batchCount); + // CHECK: blasStatus = hipblasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount); + blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const double* alpha, const double* const Aarray[], int lda, const double* const Barray[], int ldb, const double* beta, double* const Carray[], int ldc, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const double* alpha, const double* const AP[], int lda, const double* const BP[], int ldb, const double* beta, double* const CP[], int ldc, int batchCount); + // CHECK: blasStatus = hipblasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount); + blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuComplex* alpha, const cuComplex* const Aarray[], int lda, const cuComplex* const Barray[], int ldb, const cuComplex* beta, cuComplex* const Carray[], int ldc, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const AP[], int lda, const hipblasComplex* const BP[], int ldb, const hipblasComplex* beta, hipblasComplex* const CP[], int ldc, int batchCount); + // CHECK: blasStatus = hipblasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount); + blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* const Aarray[], int lda, const cuDoubleComplex* const Barray[], int ldb, const cuDoubleComplex* beta, cuDoubleComplex* const Carray[], int ldc, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const AP[], int lda, const hipblasDoubleComplex* const BP[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const CP[], int ldc, int batchCount); + // CHECK: blasStatus = hipblasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); + blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); + + long long int strideA = 0; + long long int strideB = 0; + long long int strideC = 0; #if CUDA_VERSION >= 8000 // CHECK: hipblasDatatype_t DataType; @@ -367,6 +1244,26 @@ int main() { // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasNrm2Ex(hipblasHandle_t handle, int n, const void* x, hipblasDatatype_t xType, int incx, void* result, hipblasDatatype_t resultType, hipblasDatatype_t executionType); // CHECK: blasStatus = hipblasNrm2Ex(blasHandle, n, image, DataType, incx, image_2, DataType_2, DataType_3); blasStatus = cublasNrm2Ex(blasHandle, n, image, DataType, incx, image_2, DataType_2, DataType_3); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmStridedBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* A, int lda, long long int strideA, const float* B, int ldb, long long int strideB, const float* beta, float* C, int ldc, long long int strideC, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemmStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const float* alpha, const float* AP, int lda, long long strideA, const float* BP, int ldb, long long strideB, const float* beta, float* CP, int ldc, long long strideC, int batchCount); + // CHECK: blasStatus = hipblasSgemmStridedBatched(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, strideA, &fB, ldb, strideB, &fb, &fC, ldc, strideC, batchCount); + blasStatus = cublasSgemmStridedBatched(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, strideA, &fB, ldb, strideB, &fb, &fC, ldc, strideC, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmStridedBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const double* alpha, const double* A, int lda, long long int strideA, const double* B, int ldb, long long int strideB, const double* beta, double* C, int ldc, long long int strideC, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemmStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const double* alpha, const double* AP, int lda, long long strideA, const double* BP, int ldb, long long strideB, const double* beta, double* CP, int ldc, long long strideC, int batchCount); + // CHECK: blasStatus = hipblasDgemmStridedBatched(blasHandle, transa, transb, m, n, k, &da, &dA, lda, strideA, &dB, ldb, strideB, &db, &dC, ldc, strideC, batchCount); + blasStatus = cublasDgemmStridedBatched(blasHandle, transa, transb, m, n, k, &da, &dA, lda, strideA, &dB, ldb, strideB, &db, &dC, ldc, strideC, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmStridedBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, long long int strideA, const cuComplex* B, int ldb, long long int strideB, const cuComplex* beta, cuComplex* C, int ldc, long long int strideC, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemmStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, long long strideA, const hipblasComplex* BP, int ldb, long long strideB, const hipblasComplex* beta, hipblasComplex* CP, int ldc, long long strideC, int batchCount); + // CHECK: blasStatus = hipblasCgemmStridedBatched(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, strideA, &complexB, ldb, strideB, &complexb, &complexC, ldc, strideC, batchCount); + blasStatus = cublasCgemmStridedBatched(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, strideA, &complexB, ldb, strideB, &complexb, &complexC, ldc, strideC, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmStridedBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, long long int strideA, const cuDoubleComplex* B, int ldb, long long int strideB, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc, long long int strideC, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemmStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, long long strideA, const hipblasDoubleComplex* BP, int ldb, long long strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* CP, int ldc, long long strideC, int batchCount); + // CHECK: blasStatus = hipblasZgemmStridedBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, strideA, &dcomplexB, ldb, strideB, &dcomplexb, &dcomplexC, ldc, strideC, batchCount); + blasStatus = cublasZgemmStridedBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, strideA, &dcomplexB, ldb, strideB, &dcomplexb, &dcomplexC, ldc, strideC, batchCount); #endif #if CUDA_VERSION >= 9000