diff --git a/CMakeLists.txt b/CMakeLists.txt
index 66274d34..a8d36f32 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -68,7 +68,7 @@ if(LLVM_PACKAGE_VERSION VERSION_GREATER "9.0.1")
endif()
if(LLVM_PACKAGE_VERSION VERSION_EQUAL "15.0.0" OR LLVM_PACKAGE_VERSION VERSION_GREATER "15.0.0")
- target_link_libraries(hipify-clang PRIVATE LLVMWindowsDriver)
+ target_link_libraries(hipify-clang PRIVATE LLVMWindowsDriver clangSupport)
endif()
if(MSVC)
@@ -86,12 +86,6 @@ else()
set(addr_var )
endif()
-# [ToDo] Remove SWDEV_331863 related guards from CMakeLists.txt and HipifyAction.cpp when the blocker SWDEV_331863 is overcome
-option (SWDEV_331863 "Enables SWDEV-331863 blocker workaround" OFF)
-if(SWDEV_331863)
- add_definitions(-DSWDEV_331863)
-endif()
-
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_CFLAGS} ${addr_var}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CFLAGS} -DHIPIFY_CLANG_RES=\\\"${LLVM_LIBRARY_DIRS}/clang/${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}\\\" ${addr_var}")
diff --git a/README.md b/README.md
index c43bbd8c..e9e983c4 100644
--- a/README.md
+++ b/README.md
@@ -45,7 +45,7 @@ After applying all the matchers, the output HIP source is produced.
1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [4.0.0](http://releases.llvm.org/download.html#4.0.0); the latest stable and recommended release: [**14.0.6**](https://github.com/llvm/llvm-project/releases/tag/llvmorg-14.0.6).
-2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive), the latest supported version is [**11.7.0**](https://developer.nvidia.com/cuda-downloads).
+2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive), the latest supported version is [**11.7.1**](https://developer.nvidia.com/cuda-downloads).
@@ -160,7 +160,7 @@ After applying all the matchers, the output HIP source is produced.
14.0.2,
14.0.3,
14.0.4
- 11.7.0 |
+ 11.7.1 |
works only with the patch due to the clang's bug 54609
patch for 14.0.0**
patch for 14.0.1**
@@ -171,7 +171,7 @@ After applying all the matchers, the output HIP source is produced.
|
14.0.5,
14.0.6 |
- 11.7.0 |
+ 11.7.1 |
LATEST STABLE CONFIG |
@@ -385,7 +385,7 @@ Ubuntu 14: LLVM 4.0.0 - 7.1.0, CUDA 7.0 - 9.0, cuDNN 5.0.5 - 7.6.5
Ubuntu 16-18: LLVM 8.0.0 - 14.0.6, CUDA 8.0 - 10.2, cuDNN 5.1.10 - 8.0.5
-Ubuntu 20-21: LLVM 9.0.0 - 14.0.6, CUDA 8.0 - 11.7.0, cuDNN 5.1.10 - 8.4.1
+Ubuntu 20-21: LLVM 9.0.0 - 14.0.6, CUDA 8.0 - 11.7.1, cuDNN 5.1.10 - 8.4.1
Minimum build system requirements for the above configurations:
@@ -560,8 +560,8 @@ Testing Time: 6.03s
| 10.0.0 - 11.0.0 | 8.0 - 11.1 | 7.6.5 - 8.0.5 | 2017.15.9.30, 2019.16.8.3 | 3.19.2 | 3.9.1 |
| 11.0.1 - 11.1.0 | 8.0 - 11.2.2 | 7.6.5 - 8.0.5 | 2017.15.9.31, 2019.16.8.4 | 3.19.3 | 3.9.2 |
| 12.0.0 - 13.0.1 | 8.0 - 11.5.1 | 7.6.5 - 8.3.2 | 2017.15.9.43, 2019.16.11.9 | 3.22.2 | 3.10.2 |
-| 14.0.0 - 14.0.6 | 8.0 - 11.7.0 | 8.0.5 - 8.4.1 | 2017.15.9.49, 2019.16.11.17, 2022.17.2.6 | 3.23.2 | 3.10.5 |
-| 15.0.0git | 8.0 - 11.7.0 | 8.0.5 - 8.4.1 | 2017.15.9.49, 2019.16.11.17, 2022.17.2.6 | 3.23.2 | 3.10.5 |
+| 14.0.0 - 14.0.6 | 8.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.49, 2019.16.11.17, 2022.17.2.6 | 3.24.0 | 3.10.6 |
+| 15.0.0git | 8.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.49, 2019.16.11.17, 2022.17.2.6 | 3.24.0 | 3.10.6 |
*Building with testing support by `Visual Studio 17 2022` on `Windows 10`:*
diff --git a/bin/hipify-perl b/bin/hipify-perl
index 45e1e372..07914304 100755
--- a/bin/hipify-perl
+++ b/bin/hipify-perl
@@ -967,7 +967,7 @@ push(@exclude_filelist, split(',', $exclude_files));
%exclude_dirhash = map { $_ => 1 } @exclude_dirlist;
%exclude_filehash = map { $_ => 1 } @exclude_filelist;
-@statNames = ("error", "init", "version", "device", "context", "module", "memory", "virtual_memory", "stream_ordered_memory", "addressing", "stream", "event", "external_resource_interop", "stream_memory", "execution", "graph", "occupancy", "texture", "surface", "peer", "graphics", "interactions", "profiler", "openGL", "D3D9", "D3D10", "D3D11", "VDPAU", "EGL", "thread", "complex", "library", "device_library", "device_function", "include", "include_cuda_main_header", "type", "literal", "numeric_literal", "define", "extern_shared", "kernel_launch");
+@statNames = ("error", "init", "version", "device", "context", "module", "memory", "virtual_memory", "stream_ordered_memory", "addressing", "stream", "event", "external_resource_interop", "stream_memory", "execution", "graph", "occupancy", "texture", "surface", "peer", "graphics", "interactions", "profiler", "openGL", "D3D9", "D3D10", "D3D11", "VDPAU", "EGL", "thread", "complex", "library", "device_library", "device_function", "include", "include_cuda_main_header", "include_cuda_main_header_v2", "type", "literal", "numeric_literal", "define", "extern_shared", "kernel_launch");
sub totalStats {
my %count = %{shift()};
@@ -1599,7 +1599,6 @@ sub rocSubstitutions {
subst("cublasZtrsv", "rocblas_ztrsv", "library");
subst("cublasZtrsv_v2", "rocblas_ztrsv", "library");
subst("cublas.h", "rocblas.h", "include_cuda_main_header");
- subst("cublas_v2.h", "rocblas.h", "include_cuda_main_header");
subst("cublasAtomicsMode_t", "rocblas_atomics_mode", "type");
subst("cublasContext", "_rocblas_handle", "type");
subst("cublasDataType_t", "rocblas_datatype", "type");
@@ -1935,6 +1934,7 @@ sub simpleSubstitutions {
subst("cuGraphAddHostNode", "hipGraphAddHostNode", "graph");
subst("cuGraphAddKernelNode", "hipGraphAddKernelNode", "graph");
subst("cuGraphChildGraphNodeGetGraph", "hipGraphChildGraphNodeGetGraph", "graph");
+ subst("cuGraphClone", "hipGraphClone", "graph");
subst("cuGraphCreate", "hipGraphCreate", "graph");
subst("cuGraphDestroy", "hipGraphDestroy", "graph");
subst("cuGraphDestroyNode", "hipGraphDestroyNode", "graph");
@@ -1982,6 +1982,7 @@ sub simpleSubstitutions {
subst("cudaGraphAddMemcpyNodeToSymbol", "hipGraphAddMemcpyNodeToSymbol", "graph");
subst("cudaGraphAddMemsetNode", "hipGraphAddMemsetNode", "graph");
subst("cudaGraphChildGraphNodeGetGraph", "hipGraphChildGraphNodeGetGraph", "graph");
+ subst("cudaGraphClone", "hipGraphClone", "graph");
subst("cudaGraphCreate", "hipGraphCreate", "graph");
subst("cudaGraphDestroy", "hipGraphDestroy", "graph");
subst("cudaGraphDestroyNode", "hipGraphDestroyNode", "graph");
@@ -2075,6 +2076,7 @@ sub simpleSubstitutions {
subst("cudaGetTextureAlignmentOffset", "hipGetTextureAlignmentOffset", "texture");
subst("cudaGetTextureObjectResourceDesc", "hipGetTextureObjectResourceDesc", "texture");
subst("cudaGetTextureObjectResourceViewDesc", "hipGetTextureObjectResourceViewDesc", "texture");
+ subst("cudaGetTextureObjectTextureDesc", "hipGetTextureObjectTextureDesc", "texture");
subst("cudaGetTextureReference", "hipGetTextureReference", "texture");
subst("cudaUnbindTexture", "hipUnbindTexture", "texture");
subst("cudaCreateSurfaceObject", "hipCreateSurfaceObject", "surface");
@@ -3238,8 +3240,9 @@ sub simpleSubstitutions {
subst("caffe2\/operators\/spatial_batch_norm_op.h", "caffe2\/operators\/hip\/spatial_batch_norm_op_miopen.hip", "include");
subst("channel_descriptor.h", "hip\/channel_descriptor.h", "include");
subst("cooperative_groups.h", "hip\/hip_cooperative_groups.h", "include");
+ subst("cublas_api.h", "hipblas.h", "include");
subst("cuda_fp16.h", "hip\/hip_fp16.h", "include");
- subst("cuda_profiler_api.h", "hip\/hip_profile.h", "include");
+ subst("cuda_profiler_api.h", "hip\/hip_runtime_api.h", "include");
subst("cuda_runtime_api.h", "hip\/hip_runtime_api.h", "include");
subst("cuda_texture_types.h", "hip\/hip_texture_types.h", "include");
subst("cufftXt.h", "hipfftXt.h", "include");
@@ -3266,7 +3269,6 @@ sub simpleSubstitutions {
subst("cuComplex.h", "hip\/hip_complex.h", "include_cuda_main_header");
subst("cub\/cub.cuh", "hipcub\/hipcub.hpp", "include_cuda_main_header");
subst("cublas.h", "hipblas.h", "include_cuda_main_header");
- subst("cublas_v2.h", "hipblas.h", "include_cuda_main_header");
subst("cuda.h", "hip\/hip_runtime.h", "include_cuda_main_header");
subst("cuda_runtime.h", "hip\/hip_runtime.h", "include_cuda_main_header");
subst("cudnn.h", "hipDNN.h", "include_cuda_main_header");
@@ -4644,6 +4646,7 @@ sub simpleSubstitutions {
subst("REGISTER_CUDA_OPERATOR", "REGISTER_HIP_OPERATOR", "define");
subst("REGISTER_CUDA_OPERATOR_CREATOR", "REGISTER_HIP_OPERATOR_CREATOR", "define");
subst("_CubLog", "_HipcubLog", "define");
+ subst("__CUB_ALIGN_BYTES", "__HIPCUB_ALIGN_BYTES", "define");
subst("__CUDACC__", "__HIPCC__", "define");
subst("cudaArrayCubemap", "hipArrayCubemap", "define");
subst("cudaArrayDefault", "hipArrayDefault", "define");
@@ -6132,7 +6135,6 @@ sub warnUnsupportedFunctions {
"cudaGraphDebugDotFlagsExtSemasSignalNodeParams",
"cudaGraphDebugDotFlagsEventNodeParams",
"cudaGraphDebugDotFlags",
- "cudaGraphClone",
"cudaGraphAddMemFreeNode",
"cudaGraphAddMemAllocNode",
"cudaGraphAddExternalSemaphoresWaitNode",
@@ -6573,7 +6575,6 @@ sub warnUnsupportedFunctions {
"cuGraphExecExternalSemaphoresSignalNodeSetParams",
"cuGraphExecBatchMemOpNodeSetParams",
"cuGraphDebugDotPrint",
- "cuGraphClone",
"cuGraphBatchMemOpNodeSetParams",
"cuGraphBatchMemOpNodeGetParams",
"cuGraphAddMemsetNode",
@@ -6678,10 +6679,14 @@ sub warnUnsupportedFunctions {
"bsrilu02Info",
"bsric02Info",
"__curand_umul",
+ "__CUB_LP64__",
+ "_CUB_ASM_PTR_SIZE_",
+ "_CUB_ASM_PTR_",
"PATCH_LEVEL",
"MINOR_VERSION",
"MAX_CUFFT_ERROR",
"MAJOR_VERSION",
+ "CubVector",
"CubDebugExit",
"CUuserObject_st",
"CUuserObject_flags_enum",
@@ -7688,14 +7693,22 @@ sub warnUnsupportedFunctions {
"CUDA_ARRAY3D_2DARRAY",
"CUB_USE_COOPERATIVE_GROUPS",
"CUB_SUBSCRIPTION_FACTOR",
+ "CUB_STATIC_ASSERT",
"CUB_SMEM_BANKS",
"CUB_RUNTIME_ENABLED",
+ "CUB_ROUND_UP_NEAREST",
+ "CUB_ROUND_DOWN_NEAREST",
+ "CUB_QUOTIENT_FLOOR",
+ "CUB_QUOTIENT_CEILING",
"CUB_PTX_SUBSCRIPTION_FACTOR",
"CUB_PTX_SMEM_BANKS",
"CUB_PTX_PREFER_CONFLICT_OVER_PADDING",
"CUB_PTX_LOG_WARP_THREADS",
"CUB_PTX_LOG_SMEM_BANKS",
+ "CUB_PREVENT_MACRO_SUBSTITUTION",
"CUB_PREFER_CONFLICT_OVER_PADDING",
+ "CUB_MSVC_VERSION_FULL",
+ "CUB_MSVC_VERSION",
"CUB_MAX_DEVICES",
"CUB_LOG_WARP_THREADS",
"CUB_LOG_SMEM_BANKS",
@@ -7703,7 +7716,35 @@ sub warnUnsupportedFunctions {
"CUB_IS_DEVICE_CODE",
"CUB_INCLUDE_HOST_CODE",
"CUB_INCLUDE_DEVICE_CODE",
- "CUB_CPP_DIALECT"
+ "CUB_IGNORE_DEPRECATED_DIALECT",
+ "CUB_IGNORE_DEPRECATED_CPP_DIALECT",
+ "CUB_IGNORE_DEPRECATED_CPP_11",
+ "CUB_IGNORE_DEPRECATED_COMPILER",
+ "CUB_IGNORE_DEPRECATED_API",
+ "CUB_HOST_COMPILER_UNKNOWN",
+ "CUB_HOST_COMPILER_MSVC",
+ "CUB_HOST_COMPILER_GCC",
+ "CUB_HOST_COMPILER_CLANG",
+ "CUB_HOST_COMPILER",
+ "CUB_DEVICE_COMPILER_UNKNOWN",
+ "CUB_DEVICE_COMPILER_NVCC",
+ "CUB_DEVICE_COMPILER_MSVC",
+ "CUB_DEVICE_COMPILER_GCC",
+ "CUB_DEVICE_COMPILER_CLANG",
+ "CUB_DEVICE_COMPILER",
+ "CUB_DEPRECATED",
+ "CUB_DEFINE_VECTOR_TYPE",
+ "CUB_DEFINE_DETECT_NESTED_TYPE",
+ "CUB_CPP_DIALECT",
+ "CUB_CPLUSPLUS",
+ "CUB_COMP_DEPR_IMPL1",
+ "CUB_COMP_DEPR_IMPL0",
+ "CUB_COMP_DEPR_IMPL",
+ "CUB_COMPILER_DEPRECATION_SOFT",
+ "CUB_COMPILER_DEPRECATION",
+ "CUB_CAT_",
+ "CUB_CAT",
+ "CUB_ALIGN"
)
{
my $mt = m/($func)/g;
@@ -8153,7 +8194,7 @@ while (@ARGV) {
transformHostFunctions();
# TODO: would like to move this code outside loop but it uses $_ which contains the whole file
unless ($no_output) {
- my $apiCalls = $ft{'error'} + $ft{'init'} + $ft{'version'} + $ft{'device'} + $ft{'context'} + $ft{'module'} + $ft{'memory'} + $ft{'virtual_memory'} + $ft{'stream_ordered_memory'} + $ft{'addressing'} + $ft{'stream'} + $ft{'event'} + $ft{'external_resource_interop'} + $ft{'stream_memory'} + $ft{'execution'} + $ft{'graph'} + $ft{'occupancy'} + $ft{'texture'} + $ft{'surface'} + $ft{'peer'} + $ft{'graphics'} + $ft{'interactions'} + $ft{'profiler'} + $ft{'openGL'} + $ft{'D3D9'} + $ft{'D3D10'} + $ft{'D3D11'} + $ft{'VDPAU'} + $ft{'EGL'} + $ft{'thread'} + $ft{'complex'} + $ft{'library'} + $ft{'device_library'} + $ft{'include'} + $ft{'include_cuda_main_header'} + $ft{'type'} + $ft{'literal'} + $ft{'numeric_literal'} + $ft{'define'};
+ my $apiCalls = $ft{'error'} + $ft{'init'} + $ft{'version'} + $ft{'device'} + $ft{'context'} + $ft{'module'} + $ft{'memory'} + $ft{'virtual_memory'} + $ft{'stream_ordered_memory'} + $ft{'addressing'} + $ft{'stream'} + $ft{'event'} + $ft{'external_resource_interop'} + $ft{'stream_memory'} + $ft{'execution'} + $ft{'graph'} + $ft{'occupancy'} + $ft{'texture'} + $ft{'surface'} + $ft{'peer'} + $ft{'graphics'} + $ft{'interactions'} + $ft{'profiler'} + $ft{'openGL'} + $ft{'D3D9'} + $ft{'D3D10'} + $ft{'D3D11'} + $ft{'VDPAU'} + $ft{'EGL'} + $ft{'thread'} + $ft{'complex'} + $ft{'library'} + $ft{'device_library'} + $ft{'include'} + $ft{'include_cuda_main_header'} + $ft{'include_cuda_main_header_v2'} + $ft{'type'} + $ft{'literal'} + $ft{'numeric_literal'} + $ft{'define'};
my $kernStuff = $hasDeviceCode + $ft{'kernel_launch'} + $ft{'device_function'};
my $totalCalls = $apiCalls + $kernStuff;
$is_dos = m/\r\n$/;
diff --git a/doc/markdown/CUBLAS_API_supported_by_HIP.md b/doc/markdown/CUBLAS_API_supported_by_HIP.md
index c487dc3a..238db117 100644
--- a/doc/markdown/CUBLAS_API_supported_by_HIP.md
+++ b/doc/markdown/CUBLAS_API_supported_by_HIP.md
@@ -63,7 +63,7 @@
|`CUBLAS_GEMM_ALGO8_TENSOR_OP`|9.2| | | | | | | |
|`CUBLAS_GEMM_ALGO9`|9.0| | | | | | | |
|`CUBLAS_GEMM_ALGO9_TENSOR_OP`|9.2| | | | | | | |
-|`CUBLAS_GEMM_DEFAULT`|8.0| | |`HIPBLAS_GEMM_DEFAULT`|1.8.2| | | |
+|`CUBLAS_GEMM_DEFAULT`|9.0| | |`HIPBLAS_GEMM_DEFAULT`|1.8.2| | | |
|`CUBLAS_GEMM_DEFAULT_TENSOR_OP`|9.0| | | | | | | |
|`CUBLAS_GEMM_DFALT`|8.0| | |`HIPBLAS_GEMM_DEFAULT`|1.8.2| | | |
|`CUBLAS_GEMM_DFALT_TENSOR_OP`|9.0| | | | | | | |
diff --git a/doc/markdown/CUB_API_supported_by_HIP.md b/doc/markdown/CUB_API_supported_by_HIP.md
index 51c19d90..c0015286 100644
--- a/doc/markdown/CUB_API_supported_by_HIP.md
+++ b/doc/markdown/CUB_API_supported_by_HIP.md
@@ -4,19 +4,50 @@
|**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**|
|:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|
+|`CUB_ALIGN`| | | | | | | | |
+|`CUB_CAT`| | | | | | | | |
+|`CUB_CAT_`| | | | | | | | |
+|`CUB_COMPILER_DEPRECATION`| | | | | | | | |
+|`CUB_COMPILER_DEPRECATION_SOFT`| | | | | | | | |
+|`CUB_COMP_DEPR_IMPL`| | | | | | | | |
+|`CUB_COMP_DEPR_IMPL0`| | | | | | | | |
+|`CUB_COMP_DEPR_IMPL1`| | | | | | | | |
+|`CUB_CPLUSPLUS`| | | | | | | | |
|`CUB_CPP_DIALECT`| | | | | | | | |
+|`CUB_DEFINE_DETECT_NESTED_TYPE`| | | | | | | | |
+|`CUB_DEFINE_VECTOR_TYPE`| | | | | | | | |
+|`CUB_DEPRECATED`| | | | | | | | |
+|`CUB_DEVICE_COMPILER`| | | | | | | | |
+|`CUB_DEVICE_COMPILER_CLANG`| | | | | | | | |
+|`CUB_DEVICE_COMPILER_GCC`| | | | | | | | |
+|`CUB_DEVICE_COMPILER_MSVC`| | | | | | | | |
+|`CUB_DEVICE_COMPILER_NVCC`| | | | | | | | |
+|`CUB_DEVICE_COMPILER_UNKNOWN`| | | | | | | | |
+|`CUB_HOST_COMPILER`| | | | | | | | |
+|`CUB_HOST_COMPILER_CLANG`| | | | | | | | |
+|`CUB_HOST_COMPILER_GCC`| | | | | | | | |
+|`CUB_HOST_COMPILER_MSVC`| | | | | | | | |
+|`CUB_HOST_COMPILER_UNKNOWN`| | | | | | | | |
+|`CUB_IGNORE_DEPRECATED_API`| | | | | | | | |
+|`CUB_IGNORE_DEPRECATED_COMPILER`| | | | | | | | |
+|`CUB_IGNORE_DEPRECATED_CPP_11`| | | | | | | | |
+|`CUB_IGNORE_DEPRECATED_CPP_DIALECT`| | | | | | | | |
+|`CUB_IGNORE_DEPRECATED_DIALECT`| | | | | | | | |
|`CUB_INCLUDE_DEVICE_CODE`| | | | | | | | |
|`CUB_INCLUDE_HOST_CODE`| | | | | | | | |
|`CUB_IS_DEVICE_CODE`| | | | | | | | |
|`CUB_IS_HOST_CODE`| | | | | | | | |
|`CUB_LOG_SMEM_BANKS`| | | | | | | | |
|`CUB_LOG_WARP_THREADS`| | | | | | | | |
-|`CUB_MAX`| | | |`CUB_MAX`| | | | |
+|`CUB_MAX`| | | |`CUB_MAX`|4.5.0| | | |
|`CUB_MAX_DEVICES`| | | | | | | | |
-|`CUB_MIN`| | | |`CUB_MIN`|2.5.0| | | |
+|`CUB_MIN`| | | |`CUB_MIN`|4.5.0| | | |
+|`CUB_MSVC_VERSION`| | | | | | | | |
+|`CUB_MSVC_VERSION_FULL`| | | | | | | | |
|`CUB_NAMESPACE_BEGIN`| | | |`BEGIN_HIPCUB_NAMESPACE`|2.5.0| | | |
|`CUB_NAMESPACE_END`| | | |`END_HIPCUB_NAMESPACE`|2.5.0| | | |
|`CUB_PREFER_CONFLICT_OVER_PADDING`| | | | | | | | |
+|`CUB_PREVENT_MACRO_SUBSTITUTION`| | | | | | | | |
|`CUB_PTX_ARCH`| | | |`HIPCUB_ARCH`|2.5.0| | | |
|`CUB_PTX_LOG_SMEM_BANKS`| | | | | | | | |
|`CUB_PTX_LOG_WARP_THREADS`| | | | | | | | |
@@ -24,15 +55,25 @@
|`CUB_PTX_SMEM_BANKS`| | | | | | | | |
|`CUB_PTX_SUBSCRIPTION_FACTOR`| | | | | | | | |
|`CUB_PTX_WARP_THREADS`| | | |`HIPCUB_WARP_THREADS`|2.5.0| | | |
+|`CUB_QUOTIENT_CEILING`| | | | | | | | |
+|`CUB_QUOTIENT_FLOOR`| | | | | | | | |
+|`CUB_ROUND_DOWN_NEAREST`| | | | | | | | |
+|`CUB_ROUND_UP_NEAREST`| | | | | | | | |
|`CUB_RUNTIME_ENABLED`| | | | | | | | |
|`CUB_RUNTIME_FUNCTION`| | | |`HIPCUB_RUNTIME_FUNCTION`|2.5.0| | | |
|`CUB_SMEM_BANKS`| | | | | | | | |
+|`CUB_STATIC_ASSERT`| | | | | | | | |
|`CUB_STDERR`| | | |`HIPCUB_STDERR`|2.5.0| | | |
|`CUB_SUBSCRIPTION_FACTOR`| | | | | | | | |
|`CUB_USE_COOPERATIVE_GROUPS`| | | | | | | | |
|`CubDebug`| | | |`HipcubDebug`|2.5.0| | | |
|`CubDebugExit`| | | | | | | | |
+|`CubVector`| | | | | | | | |
+|`_CUB_ASM_PTR_`| | | | | | | | |
+|`_CUB_ASM_PTR_SIZE_`| | | | | | | | |
|`_CubLog`| | | |`_HipcubLog`|2.5.0| | | |
+|`__CUB_ALIGN_BYTES`| | | |`__HIPCUB_ALIGN_BYTES`|4.5.0| | | |
+|`__CUB_LP64__`| | | | | | | | |
\*A - Added; D - Deprecated; R - Removed; E - Experimental
\ No newline at end of file
diff --git a/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md b/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md
index 976c1df1..1ed82e98 100644
--- a/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md
+++ b/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md
@@ -1525,7 +1525,7 @@
|`cuGraphBatchMemOpNodeGetParams`|11.7| | | | | | | |
|`cuGraphBatchMemOpNodeSetParams`|11.7| | | | | | | |
|`cuGraphChildGraphNodeGetGraph`|10.0| | |`hipGraphChildGraphNodeGetGraph`|5.0.0| | | |
-|`cuGraphClone`|10.0| | | | | | | |
+|`cuGraphClone`|10.0| | |`hipGraphClone`|5.0.0| | | |
|`cuGraphCreate`|10.0| | |`hipGraphCreate`|4.3.0| | | |
|`cuGraphDebugDotPrint`|11.3| | | | | | | |
|`cuGraphDestroy`|10.0| | |`hipGraphDestroy`|4.3.0| | | |
diff --git a/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md b/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md
index ac961553..4878393c 100644
--- a/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md
+++ b/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md
@@ -392,26 +392,26 @@
|**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**|
|:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|
-|`cuTexObjectGetTextureDesc`|9.0| | |`hipGetTextureObjectTextureDesc`|1.7.0| | | |
|`cudaCreateTextureObject`| | | |`hipCreateTextureObject`|1.7.0| | | |
|`cudaDestroyTextureObject`| | | |`hipDestroyTextureObject`|1.7.0| | | |
|`cudaGetTextureObjectResourceDesc`| | | |`hipGetTextureObjectResourceDesc`|1.7.0| | | |
|`cudaGetTextureObjectResourceViewDesc`| | | |`hipGetTextureObjectResourceViewDesc`|1.7.0| | | |
+|`cudaGetTextureObjectTextureDesc`| | | |`hipGetTextureObjectTextureDesc`|1.7.0| | | |
## **28. Surface Object Management**
|**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**|
|:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|
-|`cudaCreateSurfaceObject`|9.0| | |`hipCreateSurfaceObject`|1.9.0| | | |
-|`cudaDestroySurfaceObject`|9.0| | |`hipDestroySurfaceObject`|1.9.0| | | |
-|`cudaGetSurfaceObjectResourceDesc`|9.0| | | | | | | |
+|`cudaCreateSurfaceObject`| | | |`hipCreateSurfaceObject`|1.9.0| | | |
+|`cudaDestroySurfaceObject`| | | |`hipDestroySurfaceObject`|1.9.0| | | |
+|`cudaGetSurfaceObjectResourceDesc`| | | | | | | | |
## **29. Version Management**
|**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**|
|:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|
-|`cudaDriverGetVersion`|9.0| | |`hipDriverGetVersion`|1.6.0| | | |
-|`cudaRuntimeGetVersion`|9.0| | |`hipRuntimeGetVersion`|1.6.0| | | |
+|`cudaDriverGetVersion`| | | |`hipDriverGetVersion`|1.6.0| | | |
+|`cudaRuntimeGetVersion`| | | |`hipRuntimeGetVersion`|1.6.0| | | |
## **30. Graph Management**
@@ -437,7 +437,7 @@
|`cudaGraphAddMemcpyNodeToSymbol`|11.1| | |`hipGraphAddMemcpyNodeToSymbol`|5.0.0| | | |
|`cudaGraphAddMemsetNode`|10.0| | |`hipGraphAddMemsetNode`|4.3.0| | | |
|`cudaGraphChildGraphNodeGetGraph`|10.0| | |`hipGraphChildGraphNodeGetGraph`|5.0.0| | | |
-|`cudaGraphClone`|10.0| | | | | | | |
+|`cudaGraphClone`|10.0| | |`hipGraphClone`|5.0.0| | | |
|`cudaGraphCreate`|10.0| | |`hipGraphCreate`|4.3.0| | | |
|`cudaGraphDebugDotPrint`|11.3| | | | | | | |
|`cudaGraphDestroy`|10.0| | |`hipGraphDestroy`|4.3.0| | | |
diff --git a/src/CUDA2HIP.cpp b/src/CUDA2HIP.cpp
index b3f33a1e..1711966c 100644
--- a/src/CUDA2HIP.cpp
+++ b/src/CUDA2HIP.cpp
@@ -35,13 +35,14 @@ const std::map CUDA_INCLUDE_MAP {
{"cuda_texture_types.h", {"hip/hip_texture_types.h", "", CONV_INCLUDE, API_RUNTIME, 0}},
{"texture_fetch_functions.h", {"", "", CONV_INCLUDE, API_RUNTIME, 0}},
{"vector_types.h", {"hip/hip_vector_types.h", "", CONV_INCLUDE, API_RUNTIME, 0}},
- {"cuda_profiler_api.h", {"hip/hip_profile.h", "", CONV_INCLUDE, API_RUNTIME, 0}},
+ {"cuda_profiler_api.h", {"hip/hip_runtime_api.h", "", CONV_INCLUDE, API_RUNTIME, 0}},
{"cooperative_groups.h", {"hip/hip_cooperative_groups.h", "", CONV_INCLUDE, API_RUNTIME, 0}},
// cuComplex includes
{"cuComplex.h", {"hip/hip_complex.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_COMPLEX, 0}},
// cuBLAS includes
{"cublas.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_H, API_BLAS, 0}},
- {"cublas_v2.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_H, API_BLAS, 0}},
+ {"cublas_v2.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_V2_H, API_BLAS, 0}},
+ {"cublas_api.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE, API_BLAS, 0}},
// cuRAND includes
{"curand.h", {"hiprand.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_RAND, 0}},
{"curand_kernel.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}},
diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp
index fecd2c8c..0f6f5ae9 100644
--- a/src/CUDA2HIP_BLAS_API_functions.cpp
+++ b/src/CUDA2HIP_BLAS_API_functions.cpp
@@ -78,27 +78,30 @@ const std::map CUDA_BLAS_FUNCTION_MAP {
{"cublasGetCudartVersion", {"hipblasGetCudartVersion", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}},
// NRM2
- {"cublasSnrm2", {"hipblasSnrm2", "rocblas_snrm2", CONV_LIB_FUNC, API_BLAS, 5}},
- {"cublasDnrm2", {"hipblasDnrm2", "rocblas_dnrm2", CONV_LIB_FUNC, API_BLAS, 5}},
- {"cublasScnrm2", {"hipblasScnrm2", "rocblas_scnrm2", CONV_LIB_FUNC, API_BLAS, 5}},
- {"cublasDznrm2", {"hipblasDznrm2", "rocblas_dznrm2", CONV_LIB_FUNC, API_BLAS, 5}},
+ // NRM2 functions' signatures differ from _v2 ones, hipblas and rocblas NRM2 functions have mapping to NRM2_v2 functions only
+ {"cublasSnrm2", {"hipblasSnrm2", "rocblas_snrm2", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasDnrm2", {"hipblasDnrm2", "rocblas_dnrm2", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasScnrm2", {"hipblasScnrm2", "rocblas_scnrm2", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasDznrm2", {"hipblasDznrm2", "rocblas_dznrm2", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}},
{"cublasNrm2Ex", {"hipblasNrm2Ex", "rocblas_nrm2_ex", CONV_LIB_FUNC, API_BLAS, 5}},
// DOT
- {"cublasSdot", {"hipblasSdot", "rocblas_sdot", CONV_LIB_FUNC, API_BLAS, 5}},
- {"cublasDdot", {"hipblasDdot", "rocblas_ddot", CONV_LIB_FUNC, API_BLAS, 5}},
- {"cublasCdotu", {"hipblasCdotu", "rocblas_cdotu", CONV_LIB_FUNC, API_BLAS, 5}},
- {"cublasCdotc", {"hipblasCdotc", "rocblas_cdotc", CONV_LIB_FUNC, API_BLAS, 5}},
- {"cublasZdotu", {"hipblasZdotu", "rocblas_zdotu", CONV_LIB_FUNC, API_BLAS, 5}},
- {"cublasZdotc", {"hipblasZdotc", "rocblas_zdotc", CONV_LIB_FUNC, API_BLAS, 5}},
+ // DOT functions' signatures differ from _v2 ones, hipblas and rocblas DOT functions have mapping to DOT_v2 functions only
+ {"cublasSdot", {"hipblasSdot", "rocblas_sdot", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasDdot", {"hipblasDdot", "rocblas_ddot", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasCdotu", {"hipblasCdotu", "rocblas_cdotu", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasCdotc", {"hipblasCdotc", "rocblas_cdotc", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasZdotu", {"hipblasZdotu", "rocblas_zdotu", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasZdotc", {"hipblasZdotc", "rocblas_zdotc", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}},
// SCAL
- {"cublasSscal", {"hipblasSscal", "rocblas_sscal", CONV_LIB_FUNC, API_BLAS, 5}},
- {"cublasDscal", {"hipblasDscal", "rocblas_dscal", CONV_LIB_FUNC, API_BLAS, 5}},
- {"cublasCscal", {"hipblasCscal", "rocblas_cscal", CONV_LIB_FUNC, API_BLAS, 5}},
- {"cublasCsscal", {"hipblasCsscal", "rocblas_csscal", CONV_LIB_FUNC, API_BLAS, 5}},
- {"cublasZscal", {"hipblasZscal", "rocblas_zscal", CONV_LIB_FUNC, API_BLAS, 5}},
- {"cublasZdscal", {"hipblasZdscal", "rocblas_zdscal", CONV_LIB_FUNC, API_BLAS, 5}},
+ // SCAL functions' signatures differ from _v2 ones, hipblas and rocblas SCAL functions have mapping to SCAL_v2 functions only
+ {"cublasSscal", {"hipblasSscal", "rocblas_sscal", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasDscal", {"hipblasDscal", "rocblas_dscal", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasCscal", {"hipblasCscal", "rocblas_cscal", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasCsscal", {"hipblasCsscal", "rocblas_csscal", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasZscal", {"hipblasZscal", "rocblas_zscal", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}},
+ {"cublasZdscal", {"hipblasZdscal", "rocblas_zdscal", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}},
// AXPY
{"cublasSaxpy", {"hipblasSaxpy", "rocblas_saxpy", CONV_LIB_FUNC, API_BLAS, 5}},
diff --git a/src/CUDA2HIP_BLAS_API_types.cpp b/src/CUDA2HIP_BLAS_API_types.cpp
index 3cd4caf1..997183f1 100644
--- a/src/CUDA2HIP_BLAS_API_types.cpp
+++ b/src/CUDA2HIP_BLAS_API_types.cpp
@@ -37,7 +37,7 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP {
{"CUBLAS_OP_T", {"HIPBLAS_OP_T", "rocblas_operation_transpose", CONV_NUMERIC_LITERAL, API_BLAS, 2}},
{"CUBLAS_OP_C", {"HIPBLAS_OP_C", "rocblas_operation_conjugate_transpose", CONV_NUMERIC_LITERAL, API_BLAS, 2}},
{"CUBLAS_OP_HERMITAN", {"HIPBLAS_OP_C", "rocblas_operation_conjugate_transpose", CONV_NUMERIC_LITERAL, API_BLAS, 2}},
- {"CUBLAS_OP_CONJG", {"HIPBLAS_OP_CONJG", "rocblas_operation_conjugate", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}},
+ {"CUBLAS_OP_CONJG", {"HIPBLAS_OP_CONJG", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}},
// Blas statuses
{"cublasStatus", {"hipblasStatus_t", "rocblas_status", CONV_TYPE, API_BLAS, 2}},
@@ -205,7 +205,7 @@ const std::map CUDA_BLAS_TYPE_NAME_VER_MAP {
{"CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION", {CUDA_110, CUDA_0, CUDA_0}},
{"cublasGemmAlgo_t", {CUDA_80, CUDA_0, CUDA_0}},
{"CUBLAS_GEMM_DFALT", {CUDA_80, CUDA_0, CUDA_0}},
- {"CUBLAS_GEMM_DEFAULT", {CUDA_80, CUDA_0, CUDA_0}},
+ {"CUBLAS_GEMM_DEFAULT", {CUDA_90, CUDA_0, CUDA_0}},
{"CUBLAS_GEMM_ALGO0", {CUDA_80, CUDA_0, CUDA_0}},
{"CUBLAS_GEMM_ALGO1", {CUDA_80, CUDA_0, CUDA_0}},
{"CUBLAS_GEMM_ALGO2", {CUDA_80, CUDA_0, CUDA_0}},
diff --git a/src/CUDA2HIP_CUB_API_types.cpp b/src/CUDA2HIP_CUB_API_types.cpp
index ef5f406f..e3650eba 100644
--- a/src/CUDA2HIP_CUB_API_types.cpp
+++ b/src/CUDA2HIP_CUB_API_types.cpp
@@ -29,6 +29,9 @@ const std::map CUDA_CUB_NAMESPACE_MAP {
// Maps the names of CUDA CUB API types to the corresponding HIP types
const std::map CUDA_CUB_TYPE_NAME_MAP {
+ // 1. Structs
+ {"CubVector", {"HipcubVector", "", CONV_TYPE, API_CUB, 1, HIP_UNSUPPORTED}},
+
// 5. Defines
{"CUB_STDERR", {"HIPCUB_STDERR", "", CONV_DEFINE, API_CUB, 1}},
{"CubDebug", {"HipcubDebug", "", CONV_DEFINE, API_CUB, 1}},
@@ -59,6 +62,48 @@ const std::map CUDA_CUB_TYPE_NAME_MAP {
{"CUB_PTX_PREFER_CONFLICT_OVER_PADDING", {"HIPCUB_PREFER_CONFLICT_OVER_PADDING", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
{"CUB_MAX", {"CUB_MAX", "", CONV_DEFINE, API_CUB, 1}},
{"CUB_MIN", {"CUB_MIN", "", CONV_DEFINE, API_CUB, 1}},
+ {"__CUB_ALIGN_BYTES", {"__HIPCUB_ALIGN_BYTES", "", CONV_DEFINE, API_CUB, 1}},
+ {"CUB_DEFINE_VECTOR_TYPE", {"HIPCUB_DEFINE_VECTOR_TYPE", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_DEFINE_DETECT_NESTED_TYPE", {"HIPCUB_DEFINE_DETECT_NESTED_TYPE", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"__CUB_LP64__", {"__HIPCUB_LP64__", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"_CUB_ASM_PTR_", {"_HIPCUB_ASM_PTR_", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"_CUB_ASM_PTR_SIZE_", {"_HIPCUB_ASM_PTR_SIZE_", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_USE_COOPERATIVE_GROUPS", {"HIPCUB_USE_COOPERATIVE_GROUPS", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_ALIGN", {"HIPCUB_ALIGN", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_PREVENT_MACRO_SUBSTITUTION", {"HIPCUB_PREVENT_MACRO_SUBSTITUTION", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_QUOTIENT_FLOOR", {"HIPCUB_QUOTIENT_FLOOR", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_QUOTIENT_CEILING", {"HIPCUB_QUOTIENT_CEILING", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_ROUND_UP_NEAREST", {"HIPCUB_ROUND_UP_NEAREST", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_ROUND_DOWN_NEAREST", {"HIPCUB_ROUND_DOWN_NEAREST", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_STATIC_ASSERT", {"HIPCUB_STATIC_ASSERT", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_CAT", {"HIPCUB_CAT", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_CAT_", {"HIPCUB_CAT_", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_IGNORE_DEPRECATED_API", {"HIPCUB_IGNORE_DEPRECATED_API", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_DEPRECATED", {"HIPCUB_DEPRECATED", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_HOST_COMPILER", {"HIPCUB_HOST_COMPILER", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_HOST_COMPILER_MSVC", {"HIPCUB_HOST_COMPILER_MSVC", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_HOST_COMPILER_CLANG", {"HIPCUB_HOST_COMPILER_CLANG", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_HOST_COMPILER_GCC", {"HIPCUB_HOST_COMPILER_GCC", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_HOST_COMPILER_UNKNOWN", {"HIPCUB_HOST_COMPILER_UNKNOWN", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_DEVICE_COMPILER", {"HIPCUB_DEVICE_COMPILER", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_DEVICE_COMPILER_MSVC", {"HIPCUB_DEVICE_COMPILER_MSVC", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_DEVICE_COMPILER_CLANG", {"HIPCUB_DEVICE_COMPILER_CLANG", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_DEVICE_COMPILER_GCC", {"HIPCUB_DEVICE_COMPILER_GCC", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_DEVICE_COMPILER_NVCC", {"HIPCUB_DEVICE_COMPILER_NVCC", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_DEVICE_COMPILER_UNKNOWN", {"HIPCUB_DEVICE_COMPILER_UNKNOWN", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_IGNORE_DEPRECATED_DIALECT", {"HIPCUB_IGNORE_DEPRECATED_DIALECT", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_IGNORE_DEPRECATED_CPP_DIALECT", {"HIPCUB_IGNORE_DEPRECATED_CPP_DIALECT", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_IGNORE_DEPRECATED_CPP_11", {"HIPCUB_IGNORE_DEPRECATED_CPP_11", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_IGNORE_DEPRECATED_CPP_11", {"HIPCUB_IGNORE_DEPRECATED_CPP_11", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_IGNORE_DEPRECATED_COMPILER", {"HIPCUB_IGNORE_DEPRECATED_COMPILER", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_MSVC_VERSION", {"HIPCUB_MSVC_VERSION", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_MSVC_VERSION_FULL", {"HIPCUB_MSVC_VERSION_FULL", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_CPLUSPLUS", {"HIPCUB_CPLUSPLUS", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_COMP_DEPR_IMPL", {"HIPCUB_COMP_DEPR_IMPL", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_COMP_DEPR_IMPL0", {"HIPCUB_COMP_DEPR_IMPL0", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_COMP_DEPR_IMPL1", {"HIPCUB_COMP_DEPR_IMPL1", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_COMPILER_DEPRECATION", {"HIPCUB_COMPILER_DEPRECATION", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
+ {"CUB_COMPILER_DEPRECATION_SOFT", {"HIPCUB_COMPILER_DEPRECATION_SOFT", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}},
};
const std::map CUDA_CUB_TYPE_NAME_VER_MAP {
@@ -73,7 +118,9 @@ const std::map HIP_CUB_TYPE_NAME_VER_MAP {
{"HIPCUB_ARCH", {HIP_2050, HIP_0, HIP_0 }},
{"BEGIN_HIPCUB_NAMESPACE", {HIP_2050, HIP_0, HIP_0 }},
{"END_HIPCUB_NAMESPACE", {HIP_2050, HIP_0, HIP_0 }},
- {"CUB_MIN", {HIP_2050, HIP_0, HIP_0 }},
+ {"CUB_MAX", {HIP_4050, HIP_0, HIP_0 }},
+ {"CUB_MIN", {HIP_4050, HIP_0, HIP_0 }},
+ {"__HIPCUB_ALIGN_BYTES", {HIP_4050, HIP_0, HIP_0 }},
};
const std::map CUDA_CUB_API_SECTION_MAP {
diff --git a/src/CUDA2HIP_Driver_API_functions.cpp b/src/CUDA2HIP_Driver_API_functions.cpp
index a3f4d2b3..81016ddc 100644
--- a/src/CUDA2HIP_Driver_API_functions.cpp
+++ b/src/CUDA2HIP_Driver_API_functions.cpp
@@ -591,7 +591,7 @@ const std::map CUDA_DRIVER_FUNCTION_MAP {
// cudaGraphChildGraphNodeGetGraph
{"cuGraphChildGraphNodeGetGraph", {"hipGraphChildGraphNodeGetGraph", "", CONV_GRAPH, API_DRIVER, 21}},
// cudaGraphClone
- {"cuGraphClone", {"hipGraphClone", "", CONV_GRAPH, API_DRIVER, 21, HIP_UNSUPPORTED}},
+ {"cuGraphClone", {"hipGraphClone", "", CONV_GRAPH, API_DRIVER, 21}},
// cudaGraphCreate
{"cuGraphCreate", {"hipGraphCreate", "", CONV_GRAPH, API_DRIVER, 21}},
// cudaGraphDebugDotPrint
diff --git a/src/CUDA2HIP_Runtime_API_functions.cpp b/src/CUDA2HIP_Runtime_API_functions.cpp
index 3c525c73..85f60896 100644
--- a/src/CUDA2HIP_Runtime_API_functions.cpp
+++ b/src/CUDA2HIP_Runtime_API_functions.cpp
@@ -640,8 +640,8 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP {
// cuTexObjectGetResourceViewDesc
{"cudaGetTextureObjectResourceViewDesc", {"hipGetTextureObjectResourceViewDesc", "", CONV_TEXTURE, API_RUNTIME, 27}},
// no analogue
- // NOTE: Not equal to cudaGetTextureObjectTextureDesc due to different signatures
- {"cuTexObjectGetTextureDesc", {"hipGetTextureObjectTextureDesc", "", CONV_TEXTURE, API_RUNTIME, 27}},
+ // NOTE: Not equal to cuTexObjectGetTextureDesc due to different signatures
+ {"cudaGetTextureObjectTextureDesc", {"hipGetTextureObjectTextureDesc", "", CONV_TEXTURE, API_RUNTIME, 27}},
// 28. Surface Object Management
// no analogue
@@ -683,7 +683,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP {
// cuGraphChildGraphNodeGetGraph
{"cudaGraphChildGraphNodeGetGraph", {"hipGraphChildGraphNodeGetGraph", "", CONV_GRAPH, API_RUNTIME, 30}},
// cuGraphClone
- {"cudaGraphClone", {"hipGraphClone", "", CONV_GRAPH, API_RUNTIME, 30, HIP_UNSUPPORTED}},
+ {"cudaGraphClone", {"hipGraphClone", "", CONV_GRAPH, API_RUNTIME, 30}},
// cuGraphCreate
{"cudaGraphCreate", {"hipGraphCreate", "", CONV_GRAPH, API_RUNTIME, 30}},
// cuGraphDebugDotPrint
@@ -833,7 +833,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP {
// cuGraphInstantiateWithFlags
{"cudaGraphInstantiateWithFlags", {"hipGraphInstantiateWithFlags", "", CONV_GRAPH, API_RUNTIME, 30}},
// cuGraphNodeSetEnabled
- {"cudaGraphNodeSetEnabled", {"hipGraphNodeSetEnabled", "", CONV_GRAPH, API_RUNTIME, 30, HIP_UNSUPPORTED}},
+ {"cudaGraphNodeSetEnabled", {"hipGraphNodeSetEnabled", "", CONV_GRAPH, API_RUNTIME, 30, HIP_UNSUPPORTED}},
// 31. Driver Entry Point Access
// cuGetProcAddress
@@ -963,12 +963,6 @@ const std::map CUDA_RUNTIME_FUNCTION_VER_MAP {
{"cudaUnbindTexture", {CUDA_0, CUDA_110, CUDA_0 }},
{"cudaBindSurfaceToArray", {CUDA_0, CUDA_110, CUDA_0 }},
{"cudaGetSurfaceReference", {CUDA_0, CUDA_110, CUDA_0 }},
- {"cuTexObjectGetTextureDesc", {CUDA_90, CUDA_0, CUDA_0 }},
- {"cudaCreateSurfaceObject", {CUDA_90, CUDA_0, CUDA_0 }},
- {"cudaDestroySurfaceObject", {CUDA_90, CUDA_0, CUDA_0 }},
- {"cudaGetSurfaceObjectResourceDesc", {CUDA_90, CUDA_0, CUDA_0 }},
- {"cudaDriverGetVersion", {CUDA_90, CUDA_0, CUDA_0 }},
- {"cudaRuntimeGetVersion", {CUDA_90, CUDA_0, CUDA_0 }},
{"cudaGraphAddChildGraphNode", {CUDA_100, CUDA_0, CUDA_0 }},
{"cudaGraphAddDependencies", {CUDA_100, CUDA_0, CUDA_0 }},
{"cudaGraphAddEmptyNode", {CUDA_100, CUDA_0, CUDA_0 }},
@@ -1288,6 +1282,7 @@ const std::map HIP_RUNTIME_FUNCTION_VER_MAP {
{"hipGraphEventWaitNodeGetEvent", {HIP_5000, HIP_0, HIP_0 }},
{"hipGraphEventWaitNodeSetEvent", {HIP_5000, HIP_0, HIP_0 }},
{"hipGraphExecEventWaitNodeSetEvent", {HIP_5000, HIP_0, HIP_0 }},
+ {"hipGraphClone", {HIP_5000, HIP_0, HIP_0 }},
{"hipDeviceGetDefaultMemPool", {HIP_5020, HIP_0, HIP_0, HIP_LATEST}},
{"hipDeviceSetMemPool", {HIP_5020, HIP_0, HIP_0, HIP_LATEST}},
{"hipDeviceGetMemPool", {HIP_5020, HIP_0, HIP_0, HIP_LATEST}},
diff --git a/src/HipifyAction.cpp b/src/HipifyAction.cpp
index fb58ac87..ae02a4dd 100644
--- a/src/HipifyAction.cpp
+++ b/src/HipifyAction.cpp
@@ -173,27 +173,38 @@ void HipifyAction::FindAndReplace(StringRef name,
}
Statistics::current().incrementCounter(found->second, name.str());
clang::DiagnosticsEngine &DE = getCompilerInstance().getDiagnostics();
- // Warn the user about deprecated idenrifier.
+ // Warn about the deprecated identifier in CUDA but hipify it.
if (Statistics::isDeprecated(found->second)) {
- DE.Report(sl, DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "CUDA identifier is deprecated."));
+ const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "'%0' is deprecated in CUDA.");
+ DE.Report(sl, ID) << found->first;
}
- // Warn the user about unsupported experimental identifier.
+ // Warn about the unsupported experimental identifier.
if (Statistics::isHipExperimental(found->second) &&!Experimental) {
std::string sWarn;
Statistics::isToRoc(found->second) ? sWarn = sROC : sWarn = sHIP;
sWarn = "" + sWarn;
- const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "CUDA identifier is experimental in %0. To hipify it, use the '--experimental' option.");
- DE.Report(sl, ID) << sWarn;
+ const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "'%0' is experimental in '%1'; to hipify it, use the '--experimental' option.");
+ DE.Report(sl, ID) << found->first << sWarn;
return;
}
-
- // Warn the user about unsupported identifier.
+ // Warn about the identifier which is supported only for _v2 version of it
+ // [NOTE]: Currently, only cuBlas is tracked for versioning and only for _v2;
+ // cublas_v2.h has to be included in the source cuda file for hipification.
+ if (Statistics::isHipSupportedV2Only(found->second) && found->second.apiType == API_BLAS && !insertedBLASHeader_V2) {
+ std::string sWarn;
+ Statistics::isToRoc(found->second) ? sWarn = sROC : sWarn = sHIP;
+ sWarn = "" + sWarn;
+ const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "Only '%0_v2' version of '%0' is supported in '%1'; to hipify it, include 'cublas_v2.h' in the source.");
+ DE.Report(sl, ID) << found->first << sWarn;
+ return;
+ }
+ // Warn about the unsupported identifier.
if (Statistics::isUnsupported(found->second)) {
std::string sWarn;
Statistics::isToRoc(found->second) ? sWarn = sROC : sWarn = sHIP;
sWarn = "" + sWarn;
- const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "CUDA identifier is unsupported in %0.");
- DE.Report(sl, ID) << sWarn;
+ const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "'%0' is unsupported in '%1'.");
+ DE.Report(sl, ID) << found->first << sWarn;
return;
}
if (!bReplace) {
@@ -288,6 +299,18 @@ bool HipifyAction::Exclude(const hipCounter &hipToken) {
default:
return false;
}
+ return false;
+ case CONV_INCLUDE_CUDA_MAIN_V2_H:
+ switch (hipToken.apiType) {
+ case API_BLAS:
+ if (insertedBLASHeader_V2) return true;
+ insertedBLASHeader_V2 = true;
+ if (insertedBLASHeader) return true;
+ return false;
+ default:
+ return false;
+ }
+ return false;
case CONV_INCLUDE:
if (hipToken.hipName.empty()) return true;
switch (hipToken.apiType) {
@@ -300,6 +323,7 @@ bool HipifyAction::Exclude(const hipCounter &hipToken) {
default:
return false;
}
+ return false;
default:
return false;
}
@@ -325,7 +349,10 @@ void HipifyAction::InclusionDirective(clang::SourceLocation hash_loc,
clang::SourceLocation sl = filename_range.getBegin();
if (Statistics::isUnsupported(found->second)) {
clang::DiagnosticsEngine &DE = getCompilerInstance().getDiagnostics();
- DE.Report(sl, DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "Unsupported CUDA header."));
+ std::string sWarn;
+ Statistics::isToRoc(found->second) ? sWarn = sROC : sWarn = sHIP;
+ const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "'%0' is unsupported header in '%1'.");
+ DE.Report(sl, ID) << found->first << sWarn;
return;
}
clang::StringRef newInclude;
@@ -543,8 +570,8 @@ bool HipifyAction::cudaHostFuncCall(const mat::MatchFinder::MatchResult &Result)
switch (c.second.castWarn) {
case cw_DataLoss: {
clang::DiagnosticsEngine& DE = getCompilerInstance().getDiagnostics();
- const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "Possible data loss in %0 argument.");
- DE.Report(fullSL, ID) << argNum+1;
+ const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "Possible data loss in %0 argument of '%1'.");
+ DE.Report(fullSL, ID) << argNum+1 << sName;
break;
}
case cw_None:
@@ -698,10 +725,10 @@ class PPCallbackProxy : public clang::PPCallbacks {
public:
explicit PPCallbackProxy(HipifyAction &action): hipifyAction(action) {}
- // [ToDo] Remove SWDEV_331863 related guards from CMakeLists.txt and HipifyAction.cpp when the blocker SWDEV_331863 is overcome
+
void InclusionDirective(clang::SourceLocation hash_loc, const clang::Token &include_token,
StringRef file_name, bool is_angled, clang::CharSourceRange filename_range,
-#if (LLVM_VERSION_MAJOR < 15) || (LLVM_VERSION_MAJOR == 15 && SWDEV_331863)
+#if LLVM_VERSION_MAJOR < 15
const clang::FileEntry *file,
#else
Optional file,
@@ -712,7 +739,7 @@ class PPCallbackProxy : public clang::PPCallbacks {
, clang::SrcMgr::CharacteristicKind FileType
#endif
) override {
-#if (LLVM_VERSION_MAJOR < 15) || (LLVM_VERSION_MAJOR == 15 && SWDEV_331863)
+#if LLVM_VERSION_MAJOR < 15
auto f = file;
#else
auto f = &file->getFileEntry();
@@ -737,6 +764,10 @@ bool HipifyAction::BeginInvocation(clang::CompilerInstance &CI) {
void HipifyAction::ExecuteAction() {
clang::Preprocessor &PP = getCompilerInstance().getPreprocessor();
+ // Register yourself as the preprocessor callback, by proxy.
+ PP.addPPCallbacks(std::unique_ptr(new PPCallbackProxy(*this)));
+ // Now we're done futzing with the lexer, have the subclass proceeed with Sema and AST matching.
+ clang::ASTFrontendAction::ExecuteAction();
auto &SM = getCompilerInstance().getSourceManager();
// Start lexing the specified input file.
llcompat::Memory_Buffer FromFile = llcompat::getMemoryBuffer(SM);
@@ -751,10 +782,6 @@ void HipifyAction::ExecuteAction() {
RewriteToken(RawTok);
RawLex.LexFromRawLexer(RawTok);
}
- // Register yourself as the preprocessor callback, by proxy.
- PP.addPPCallbacks(std::unique_ptr(new PPCallbackProxy(*this)));
- // Now we're done futzing with the lexer, have the subclass proceeed with Sema and AST matching.
- clang::ASTFrontendAction::ExecuteAction();
}
void HipifyAction::run(const mat::MatchFinder::MatchResult &Result) {
diff --git a/src/HipifyAction.h b/src/HipifyAction.h
index 02813f54..7fbf7134 100644
--- a/src/HipifyAction.h
+++ b/src/HipifyAction.h
@@ -49,6 +49,7 @@ class HipifyAction : public clang::ASTFrontendAction,
// This approach means we do the best it's possible to do w.r.t preserving the user's include order.
bool insertedRuntimeHeader = false;
bool insertedBLASHeader = false;
+ bool insertedBLASHeader_V2 = false;
bool insertedRANDHeader = false;
bool insertedRAND_kernelHeader = false;
bool insertedDNNHeader = false;
diff --git a/src/Statistics.cpp b/src/Statistics.cpp
index 6afa60ba..ade47baf 100644
--- a/src/Statistics.cpp
+++ b/src/Statistics.cpp
@@ -64,6 +64,7 @@ const char *counterNames[NUM_CONV_TYPES] = {
"device_function", // CONV_DEVICE_FUNC
"include", // CONV_INCLUDE
"include_cuda_main_header", // CONV_INCLUDE_CUDA_MAIN_H
+ "include_cuda_main_header_v2", // CONV_INCLUDE_CUDA_MAIN_V2_H
"type", // CONV_TYPE
"literal", // CONV_LITERAL
"numeric_literal", // CONV_NUMERIC_LITERAL
@@ -108,6 +109,7 @@ const char *counterTypes[NUM_CONV_TYPES] = {
"CONV_LIB_DEVICE_FUNC",
"CONV_INCLUDE",
"CONV_INCLUDE_CUDA_MAIN_H",
+ "CONV_INCLUDE_CUDA_MAIN_V2_H",
"CONV_TYPE",
"CONV_LITERAL",
"CONV_NUMERIC_LITERAL",
@@ -399,6 +401,10 @@ bool Statistics::isRemoved(const hipCounter &counter) {
HIP_REMOVED == (counter.supportDegree & HIP_REMOVED));
}
+bool Statistics::isHipSupportedV2Only(const hipCounter& counter) {
+ return HIP_SUPPORTED_V2_ONLY == (counter.supportDegree & HIP_SUPPORTED_V2_ONLY);
+}
+
std::string Statistics::getCudaVersion(const cudaVersions& ver) {
switch (ver) {
case CUDA_0:
diff --git a/src/Statistics.h b/src/Statistics.h
index 259bc0bf..66b0c3a4 100644
--- a/src/Statistics.h
+++ b/src/Statistics.h
@@ -121,6 +121,7 @@ enum ConvTypes {
CONV_DEVICE_FUNC,
CONV_INCLUDE,
CONV_INCLUDE_CUDA_MAIN_H,
+ CONV_INCLUDE_CUDA_MAIN_V2_H,
CONV_TYPE,
CONV_LITERAL,
CONV_NUMERIC_LITERAL,
@@ -158,7 +159,8 @@ enum SupportDegree {
CUDA_REMOVED = 0x40,
HIP_REMOVED = 0x80,
REMOVED = 0x100,
- HIP_EXPERIMENTAL = 0x200
+ HIP_EXPERIMENTAL = 0x200,
+ HIP_SUPPORTED_V2_ONLY = 0x400
};
enum cudaVersions {
@@ -411,6 +413,8 @@ class Statistics {
static bool isHipRemoved(const hipCounter& counter);
// Check whether the counter is REMOVED or not.
static bool isRemoved(const hipCounter& counter);
+ // Check whether the counter is HIP_SUPPORTED_V2_ONLY or not.
+ static bool isHipSupportedV2Only(const hipCounter& counter);
// Get string CUDA version.
static std::string getCudaVersion(const cudaVersions &ver);
// Get string HIP version.
diff --git a/tests/unit_tests/headers/headers_test_07.cu b/tests/unit_tests/headers/headers_test_07.cu
index 1effc189..c125522c 100644
--- a/tests/unit_tests/headers/headers_test_07.cu
+++ b/tests/unit_tests/headers/headers_test_07.cu
@@ -1,8 +1,11 @@
// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
+// CHECK: #include
// CHECK: #include "hipblas.h"
// CHECK-NOT: #include "cublas.h"
+// CHECK-NOT: #include "cublas_v2.h"
// CHECK: #include
-#include "cublas_v2.h"
#include "cublas.h"
+#include "cublas_v2.h"
+// CHECK-NOT: #include "hipblas.h"
#include
diff --git a/tests/unit_tests/headers/headers_test_08.cu b/tests/unit_tests/headers/headers_test_08.cu
index aca7f194..84677d57 100644
--- a/tests/unit_tests/headers/headers_test_08.cu
+++ b/tests/unit_tests/headers/headers_test_08.cu
@@ -1,14 +1,18 @@
// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
// CHECK: #include
-// CHECK-NOT: #include
// CHECK: #include
// CHECK: #include "hipblas.h"
// CHECK-NOT: #include "cublas.h"
+// CHECK-NOT: #include "cublas_v2.h"
// CHECK: #include
+// CHECK-NOT: #include
#include
+// CHECK-NOT: #include
#include
+// CHECK-NOT: #include
#include
-#include "cublas_v2.h"
#include "cublas.h"
+#include "cublas_v2.h"
+// CHECK-NOT: #include "hipblas.h"
#include
diff --git a/tests/unit_tests/headers/headers_test_09.cu b/tests/unit_tests/headers/headers_test_09.cu
index 37e718b5..067fe179 100644
--- a/tests/unit_tests/headers/headers_test_09.cu
+++ b/tests/unit_tests/headers/headers_test_09.cu
@@ -19,6 +19,7 @@
// CHECK: #include "hipblas.h"
// CHECK-NOT: #include "cublas.h"
+// CHECK-NOT: #include "cublas_v2.h"
// CHECK: #include
@@ -51,10 +52,12 @@
// CHECK: #include "hipsparse.h"
#include
+// CHECK-NOT: #include
#include
#include
+// CHECK-NOT: #include
#include "cuda_runtime_api.h"
#include "channel_descriptor.h"
@@ -67,8 +70,9 @@
#include
-#include "cublas_v2.h"
#include "cublas.h"
+#include "cublas_v2.h"
+// CHECK-NOT: #include "hipblas.h"
#include
diff --git a/tests/unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu b/tests/unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu
index 69812c98..4d8cadf7 100644
--- a/tests/unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu
+++ b/tests/unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu
@@ -5,16 +5,25 @@
#include
#include
// CHECK: #include "hipblas.h"
+// CHECK-NOT: #include "cublas_v2.h"
#include "cublas.h"
+#include "cublas_v2.h"
+// CHECK-NOT: #include "hipblas.h"
#define M 6
#define N 5
#define IDX2C(i,j,ld) (((j)*(ld))+(i))
static __inline__ void modify(float *m, int ldm, int n, int p, int q, float
alpha, float beta) {
- // CHECK: hipblasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm);
- // CHECK: hipblasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1);
- cublasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm);
- cublasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1);
+ // CHECK: hipblasHandle_t blasHandle;
+ cublasHandle_t blasHandle;
+ // CHECK: hipblasStatus_t blasStatus = hipblasCreate(&blasHandle);
+ cublasStatus blasStatus = cublasCreate(&blasHandle);
+ // CHECK: hipblasSscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm);
+ cublasSscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm);
+ // CHECK: hipblasSscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1);
+ cublasSscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1);
+ // CHECK: hipblasDestroy(blasHandle);
+ cublasDestroy(blasHandle);
}
int main(void) {
int i, j;
diff --git a/tests/unit_tests/libraries/cuBLAS/cublas_v1.cu b/tests/unit_tests/libraries/cuBLAS/cublas_v1.cu
new file mode 100644
index 00000000..74a3645a
--- /dev/null
+++ b/tests/unit_tests/libraries/cuBLAS/cublas_v1.cu
@@ -0,0 +1,83 @@
+// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
+
+// CHECK: #include
+#include
+#include
+#include
+// CHECK: #include "hipblas.h"
+#include "cublas.h"
+#define M 6
+#define N 5
+#define IDX2C(i,j,ld) (((j)*(ld))+(i))
+static __inline__ void modify(float* m, int ldm, int n, int p, int q, float
+ alpha, float beta) {
+ // CHECK-NOT: hipblasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm);
+ // CHECK-NOT: hipblasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1);
+ // CHECK: cublasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm);
+ // CHECK: cublasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1);
+ cublasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm);
+ cublasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1);
+}
+int main(void) {
+ int i, j;
+ // CHECK: hipblasStatus_t stat;
+ cublasStatus stat;
+ float* devPtrA;
+ float* a = 0;
+ a = (float*)malloc(M * N * sizeof(*a));
+ if (!a) {
+ printf("host memory allocation failed");
+ return EXIT_FAILURE;
+ }
+ for (j = 0; j < N; j++) {
+ for (i = 0; i < M; i++) {
+ a[IDX2C(i, j, M)] = (float)(i * M + j + 1);
+ }
+ }
+ // cublasInit is not supported yet
+ cublasInit();
+ // cublasAlloc is not supported yet
+ stat = cublasAlloc(M * N, sizeof(*a), (void**)&devPtrA);
+ // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) {
+ if (stat != CUBLAS_STATUS_SUCCESS) {
+ printf("device memory allocation failed");
+ // cublasShutdown is not supported yet
+ cublasShutdown();
+ return EXIT_FAILURE;
+ }
+ // CHECK: stat = hipblasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M);
+ stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M);
+ // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) {
+ if (stat != CUBLAS_STATUS_SUCCESS) {
+ printf("data download failed");
+ // cublasFree is not supported yet
+ cublasFree(devPtrA);
+ // cublasShutdown is not supported yet
+ cublasShutdown();
+ return EXIT_FAILURE;
+ }
+ modify(devPtrA, M, N, 1, 2, 16.0f, 12.0f);
+ // CHECK: stat = hipblasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M);
+ stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M);
+ // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) {
+ if (stat != CUBLAS_STATUS_SUCCESS) {
+ printf("data upload failed");
+ // cublasFree is not supported yet
+ cublasFree(devPtrA);
+ // cublasShutdown is not supported yet
+ cublasShutdown();
+ return EXIT_FAILURE;
+ }
+ // cublasFree is not supported yet
+ cublasFree(devPtrA);
+ // cublasShutdown is not supported yet
+ cublasShutdown();
+ for (j = 0; j < N; j++) {
+ for (i = 0; i < M; i++) {
+ printf("%7.0f", a[IDX2C(i, j, M)]);
+ }
+ printf("\n");
+ }
+ free(a);
+ return EXIT_SUCCESS;
+}
diff --git a/tests/unit_tests/libraries/cuBLAS/rocBLAS/cublas_0_based_indexing_rocblas.cu b/tests/unit_tests/libraries/cuBLAS/rocBLAS/cublas_0_based_indexing_rocblas.cu
index 2c014cb0..a47561bd 100644
--- a/tests/unit_tests/libraries/cuBLAS/rocBLAS/cublas_0_based_indexing_rocblas.cu
+++ b/tests/unit_tests/libraries/cuBLAS/rocBLAS/cublas_0_based_indexing_rocblas.cu
@@ -5,16 +5,25 @@
#include
#include
// CHECK: #include "rocblas.h"
+// CHECK-NOT: #include "cublas_v2.h"
#include "cublas.h"
+#include "cublas_v2.h"
+// CHECK-NOT: #include "rocblas.h"
#define M 6
#define N 5
#define IDX2C(i,j,ld) (((j)*(ld))+(i))
static __inline__ void modify(float *m, int ldm, int n, int p, int q, float
alpha, float beta) {
- // CHECK: rocblas_sscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm);
- // CHECK: rocblas_sscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1);
- cublasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm);
- cublasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1);
+ // CHECK: rocblas_handle blasHandle;
+ cublasHandle_t blasHandle;
+ // CHECK: rocblas_status blasStatus = rocblas_create_handle(&blasHandle);
+ cublasStatus blasStatus = cublasCreate(&blasHandle);
+ // CHECK: rocblas_sscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm);
+ // CHECK: rocblas_sscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1);
+ cublasSscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm);
+ cublasSscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1);
+ // CHECK: rocblas_destroy_handle(blasHandle);
+ cublasDestroy(blasHandle);
}
int main(void) {
int i, j;
diff --git a/tests/unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp b/tests/unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp
index 7b3e4649..0fc567d4 100644
--- a/tests/unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp
+++ b/tests/unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp
@@ -30,7 +30,7 @@ THE SOFTWARE.
// CHECK: #include
#include
-// CHECK: #include
+// CHECK: #include
#include
#define WIDTH 1024
diff --git a/tests/unit_tests/synthetic/driver_functions.cu b/tests/unit_tests/synthetic/driver_functions.cu
index 7a2b6b5c..7c557999 100644
--- a/tests/unit_tests/synthetic/driver_functions.cu
+++ b/tests/unit_tests/synthetic/driver_functions.cu
@@ -1031,6 +1031,11 @@ int main() {
// HIP: hipError_t hipGraphChildGraphNodeGetGraph(hipGraphNode_t node, hipGraph_t* pGraph);
// CHECK: result = hipGraphChildGraphNodeGetGraph(graphNode, &graph);
result = cuGraphChildGraphNodeGetGraph(graphNode, &graph);
+
+ // CUDA: CUresult CUDAAPI cuGraphClone(CUgraph *phGraphClone, CUgraph originalGraph);
+ // HIP: hipError_t hipGraphClone(hipGraph_t* pGraphClone, hipGraph_t originalGraph);
+ // CHECK: result = hipGraphClone(&graph, graph2);
+ result = cuGraphClone(&graph, graph2);
#endif
#if CUDA_VERSION > 10000
@@ -1536,12 +1541,10 @@ int main() {
// CHECK: result = hipTexObjectGetResourceViewDesc(&res_view_descr, texObject);
result = cuTexObjectGetResourceViewDesc(&res_view_descr, texObject);
-#if CUDA_VERSION >= 9000
// CUDA: CUresult CUDAAPI cuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC *pTexDesc, CUtexObject texObject);
// HIP: hipError_t hipTexObjectGetTextureDesc(HIP_TEXTURE_DESC* pTexDesc, hipTextureObject_t texObject);
// CHECK: result = hipTexObjectGetTextureDesc(&tex_descr, texObject);
result = cuTexObjectGetTextureDesc(&tex_descr, texObject);
-#endif
// CUDA: CUresult CUDAAPI cuCtxEnablePeerAccess(CUcontext peerContext, unsigned int Flags);
// HIP: DEPRECATED(DEPRECATED_MSG) hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags);
diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu
new file mode 100644
index 00000000..b8bdfd48
--- /dev/null
+++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu
@@ -0,0 +1,399 @@
+// RUN: %run_test hipify "%s" "%t" %hipify_args 2 --skip-excluded-preprocessor-conditional-blocks --experimental %clang_args -D__CUDA_API_VERSION_INTERNAL
+
+// CHECK: #include
+#include
+// CHECK: #include "hipblas.h"
+// CHECK-NOT: #include "cublas_v2.h"
+#include "cublas.h"
+#include "cublas_v2.h"
+// CHECK-NOT: #include "hipblas.h"
+
+int main() {
+ printf("14. cuBLAS API to hipBLAS API synthetic test\n");
+
+ // CHECK: hipblasOperation_t blasOperation;
+ // CHECK-NEXT: hipblasOperation_t BLAS_OP_N = HIPBLAS_OP_N;
+ // CHECK-NEXT: hipblasOperation_t BLAS_OP_T = HIPBLAS_OP_T;
+ // CHECK-NEXT: hipblasOperation_t BLAS_OP_C = HIPBLAS_OP_C;
+ cublasOperation_t blasOperation;
+ cublasOperation_t BLAS_OP_N = CUBLAS_OP_N;
+ cublasOperation_t BLAS_OP_T = CUBLAS_OP_T;
+ cublasOperation_t BLAS_OP_C = CUBLAS_OP_C;
+
+#if CUDA_VERSION >= 10010
+ // CHECK: hipblasOperation_t BLAS_OP_HERMITAN = HIPBLAS_OP_C;
+ cublasOperation_t BLAS_OP_HERMITAN = CUBLAS_OP_HERMITAN;
+#endif
+
+ // CHECK: hipblasStatus_t blasStatus;
+ // CHECK-NEXT: hipblasStatus_t blasStatus_t;
+ // CHECK-NEXT: hipblasStatus_t BLAS_STATUS_SUCCESS = HIPBLAS_STATUS_SUCCESS;
+ // CHECK-NEXT: hipblasStatus_t BLAS_STATUS_NOT_INITIALIZED = HIPBLAS_STATUS_NOT_INITIALIZED;
+ // CHECK-NEXT: hipblasStatus_t BLAS_STATUS_ALLOC_FAILED = HIPBLAS_STATUS_ALLOC_FAILED;
+ // CHECK-NEXT: hipblasStatus_t BLAS_STATUS_INVALID_VALUE = HIPBLAS_STATUS_INVALID_VALUE;
+ // CHECK-NEXT: hipblasStatus_t BLAS_STATUS_MAPPING_ERROR = HIPBLAS_STATUS_MAPPING_ERROR;
+ // CHECK-NEXT: hipblasStatus_t BLAS_STATUS_EXECUTION_FAILED = HIPBLAS_STATUS_EXECUTION_FAILED;
+ // CHECK-NEXT: hipblasStatus_t BLAS_STATUS_INTERNAL_ERROR = HIPBLAS_STATUS_INTERNAL_ERROR;
+ // CHECK-NEXT: hipblasStatus_t BLAS_STATUS_NOT_SUPPORTED = HIPBLAS_STATUS_NOT_SUPPORTED;
+ // CHECK-NEXT: hipblasStatus_t BLAS_STATUS_ARCH_MISMATCH = HIPBLAS_STATUS_ARCH_MISMATCH;
+ cublasStatus blasStatus;
+ cublasStatus_t blasStatus_t;
+ cublasStatus_t BLAS_STATUS_SUCCESS = CUBLAS_STATUS_SUCCESS;
+ cublasStatus_t BLAS_STATUS_NOT_INITIALIZED = CUBLAS_STATUS_NOT_INITIALIZED;
+ cublasStatus_t BLAS_STATUS_ALLOC_FAILED = CUBLAS_STATUS_ALLOC_FAILED;
+ cublasStatus_t BLAS_STATUS_INVALID_VALUE = CUBLAS_STATUS_INVALID_VALUE;
+ cublasStatus_t BLAS_STATUS_MAPPING_ERROR = CUBLAS_STATUS_MAPPING_ERROR;
+ cublasStatus_t BLAS_STATUS_EXECUTION_FAILED = CUBLAS_STATUS_EXECUTION_FAILED;
+ cublasStatus_t BLAS_STATUS_INTERNAL_ERROR = CUBLAS_STATUS_INTERNAL_ERROR;
+ cublasStatus_t BLAS_STATUS_NOT_SUPPORTED = CUBLAS_STATUS_NOT_SUPPORTED;
+ cublasStatus_t BLAS_STATUS_ARCH_MISMATCH = CUBLAS_STATUS_ARCH_MISMATCH;
+
+ // CHECK: hipblasFillMode_t blasFillMode;
+ // CHECK-NEXT: hipblasFillMode_t BLAS_FILL_MODE_LOWER = HIPBLAS_FILL_MODE_LOWER;
+ // CHECK-NEXT: hipblasFillMode_t BLAS_FILL_MODE_UPPER = HIPBLAS_FILL_MODE_UPPER;
+ cublasFillMode_t blasFillMode;
+ cublasFillMode_t BLAS_FILL_MODE_LOWER = CUBLAS_FILL_MODE_LOWER;
+ cublasFillMode_t BLAS_FILL_MODE_UPPER = CUBLAS_FILL_MODE_UPPER;
+
+#if CUDA_VERSION >= 10010
+ // CHECK: hipblasFillMode_t BLAS_FILL_MODE_FULL = HIPBLAS_FILL_MODE_FULL;
+ cublasFillMode_t BLAS_FILL_MODE_FULL = CUBLAS_FILL_MODE_FULL;
+#endif
+
+ // CHECK: hipblasDiagType_t blasDiagType;
+ // CHECK-NEXT: hipblasDiagType_t BLAS_DIAG_NON_UNIT = HIPBLAS_DIAG_NON_UNIT;
+ // CHECK-NEXT: hipblasDiagType_t BLAS_DIAG_UNIT = HIPBLAS_DIAG_UNIT;
+ cublasDiagType_t blasDiagType;
+ cublasDiagType_t BLAS_DIAG_NON_UNIT = CUBLAS_DIAG_NON_UNIT;
+ cublasDiagType_t BLAS_DIAG_UNIT = CUBLAS_DIAG_UNIT;
+
+ // CHECK: hipblasSideMode_t blasSideMode;
+ // CHECK-NEXT: hipblasSideMode_t BLAS_SIDE_LEFT = HIPBLAS_SIDE_LEFT;
+ // CHECK-NEXT: hipblasSideMode_t BLAS_SIDE_RIGHT = HIPBLAS_SIDE_RIGHT;
+ cublasSideMode_t blasSideMode;
+ cublasSideMode_t BLAS_SIDE_LEFT = CUBLAS_SIDE_LEFT;
+ cublasSideMode_t BLAS_SIDE_RIGHT = CUBLAS_SIDE_RIGHT;
+
+ // CHECK: hipblasPointerMode_t blasPointerMode;
+ // CHECK-NEXT: hipblasPointerMode_t BLAS_POINTER_MODE_HOST = HIPBLAS_POINTER_MODE_HOST;
+ // CHECK-NEXT: hipblasPointerMode_t BLAS_POINTER_MODE_DEVICE = HIPBLAS_POINTER_MODE_DEVICE;
+ cublasPointerMode_t blasPointerMode;
+ cublasPointerMode_t BLAS_POINTER_MODE_HOST = CUBLAS_POINTER_MODE_HOST;
+ cublasPointerMode_t BLAS_POINTER_MODE_DEVICE = CUBLAS_POINTER_MODE_DEVICE;
+
+ // CHECK: hipblasAtomicsMode_t blasAtomicsMode;
+ // CHECK-NEXT: hipblasAtomicsMode_t BLAS_ATOMICS_NOT_ALLOWED = HIPBLAS_ATOMICS_NOT_ALLOWED;
+ // CHECK-NEXT: hipblasAtomicsMode_t BLAS_ATOMICS_ALLOWED = HIPBLAS_ATOMICS_ALLOWED;
+ cublasAtomicsMode_t blasAtomicsMode;
+ cublasAtomicsMode_t BLAS_ATOMICS_NOT_ALLOWED = CUBLAS_ATOMICS_NOT_ALLOWED;
+ cublasAtomicsMode_t BLAS_ATOMICS_ALLOWED = CUBLAS_ATOMICS_ALLOWED;
+
+#if CUDA_VERSION >= 8000
+ // CHECK: hipblasDatatype_t DataType;
+ // CHECK-NEXT: hipblasDatatype_t DataType_t;
+ // CHECK-NEXT: hipblasDatatype_t blasDataType;
+ // CHECK-NEXT: hipblasDatatype_t R_16F = HIPBLAS_R_16F;
+ // CHECK-NEXT: hipblasDatatype_t C_16F = HIPBLAS_C_16F;
+ // CHECK-NEXT: hipblasDatatype_t R_32F = HIPBLAS_R_32F;
+ // CHECK-NEXT: hipblasDatatype_t C_32F = HIPBLAS_C_32F;
+ // CHECK-NEXT: hipblasDatatype_t R_64F = HIPBLAS_R_64F;
+ // CHECK-NEXT: hipblasDatatype_t C_64F = HIPBLAS_C_64F;
+ // CHECK-NEXT: hipblasDatatype_t R_8I = HIPBLAS_R_8I;
+ // CHECK-NEXT: hipblasDatatype_t C_8I = HIPBLAS_C_8I;
+ // CHECK-NEXT: hipblasDatatype_t R_8U = HIPBLAS_R_8U;
+ // CHECK-NEXT: hipblasDatatype_t C_8U = HIPBLAS_C_8U;
+ // CHECK-NEXT: hipblasDatatype_t R_32I = HIPBLAS_R_32I;
+ // CHECK-NEXT: hipblasDatatype_t C_32I = HIPBLAS_C_32I;
+ // CHECK-NEXT: hipblasDatatype_t R_32U = HIPBLAS_R_32U;
+ // CHECK-NEXT: hipblasDatatype_t C_32U = HIPBLAS_C_32U;
+ cudaDataType DataType;
+ cudaDataType_t DataType_t;
+ cublasDataType_t blasDataType;
+ cublasDataType_t R_16F = CUDA_R_16F;
+ cublasDataType_t C_16F = CUDA_C_16F;
+ cublasDataType_t R_32F = CUDA_R_32F;
+ cublasDataType_t C_32F = CUDA_C_32F;
+ cublasDataType_t R_64F = CUDA_R_64F;
+ cublasDataType_t C_64F = CUDA_C_64F;
+ cublasDataType_t R_8I = CUDA_R_8I;
+ cublasDataType_t C_8I = CUDA_C_8I;
+ cublasDataType_t R_8U = CUDA_R_8U;
+ cublasDataType_t C_8U = CUDA_C_8U;
+ cublasDataType_t R_32I = CUDA_R_32I;
+ cublasDataType_t C_32I = CUDA_C_32I;
+ cublasDataType_t R_32U = CUDA_R_32U;
+ cublasDataType_t C_32U = CUDA_C_32U;
+#endif
+
+#if CUDA_VERSION >= 11000
+ // CHECK: hipblasDatatype_t R_16BF = HIPBLAS_R_16B;
+ // CHECK-NEXT: hipblasDatatype_t C_16BF = HIPBLAS_C_16B;
+ cublasDataType_t R_16BF = CUDA_R_16BF;
+ cublasDataType_t C_16BF = CUDA_C_16BF;
+#endif
+
+#if CUDA_VERSION >= 8000
+ // CHECK: hipblasGemmAlgo_t blasGemmAlgo;
+ // CHECK-NEXT: hipblasGemmAlgo_t BLAS_GEMM_DFALT = HIPBLAS_GEMM_DEFAULT;
+ cublasGemmAlgo_t blasGemmAlgo;
+ cublasGemmAlgo_t BLAS_GEMM_DFALT = CUBLAS_GEMM_DFALT;
+#endif
+
+#if CUDA_VERSION >= 9000
+ // CHECK: hipblasGemmAlgo_t BLAS_GEMM_DEFAULT = HIPBLAS_GEMM_DEFAULT;
+ cublasGemmAlgo_t BLAS_GEMM_DEFAULT = CUBLAS_GEMM_DEFAULT;
+#endif
+
+ // CHECK: hipblasHandle_t blasHandle;
+ cublasHandle_t blasHandle;
+
+// Functions
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetAtomicsMode(cublasHandle_t handle, cublasAtomicsMode_t* mode);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGetAtomicsMode(hipblasHandle_t handle, hipblasAtomicsMode_t* atomics_mode);
+ // CHECK: blasStatus = hipblasGetAtomicsMode(blasHandle, &blasAtomicsMode);
+ blasStatus = cublasGetAtomicsMode(blasHandle, &blasAtomicsMode);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSetAtomicsMode(cublasHandle_t handle, cublasAtomicsMode_t mode);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSetAtomicsMode(hipblasHandle_t handle, hipblasAtomicsMode_t atomics_mode);
+ // CHECK: blasStatus = hipblasSetAtomicsMode(blasHandle, blasAtomicsMode);
+ blasStatus = cublasSetAtomicsMode(blasHandle, blasAtomicsMode);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCreate_v2(cublasHandle_t* handle);
+ // CUDA: #define cublasCreate cublasCreate_v2
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCreate(hipblasHandle_t* handle);
+ // CHECK: blasStatus = hipblasCreate(&blasHandle);
+ // CHECK-NEXT: blasStatus = hipblasCreate(&blasHandle);
+ blasStatus = cublasCreate(&blasHandle);
+ blasStatus = cublasCreate_v2(&blasHandle);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDestroy_v2(cublasHandle_t handle);
+ // CUDA: #define cublasDestroy cublasDestroy_v2
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDestroy(hipblasHandle_t handle);
+ // CHECK: blasStatus = hipblasDestroy(blasHandle);
+ // CHECK-NEXT: blasStatus = hipblasDestroy(blasHandle);
+ blasStatus = cublasDestroy(blasHandle);
+ blasStatus = cublasDestroy_v2(blasHandle);
+
+ // CHECK: hipStream_t stream;
+ cudaStream_t stream;
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSetStream_v2(cublasHandle_t handle, cudaStream_t streamId);
+ // CUDA: #define cublasSetStream cublasSetStream_v2
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSetStream(hipblasHandle_t handle, hipStream_t streamId);
+ // CHECK: blasStatus = hipblasSetStream(blasHandle, stream);
+ // CHECK-NEXT: blasStatus = hipblasSetStream(blasHandle, stream);
+ blasStatus = cublasSetStream(blasHandle, stream);
+ blasStatus = cublasSetStream_v2(blasHandle, stream);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetStream_v2(cublasHandle_t handle, cudaStream_t* streamId);
+ // CUDA: #define cublasGetStream cublasGetStream_v2
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGetStream(hipblasHandle_t handle, hipStream_t* streamId);
+ // CHECK: blasStatus = hipblasGetStream(blasHandle, &stream);
+ // CHECK-NEXT: blasStatus = hipblasGetStream(blasHandle, &stream);
+ blasStatus = cublasGetStream(blasHandle, &stream);
+ blasStatus = cublasGetStream_v2(blasHandle, &stream);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSetPointerMode_v2(cublasHandle_t handle, cublasPointerMode_t mode);
+ // CUDA: #define cublasSetPointerMode cublasSetPointerMode_v2
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSetPointerMode(hipblasHandle_t handle, hipblasPointerMode_t mode);
+ // CHECK: blasStatus = hipblasSetPointerMode(blasHandle, blasPointerMode);
+ // CHECK-NEXT: blasStatus = hipblasSetPointerMode(blasHandle, blasPointerMode);
+ blasStatus = cublasSetPointerMode(blasHandle, blasPointerMode);
+ blasStatus = cublasSetPointerMode_v2(blasHandle, blasPointerMode);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetPointerMode_v2(cublasHandle_t handle, cublasPointerMode_t* mode);
+ // CUDA: #define cublasGetPointerMode cublasGetPointerMode_v2
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGetPointerMode(hipblasHandle_t handle, hipblasPointerMode_t* mode);
+ // CHECK: blasStatus = hipblasGetPointerMode(blasHandle, &blasPointerMode);
+ // CHECK-NEXT: blasStatus = hipblasGetPointerMode(blasHandle, &blasPointerMode);
+ blasStatus = cublasGetPointerMode(blasHandle, &blasPointerMode);
+ blasStatus = cublasGetPointerMode_v2(blasHandle, &blasPointerMode);
+
+ int n = 0;
+ int num = 0;
+ int incx = 0;
+ int incy = 0;
+ void* image = nullptr;
+ void* image_2 = nullptr;
+ void* deviceptr = nullptr;
+
+ // CUDA: cublasStatus_t CUBLASWINAPI cublasSetVector(int n, int elemSize, const void* x, int incx, void* devicePtr, int incy);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSetVector(int n, int elemSize, const void* x, int incx, void* y, int incy);
+ // CHECK: blasStatus = hipblasSetVector(n, num, image, incx, image_2, incy);
+ blasStatus = cublasSetVector(n, num, image, incx, image_2, incy);
+
+ // CUDA: cublasStatus_t CUBLASWINAPI cublasGetVector(int n, int elemSize, const void* x, int incx, void* y, int incy);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGetVector(int n, int elemSize, const void* x, int incx, void* y, int incy);
+ // CHECK: blasStatus = hipblasGetVector(n, num, image, incx, image_2, incy);
+ blasStatus = cublasGetVector(n, num, image, incx, image_2, incy);
+
+ // CUDA: cublasStatus_t CUBLASWINAPI cublasSetVectorAsync(int n, int elemSize, const void* hostPtr, int incx, void* devicePtr, int incy, cudaStream_t stream);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSetVectorAsync(int n, int elemSize, const void* x, int incx, void* y, int incy, hipStream_t stream);
+ // CHECK: blasStatus = hipblasSetVectorAsync(n, num, image, incx, image_2, incy, stream);
+ blasStatus = cublasSetVectorAsync(n, num, image, incx, image_2, incy, stream);
+
+ // CUDA: cublasStatus_t CUBLASWINAPI cublasGetVectorAsync(int n, int elemSize, const void* devicePtr, int incx, void* hostPtr, int incy, cudaStream_t stream);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGetVectorAsync(int n, int elemSize, const void* x, int incx, void* y, int incy, hipStream_t stream);
+ // CHECK: blasStatus = hipblasGetVectorAsync(n, num, image, incx, image_2, incy, stream);
+ blasStatus = cublasGetVectorAsync(n, num, image, incx, image_2, incy, stream);
+
+ int rows = 0;
+ int cols = 0;
+
+ // CUDA: cublasStatus_t CUBLASWINAPI cublasSetMatrix(int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSetMatrix(int rows, int cols, int elemSize, const void* AP, int lda, void* BP, int ldb);
+ // CHECK: blasStatus = hipblasSetMatrix(rows, cols, num, image, incx, image_2, incy);
+ blasStatus = cublasSetMatrix(rows, cols, num, image, incx, image_2, incy);
+
+ // CUDA: cublasStatus_t CUBLASWINAPI cublasGetMatrix(int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGetMatrix(int rows, int cols, int elemSize, const void* AP, int lda, void* BP, int ldb);
+ // CHECK: blasStatus = hipblasGetMatrix(rows, cols, num, image, incx, image_2, incy);
+ blasStatus = cublasGetMatrix(rows, cols, num, image, incx, image_2, incy);
+
+ // CUDA: cublasStatus_t CUBLASWINAPI cublasSetMatrixAsync(int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb, cudaStream_t stream);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSetMatrixAsync(int rows, int cols, int elemSize, const void* AP, int lda, void* BP, int ldb, hipStream_t stream);
+ // CHECK: blasStatus = hipblasSetMatrixAsync(rows, cols, num, image, incx, image_2, incy, stream);
+ blasStatus = cublasSetMatrixAsync(rows, cols, num, image, incx, image_2, incy, stream);
+
+ // CUDA: cublasStatus_t CUBLASWINAPI cublasGetMatrixAsync(int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb, cudaStream_t stream);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGetMatrixAsync(int rows, int cols, int elemSize, const void* AP, int lda, void* BP, int ldb, hipStream_t stream);
+ // CHECK: blasStatus = hipblasGetMatrixAsync(rows, cols, num, image, incx, image_2, incy, stream);
+ blasStatus = cublasGetMatrixAsync(rows, cols, num, image, incx, image_2, incy, stream);
+
+ cudaDataType DataType_2, DataType_3;
+
+ float fx = 0;
+ float fy = 0;
+ float fresult = 0;
+
+ double dx = 0;
+ double dy = 0;
+ double dresult = 0;
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle, int n, const float* x, int incx, float* result);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSnrm2(hipblasHandle_t handle, int n, const float* x, int incx, float* result);
+ // CHECK: blasStatus = hipblasSnrm2(blasHandle, n, &fx, incx, &fresult);
+ // CHECK-NEXT: blasStatus = hipblasSnrm2(blasHandle, n, &fx, incx, &fresult);
+ blasStatus = cublasSnrm2(blasHandle, n, &fx, incx, &fresult);
+ blasStatus = cublasSnrm2_v2(blasHandle, n, &fx, incx, &fresult);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDnrm2_v2(cublasHandle_t handle, int n, const double* x, int incx, double* result);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDnrm2(hipblasHandle_t handle, int n, const double* x, int incx, double* result);
+ // CHECK: blasStatus = hipblasDnrm2(blasHandle, n, &dx, incx, &dresult);
+ // CHECK-NEXT: blasStatus = hipblasDnrm2(blasHandle, n, &dx, incx, &dresult);
+ blasStatus = cublasDnrm2(blasHandle, n, &dx, incx, &dresult);
+ blasStatus = cublasDnrm2_v2(blasHandle, n, &dx, incx, &dresult);
+
+ // CHECK: hipComplex complex, complex_2, complex_res;
+ cuComplex complex, complex_2, complex_res;
+ // CHECK: hipDoubleComplex dcomplex, dcomplex_2, dcomplex_res;
+ cuDoubleComplex dcomplex, dcomplex_2, dcomplex_res;
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScnrm2_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, float* result);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasScnrm2(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, float* result);
+ // CHECK: blasStatus = hipblasScnrm2(blasHandle, n, &complex, incx, &fresult);
+ // CHECK-NEXT: blasStatus = hipblasScnrm2(blasHandle, n, &complex, incx, &fresult);
+ blasStatus = cublasScnrm2(blasHandle, n, &complex, incx, &fresult);
+ blasStatus = cublasScnrm2_v2(blasHandle, n, &complex, incx, &fresult);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDznrm2_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, double* result);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDznrm2(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, double* result);
+ // CHECK: blasStatus = hipblasDznrm2(blasHandle, n, &dcomplex, incx, &dresult);
+ // CHECK-NEXT: blasStatus = hipblasDznrm2(blasHandle, n, &dcomplex, incx, &dresult);
+ blasStatus = cublasDznrm2(blasHandle, n, &dcomplex, incx, &dresult);
+ blasStatus = cublasDznrm2_v2(blasHandle, n, &dcomplex, incx, &dresult);
+
+#if CUDA_VERSION >= 8000
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasNrm2Ex(cublasHandle_t handle, int n, const void* x, cudaDataType xType, int incx, void* result, cudaDataType resultType, cudaDataType executionType);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasNrm2Ex(hipblasHandle_t handle, int n, const void* x, hipblasDatatype_t xType, int incx, void* result, hipblasDatatype_t resultType, hipblasDatatype_t executionType);
+ // CHECK: blasStatus = hipblasNrm2Ex(blasHandle, n, image, DataType, incx, image_2, DataType_2, DataType_3);
+ blasStatus = cublasNrm2Ex(blasHandle, n, image, DataType, incx, image_2, DataType_2, DataType_3);
+#endif
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdot_v2(cublasHandle_t handle, int n, const float* x, int incx, const float* y, int incy, float* result);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSdot(hipblasHandle_t handle, int n, const float* x, int incx, const float* y, int incy, float* result);
+ // CHECK: blasStatus = hipblasSdot(blasHandle, n, &fx, incx, &fy, incy, &fresult);
+ // CHECK-NEXT: blasStatus = hipblasSdot(blasHandle, n, &fx, incx, &fy, incy, &fresult);
+ blasStatus = cublasSdot(blasHandle, n, &fx, incx, &fy, incy, &fresult);
+ blasStatus = cublasSdot_v2(blasHandle, n, &fx, incx, &fy, incy, &fresult);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDdot_v2(cublasHandle_t handle, int n, const double* x, int incx, const double* y, int incy, double* result);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDdot(hipblasHandle_t handle, int n, const double* x, int incx, const double* y, int incy, double* result);
+ // CHECK: blasStatus = hipblasDdot(blasHandle, n, &dx, incx, &dy, incy, &dresult);
+ // CHECK-NEXT: blasStatus = hipblasDdot(blasHandle, n, &dx, incx, &dy, incy, &dresult);
+ blasStatus = cublasDdot(blasHandle, n, &dx, incx, &dy, incy, &dresult);
+ blasStatus = cublasDdot_v2(blasHandle, n, &dx, incx, &dy, incy, &dresult);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdotu_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* result);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCdotu(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* result);
+ // CHECK: blasStatus = hipblasCdotu(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res);
+ // CHECK-NEXT: blasStatus = hipblasCdotu(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res);
+ blasStatus = cublasCdotu(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res);
+ blasStatus = cublasCdotu_v2(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdotc_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* result);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCdotc(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* result);
+ // CHECK: blasStatus = hipblasCdotc(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res);
+ // CHECK-NEXT: blasStatus = hipblasCdotc(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res);
+ blasStatus = cublasCdotc(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res);
+ blasStatus = cublasCdotc_v2(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdotu_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* result);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZdotu(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* result);
+ // CHECK: blasStatus = hipblasZdotu(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res);
+ // CHECK-NEXT: blasStatus = hipblasZdotu(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res);
+ blasStatus = cublasZdotu(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res);
+ blasStatus = cublasZdotu_v2(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdotc_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* result);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZdotc(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* result);
+ // CHECK: blasStatus = hipblasZdotc(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res);
+ // CHECK-NEXT: blasStatus = hipblasZdotc(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res);
+ blasStatus = cublasZdotc(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res);
+ blasStatus = cublasZdotc_v2(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSscal_v2(cublasHandle_t handle, int n, const float* alpha, float* x, int incx);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSscal(hipblasHandle_t handle, int n, const float* alpha, float* x, int incx);
+ // CHECK: blasStatus = hipblasSscal(blasHandle, n, &fy, &fx, incx);
+ // CHECK-NEXT: blasStatus = hipblasSscal(blasHandle, n, &fy, &fx, incx);
+ blasStatus = cublasSscal(blasHandle, n, &fy, &fx, incx);
+ blasStatus = cublasSscal_v2(blasHandle, n, &fy, &fx, incx);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDscal_v2(cublasHandle_t handle, int n, const double* alpha, double* x, int incx);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDscal(hipblasHandle_t handle, int n, const double* alpha, double* x, int incx);
+ // CHECK: blasStatus = hipblasDscal(blasHandle, n, &dx, &dy, incx);
+ // CHECK-NEXT: blasStatus = hipblasDscal(blasHandle, n, &dx, &dy, incx);
+ blasStatus = cublasDscal(blasHandle, n, &dx, &dy, incx);
+ blasStatus = cublasDscal_v2(blasHandle, n, &dx, &dy, incx);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCscal_v2(cublasHandle_t handle, int n, const cuComplex* alpha, cuComplex* x, int incx);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCscal(hipblasHandle_t handle, int n, const hipblasComplex* alpha, hipblasComplex* x, int incx);
+ // CHECK: blasStatus = hipblasCscal(blasHandle, n, &complex, &complex_2, incx);
+ // CHECK-NEXT: blasStatus = hipblasCscal(blasHandle, n, &complex, &complex_2, incx);
+ blasStatus = cublasCscal(blasHandle, n, &complex, &complex_2, incx);
+ blasStatus = cublasCscal_v2(blasHandle, n, &complex, &complex_2, incx);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsscal_v2(cublasHandle_t handle, int n, const float* alpha, cuComplex* x, int incx);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsscal(hipblasHandle_t handle, int n, const float* alpha, hipblasComplex* x, int incx);
+ // CHECK: blasStatus = hipblasCsscal(blasHandle, n, &fx, &complex, incx);
+ // CHECK-NEXT: blasStatus = hipblasCsscal(blasHandle, n, &fx, &complex, incx);
+ blasStatus = cublasCsscal(blasHandle, n, &fx, &complex, incx);
+ blasStatus = cublasCsscal_v2(blasHandle, n, &fx, &complex, incx);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZscal_v2(cublasHandle_t handle, int n, const cuDoubleComplex* alpha, cuDoubleComplex* x, int incx);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZscal(hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* x, int incx);
+ // CHECK: blasStatus = hipblasZscal(blasHandle, n, &dcomplex, &dcomplex_2, incx);
+ // CHECK-NEXT: blasStatus = hipblasZscal(blasHandle, n, &dcomplex, &dcomplex_2, incx);
+ blasStatus = cublasZscal(blasHandle, n, &dcomplex, &dcomplex_2, incx);
+ blasStatus = cublasZscal_v2(blasHandle, n, &dcomplex, &dcomplex_2, incx);
+
+ // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdscal_v2(cublasHandle_t handle, int n, const double* alpha, cuDoubleComplex* x, int incx);
+ // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZdscal(hipblasHandle_t handle, int n, const double* alpha, hipblasDoubleComplex* x, int incx);
+ // CHECK: blasStatus = hipblasZdscal(blasHandle, n, &dx, &dcomplex, incx);
+ // CHECK-NEXT: blasStatus = hipblasZdscal(blasHandle, n, &dx, &dcomplex, incx);
+ blasStatus = cublasZdscal(blasHandle, n, &dx, &dcomplex, incx);
+ blasStatus = cublasZdscal_v2(blasHandle, n, &dx, &dcomplex, incx);
+
+ return 0;
+}
diff --git a/tests/unit_tests/synthetic/runtime_functions.cu b/tests/unit_tests/synthetic/runtime_functions.cu
index 29648536..4e1a3578 100644
--- a/tests/unit_tests/synthetic/runtime_functions.cu
+++ b/tests/unit_tests/synthetic/runtime_functions.cu
@@ -8,7 +8,10 @@
#include "windows.h"
#include
#endif
+
#include "cuda_gl_interop.h"
+// CHECK: #include "hip/hip_runtime_api.h"
+#include "cuda_profiler_api.h"
int main() {
printf("12. CUDA Runtime API Functions synthetic test\n");
@@ -35,6 +38,8 @@ int main() {
void* deviceptr_2 = nullptr;
void* image = nullptr;
void* func = nullptr;
+ void* src = nullptr;
+ void* dst = nullptr;
char* ch = nullptr;
const char* const_ch = nullptr;
dim3 gridDim;
@@ -42,6 +47,7 @@ int main() {
GLuint gl_uint = 0;
GLenum gl_enum = 0;
struct textureReference* texref = nullptr;
+ std::string name = "str";
#if defined(_WIN32)
unsigned long long ull = 0;
@@ -57,6 +63,17 @@ int main() {
cudaError_t Error_t;
cudaStream_t stream;
+ // CHECK: hipEvent_t Event_t;
+ // CHECK-Next: hipEvent_t Event_2;
+ cudaEvent_t Event_t;
+ cudaEvent_t Event_2;
+
+ // CHECK: hipMemcpy3DParms Memcpy3DParms;
+ cudaMemcpy3DParms Memcpy3DParms;
+
+ // CHECK: hipMemcpyKind MemcpyKind;
+ cudaMemcpyKind MemcpyKind;
+
#if CUDA_VERSION >= 8000
// CHECK: hipDeviceP2PAttr DeviceP2PAttr;
cudaDeviceP2PAttr DeviceP2PAttr;
@@ -133,8 +150,8 @@ int main() {
// CHECK: result = hipStreamBeginCapture(stream, StreamCaptureMode);
result = cudaStreamBeginCapture(stream, StreamCaptureMode);
- // CHECK: hipGraph_t Graph_t;
- cudaGraph_t Graph_t;
+ // CHECK: hipGraph_t Graph_t, Graph_t_2;
+ cudaGraph_t Graph_t, Graph_t_2;
// CUDA: extern __host__ cudaError_t CUDARTAPI cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t *pGraph);
// HIP: hipError_t hipStreamEndCapture(hipStream_t stream, hipGraph_t* pGraph);
@@ -209,6 +226,121 @@ int main() {
// HIP: hipError_t hipLaunchHostFunc(hipStream_t stream, hipHostFn_t fn, void* userData);
// CHECK: result = hipLaunchHostFunc(stream, hostFn, image);
result = cudaLaunchHostFunc(stream, hostFn, image);
+
+ // CHECK: hipGraphNode_t graphNode, graphNode_2;
+ cudaGraphNode_t graphNode, graphNode_2;
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddChildGraphNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, cudaGraph_t childGraph);
+ // HIP: hipError_t hipGraphAddChildGraphNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies, hipGraph_t childGraph);
+ // CHECK: result = hipGraphAddChildGraphNode(&graphNode, Graph_t, &graphNode_2, bytes, Graph_t_2);
+ result = cudaGraphAddChildGraphNode(&graphNode, Graph_t, &graphNode_2, bytes, Graph_t_2);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, const cudaGraphNode_t *to, size_t numDependencies);
+ // HIP: hipError_t hipGraphAddDependencies(hipGraph_t graph, const hipGraphNode_t* from, const hipGraphNode_t* to, size_t numDependencies);
+ // CHECK: result = hipGraphAddDependencies(Graph_t, &graphNode, &graphNode_2, bytes);
+ result = cudaGraphAddDependencies(Graph_t, &graphNode, &graphNode_2, bytes);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddEmptyNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies);
+ // HIP: hipError_t hipGraphAddEmptyNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies);
+ // CHECK: result = hipGraphAddEmptyNode(&graphNode, Graph_t, &graphNode_2, bytes);
+ result = cudaGraphAddEmptyNode(&graphNode, Graph_t, &graphNode_2, bytes);
+
+ // CHECK: hipHostNodeParams HostNodeParams;
+ cudaHostNodeParams HostNodeParams;
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddHostNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, const struct cudaHostNodeParams *pNodeParams);
+ // HIP: hipError_t hipGraphAddHostNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies, const hipHostNodeParams* pNodeParams);
+ // CHECK: result = hipGraphAddHostNode(&graphNode, Graph_t, &graphNode_2, bytes, &HostNodeParams);
+ result = cudaGraphAddHostNode(&graphNode, Graph_t, &graphNode_2, bytes, &HostNodeParams);
+
+ // CHECK: hipKernelNodeParams KernelNodeParams;
+ cudaKernelNodeParams KernelNodeParams;
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddKernelNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, const struct cudaKernelNodeParams *pNodeParams);
+ // HIP: hipError_t hipGraphAddKernelNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies, const hipKernelNodeParams* pNodeParams);
+ // CHECK: result = hipGraphAddKernelNode(&graphNode, Graph_t, &graphNode_2, bytes, &KernelNodeParams);
+ result = cudaGraphAddKernelNode(&graphNode, Graph_t, &graphNode_2, bytes, &KernelNodeParams);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, const struct cudaMemcpy3DParms *pCopyParams);
+ // HIP: hipError_t hipGraphAddMemcpyNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies, const hipMemcpy3DParms* pCopyParams);
+ // CHECK: result = hipGraphAddMemcpyNode(&graphNode, Graph_t, &graphNode_2, bytes, &Memcpy3DParms);
+ result = cudaGraphAddMemcpyNode(&graphNode, Graph_t, &graphNode_2, bytes, &Memcpy3DParms);
+
+ // CHECK: hipMemsetParams MemsetParams;
+ cudaMemsetParams MemsetParams;
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemsetNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, const struct cudaMemsetParams *pMemsetParams);
+ // HIP: hipError_t hipGraphAddMemsetNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies, const hipMemsetParams* pMemsetParams);
+ // CHECK: result = hipGraphAddMemsetNode(&graphNode, Graph_t, &graphNode_2, bytes, &MemsetParams);
+ result = cudaGraphAddMemsetNode(&graphNode, Graph_t, &graphNode_2, bytes, &MemsetParams);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t *pGraph);
+ // HIP: hipError_t hipGraphChildGraphNodeGetGraph(hipGraphNode_t node, hipGraph_t* pGraph);
+ // CHECK: result = hipGraphChildGraphNodeGetGraph(graphNode, &Graph_t);
+ result = cudaGraphChildGraphNodeGetGraph(graphNode, &Graph_t);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphClone(cudaGraph_t *pGraphClone, cudaGraph_t originalGraph);
+ // HIP: hipError_t hipGraphClone(hipGraph_t* pGraphClone, hipGraph_t originalGraph);
+ // CHECK: result = hipGraphClone(&Graph_t, Graph_t_2);
+ result = cudaGraphClone(&Graph_t, Graph_t_2);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphCreate(cudaGraph_t *pGraph, unsigned int flags);
+ // HIP: hipError_t hipGraphCreate(hipGraph_t* pGraph, unsigned int flags);
+ // CHECK: result = hipGraphCreate(&Graph_t, flags);
+ result = cudaGraphCreate(&Graph_t, flags);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphDestroy(cudaGraph_t graph);
+ // HIP: hipError_t hipGraphDestroy(hipGraph_t graph);
+ // CHECK: result = hipGraphDestroy(Graph_t);
+ result = cudaGraphDestroy(Graph_t);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphDestroyNode(cudaGraphNode_t node);
+ // HIP: hipError_t hipGraphDestroyNode(hipGraphNode_t node);
+ // CHECK: result = hipGraphDestroyNode(graphNode);
+ result = cudaGraphDestroyNode(graphNode);
+
+ // CHECK: hipGraphExec_t GraphExec_t;
+ cudaGraphExec_t GraphExec_t;
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecDestroy(cudaGraphExec_t graphExec);
+ // HIP: hipError_t hipGraphExecDestroy(hipGraphExec_t graphExec);
+ // CHECK: result = hipGraphExecDestroy(GraphExec_t);
+ result = cudaGraphExecDestroy(GraphExec_t);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphGetEdges(cudaGraph_t graph, cudaGraphNode_t *from, cudaGraphNode_t *to, size_t *numEdges);
+ // HIP: hipError_t hipGraphGetEdges(hipGraph_t graph, hipGraphNode_t* from, hipGraphNode_t* to, size_t* numEdges);
+ // CHECK: result = hipGraphGetEdges(Graph_t, &graphNode, &graphNode_2, &bytes);
+ result = cudaGraphGetEdges(Graph_t, &graphNode, &graphNode_2, &bytes);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphGetNodes(cudaGraph_t graph, cudaGraphNode_t *nodes, size_t *numNodes);
+ // HIP: hipError_t hipGraphGetNodes(hipGraph_t graph, hipGraphNode_t* nodes, size_t* numNodes);
+ // CHECK: result = hipGraphGetNodes(Graph_t, &graphNode, &bytes);
+ result = cudaGraphGetNodes(Graph_t, &graphNode, &bytes);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphGetRootNodes(cudaGraph_t graph, cudaGraphNode_t *pRootNodes, size_t *pNumRootNodes);
+ // HIP: hipError_t hipGraphGetRootNodes(hipGraph_t graph, hipGraphNode_t* pRootNodes, size_t* pNumRootNodes);
+ // CHECK: result = hipGraphGetRootNodes(Graph_t, &graphNode, &bytes);
+ result = cudaGraphGetRootNodes(Graph_t, &graphNode, &bytes);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeGetParams(cudaGraphNode_t node, struct cudaHostNodeParams *pNodeParams);
+ // HIP: hipError_t hipGraphHostNodeGetParams(hipGraphNode_t node, hipHostNodeParams* pNodeParams);
+ // CHECK: result = hipGraphHostNodeGetParams(graphNode, &HostNodeParams);
+ result = cudaGraphHostNodeGetParams(graphNode, &HostNodeParams);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeSetParams(cudaGraphNode_t node, const struct cudaHostNodeParams *pNodeParams);
+ // HIP: hipError_t hipGraphHostNodeSetParams(hipGraphNode_t node, const hipHostNodeParams* pNodeParams);
+ // CHECK: result = hipGraphHostNodeSetParams(graphNode, &HostNodeParams);
+ result = cudaGraphHostNodeSetParams(graphNode, &HostNodeParams);
+
+ char* name_ = const_cast(name.c_str());
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiate(cudaGraphExec_t *pGraphExec, cudaGraph_t graph, cudaGraphNode_t *pErrorNode, char *pLogBuffer, size_t bufferSize);
+ // HIP: hipError_t hipGraphInstantiate(hipGraphExec_t* pGraphExec, hipGraph_t graph, hipGraphNode_t* pErrorNode, char* pLogBuffer, size_t bufferSize);
+ // CHECK: result = hipGraphInstantiate(&GraphExec_t, Graph_t, &graphNode, name_, bytes);
+ result = cudaGraphInstantiate(&GraphExec_t, Graph_t, &graphNode, name_, bytes);
+
+ // CHECK: hipGraphNodeType GraphNodeType;
+ cudaGraphNodeType GraphNodeType;
#endif
#if CUDA_VERSION >= 10010
@@ -231,13 +363,16 @@ int main() {
result = cudaThreadExchangeStreamCaptureMode(&streamCaptureMode);
#endif
+#if CUDA_VERSION >= 10020
+ // CHECK: hipGraphExecUpdateResult GraphExecUpdateResult;
+ cudaGraphExecUpdateResult GraphExecUpdateResult;
+#endif
+
#if CUDA_VERSION >= 11000
// CHECK: hipKernelNodeAttrID kernelNodeAttrID;
cudaKernelNodeAttrID kernelNodeAttrID;
// CHECK: hipKernelNodeAttrValue kernelNodeAttrValue;
cudaKernelNodeAttrValue kernelNodeAttrValue;
- // CHECK: hipGraphNode_t graphNode;
- cudaGraphNode_t graphNode;
// CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetAttribute(cudaGraphNode_t hNode, enum cudaKernelNodeAttrID attr, const union cudaKernelNodeAttrValue* value);
// HIP: hipError_t hipGraphKernelNodeSetAttribute(hipGraphNode_t hNode, hipKernelNodeAttrID attr, const hipKernelNodeAttrValue* value);
@@ -248,6 +383,183 @@ int main() {
// HIP: hipError_t hipGraphKernelNodeGetAttribute(hipGraphNode_t hNode, hipKernelNodeAttrID attr, hipKernelNodeAttrValue* value);
// CHECK: result = hipGraphKernelNodeGetAttribute(graphNode, kernelNodeAttrID, &kernelNodeAttrValue);
result = cudaGraphKernelNodeGetAttribute(graphNode, kernelNodeAttrID, &kernelNodeAttrValue);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecKernelNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const struct cudaKernelNodeParams *pNodeParams);
+ // HIP: hipError_t hipGraphExecKernelNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, const hipKernelNodeParams* pNodeParams);
+ // CHECK: result = hipGraphExecKernelNodeSetParams(GraphExec_t, graphNode, &KernelNodeParams);
+ result = cudaGraphExecKernelNodeSetParams(GraphExec_t, graphNode, &KernelNodeParams);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const struct cudaMemcpy3DParms *pNodeParams);
+ // HIP: hipError_t hipGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, hipMemcpy3DParms* pNodeParams);
+ // CHECK: result = hipGraphExecMemcpyNodeSetParams(GraphExec_t, graphNode, &Memcpy3DParms);
+ result = cudaGraphExecMemcpyNodeSetParams(GraphExec_t, graphNode, &Memcpy3DParms);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemsetNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const struct cudaMemsetParams *pNodeParams);
+ // HIP: hipError_t hipGraphExecMemsetNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, const hipMemsetParams* pNodeParams);
+ // CHECK: result = hipGraphExecMemsetNodeSetParams(GraphExec_t, graphNode, &MemsetParams);
+ result = cudaGraphExecMemsetNodeSetParams(GraphExec_t, graphNode, &MemsetParams);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecHostNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const struct cudaHostNodeParams *pNodeParams);
+ // HIP: hipError_t hipError_t hipGraphExecHostNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, const hipHostNodeParams* pNodeParams);
+ // CHECK: result = hipGraphExecHostNodeSetParams(GraphExec_t, graphNode, &HostNodeParams);
+ result = cudaGraphExecHostNodeSetParams(GraphExec_t, graphNode, &HostNodeParams);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecUpdate(cudaGraphExec_t hGraphExec, cudaGraph_t hGraph, cudaGraphNode_t *hErrorNode_out, enum cudaGraphExecUpdateResult *updateResult_out);
+ // HIP: hipError_t hipGraphExecUpdate(hipGraphExec_t hGraphExec, hipGraph_t hGraph, hipGraphNode_t* hErrorNode_out, hipGraphExecUpdateResult* updateResult_out);
+ // CHECK: result = hipGraphExecUpdate(GraphExec_t, Graph_t, &graphNode, &GraphExecUpdateResult);
+ result = cudaGraphExecUpdate(GraphExec_t, Graph_t, &graphNode, &GraphExecUpdateResult);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetParams(cudaGraphNode_t node, struct cudaKernelNodeParams *pNodeParams);
+ // HIP: hipError_t hipGraphKernelNodeGetParams(hipGraphNode_t node, hipKernelNodeParams* pNodeParams);
+ // CHECK: result = hipGraphKernelNodeGetParams(graphNode, &KernelNodeParams);
+ result = cudaGraphKernelNodeGetParams(graphNode, &KernelNodeParams);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetParams(cudaGraphNode_t node, const struct cudaKernelNodeParams *pNodeParams);
+ // HIP: hipError_t hipGraphKernelNodeSetParams(hipGraphNode_t node, const hipKernelNodeParams* pNodeParams);
+ // CHECK: result = hipGraphKernelNodeSetParams(graphNode, &KernelNodeParams);
+ result = cudaGraphKernelNodeSetParams(graphNode, &KernelNodeParams);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphLaunch(cudaGraphExec_t graphExec, cudaStream_t stream);
+ // HIP: hipError_t hipGraphLaunch(hipGraphExec_t graphExec, hipStream_t stream);
+ // CHECK: result = hipGraphLaunch(GraphExec_t, stream);
+ result = cudaGraphLaunch(GraphExec_t, stream);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeGetParams(cudaGraphNode_t node, struct cudaMemcpy3DParms *pNodeParams);
+ // HIP: hipError_t hipGraphMemcpyNodeGetParams(hipGraphNode_t node, hipMemcpy3DParms* pNodeParams);
+ // CHECK: result = hipGraphMemcpyNodeGetParams(graphNode, &Memcpy3DParms);
+ result = cudaGraphMemcpyNodeGetParams(graphNode, &Memcpy3DParms);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParams(cudaGraphNode_t node, const struct cudaMemcpy3DParms *pNodeParams);
+ // HIP: hipError_t hipGraphMemcpyNodeSetParams(hipGraphNode_t node, const hipMemcpy3DParms* pNodeParams);
+ // CHECK: result = hipGraphMemcpyNodeSetParams(graphNode, &Memcpy3DParms);
+ result = cudaGraphMemcpyNodeSetParams(graphNode, &Memcpy3DParms);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeGetParams(cudaGraphNode_t node, struct cudaMemsetParams *pNodeParams);
+ // HIP: hipError_t hipGraphMemsetNodeGetParams(hipGraphNode_t node, hipMemsetParams* pNodeParams);
+ // CHECK: result = hipGraphMemsetNodeGetParams(graphNode, &MemsetParams);
+ result = cudaGraphMemsetNodeGetParams(graphNode, &MemsetParams);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeSetParams(cudaGraphNode_t node, const struct cudaMemsetParams *pNodeParams);
+ // HIP: hipError_t hipGraphMemsetNodeSetParams(hipGraphNode_t node, const hipMemsetParams* pNodeParams);
+ // CHECK: result = hipGraphMemsetNodeSetParams(graphNode, &MemsetParams);
+ result = cudaGraphMemsetNodeSetParams(graphNode, &MemsetParams);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphNodeFindInClone(cudaGraphNode_t *pNode, cudaGraphNode_t originalNode, cudaGraph_t clonedGraph);
+ // HIP: hipError_t hipError_t hipGraphNodeFindInClone(hipGraphNode_t* pNode, hipGraphNode_t originalNode, hipGraph_t clonedGraph);
+ // CHECK: result = hipGraphNodeFindInClone(&graphNode, graphNode_2, Graph_t);
+ result = cudaGraphNodeFindInClone(&graphNode, graphNode_2, Graph_t);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependencies(cudaGraphNode_t node, cudaGraphNode_t *pDependencies, size_t *pNumDependencies);
+ // HIP: hipError_t hipGraphNodeGetDependencies(hipGraphNode_t node, hipGraphNode_t* pDependencies, size_t* pNumDependencies);
+ // CHECK: result = hipGraphNodeGetDependencies(graphNode, &graphNode_2, &bytes);
+ result = cudaGraphNodeGetDependencies(graphNode, &graphNode_2, &bytes);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependentNodes(cudaGraphNode_t node, cudaGraphNode_t *pDependentNodes, size_t *pNumDependentNodes);
+ // HIP: hipError_t hipGraphNodeGetDependentNodes(hipGraphNode_t node, hipGraphNode_t* pDependentNodes, size_t* pNumDependentNodes);
+ // CHECK: result = hipGraphNodeGetDependentNodes(graphNode, &graphNode_2, &bytes);
+ result = cudaGraphNodeGetDependentNodes(graphNode, &graphNode_2, &bytes);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetType(cudaGraphNode_t node, enum cudaGraphNodeType *pType);
+ // HIP: hipError_t hipGraphNodeGetType(hipGraphNode_t node, hipGraphNodeType* pType);
+ // CHECK: result = hipGraphNodeGetType(graphNode, &GraphNodeType);
+ result = cudaGraphNodeGetType(graphNode, &GraphNodeType);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphRemoveDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, const cudaGraphNode_t *to, size_t numDependencies);
+ // HIP: hipError_t hipGraphRemoveDependencies(hipGraph_t graph, const hipGraphNode_t* from, const hipGraphNode_t* to, size_t numDependencies);
+ // CHECK: result = hipGraphRemoveDependencies(Graph_t, &graphNode, &graphNode, bytes);
+ result = cudaGraphRemoveDependencies(Graph_t, &graphNode, &graphNode, bytes);
+#endif
+
+#if CUDA_VERSION >= 11010
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNodeToSymbol(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const void* symbol, const void* src, size_t count, size_t offset, enum cudaMemcpyKind kind);
+ // HIP: hipError_t hipGraphAddMemcpyNodeToSymbol(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies, const void* symbol, const void* src, size_t count, size_t offset, hipMemcpyKind kind);
+ // CHECK: result = hipGraphAddMemcpyNodeToSymbol(&graphNode, Graph_t, &graphNode_2, width, HIP_SYMBOL(image), src, bytes, wOffset, MemcpyKind);
+ result = cudaGraphAddMemcpyNodeToSymbol(&graphNode, Graph_t, &graphNode_2, width, image, src, bytes, wOffset, MemcpyKind);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNodeFromSymbol(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, void* dst, const void* symbol, size_t count, size_t offset, enum cudaMemcpyKind kind);
+ // HIP: hipError_t hipGraphAddMemcpyNodeFromSymbol(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies, void* dst, const void* symbol, size_t count, size_t offset, hipMemcpyKind kind);
+ // CHECK: result = hipGraphAddMemcpyNodeFromSymbol(&graphNode, Graph_t, &graphNode_2, width, dst, HIP_SYMBOL(image), bytes, wOffset, MemcpyKind);
+ result = cudaGraphAddMemcpyNodeFromSymbol(&graphNode, Graph_t, &graphNode_2, width, dst, image, bytes, wOffset, MemcpyKind);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode1D(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, void* dst, const void* src, size_t count, enum cudaMemcpyKind kind);
+ // HIP: hipError_t hipGraphAddMemcpyNode1D(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies, void* dst, const void* src, size_t count, hipMemcpyKind kind);
+ // CHECK: result = hipGraphAddMemcpyNode1D(&graphNode, Graph_t, &graphNode_2, width, dst, src, bytes, MemcpyKind);
+ result = cudaGraphAddMemcpyNode1D(&graphNode, Graph_t, &graphNode_2, width, dst, src, bytes, MemcpyKind);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParamsToSymbol(cudaGraphNode_t node, const void* symbol, const void* src, size_t count, size_t offset, enum cudaMemcpyKind kind);
+ // HIP: hipError_t hipGraphMemcpyNodeSetParamsToSymbol(hipGraphNode_t node, const void* symbol, const void* src, size_t count, size_t offset, hipMemcpyKind kind);
+ // CHECK: result = hipGraphMemcpyNodeSetParamsToSymbol(graphNode, HIP_SYMBOL(image), src, bytes, wOffset, MemcpyKind);
+ result = cudaGraphMemcpyNodeSetParamsToSymbol(graphNode, image, src, bytes, wOffset, MemcpyKind);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParamsFromSymbol(cudaGraphNode_t node, void* dst, const void* symbol, size_t count, size_t offset, enum cudaMemcpyKind kind);
+ // HIP: hipError_t hipGraphMemcpyNodeSetParamsFromSymbol(hipGraphNode_t node, void* dst, const void* symbol, size_t count, size_t offset, hipMemcpyKind kind);
+ // CHECK: result = hipGraphMemcpyNodeSetParamsFromSymbol(graphNode, dst, HIP_SYMBOL(image), bytes, wOffset, MemcpyKind);
+ result = cudaGraphMemcpyNodeSetParamsFromSymbol(graphNode, dst, image, bytes, wOffset, MemcpyKind);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParams1D(cudaGraphNode_t node, void* dst, const void* src, size_t count, enum cudaMemcpyKind kind);
+ // HIP: hipError_t hipGraphMemcpyNodeSetParams1D(hipGraphNode_t node, void* dst, const void* src, size_t count, hipMemcpyKind kind);
+ // CHECK: result = hipGraphMemcpyNodeSetParams1D(graphNode, dst, src, bytes, MemcpyKind);
+ result = cudaGraphMemcpyNodeSetParams1D(graphNode, dst, src, bytes, MemcpyKind);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddEventRecordNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, cudaEvent_t event);
+ // HIP: hipError_t hipGraphAddEventRecordNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies, hipEvent_t event);
+ // CHECK: result = hipGraphAddEventRecordNode(&graphNode, Graph_t, &graphNode_2, bytes, Event_t);
+ result = cudaGraphAddEventRecordNode(&graphNode, Graph_t, &graphNode_2, bytes, Event_t);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphEventRecordNodeGetEvent(cudaGraphNode_t node, cudaEvent_t *event_out);
+ // HIP: hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, hipEvent_t* event_out);
+ // CHECK: result = hipGraphEventRecordNodeGetEvent(graphNode, &Event_t);
+ result = cudaGraphEventRecordNodeGetEvent(graphNode, &Event_t);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphEventRecordNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event);
+ // HIP: hipError_t hipGraphEventRecordNodeSetEvent(hipGraphNode_t node, hipEvent_t event);
+ // CHECK: result = hipGraphEventRecordNodeSetEvent(graphNode, Event_t);
+ result = cudaGraphEventRecordNodeSetEvent(graphNode, Event_t);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddEventWaitNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, cudaEvent_t event);
+ // HIP: hipError_t hipGraphAddEventWaitNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies, hipEvent_t event);
+ // CHECK: result = hipGraphAddEventWaitNode(&graphNode, Graph_t, &graphNode_2, bytes, Event_t);
+ result = cudaGraphAddEventWaitNode(&graphNode, Graph_t, &graphNode_2, bytes, Event_t);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphEventWaitNodeGetEvent(cudaGraphNode_t node, cudaEvent_t *event_out);
+ // HIP: hipError_t hipGraphEventWaitNodeGetEvent(hipGraphNode_t node, hipEvent_t* event_out);
+ // CHECK: result = hipGraphEventWaitNodeGetEvent(graphNode, &Event_t);
+ result = cudaGraphEventWaitNodeGetEvent(graphNode, &Event_t);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphEventWaitNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event);
+ // HIP: hipError_t hipGraphEventWaitNodeSetEvent(hipGraphNode_t node, hipEvent_t event);
+ // CHECK: result = hipGraphEventWaitNodeSetEvent(graphNode, Event_t);
+ result = cudaGraphEventWaitNodeSetEvent(graphNode, Event_t);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParamsToSymbol(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const void* symbol, const void* src, size_t count, size_t offset, enum cudaMemcpyKind kind);
+ // HIP: hipError_t hipGraphExecMemcpyNodeSetParamsToSymbol(hipGraphExec_t hGraphExec, hipGraphNode_t node, const void* symbol, const void* src, size_t count, size_t offset, hipMemcpyKind kind);
+ // CHECK: result = hipGraphExecMemcpyNodeSetParamsToSymbol(GraphExec_t, graphNode, HIP_SYMBOL(image), src, bytes, wOffset, MemcpyKind);
+ result = cudaGraphExecMemcpyNodeSetParamsToSymbol(GraphExec_t, graphNode, image, src, bytes, wOffset, MemcpyKind);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParamsFromSymbol(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, void* dst, const void* symbol, size_t count, size_t offset, enum cudaMemcpyKind kind);
+ // HIP: hipError_t hipGraphExecMemcpyNodeSetParamsFromSymbol(hipGraphExec_t hGraphExec, hipGraphNode_t node, void* dst, const void* symbol, size_t count, size_t offset, hipMemcpyKind kind);
+ // CHECK: result = hipGraphExecMemcpyNodeSetParamsFromSymbol(GraphExec_t, graphNode, dst, HIP_SYMBOL(image), bytes, wOffset, MemcpyKind);
+ result = cudaGraphExecMemcpyNodeSetParamsFromSymbol(GraphExec_t, graphNode, dst, image, bytes, wOffset, MemcpyKind);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParams1D(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, void* dst, const void* src, size_t count, enum cudaMemcpyKind kind);
+ // HIP: hipError_t hipGraphExecMemcpyNodeSetParams1D(hipGraphExec_t hGraphExec, hipGraphNode_t node, void* dst, const void* src, size_t count, hipMemcpyKind kind);
+ // CHECK: result = hipGraphExecMemcpyNodeSetParams1D(GraphExec_t, graphNode, dst, src, bytes, MemcpyKind);
+ result = cudaGraphExecMemcpyNodeSetParams1D(GraphExec_t, graphNode, dst, src, bytes, MemcpyKind);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecChildGraphNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, cudaGraph_t childGraph);
+ // HIP: hipError_t hipGraphExecChildGraphNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, hipGraph_t childGraph);
+ // CHECK: result = hipGraphExecChildGraphNodeSetParams(GraphExec_t, graphNode, Graph_t);
+ result = cudaGraphExecChildGraphNodeSetParams(GraphExec_t, graphNode, Graph_t);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecEventRecordNodeSetEvent(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event);
+ // HIP: hipError_t hipGraphExecEventRecordNodeSetEvent(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, hipEvent_t event);
+ // CHECK: result = hipGraphExecEventRecordNodeSetEvent(GraphExec_t, graphNode, Event_t);
+ result = cudaGraphExecEventRecordNodeSetEvent(GraphExec_t, graphNode, Event_t);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecEventWaitNodeSetEvent(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event);
+ // HIP: hipError_t hipGraphExecEventWaitNodeSetEvent(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, hipEvent_t event);
+ // CHECK: result = hipGraphExecEventWaitNodeSetEvent(GraphExec_t, graphNode, Event_t);
+ result = cudaGraphExecEventWaitNodeSetEvent(GraphExec_t, graphNode, Event_t);
#endif
#if CUDA_VERSION >= 11020
@@ -355,6 +667,13 @@ int main() {
result = cudaMemPoolImportPointer(&deviceptr, memPool_t, &memPoolPtrExportData);
#endif
+#if CUDA_VERSION >= 11040
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiateWithFlags(cudaGraphExec_t *pGraphExec, cudaGraph_t graph, unsigned long long flags);
+ // HIP: hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t* pGraphExec, hipGraph_t graph, unsigned long long flags);
+ // CHECK: result = hipGraphInstantiateWithFlags(&GraphExec_t, Graph_t, ull);
+ result = cudaGraphInstantiateWithFlags(&GraphExec_t, Graph_t, ull);
+#endif
+
// CHECK: hipDeviceProp_t DeviceProp;
cudaDeviceProp DeviceProp;
@@ -458,11 +777,6 @@ int main() {
// CHECK: hipIpcEventHandle_t IpcEventHandle_t;
cudaIpcEventHandle_t IpcEventHandle_t;
- // CHECK: hipEvent_t Event_t;
- // CHECK-Next: hipEvent_t Event_2;
- cudaEvent_t Event_t;
- cudaEvent_t Event_2;
-
// CUDA: extern __host__ cudaError_t CUDARTAPI cudaIpcGetEventHandle(cudaIpcEventHandle_t *handle, cudaEvent_t event);
// HIP: hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event);
// CHECK: result = hipIpcGetEventHandle(&IpcEventHandle_t, Event_t);
@@ -795,9 +1109,6 @@ int main() {
// CHECK: result = hipMallocPitch(&deviceptr, &bytes, width, height);
result = cudaMallocPitch(&deviceptr, &bytes, width, height);
- // CHECK: hipMemcpyKind MemcpyKind;
- cudaMemcpyKind MemcpyKind;
-
// CUDA: extern __host__ cudaError_t CUDARTAPI cudaMemcpy(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind);
// HIP: hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind);
// CHECK: result = hipMemcpy(deviceptr, deviceptr_2, bytes, MemcpyKind);
@@ -833,9 +1144,6 @@ int main() {
// CHECK: result = hipMemcpy2DToArrayAsync(Array_t, wOffset, hOffset, deviceptr_2, pitch, width, height, MemcpyKind, stream);
result = cudaMemcpy2DToArrayAsync(Array_t, wOffset, hOffset, deviceptr_2, pitch, width, height, MemcpyKind, stream);
- // CHECK: hipMemcpy3DParms Memcpy3DParms;
- cudaMemcpy3DParms Memcpy3DParms;
-
// CUDA: extern __host__ cudaError_t CUDARTAPI cudaMemcpy3D(const struct cudaMemcpy3DParms *p);
// HIP: hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p);
// CHECK: result = hipMemcpy3D(&Memcpy3DParms);
@@ -1058,5 +1366,92 @@ int main() {
// CHECK: result = hipUnbindTexture(texref);
result = cudaUnbindTexture(texref);
+ // CHECK: hipTextureObject_t TextureObject_t;
+ cudaTextureObject_t TextureObject_t;
+
+ // CHECK: hipResourceDesc ResourceDesc;
+ cudaResourceDesc ResourceDesc;
+
+ // CHECK: hipTextureDesc TextureDesc;
+ cudaTextureDesc TextureDesc;
+
+ // CHECK: hipResourceViewDesc ResourceViewDesc;
+ cudaResourceViewDesc ResourceViewDesc;
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaCreateTextureObject(cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, const struct cudaTextureDesc *pTexDesc, const struct cudaResourceViewDesc *pResViewDesc);
+ // HIP: hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResourceDesc* pResDesc, const hipTextureDesc* pTexDesc, const struct hipResourceViewDesc* pResViewDesc);
+ // CHECK: result = hipCreateTextureObject(&TextureObject_t, &ResourceDesc, &TextureDesc, &ResourceViewDesc);
+ result = cudaCreateTextureObject(&TextureObject_t, &ResourceDesc, &TextureDesc, &ResourceViewDesc);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaDestroyTextureObject(cudaTextureObject_t texObject);
+ // HIP: hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject);
+ // CHECK: result = hipDestroyTextureObject(TextureObject_t);
+ result = cudaDestroyTextureObject(TextureObject_t);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceDesc(struct cudaResourceDesc *pResDesc, cudaTextureObject_t texObject);
+ // HIP: hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, hipTextureObject_t textureObject);
+ // CHECK: result = hipGetTextureObjectResourceDesc(&ResourceDesc, TextureObject_t);
+ result = cudaGetTextureObjectResourceDesc(&ResourceDesc, TextureObject_t);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceViewDesc(struct cudaResourceViewDesc *pResViewDesc, cudaTextureObject_t texObject);
+ // HIP: hipError_t hipGetTextureObjectResourceViewDesc(struct hipResourceViewDesc* pResViewDesc, hipTextureObject_t textureObject);
+ // CHECK: result = hipGetTextureObjectResourceViewDesc(&ResourceViewDesc, TextureObject_t);
+ result = cudaGetTextureObjectResourceViewDesc(&ResourceViewDesc, TextureObject_t);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectTextureDesc(struct cudaTextureDesc *pTexDesc, cudaTextureObject_t texObject);
+ // HIP: hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, hipTextureObject_t textureObject);
+ // CHECK: result = hipGetTextureObjectTextureDesc(&TextureDesc, TextureObject_t);
+ result = cudaGetTextureObjectTextureDesc(&TextureDesc, TextureObject_t);
+
+ // CHECK: hipSurfaceObject_t SurfaceObject_t;
+ cudaSurfaceObject_t SurfaceObject_t;
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaCreateSurfaceObject(cudaSurfaceObject_t *pSurfObject, const struct cudaResourceDesc *pResDesc);
+ // HIP: hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, const hipResourceDesc* pResDesc);
+ // CHECK: result = hipCreateSurfaceObject(&SurfaceObject_t, &ResourceDesc);
+ result = cudaCreateSurfaceObject(&SurfaceObject_t, &ResourceDesc);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject);
+ // HIP: hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject);
+ // CHECK: result = hipDestroySurfaceObject(SurfaceObject_t);
+ result = cudaDestroySurfaceObject(SurfaceObject_t);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaDriverGetVersion(int *driverVersion);
+ // HIP: hipError_t hipDriverGetVersion(int* driverVersion);
+ // CHECK: result = hipDriverGetVersion(&intVal);
+ result = cudaDriverGetVersion(&intVal);
+
+ // CUDA: extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaRuntimeGetVersion(int *runtimeVersion);
+ // HIP: hipError_t hipRuntimeGetVersion(int* runtimeVersion);
+ // CHECK: result = hipRuntimeGetVersion(&intVal);
+ result = cudaRuntimeGetVersion(&intVal);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaProfilerStart(void);
+ // HIP: hipError_t hipProfilerStart();
+ // CHECK: result = hipProfilerStart();
+ result = cudaProfilerStart();
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaProfilerStop(void);
+ // HIP: hipError_t hipProfilerStop();
+ // CHECK: result = hipProfilerStop();
+ result = cudaProfilerStop();
+
+#if CUDA_VERSION <= 10000
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem __dv(0), cudaStream_t stream __dv(0));
+ // HIP: hipError_t hipConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem __dparm(0), hipStream_t stream __dparm(0));
+ // CHECK: result = hipConfigureCall(gridDim, blockDim, bytes, stream);
+ result = cudaConfigureCall(gridDim, blockDim, bytes, stream);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaLaunch(const void *func);
+ // HIP: hipError_t hipLaunchByPtr(const void* func);
+ // CHECK: result = hipLaunchByPtr(deviceptr);
+ result = cudaLaunch(deviceptr);
+
+ // CUDA: extern __host__ cudaError_t CUDARTAPI cudaSetupArgument(const void *arg, size_t size, size_t offset);
+ // HIP: hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset);
+ // CHECK: result = hipSetupArgument(deviceptr, bytes, wOffset);
+ result = cudaSetupArgument(deviceptr, bytes, wOffset);
+#endif
+
return 0;
}