diff --git a/CHANGELOG.md b/CHANGELOG.md index e96d3260..7aa4a9b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,18 @@ Documentation for HIPIFY is available at [https://rocmdocs.amd.com/projects/HIPIFY/en/latest/](https://rocmdocs.amd.com/projects/HIPIFY/en/latest/). +## HIPIFY for ROCm 6.2.2 + +### Additions + +* cuDNN 9.3.0 support + +### Fixes + +* Removed some post HIP 6.2 APIs from support +* Added hipification support for HIP functions `hipSetValidDevices`, `hipMemcpy2DArrayToArray`, `hipMemcpyAtoA`, `hipMemcpyAtoD`, `hipMemcpyAtoA`, `hipMemcpyAtoHAsync`, and `hipMemcpyHtoAAsync` +* Fixed an issue with `Skipped some replacements` when hipification didn't occur at all + ## HIPIFY for ROCm 6.2.1 ### Additions diff --git a/bin/hipify-perl b/bin/hipify-perl index 4f7a2051..ac1393cf 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1418,19 +1418,18 @@ my %experimental_funcs = ( "curandOrdering" => "6.2.0", "cudaStreamLegacy" => "6.2.0", "cudaStreamBeginCaptureToGraph" => "6.2.0", + "cudaSetValidDevices" => "6.2.0", + "cudaMemcpy2DArrayToArray" => "6.2.0", "cudaLaunchAttributeValue" => "6.2.0", "cudaLaunchAttributePriority" => "6.2.0", "cudaLaunchAttributeID" => "6.2.0", "cudaLaunchAttributeCooperative" => "6.2.0", "cudaLaunchAttributeAccessPolicyWindow" => "6.2.0", "cudaKernelNodeAttributePriority" => "6.2.0", - "cudaGraphNodeSetParams" => "6.2.0", "cudaGraphKernelNodePortProgrammatic" => "6.2.0", "cudaGraphKernelNodePortLaunchCompletion" => "6.2.0", "cudaGraphKernelNodePortDefault" => "6.2.0", "cudaGraphInstantiateWithParams" => "6.2.0", - "cudaGraphExecNodeSetParams" => "6.2.0", - "cudaGraphExecGetFlags" => "6.2.0", "cudaGraphEdgeData_st" => "6.2.0", "cudaGraphEdgeData" => "6.2.0", "cudaGraphDependencyType_enum" => "6.2.0", @@ -1603,15 +1602,18 @@ my %experimental_funcs = ( "cublasCgbmv_64" => "6.2.0", "cublasAxpyEx_64" => "6.2.0", "cuStreamBeginCaptureToGraph" => "6.2.0", - "cuGraphNodeSetParams" => "6.2.0", - "cuGraphMemcpyNodeSetParams" => "6.2.0", - "cuGraphMemcpyNodeGetParams" => "6.2.0", + "cuMemcpyHtoAAsync_v2" => "6.2.0", + "cuMemcpyHtoAAsync" => "6.2.0", + "cuMemcpyDtoA_v2" => "6.2.0", + "cuMemcpyDtoA" => "6.2.0", + "cuMemcpyAtoHAsync_v2" => "6.2.0", + "cuMemcpyAtoHAsync" => "6.2.0", + "cuMemcpyAtoD_v2" => "6.2.0", + "cuMemcpyAtoD" => "6.2.0", + "cuMemcpyAtoA_v2" => "6.2.0", + "cuMemcpyAtoA" => "6.2.0", "cuGraphInstantiateWithParams" => "6.2.0", - "cuGraphExecNodeSetParams" => "6.2.0", - "cuGraphExecMemcpyNodeSetParams" => "6.2.0", - "cuGraphExecGetFlags" => "6.2.0", "cuGraphAddNode" => "6.2.0", - "cuGraphAddMemFreeNode" => "6.2.0", "cuGetProcAddress" => "6.2.0", "CUlaunchAttributeValue_union" => "6.2.0", "CUlaunchAttributeValue" => "6.2.0", @@ -1783,22 +1785,24 @@ sub subst { } sub experimentalSubstitutions { + subst("cudaSetValidDevices", "hipSetValidDevices", "device"); + subst("cuMemcpyAtoA", "hipMemcpyAtoA", "memory"); + subst("cuMemcpyAtoA_v2", "hipMemcpyAtoA", "memory"); + subst("cuMemcpyAtoD", "hipMemcpyAtoD", "memory"); + subst("cuMemcpyAtoD_v2", "hipMemcpyAtoD", "memory"); + subst("cuMemcpyAtoHAsync", "hipMemcpyAtoHAsync", "memory"); + subst("cuMemcpyAtoHAsync_v2", "hipMemcpyAtoHAsync", "memory"); + subst("cuMemcpyDtoA", "hipMemcpyDtoA", "memory"); + subst("cuMemcpyDtoA_v2", "hipMemcpyDtoA", "memory"); + subst("cuMemcpyHtoAAsync", "hipMemcpyHtoAAsync", "memory"); + subst("cuMemcpyHtoAAsync_v2", "hipMemcpyHtoAAsync", "memory"); + subst("cudaMemcpy2DArrayToArray", "hipMemcpy2DArrayToArray", "memory"); subst("cuStreamBeginCaptureToGraph", "hipStreamBeginCaptureToGraph", "stream"); subst("cudaStreamBeginCaptureToGraph", "hipStreamBeginCaptureToGraph", "stream"); - subst("cuGraphAddMemFreeNode", "hipDrvGraphAddMemFreeNode", "graph"); subst("cuGraphAddNode", "hipGraphAddNode", "graph"); - subst("cuGraphExecGetFlags", "hipGraphExecGetFlags", "graph"); - subst("cuGraphExecMemcpyNodeSetParams", "hipDrvGraphExecMemcpyNodeSetParams", "graph"); - subst("cuGraphExecNodeSetParams", "hipGraphExecNodeSetParams", "graph"); subst("cuGraphInstantiateWithParams", "hipGraphInstantiateWithParams", "graph"); - subst("cuGraphMemcpyNodeGetParams", "hipDrvGraphMemcpyNodeGetParams", "graph"); - subst("cuGraphMemcpyNodeSetParams", "hipDrvGraphMemcpyNodeSetParams", "graph"); - subst("cuGraphNodeSetParams", "hipGraphNodeSetParams", "graph"); subst("cudaGraphAddNode", "hipGraphAddNode", "graph"); - subst("cudaGraphExecGetFlags", "hipGraphExecGetFlags", "graph"); - subst("cudaGraphExecNodeSetParams", "hipGraphExecNodeSetParams", "graph"); subst("cudaGraphInstantiateWithParams", "hipGraphInstantiateWithParams", "graph"); - subst("cudaGraphNodeSetParams", "hipGraphNodeSetParams", "graph"); subst("cuGetProcAddress", "hipGetProcAddress", "driver_entry_point"); subst("cudaGetDriverEntryPoint", "hipGetProcAddress", "driver_entry_point"); subst("cudaGetFuncBySymbol", "hipGetFuncBySymbol", "driver_interact"); @@ -5621,6 +5625,7 @@ sub simpleSubstitutions { subst("curand_precalc.h", "hiprand\/hiprand_kernel.h", "include"); subst("curand_uniform.h", "hiprand\/hiprand_kernel.h", "include"); subst("device_functions.h", "hip\/device_functions.h", "include"); + subst("device_launch_parameters.h", "", "include"); subst("driver_types.h", "hip\/driver_types.h", "include"); subst("library_types.h", "hip\/library_types.h", "include"); subst("math_constants.h", "hip\/hip_math_constants.h", "include"); @@ -9465,7 +9470,6 @@ sub warnUnsupportedFunctions { "cudaSharedmemCarveoutMaxL1", "cudaSharedmemCarveoutDefault", "cudaSharedCarveout", - "cudaSetValidDevices", "cudaSetDoubleForHost", "cudaSetDoubleForDevice", "cudaProfilerInitialize", @@ -9484,7 +9488,6 @@ sub warnUnsupportedFunctions { "cudaMemcpy3DPeerParms", "cudaMemcpy3DPeerAsync", "cudaMemcpy3DPeer", - "cudaMemcpy2DArrayToArray", "cudaMemRangeAttributePreferredLocationType", "cudaMemRangeAttributePreferredLocationId", "cudaMemRangeAttributeLastPrefetchLocationType", @@ -9558,6 +9561,7 @@ sub warnUnsupportedFunctions { "cudaGraphicsCubeFaceNegativeX", "cudaGraphicsCubeFace", "cudaGraphRemoveDependencies_v2", + "cudaGraphNodeSetParams", "cudaGraphNodeGetDependentNodes_v2", "cudaGraphNodeGetDependencies_v2", "cudaGraphKernelNodeUpdate", @@ -9570,6 +9574,8 @@ sub warnUnsupportedFunctions { "cudaGraphExecUpdateResultInfo_st", "cudaGraphExecUpdateResultInfo", "cudaGraphExecUpdateErrorAttributesChanged", + "cudaGraphExecNodeSetParams", + "cudaGraphExecGetFlags", "cudaGraphDeviceNode_t", "cudaGraphDebugDotFlagsConditionalNodeParams", "cudaGraphConditionalNodeType", @@ -10004,16 +10010,6 @@ sub warnUnsupportedFunctions { "cuMemsetD2D16", "cuMemcpyPeerAsync", "cuMemcpyPeer", - "cuMemcpyHtoAAsync_v2", - "cuMemcpyHtoAAsync", - "cuMemcpyDtoA_v2", - "cuMemcpyDtoA", - "cuMemcpyAtoHAsync_v2", - "cuMemcpyAtoHAsync", - "cuMemcpyAtoD_v2", - "cuMemcpyAtoD", - "cuMemcpyAtoA_v2", - "cuMemcpyAtoA", "cuMemcpyAsync", "cuMemcpy3DPeerAsync", "cuMemcpy3DPeer", @@ -10059,11 +10055,18 @@ sub warnUnsupportedFunctions { "cuGraphicsD3D11RegisterResource", "cuGraphicsD3D10RegisterResource", "cuGraphRemoveDependencies_v2", + "cuGraphNodeSetParams", "cuGraphNodeGetDependentNodes_v2", "cuGraphNodeGetDependencies_v2", + "cuGraphMemcpyNodeSetParams", + "cuGraphMemcpyNodeGetParams", "cuGraphGetEdges_v2", + "cuGraphExecNodeSetParams", + "cuGraphExecMemcpyNodeSetParams", + "cuGraphExecGetFlags", "cuGraphConditionalHandleCreate", "cuGraphAddNode_v2", + "cuGraphAddMemFreeNode", "cuGraphAddDependencies_v2", "cuGLUnregisterBufferObject", "cuGLUnmapBufferObjectAsync", diff --git a/docs/hipify-clang.rst b/docs/hipify-clang.rst index 789dd0b7..b15b5b2b 100644 --- a/docs/hipify-clang.rst +++ b/docs/hipify-clang.rst @@ -545,7 +545,7 @@ LLVM >= 10.0.0 .. code-block:: shell - -DCUDA_DNN_ROOT_DIR=D:/CUDA/cuDNN/9.2.1 + -DCUDA_DNN_ROOT_DIR=D:/CUDA/cuDNN/9.3.0 5. [Optional] Install `CUB 1.9.8 `_ for ``CUDA < 11.0`` only; for ``CUDA >= 11.0``, the CUB shipped with CUDA will be used for testing. @@ -637,8 +637,8 @@ On Linux, the following configurations are tested: * Ubuntu 14: LLVM 4.0.0 - 7.1.0, CUDA 7.0 - 9.0, cuDNN 5.0.5 - 7.6.5 * Ubuntu 16-19: LLVM 8.0.0 - 14.0.6, CUDA 7.0 - 10.2, cuDNN 5.1.10 - 8.0.5 -* Ubuntu 20-21: LLVM 9.0.0 - 18.1.8, CUDA 7.0 - 12.3.2, cuDNN 5.1.10 - 9.2.1 -* Ubuntu 22-23: LLVM 13.0.0 - 18.1.8, CUDA 7.0 - 12.3.2, cuDNN 8.0.5 - 9.2.1 +* Ubuntu 20-21: LLVM 9.0.0 - 18.1.8, CUDA 7.0 - 12.3.2, cuDNN 5.1.10 - 9.3.0 +* Ubuntu 22-23: LLVM 13.0.0 - 18.1.8, CUDA 7.0 - 12.3.2, cuDNN 8.0.5 - 9.3.0 Minimum build system requirements for the above configurations: @@ -646,7 +646,7 @@ Minimum build system requirements for the above configurations: Recommended build system requirements: -* CMake 3.30.0, GNU C/C++ 13.2, Python 3.12.4. +* CMake 3.30.2, GNU C/C++ 13.2, Python 3.12.5. Here's how to build ``hipify-clang`` with testing support on ``Ubuntu 23.10.01``: @@ -658,7 +658,7 @@ Here's how to build ``hipify-clang`` with testing support on ``Ubuntu 23.10.01`` -DCMAKE_INSTALL_PREFIX=../dist \ -DCMAKE_PREFIX_PATH=/usr/llvm/18.1.8/dist \ -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.3.2 \ - -DCUDA_DNN_ROOT_DIR=/usr/local/cudnn-9.2.1 \ + -DCUDA_DNN_ROOT_DIR=/usr/local/cudnn-9.3.0 \ -DLLVM_EXTERNAL_LIT=/usr/llvm/18.1.8/build/bin/llvm-lit \ ../hipify @@ -690,13 +690,13 @@ The corresponding successful output is: -- - Binary path : /usr/llvm/18.1.8/dist/bin -- Linker detection: GNU ld -- ---- The below configuring for hipify-clang testing only ---- - -- Found Python: /usr/bin/python3.12 (found version "3.12.4") found components: Interpreter + -- Found Python: /usr/bin/python3.12 (found version "3.12.5") found components: Interpreter -- Found lit: /usr/local/bin/lit -- Found FileCheck: /GIT/LLVM/trunk/dist/FileCheck -- Initial CUDA to configure: -- - CUDA Toolkit path : /usr/local/cuda-12.3.2 -- - CUDA Samples path : - -- - cuDNN path : /usr/local/cudnn-9.2.1 + -- - cuDNN path : /usr/local/cudnn-9.3.0 -- - CUB path : -- Found CUDAToolkit: /usr/local/cuda-12.3.2/targets/x86_64-linux/include (found version "12.3.107") -- Performing Test CMAKE_HAVE_LIBC_PTHREAD @@ -705,7 +705,7 @@ The corresponding successful output is: -- Found CUDA config: -- - CUDA Toolkit path : /usr/local/cuda-12.3.2 -- - CUDA Samples path : OFF - -- - cuDNN path : /usr/local/cudnn-9.2.1 + -- - cuDNN path : /usr/local/cudnn-9.3.0 -- - CUB path : /usr/local/cuda-12.3.2/include/cub -- Configuring done (0.5s) -- Generating done (0.0s) @@ -726,7 +726,7 @@ The corresponding successful output is: x86_64 - Platform architecture Linux 6.5.0-15-generic - Platform OS 64 - hipify-clang binary bitness - 64 - python 3.12.4 binary bitness + 64 - python 3.12.5 binary bitness =============================================================== -- Testing: 106 tests, 12 threads -- Testing Time: 6.91s @@ -818,16 +818,16 @@ Tested configurations: - ``3.11.4`` * - ``17.0.1`` :sup:`6` - ``18.1.8`` :sup:`7` - ``7.0 - 12.3.2`` - - ``8.0.5 - 9.2.1`` - - ``2019.16.11.37, 2022.17.10.4`` + - ``8.0.5 - 9.3.0`` + - ``2019.16.11.38, 2022.17.10.5`` - ``3.30.0`` - - ``3.12.4`` + - ``3.12.5`` * - ``19.0.0git`` - ``7.0 - 12.5.1`` - - ``8.0.5 - 9.2.1`` - - ``2019.16.11.37, 2022.17.10.4`` + - ``8.0.5 - 9.3.0`` + - ``2019.16.11.38, 2022.17.10.5`` - ``3.30.0`` - - ``3.12.4`` + - ``3.12.5`` :sup:`5` LLVM 14.x.x is the latest major release supporting Visual Studio 2017. @@ -855,7 +855,7 @@ Building with testing support using ``Visual Studio 17 2022`` on ``Windows 11``: -DCMAKE_PREFIX_PATH=D:/LLVM/18.1.8/dist \ -DCUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.3" \ -DCUDA_SDK_ROOT_DIR="C:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.3" \ - -DCUDA_DNN_ROOT_DIR=D:/CUDA/cuDNN/9.2.1 \ + -DCUDA_DNN_ROOT_DIR=D:/CUDA/cuDNN/9.3.0 \ -DLLVM_EXTERNAL_LIT=D:/LLVM/18.1.8/build/Release/bin/llvm-lit.py \ ../hipify @@ -886,19 +886,19 @@ The corresponding successful output is: -- - LLVM Include path : D:/LLVM/18.1.8/dist/include -- - Binary path : D:/LLVM/18.1.8/dist/bin -- ---- The below configuring for hipify-clang testing only ---- - -- Found Python: C:/Users/TT/AppData/Local/Programs/Python/Python312/python.exe (found version "3.12.4") found components: Interpreter + -- Found Python: C:/Users/TT/AppData/Local/Programs/Python/Python312/python.exe (found version "3.12.5") found components: Interpreter -- Found lit: C:/Users/TT/AppData/Local/Programs/Python/Python312/Scripts/lit.exe -- Found FileCheck: D:/LLVM/18.1.8/dist/bin/FileCheck.exe -- Initial CUDA to configure: -- - CUDA Toolkit path : C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.3 -- - CUDA Samples path : C:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.3 - -- - cuDNN path : D:/CUDA/cuDNN/9.2.1 + -- - cuDNN path : D:/CUDA/cuDNN/9.3.0 -- - CUB path : -- Found CUDAToolkit: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.3/include (found version "12.3.107") -- Found CUDA config: -- - CUDA Toolkit path : C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.3 -- - CUDA Samples path : C:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.3 - -- - cuDNN path : D:/CUDA/cuDNN/9.2.1 + -- - cuDNN path : D:/CUDA/cuDNN/9.3.0 -- - CUB path : C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.3/include/cub -- Configuring done (1.4s) -- Generating done (0.1s) diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in index 4cda4737..9ad6c5d8 100644 --- a/docs/sphinx/requirements.in +++ b/docs/sphinx/requirements.in @@ -1 +1 @@ -rocm-docs-core==1.6.1 +rocm-docs-core==1.6.2 diff --git a/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt index ae44135c..5eecf832 100644 --- a/docs/sphinx/requirements.txt +++ b/docs/sphinx/requirements.txt @@ -92,7 +92,7 @@ requests==2.32.2 # via # pygithub # sphinx -rocm-docs-core==1.6.1 +rocm-docs-core==1.6.2 # via -r requirements.in smmap==5.0.1 # via gitdb diff --git a/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md b/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md index f32ad22f..2371b63c 100644 --- a/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md @@ -1658,16 +1658,16 @@ |`cuMemcpy3DPeerAsync`| | | | | | | | | | | |`cuMemcpy3D_v2`| | | | |`hipDrvMemcpy3D`|3.5.0| | | | | |`cuMemcpyAsync`| | | | | | | | | | | -|`cuMemcpyAtoA`| | | | | | | | | | | -|`cuMemcpyAtoA_v2`| | | | | | | | | | | -|`cuMemcpyAtoD`| | | | | | | | | | | -|`cuMemcpyAtoD_v2`| | | | | | | | | | | +|`cuMemcpyAtoA`| | | | |`hipMemcpyAtoA`|6.2.0| | | |6.2.0| +|`cuMemcpyAtoA_v2`| | | | |`hipMemcpyAtoA`|6.2.0| | | |6.2.0| +|`cuMemcpyAtoD`| | | | |`hipMemcpyAtoD`|6.2.0| | | |6.2.0| +|`cuMemcpyAtoD_v2`| | | | |`hipMemcpyAtoD`|6.2.0| | | |6.2.0| |`cuMemcpyAtoH`| | | | |`hipMemcpyAtoH`|1.9.0| | | | | -|`cuMemcpyAtoHAsync`| | | | | | | | | | | -|`cuMemcpyAtoHAsync_v2`| | | | | | | | | | | +|`cuMemcpyAtoHAsync`| | | | |`hipMemcpyAtoHAsync`|6.2.0| | | |6.2.0| +|`cuMemcpyAtoHAsync_v2`| | | | |`hipMemcpyAtoHAsync`|6.2.0| | | |6.2.0| |`cuMemcpyAtoH_v2`| | | | |`hipMemcpyAtoH`|1.9.0| | | | | -|`cuMemcpyDtoA`| | | | | | | | | | | -|`cuMemcpyDtoA_v2`| | | | | | | | | | | +|`cuMemcpyDtoA`| | | | |`hipMemcpyDtoA`|6.2.0| | | |6.2.0| +|`cuMemcpyDtoA_v2`| | | | |`hipMemcpyDtoA`|6.2.0| | | |6.2.0| |`cuMemcpyDtoD`| | | | |`hipMemcpyDtoD`|1.6.0| | | | | |`cuMemcpyDtoDAsync`| | | | |`hipMemcpyDtoDAsync`|1.6.0| | | | | |`cuMemcpyDtoDAsync_v2`| | | | |`hipMemcpyDtoDAsync`|1.6.0| | | | | @@ -1677,8 +1677,8 @@ |`cuMemcpyDtoHAsync_v2`| | | | |`hipMemcpyDtoHAsync`|1.6.0| | | | | |`cuMemcpyDtoH_v2`| | | | |`hipMemcpyDtoH`|1.6.0| | | | | |`cuMemcpyHtoA`| | | | |`hipMemcpyHtoA`|1.9.0| | | | | -|`cuMemcpyHtoAAsync`| | | | | | | | | | | -|`cuMemcpyHtoAAsync_v2`| | | | | | | | | | | +|`cuMemcpyHtoAAsync`| | | | |`hipMemcpyHtoAAsync`|6.2.0| | | |6.2.0| +|`cuMemcpyHtoAAsync_v2`| | | | |`hipMemcpyHtoAAsync`|6.2.0| | | |6.2.0| |`cuMemcpyHtoA_v2`| | | | |`hipMemcpyHtoA`|1.9.0| | | | | |`cuMemcpyHtoD`| | | | |`hipMemcpyHtoD`|1.6.0| | | | | |`cuMemcpyHtoDAsync`| | | | |`hipMemcpyHtoDAsync`|1.6.0| | | | | @@ -1900,7 +1900,7 @@ |`cuGraphAddHostNode`|10.0| | | |`hipGraphAddHostNode`|5.0.0| | | | | |`cuGraphAddKernelNode`|10.0| | | |`hipGraphAddKernelNode`|4.3.0| | | | | |`cuGraphAddMemAllocNode`|11.4| | | |`hipGraphAddMemAllocNode`|5.5.0| | | | | -|`cuGraphAddMemFreeNode`|11.4| | | |`hipDrvGraphAddMemFreeNode`|6.2.0| | | |6.2.0| +|`cuGraphAddMemFreeNode`|11.4| | | | | | | | | | |`cuGraphAddMemcpyNode`|10.0| | | |`hipDrvGraphAddMemcpyNode`|6.0.0| | | | | |`cuGraphAddMemsetNode`|10.0| | | |`hipDrvGraphAddMemsetNode`|6.1.0| | | | | |`cuGraphAddNode`|12.2| | | |`hipGraphAddNode`|6.2.0| | | |6.2.0| @@ -1925,11 +1925,11 @@ |`cuGraphExecEventWaitNodeSetEvent`|11.1| | | |`hipGraphExecEventWaitNodeSetEvent`|5.0.0| | | | | |`cuGraphExecExternalSemaphoresSignalNodeSetParams`|11.2| | | |`hipGraphExecExternalSemaphoresSignalNodeSetParams`|5.7.0| | | | | |`cuGraphExecExternalSemaphoresWaitNodeSetParams`|11.2| | | |`hipGraphExecExternalSemaphoresWaitNodeSetParams`|5.7.0| | | | | -|`cuGraphExecGetFlags`|12.0| | | |`hipGraphExecGetFlags`|6.2.0| | | |6.2.0| +|`cuGraphExecGetFlags`|12.0| | | | | | | | | | |`cuGraphExecHostNodeSetParams`|10.2| | | |`hipGraphExecHostNodeSetParams`|5.0.0| | | | | |`cuGraphExecKernelNodeSetParams`|10.1| | | |`hipGraphExecKernelNodeSetParams`|4.5.0| | | | | -|`cuGraphExecMemcpyNodeSetParams`|10.2| | | |`hipDrvGraphExecMemcpyNodeSetParams`|6.2.0| | | |6.2.0| -|`cuGraphExecNodeSetParams`|12.2| | | |`hipGraphExecNodeSetParams`|6.2.0| | | |6.2.0| +|`cuGraphExecMemcpyNodeSetParams`|10.2| | | | | | | | | | +|`cuGraphExecNodeSetParams`|12.2| | | | | | | | | | |`cuGraphExecUpdate`|10.2| | | |`hipGraphExecUpdate`|5.0.0| | | | | |`cuGraphExternalSemaphoresSignalNodeGetParams`|11.2| | | |`hipGraphExternalSemaphoresSignalNodeGetParams`|5.7.0| | | | | |`cuGraphExternalSemaphoresSignalNodeSetParams`|11.2| | | |`hipGraphExternalSemaphoresSignalNodeSetParams`|5.7.0| | | | | @@ -1953,8 +1953,8 @@ |`cuGraphLaunch`|10.0| | | |`hipGraphLaunch`|4.3.0| | | | | |`cuGraphMemAllocNodeGetParams`|11.4| | | |`hipGraphMemAllocNodeGetParams`|5.5.0| | | | | |`cuGraphMemFreeNodeGetParams`|11.4| | | |`hipGraphMemFreeNodeGetParams`|5.5.0| | | | | -|`cuGraphMemcpyNodeGetParams`|10.0| | | |`hipDrvGraphMemcpyNodeGetParams`|6.2.0| | | |6.2.0| -|`cuGraphMemcpyNodeSetParams`|10.0| | | |`hipDrvGraphMemcpyNodeSetParams`|6.2.0| | | |6.2.0| +|`cuGraphMemcpyNodeGetParams`|10.0| | | | | | | | | | +|`cuGraphMemcpyNodeSetParams`|10.0| | | | | | | | | | |`cuGraphMemsetNodeGetParams`|10.0| | | |`hipGraphMemsetNodeGetParams`|4.5.0| | | | | |`cuGraphMemsetNodeSetParams`|10.0| | | |`hipGraphMemsetNodeSetParams`|4.5.0| | | | | |`cuGraphNodeFindInClone`|10.0| | | |`hipGraphNodeFindInClone`|5.0.0| | | | | @@ -1965,7 +1965,7 @@ |`cuGraphNodeGetEnabled`|11.6| | | |`hipGraphNodeGetEnabled`|5.5.0| | | | | |`cuGraphNodeGetType`|10.0| | | |`hipGraphNodeGetType`|5.0.0| | | | | |`cuGraphNodeSetEnabled`|11.6| | | |`hipGraphNodeSetEnabled`|5.5.0| | | | | -|`cuGraphNodeSetParams`|12.2| | | |`hipGraphNodeSetParams`|6.2.0| | | |6.2.0| +|`cuGraphNodeSetParams`|12.2| | | | | | | | | | |`cuGraphReleaseUserObject`|11.3| | | |`hipGraphReleaseUserObject`|5.3.0| | | | | |`cuGraphRemoveDependencies`|10.0| | | |`hipGraphRemoveDependencies`|5.0.0| | | | | |`cuGraphRemoveDependencies_v2`|12.3| | | | | | | | | | diff --git a/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md b/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md index 83f32650..0e33eb01 100644 --- a/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md +++ b/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md @@ -34,7 +34,7 @@ |`cudaIpcOpenMemHandle`| | | | |`hipIpcOpenMemHandle`|1.6.0| | | | | |`cudaSetDevice`| | | | |`hipSetDevice`|1.6.0| | | | | |`cudaSetDeviceFlags`| | | | |`hipSetDeviceFlags`|1.6.0| | | | | -|`cudaSetValidDevices`| | | | | | | | | | | +|`cudaSetValidDevices`| | | | |`hipSetValidDevices`|6.2.0| | | |6.2.0| ## **2. Device Management [DEPRECATED]** @@ -197,7 +197,7 @@ |`cudaMemRangeGetAttributes`|8.0| | | |`hipMemRangeGetAttributes`|3.7.0| | | | | |`cudaMemcpy`| | | | |`hipMemcpy`|1.5.0| | | | | |`cudaMemcpy2D`| | | | |`hipMemcpy2D`|1.6.0| | | | | -|`cudaMemcpy2DArrayToArray`| | | | | | | | | | | +|`cudaMemcpy2DArrayToArray`| | | | |`hipMemcpy2DArrayToArray`|6.2.0| | | |6.2.0| |`cudaMemcpy2DAsync`| | | | |`hipMemcpy2DAsync`|1.6.0| | | | | |`cudaMemcpy2DFromArray`| | | | |`hipMemcpy2DFromArray`|3.0.0| | | | | |`cudaMemcpy2DFromArrayAsync`| | | | |`hipMemcpy2DFromArrayAsync`|3.0.0| | | | | @@ -464,7 +464,7 @@ |`cudaGraphExecEventWaitNodeSetEvent`|11.1| | | |`hipGraphExecEventWaitNodeSetEvent`|5.0.0| | | | | |`cudaGraphExecExternalSemaphoresSignalNodeSetParams`|11.2| | | |`hipGraphExecExternalSemaphoresSignalNodeSetParams`|5.7.0| | | | | |`cudaGraphExecExternalSemaphoresWaitNodeSetParams`|11.2| | | |`hipGraphExecExternalSemaphoresWaitNodeSetParams`|5.7.0| | | | | -|`cudaGraphExecGetFlags`|12.0| | | |`hipGraphExecGetFlags`|6.2.0| | | |6.2.0| +|`cudaGraphExecGetFlags`|12.0| | | | | | | | | | |`cudaGraphExecHostNodeSetParams`|11.0| | | |`hipGraphExecHostNodeSetParams`|5.0.0| | | | | |`cudaGraphExecKernelNodeSetParams`|11.0| | | |`hipGraphExecKernelNodeSetParams`|4.5.0| | | | | |`cudaGraphExecMemcpyNodeSetParams`|11.0| | | |`hipGraphExecMemcpyNodeSetParams`|5.0.0| | | | | @@ -472,7 +472,7 @@ |`cudaGraphExecMemcpyNodeSetParamsFromSymbol`|11.1| | | |`hipGraphExecMemcpyNodeSetParamsFromSymbol`|5.0.0| | | | | |`cudaGraphExecMemcpyNodeSetParamsToSymbol`|11.1| | | |`hipGraphExecMemcpyNodeSetParamsToSymbol`|5.0.0| | | | | |`cudaGraphExecMemsetNodeSetParams`|11.0| | | |`hipGraphExecMemsetNodeSetParams`|5.0.0| | | | | -|`cudaGraphExecNodeSetParams`|12.2| | | |`hipGraphExecNodeSetParams`|6.2.0| | | |6.2.0| +|`cudaGraphExecNodeSetParams`|12.2| | | | | | | | | | |`cudaGraphExecUpdate`|11.0| | | |`hipGraphExecUpdate`|5.0.0| | | | | |`cudaGraphExternalSemaphoresSignalNodeGetParams`|11.2| | | |`hipGraphExternalSemaphoresSignalNodeGetParams`|5.7.0| | | | | |`cudaGraphExternalSemaphoresSignalNodeSetParams`|11.2| | | |`hipGraphExternalSemaphoresSignalNodeSetParams`|5.7.0| | | | | @@ -510,7 +510,7 @@ |`cudaGraphNodeGetEnabled`|11.6| | | |`hipGraphNodeGetEnabled`|5.5.0| | | | | |`cudaGraphNodeGetType`|11.0| | | |`hipGraphNodeGetType`|5.0.0| | | | | |`cudaGraphNodeSetEnabled`|11.6| | | |`hipGraphNodeSetEnabled`|5.5.0| | | | | -|`cudaGraphNodeSetParams`|12.2| | | |`hipGraphNodeSetParams`|6.2.0| | | |6.2.0| +|`cudaGraphNodeSetParams`|12.2| | | | | | | | | | |`cudaGraphReleaseUserObject`|11.3| | | |`hipGraphReleaseUserObject`|5.3.0| | | | | |`cudaGraphRemoveDependencies`|11.0| | | |`hipGraphRemoveDependencies`|5.0.0| | | | | |`cudaGraphRemoveDependencies_v2`|12.3| | | | | | | | | | diff --git a/docs/tables/CUDNN_API_supported_by_HIP.md b/docs/tables/CUDNN_API_supported_by_HIP.md index 0f86fb2d..feb58d01 100644 --- a/docs/tables/CUDNN_API_supported_by_HIP.md +++ b/docs/tables/CUDNN_API_supported_by_HIP.md @@ -886,7 +886,7 @@ |`cudnnBackendExecute`|8.0.1| | | | | | | | | | |`cudnnBackendFinalize`|8.0.1| | | | | | | | | | |`cudnnBackendGetAttribute`|8.0.1| | | | | | | | | | -|`cudnnBackendInitialize`|8.0.1| | | | | | | | | | +|`cudnnBackendInitialize`|8.0.1|9.3.0| | | | | | | | | |`cudnnBackendSetAttribute`|8.0.1| | | | | | | | | | |`cudnnBatchNormalizationBackward`|4.0.0|9.0.0| | |`hipdnnBatchNormalizationBackward`| | | | | | |`cudnnBatchNormalizationBackwardEx`|7.4.1|9.0.0| | | | | | | | | diff --git a/docs/tables/CUDNN_API_supported_by_HIP_and_MIOPEN.md b/docs/tables/CUDNN_API_supported_by_HIP_and_MIOPEN.md index a803ea4c..b8c4ddd9 100644 --- a/docs/tables/CUDNN_API_supported_by_HIP_and_MIOPEN.md +++ b/docs/tables/CUDNN_API_supported_by_HIP_and_MIOPEN.md @@ -886,7 +886,7 @@ |`cudnnBackendExecute`|8.0.1| | | | | | | | | | | | | | | | |`cudnnBackendFinalize`|8.0.1| | | | | | | | | | | | | | | | |`cudnnBackendGetAttribute`|8.0.1| | | | | | | | | | | | | | | | -|`cudnnBackendInitialize`|8.0.1| | | | | | | | | | | | | | | | +|`cudnnBackendInitialize`|8.0.1|9.3.0| | | | | | | | | | | | | | | |`cudnnBackendSetAttribute`|8.0.1| | | | | | | | | | | | | | | | |`cudnnBatchNormalizationBackward`|4.0.0|9.0.0| | |`hipdnnBatchNormalizationBackward`| | | | | |`miopenBatchNormalizationBackward`| | | | | | |`cudnnBatchNormalizationBackwardEx`|7.4.1|9.0.0| | | | | | | | | | | | | | | diff --git a/docs/tables/CUDNN_API_supported_by_MIOPEN.md b/docs/tables/CUDNN_API_supported_by_MIOPEN.md index 26a01db0..0e77a3de 100644 --- a/docs/tables/CUDNN_API_supported_by_MIOPEN.md +++ b/docs/tables/CUDNN_API_supported_by_MIOPEN.md @@ -886,7 +886,7 @@ |`cudnnBackendExecute`|8.0.1| | | | | | | | | | |`cudnnBackendFinalize`|8.0.1| | | | | | | | | | |`cudnnBackendGetAttribute`|8.0.1| | | | | | | | | | -|`cudnnBackendInitialize`|8.0.1| | | | | | | | | | +|`cudnnBackendInitialize`|8.0.1|9.3.0| | | | | | | | | |`cudnnBackendSetAttribute`|8.0.1| | | | | | | | | | |`cudnnBatchNormalizationBackward`|4.0.0|9.0.0| | |`miopenBatchNormalizationBackward`| | | | | | |`cudnnBatchNormalizationBackwardEx`|7.4.1|9.0.0| | | | | | | | | diff --git a/src/CUDA2HIP.cpp b/src/CUDA2HIP.cpp index 3aae6ba3..11d7395b 100644 --- a/src/CUDA2HIP.cpp +++ b/src/CUDA2HIP.cpp @@ -27,6 +27,7 @@ const std::map CUDA_INCLUDE_MAP { // CUDA includes {"cuda.h", {"hip/hip_runtime.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_DRIVER, 0}}, {"cuda_runtime.h", {"hip/hip_runtime.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_RUNTIME, 0}}, + {"device_launch_parameters.h", {"", "", CONV_INCLUDE, API_RUNTIME, 0}}, {"cuda_runtime_api.h", {"hip/hip_runtime_api.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, {"channel_descriptor.h", {"hip/channel_descriptor.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, {"device_functions.h", {"hip/device_functions.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, diff --git a/src/CUDA2HIP_DNN_API_functions.cpp b/src/CUDA2HIP_DNN_API_functions.cpp index ceba36bd..4baf2e0b 100644 --- a/src/CUDA2HIP_DNN_API_functions.cpp +++ b/src/CUDA2HIP_DNN_API_functions.cpp @@ -432,7 +432,7 @@ const std::map CUDA_DNN_FUNCTION_VER_MAP { {"cudnnAdvTrainVersionCheck", {CUDNN_801, CUDA_0, CUDNN_900}}, {"cudnnBackendCreateDescriptor", {CUDNN_801, CUDA_0, CUDA_0 }}, {"cudnnBackendDestroyDescriptor", {CUDNN_801, CUDA_0, CUDA_0 }}, - {"cudnnBackendInitialize", {CUDNN_801, CUDA_0, CUDA_0 }}, + {"cudnnBackendInitialize", {CUDNN_801, CUDNN_930, CUDA_0 }}, {"cudnnBackendFinalize", {CUDNN_801, CUDA_0, CUDA_0 }}, {"cudnnBackendSetAttribute", {CUDNN_801, CUDA_0, CUDA_0 }}, {"cudnnBackendGetAttribute", {CUDNN_801, CUDA_0, CUDA_0 }}, diff --git a/src/CUDA2HIP_Driver_API_functions.cpp b/src/CUDA2HIP_Driver_API_functions.cpp index 99ae10fa..3c42d319 100644 --- a/src/CUDA2HIP_Driver_API_functions.cpp +++ b/src/CUDA2HIP_Driver_API_functions.cpp @@ -264,20 +264,20 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { {"cuMemcpyAsync", {"hipMemcpyAsync_", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_UNSUPPORTED}}, // no analogue // NOTE: Not equal to cudaMemcpyArrayToArray due to different signatures - {"cuMemcpyAtoA", {"hipMemcpyAtoA", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_UNSUPPORTED}}, - {"cuMemcpyAtoA_v2", {"hipMemcpyAtoA", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_UNSUPPORTED}}, + {"cuMemcpyAtoA", {"hipMemcpyAtoA", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_EXPERIMENTAL}}, + {"cuMemcpyAtoA_v2", {"hipMemcpyAtoA", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_EXPERIMENTAL}}, // no analogue - {"cuMemcpyAtoD", {"hipMemcpyAtoD", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_UNSUPPORTED}}, - {"cuMemcpyAtoD_v2", {"hipMemcpyAtoD", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_UNSUPPORTED}}, + {"cuMemcpyAtoD", {"hipMemcpyAtoD", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_EXPERIMENTAL}}, + {"cuMemcpyAtoD_v2", {"hipMemcpyAtoD", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_EXPERIMENTAL}}, // no analogue {"cuMemcpyAtoH", {"hipMemcpyAtoH", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY}}, {"cuMemcpyAtoH_v2", {"hipMemcpyAtoH", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY}}, // no analogue - {"cuMemcpyAtoHAsync", {"hipMemcpyAtoHAsync", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_UNSUPPORTED}}, - {"cuMemcpyAtoHAsync_v2", {"hipMemcpyAtoHAsync", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_UNSUPPORTED}}, + {"cuMemcpyAtoHAsync", {"hipMemcpyAtoHAsync", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_EXPERIMENTAL}}, + {"cuMemcpyAtoHAsync_v2", {"hipMemcpyAtoHAsync", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_EXPERIMENTAL}}, // no analogue - {"cuMemcpyDtoA", {"hipMemcpyDtoA", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_UNSUPPORTED}}, - {"cuMemcpyDtoA_v2", {"hipMemcpyDtoA", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_UNSUPPORTED}}, + {"cuMemcpyDtoA", {"hipMemcpyDtoA", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_EXPERIMENTAL}}, + {"cuMemcpyDtoA_v2", {"hipMemcpyDtoA", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_EXPERIMENTAL}}, // no analogue {"cuMemcpyDtoD", {"hipMemcpyDtoD", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY}}, {"cuMemcpyDtoD_v2", {"hipMemcpyDtoD", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY}}, @@ -294,8 +294,8 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { {"cuMemcpyHtoA", {"hipMemcpyHtoA", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY}}, {"cuMemcpyHtoA_v2", {"hipMemcpyHtoA", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY}}, // no analogue - {"cuMemcpyHtoAAsync", {"hipMemcpyHtoAAsync", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_UNSUPPORTED}}, - {"cuMemcpyHtoAAsync_v2", {"hipMemcpyHtoAAsync", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_UNSUPPORTED}}, + {"cuMemcpyHtoAAsync", {"hipMemcpyHtoAAsync", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_EXPERIMENTAL}}, + {"cuMemcpyHtoAAsync_v2", {"hipMemcpyHtoAAsync", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_EXPERIMENTAL}}, // no analogue {"cuMemcpyHtoD", {"hipMemcpyHtoD", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY}}, {"cuMemcpyHtoD_v2", {"hipMemcpyHtoD", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY}}, @@ -710,9 +710,9 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // cudaGraphLaunch {"cuGraphLaunch", {"hipGraphLaunch", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH}}, // NOTE: cudaGraphMemcpyNodeGetParams has a different signature - {"cuGraphMemcpyNodeGetParams", {"hipDrvGraphMemcpyNodeGetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_EXPERIMENTAL}}, + {"cuGraphMemcpyNodeGetParams", {"hipDrvGraphMemcpyNodeGetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, // NOTE: cudaGraphMemcpyNodeSetParams has a different signature - {"cuGraphMemcpyNodeSetParams", {"hipDrvGraphMemcpyNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_EXPERIMENTAL}}, + {"cuGraphMemcpyNodeSetParams", {"hipDrvGraphMemcpyNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, // cudaGraphMemsetNodeGetParams {"cuGraphMemsetNodeGetParams", {"hipGraphMemsetNodeGetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH}}, // cudaGraphMemsetNodeSetParams @@ -738,7 +738,7 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // cudaGraphRemoveDependencies_v2 {"cuGraphRemoveDependencies_v2", {"hipGraphRemoveDependencies_v2", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, // no analogue - {"cuGraphExecMemcpyNodeSetParams", {"hipDrvGraphExecMemcpyNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_EXPERIMENTAL}}, + {"cuGraphExecMemcpyNodeSetParams", {"hipDrvGraphExecMemcpyNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, // cudaGraphExecHostNodeSetParams {"cuGraphExecHostNodeSetParams", {"hipGraphExecHostNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH}}, // TODO: take into account the new signature since 12.0 @@ -795,7 +795,7 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // cudaGraphMemAllocNodeGetParams {"cuGraphMemAllocNodeGetParams", {"hipGraphMemAllocNodeGetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH}}, // no analogue - {"cuGraphAddMemFreeNode", {"hipDrvGraphAddMemFreeNode", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_EXPERIMENTAL}}, + {"cuGraphAddMemFreeNode", {"hipDrvGraphAddMemFreeNode", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, // cudaGraphMemFreeNodeGetParams {"cuGraphMemFreeNodeGetParams", {"hipGraphMemFreeNodeGetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH}}, // cudaDeviceGraphMemTrim @@ -817,15 +817,15 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // cudaGraphInstantiateWithParams {"cuGraphInstantiateWithParams", {"hipGraphInstantiateWithParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_EXPERIMENTAL}}, // cudaGraphExecGetFlags - {"cuGraphExecGetFlags", {"hipGraphExecGetFlags", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_EXPERIMENTAL}}, + {"cuGraphExecGetFlags", {"hipGraphExecGetFlags", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, // cudaGraphAddNode {"cuGraphAddNode", {"hipGraphAddNode", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_EXPERIMENTAL}}, // cudaGraphAddNode_v2 {"cuGraphAddNode_v2", {"hipGraphAddNode_v2", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, // cudaGraphNodeSetParams - {"cuGraphNodeSetParams", {"hipGraphNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_EXPERIMENTAL}}, + {"cuGraphNodeSetParams", {"hipGraphNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, // cudaGraphExecNodeSetParams - {"cuGraphExecNodeSetParams", {"hipGraphExecNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_EXPERIMENTAL}}, + {"cuGraphExecNodeSetParams", {"hipGraphExecNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, // cudaGraphConditionalHandleCreate {"cuGraphConditionalHandleCreate", {"hipGraphConditionalHandleCreate", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, @@ -1651,12 +1651,13 @@ const std::map HIP_DRIVER_FUNCTION_VER_MAP { {"hipArrayGetDescriptor", {HIP_5060, HIP_0, HIP_0 }}, {"hipArray3DGetDescriptor", {HIP_5060, HIP_0, HIP_0 }}, {"hipDrvGraphAddMemcpyNode", {HIP_6000, HIP_0, HIP_0 }}, - {"hipDrvGraphMemcpyNodeGetParams", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, - {"hipDrvGraphMemcpyNodeSetParams", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, {"hipDrvGraphAddMemsetNode", {HIP_6010, HIP_0, HIP_0 }}, - {"hipDrvGraphAddMemFreeNode", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, - {"hipDrvGraphExecMemcpyNodeSetParams", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, {"hipTexRefGetBorderColor", {HIP_6010, HIP_6010, HIP_0 }}, + {"hipMemcpyAtoD", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipMemcpyDtoA", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipMemcpyAtoA", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipMemcpyAtoHAsync", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipMemcpyHtoAAsync", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map CUDA_DRIVER_FUNCTION_CHANGED_VER_MAP { diff --git a/src/CUDA2HIP_Runtime_API_functions.cpp b/src/CUDA2HIP_Runtime_API_functions.cpp index d5eb758c..74cbfbc1 100644 --- a/src/CUDA2HIP_Runtime_API_functions.cpp +++ b/src/CUDA2HIP_Runtime_API_functions.cpp @@ -80,7 +80,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuCtxGetFlags {"cudaSetDeviceFlags", {"hipSetDeviceFlags", "", CONV_DEVICE, API_RUNTIME, SEC::DEVICE}}, // no analogue - {"cudaSetValidDevices", {"hipSetValidDevices", "", CONV_DEVICE, API_RUNTIME, SEC::DEVICE, HIP_UNSUPPORTED}}, + {"cudaSetValidDevices", {"hipSetValidDevices", "", CONV_DEVICE, API_RUNTIME, SEC::DEVICE, HIP_EXPERIMENTAL}}, // cuDeviceGetTexture1DLinearMaxWidth {"cudaDeviceGetTexture1DLinearMaxWidth", {"hipDeviceGetTexture1DLinearMaxWidth", "", CONV_DEVICE, API_RUNTIME, SEC::DEVICE, HIP_UNSUPPORTED}}, // cuDeviceGetDefaultMemPool @@ -333,7 +333,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // NOTE: Not equal to cuMemcpy2D due to different signatures {"cudaMemcpy2D", {"hipMemcpy2D", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}}, // no analogue - {"cudaMemcpy2DArrayToArray", {"hipMemcpy2DArrayToArray", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY, HIP_UNSUPPORTED}}, + {"cudaMemcpy2DArrayToArray", {"hipMemcpy2DArrayToArray", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY, HIP_EXPERIMENTAL}}, // no analogue // NOTE: Not equal to cuMemcpy2DAsync due to different signatures {"cudaMemcpy2DAsync", {"hipMemcpy2DAsync", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}}, @@ -863,15 +863,15 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuGraphInstantiateWithParams {"cudaGraphInstantiateWithParams", {"hipGraphInstantiateWithParams", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_EXPERIMENTAL}}, // cuGraphExecGetFlags - {"cudaGraphExecGetFlags", {"hipGraphExecGetFlags", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_EXPERIMENTAL}}, + {"cudaGraphExecGetFlags", {"hipGraphExecGetFlags", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}}, // cuGraphAddNode {"cudaGraphAddNode", {"hipGraphAddNode", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_EXPERIMENTAL}}, // cuGraphAddNode_v2 {"cudaGraphAddNode_v2", {"hipGraphAddNode_v2", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}}, // cuGraphNodeSetParams - {"cudaGraphNodeSetParams", {"hipGraphNodeSetParams", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_EXPERIMENTAL}}, + {"cudaGraphNodeSetParams", {"hipGraphNodeSetParams", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}}, // cuGraphExecNodeSetParams - {"cudaGraphExecNodeSetParams", {"hipGraphExecNodeSetParams", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_EXPERIMENTAL}}, + {"cudaGraphExecNodeSetParams", {"hipGraphExecNodeSetParams", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}}, // cuGraphConditionalHandleCreate {"cudaGraphConditionalHandleCreate", {"hipGraphConditionalHandleCreate", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}}, @@ -1429,12 +1429,11 @@ const std::map HIP_RUNTIME_FUNCTION_VER_MAP { {"hipGraphExecExternalSemaphoresWaitNodeSetParams", {HIP_5070, HIP_0, HIP_0 }}, {"hipGraphInstantiateWithParams", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, {"hipGraphAddNode", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphExecGetFlags", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphNodeSetParams", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphExecNodeSetParams", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, {"hipGetProcAddress", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, {"hipGetFuncBySymbol", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, {"hipStreamBeginCaptureToGraph", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipSetValidDevices", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipMemcpy2DArrayToArray", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map CUDA_RUNTIME_FUNCTION_CHANGED_VER_MAP { diff --git a/src/HipifyAction.cpp b/src/HipifyAction.cpp index 889ac654..9258bca9 100644 --- a/src/HipifyAction.cpp +++ b/src/HipifyAction.cpp @@ -354,7 +354,7 @@ std::map> FuncArgCasts { { { { - {2, {e_add_const_argument, cw_None, "hipHostMallocDefault"}} + {3, {e_add_const_argument, cw_None, "hipHostMallocDefault"}} } } } @@ -2660,6 +2660,7 @@ bool HipifyAction::cudaHostFuncCall(const mat::MatchFinder::MatchResult &Result) auto it = FuncArgCasts.find(sName); if (it == FuncArgCasts.end()) return false; auto castStructs = it->second; + auto &SM = *Result.SourceManager; for (auto cc : castStructs) { if (cc.isToMIOpen != TranslateToMIOpen || cc.isToRoc != TranslateToRoc) continue; clang::LangOptions DefaultLangOptions; @@ -2668,17 +2669,17 @@ bool HipifyAction::cudaHostFuncCall(const mat::MatchFinder::MatchResult &Result) unsigned int argNum = c.first; clang::SmallString<40> XStr; llvm::raw_svector_ostream OS(XStr); - auto &SM = *Result.SourceManager; clang::SourceRange sr, replacementRange; clang::SourceLocation s, e; if (argNum < call->getNumArgs()) { sr = call->getArg(argNum)->getSourceRange(); replacementRange = getWriteRange(SM, { sr.getBegin(), sr.getEnd() }); - s = replacementRange.getBegin(); - e = replacementRange.getEnd(); } else { s = e = call->getEndLoc(); + replacementRange = getWriteRange(SM, { s, e }); } + s = replacementRange.getBegin(); + e = replacementRange.getEnd(); switch (c.second.castType) { case e_remove_argument: { @@ -2695,6 +2696,8 @@ bool HipifyAction::cudaHostFuncCall(const mat::MatchFinder::MatchResult &Result) s = prevComma->getLocation(); } } + replacementRange = getWriteRange(SM, { s, e }); + e = replacementRange.getEnd(); length = SM.getCharacterData(e) - SM.getCharacterData(s); break; } @@ -2727,6 +2730,8 @@ bool HipifyAction::cudaHostFuncCall(const mat::MatchFinder::MatchResult &Result) e = call->getArg(argNum + c.second.numberToMoveOrCopy)->getBeginLoc(); else e = call->getEndLoc(); + replacementRange = getWriteRange(SM, { s, e }); + e = replacementRange.getEnd(); length = SM.getCharacterData(e) - SM.getCharacterData(s); break; } diff --git a/src/Statistics.cpp b/src/Statistics.cpp index b79cdfbc..0902fb7e 100644 --- a/src/Statistics.cpp +++ b/src/Statistics.cpp @@ -541,6 +541,7 @@ std::string Statistics::getCudaVersion(const cudaVersions &ver) { case CUDNN_900: return "9.0.0"; case CUDNN_910: return "9.1.0"; case CUDNN_920: return "9.2.0"; + case CUDNN_930: return "9.3.0"; } return ""; } diff --git a/src/Statistics.h b/src/Statistics.h index b24cc48e..a775bda2 100644 --- a/src/Statistics.h +++ b/src/Statistics.h @@ -295,7 +295,8 @@ enum cudaVersions { CUDNN_900 = 900, CUDNN_910 = 910, CUDNN_920 = 920, - CUDNN_LATEST = CUDNN_920, + CUDNN_930 = 930, + CUDNN_LATEST = CUDNN_930, }; enum hipVersions { diff --git a/tests/unit_tests/synthetic/driver_functions.cu b/tests/unit_tests/synthetic/driver_functions.cu index 5da5ba83..3e2ee455 100644 --- a/tests/unit_tests/synthetic/driver_functions.cu +++ b/tests/unit_tests/synthetic/driver_functions.cu @@ -64,6 +64,7 @@ int main() { // CHECK-NEXT: hipTexRef texref; // CHECK-NEXT: hipJitOption jit_option; // CHECK-NEXT: hipArray_t array_; + // CHECK-NEXT: hipArray_t array_dst; // CHECK-NEXT: HIP_ARRAY3D_DESCRIPTOR ARRAY3D_DESCRIPTOR; // CHECK-NEXT: HIP_ARRAY_DESCRIPTOR ARRAY_DESCRIPTOR; // CHECK-NEXT: hipIpcEventHandle_t ipcEventHandle; @@ -93,6 +94,7 @@ int main() { CUtexref texref; CUjit_option jit_option; CUarray array_; + CUarray array_dst; CUDA_ARRAY3D_DESCRIPTOR ARRAY3D_DESCRIPTOR; CUDA_ARRAY_DESCRIPTOR ARRAY_DESCRIPTOR; CUipcEventHandle ipcEventHandle; @@ -436,6 +438,7 @@ int main() { void* dsthost = nullptr; size_t offset = 0; + size_t offset_dst = 0; // CUDA: CUresult CUDAAPI cuMemcpyAtoH(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount); // HIP: hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t count); // CHECK: result = hipMemcpyAtoH(dsthost, array_, offset, bytes); @@ -990,6 +993,41 @@ int main() { // CHECK: result = hipTexRefGetArray(&array_, texref); result = cuTexRefGetArray(&array_, texref); + // CUDA: CUresult CUDAAPI cuMemcpyAtoA_v2(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount); + // HIP: hipError_t hipMemcpyAtoA(hipArray_t dstArray, size_t dstOffset, hipArray_t srcArray, size_t srcOffset, size_t ByteCount); + // CHECK: result = hipMemcpyAtoA(array_dst, offset_dst, array_, offset, bytes); + // CHECK-NEXT: result = hipMemcpyAtoA(array_dst, offset_dst, array_, offset, bytes); + result = cuMemcpyAtoA(array_dst, offset_dst, array_, offset, bytes); + result = cuMemcpyAtoA_v2(array_dst, offset_dst, array_, offset, bytes); + + // CUDA: CUresult CUDAAPI cuMemcpyAtoD_v2(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount); + // HIP: hipError_t hipMemcpyAtoD(hipDeviceptr_t dstDevice, hipArray_t srcArray, size_t srcOffset, size_t ByteCount); + // CHECK: result = hipMemcpyAtoD(deviceptr, array_, offset, bytes); + // CHECK-NEXT: result = hipMemcpyAtoD(deviceptr, array_, offset, bytes); + result = cuMemcpyAtoD(deviceptr, array_, offset, bytes); + result = cuMemcpyAtoD_v2(deviceptr, array_, offset, bytes); + + // CUDA: CUresult CUDAAPI cuMemcpyDtoA_v2(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount); + // HIP: hipError_t hipMemcpyDtoA(hipArray_t dstArray, size_t dstOffset, hipDeviceptr_t srcDevice, size_t ByteCount); + // CHECK: result = hipMemcpyDtoA(array_, offset, deviceptr, bytes); + // CHECK-NEXT: result = hipMemcpyDtoA(array_, offset, deviceptr, bytes); + result = cuMemcpyDtoA(array_, offset, deviceptr, bytes); + result = cuMemcpyDtoA_v2(array_, offset, deviceptr, bytes); + + // CUDA: CUresult CUDAAPI cuMemcpyAtoHAsync_v2(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount, CUstream hStream); + // HIP: hipError_t hipMemcpyAtoHAsync(void* dstHost, hipArray_t srcArray, size_t srcOffset, size_t ByteCount, hipStream_t stream); + // CHECK: result = hipMemcpyAtoHAsync(dsthost, array_, offset, bytes, stream); + // CHECK-NEXT: result = hipMemcpyAtoHAsync(dsthost, array_, offset, bytes, stream); + result = cuMemcpyAtoHAsync(dsthost, array_, offset, bytes, stream); + result = cuMemcpyAtoHAsync_v2(dsthost, array_, offset, bytes, stream); + + // CUDA: CUresult CUDAAPI cuMemcpyHtoAAsync_v2(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount, CUstream hStream); + // HIP: hipError_t hipMemcpyHtoAAsync(hipArray_t dstArray, size_t dstOffset, const void* srcHost, size_t ByteCount, hipStream_t stream); + // CHECK: result = hipMemcpyHtoAAsync(array_, offset, dsthost, bytes, stream); + // CHECK-NEXT: result = hipMemcpyHtoAAsync(array_, offset, dsthost, bytes, stream); + result = cuMemcpyHtoAAsync(array_, offset, dsthost, bytes, stream); + result = cuMemcpyHtoAAsync_v2(array_, offset, dsthost, bytes, stream); + #if CUDA_VERSION >= 8000 // CHECK: hipMemRangeAttribute MemoryRangeAttribute; // CHECK-NEXT: hipMemoryAdvise MemoryAdvise; @@ -1214,16 +1252,6 @@ int main() { // CHECK: result = hipGraphLaunch(graphExec, stream); result = cuGraphLaunch(graphExec, stream); - // CUDA: CUresult CUDAAPI cuGraphMemcpyNodeGetParams(CUgraphNode hNode, CUDA_MEMCPY3D *nodeParams); - // HIP: hipError_t hipDrvGraphMemcpyNodeGetParams(hipGraphNode_t node, hipMemcpy3DParms* pNodeParams); - // CHECK: result = hipDrvGraphMemcpyNodeGetParams(graphNode, &MEMCPY3D); - result = cuGraphMemcpyNodeGetParams(graphNode, &MEMCPY3D); - - // CUDA: CUresult CUDAAPI cuGraphMemcpyNodeSetParams(CUgraphNode hNode, const CUDA_MEMCPY3D *nodeParams); - // HIP: hipError_t hipDrvGraphMemcpyNodeSetParams(hipGraphNode_t node, const hipMemcpy3DParms* pNodeParams); - // CHECK: result = hipDrvGraphMemcpyNodeSetParams(graphNode, &MEMCPY3D); - result = cuGraphMemcpyNodeSetParams(graphNode, &MEMCPY3D); - // CUDA: CUresult CUDAAPI cuGraphMemsetNodeGetParams(CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS *nodeParams); // HIP: hipError_t hipGraphMemsetNodeGetParams(hipGraphNode_t node, hipMemsetParams* pNodeParams); // CHECK: result = hipGraphMemsetNodeGetParams(graphNode, &MEMSET_NODE_PARAMS); @@ -1435,11 +1463,6 @@ int main() { // HIP: hipError_t hipMemUnmap(void* ptr, size_t size); // CHECK: result = hipMemUnmap(deviceptr, bytes); result = cuMemUnmap(deviceptr, bytes); - - // CUDA: CUresult CUDAAPI cuGraphExecMemcpyNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_MEMCPY3D *copyParams, CUcontext ctx); - // HIP: hipError_t hipDrvGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, const HIP_MEMCPY3D* copyParams, hipCtx_t ctx); - // CHECK: result = hipDrvGraphExecMemcpyNodeSetParams(graphExec, graphNode, &MEMCPY3D, context); - result = cuGraphExecMemcpyNodeSetParams(graphExec, graphNode, &MEMCPY3D, context); #endif #if CUDA_VERSION >= 10020 && CUDA_VERSION < 12000 @@ -1777,11 +1800,6 @@ int main() { // CHECK: result = hipGraphMemAllocNodeGetParams(graphNode, &MEM_ALLOC_NODE_PARAMS); result = cuGraphMemAllocNodeGetParams(graphNode, &MEM_ALLOC_NODE_PARAMS); - // CUDA: CUresult CUDAAPI cuGraphAddMemFreeNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, CUdeviceptr dptr); - // HIP: hipError_t hipDrvGraphAddMemFreeNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies, void* dev_ptr); - // CHECK: result = hipDrvGraphAddMemFreeNode(&graphNode, graph, &graphNode2, bytes, deviceptr); - result = cuGraphAddMemFreeNode(&graphNode, graph, &graphNode2, bytes, deviceptr); - // CUDA: CUresult CUDAAPI cuGraphMemFreeNodeGetParams(CUgraphNode hNode, CUdeviceptr *dptr_out); // HIP: hipError_t hipGraphMemFreeNodeGetParams(hipGraphNode_t node, void* dev_ptr); // CHECK: result = hipGraphMemFreeNodeGetParams(graphNode, &deviceptr); @@ -1868,11 +1886,6 @@ int main() { // HIP: hipError_t hipGraphInstantiateWithParams(hipGraphExec_t* pGraphExec, hipGraph_t graph, hipGraphInstantiateParams *instantiateParams); // CHECK: result = hipGraphInstantiateWithParams(&graphExec, graph, &GRAPH_INSTANTIATE_PARAMS); result = cuGraphInstantiateWithParams(&graphExec, graph, &GRAPH_INSTANTIATE_PARAMS); - - // CUDA: CUresult CUDAAPI cuGraphExecGetFlags(CUgraphExec hGraphExec, cuuint64_t *flags); - // HIP: hipError_t hipGraphExecGetFlags(hipGraphExec_t graphExec, unsigned long long* flags); - // CHECK: result = hipGraphExecGetFlags(graphExec, &ull); - result = cuGraphExecGetFlags(graphExec, &ull); #endif #if CUDA_VERSION >= 12020 @@ -1883,16 +1896,6 @@ int main() { // HIP: hipError_t hipGraphAddNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, const hipGraphNode_t *pDependencies, size_t numDependencies, hipGraphNodeParams *nodeParams); // CHECK: result = hipGraphAddNode(&graphNode, graph, &graphNode2, bytes, &graphNodeParams); result = cuGraphAddNode(&graphNode, graph, &graphNode2, bytes, &graphNodeParams); - - // CUDA: CUresult CUDAAPI cuGraphNodeSetParams(CUgraphNode hNode, CUgraphNodeParams *nodeParams); - // HIP: hipError_t hipGraphNodeSetParams(hipGraphNode_t node, hipGraphNodeParams *nodeParams); - // CHECK: result = hipGraphNodeSetParams(graphNode, &graphNodeParams); - result = cuGraphNodeSetParams(graphNode, &graphNodeParams); - - // CUDA: CUresult CUDAAPI cuGraphExecNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraphNodeParams *nodeParams); - // HIP: hipError_t hipGraphExecNodeSetParams(hipGraphExec_t graphExec, hipGraphNode_t node, hipGraphNodeParams* nodeParams); - // CHECK: result = hipGraphExecNodeSetParams(graphExec, graphNode, &graphNodeParams); - result = cuGraphExecNodeSetParams(graphExec, graphNode, &graphNodeParams); #endif #if CUDA_VERSION >= 12030 diff --git a/tests/unit_tests/synthetic/runtime_functions.cu b/tests/unit_tests/synthetic/runtime_functions.cu index 80f743fd..1422edd6 100644 --- a/tests/unit_tests/synthetic/runtime_functions.cu +++ b/tests/unit_tests/synthetic/runtime_functions.cu @@ -25,7 +25,9 @@ int main() { size_t width = 0; size_t height = 0; size_t wOffset = 0; + size_t wOffset_src = 0; size_t hOffset = 0; + size_t hOffset_src = 0; size_t pitch = 0; size_t pitch_2 = 0; int device = 0; @@ -825,6 +827,16 @@ int main() { // CUDA: template static __inline__ __host__ CUDART_DEVICE cudaError_t cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags(int* minGridSize, int* blockSize, T func, UnaryFunction blockSizeToDynamicSMemSize, int blockSizeLimit = 0, unsigned int flags = 0); // HIP: template static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSizeVariableSMemWithFlags(int* min_grid_size, int* block_size, T func, UnaryFunction block_size_to_dynamic_smem_size, int block_size_limit = 0, unsigned int flags = 0); + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaSetValidDevices(int *device_arr, int len); + // HIP: hipError_t hipSetValidDevices(int* device_arr, int len); + // CHECK: result = hipSetValidDevices(&device, intVal); + result = cudaSetValidDevices(&device, intVal); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DArrayToArray(cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)); + // HIP: hipError_t hipMemcpy2DArrayToArray(hipArray_t dst, size_t wOffsetDst, size_t hOffsetDst, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, hipMemcpyKind kind); + // CHECK: result = hipMemcpy2DArrayToArray(Array_t, wOffset, hOffset, Array_const_t, wOffset_src, hOffset_src, width, height, MemcpyKind); + result = cudaMemcpy2DArrayToArray(Array_t, wOffset, hOffset, Array_const_t, wOffset_src, hOffset_src, width, height, MemcpyKind); + #if CUDA_VERSION >= 8000 // CHECK: hipDeviceP2PAttr DeviceP2PAttr; cudaDeviceP2PAttr DeviceP2PAttr; @@ -1594,13 +1606,6 @@ int main() { result = cudaUnbindTexture(texref); #endif -#if CUDA_VERSION >= 12000 - // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecGetFlags(cudaGraphExec_t graphExec, unsigned long long *flags); - // HIP: hipError_t hipGraphExecGetFlags(hipGraphExec_t graphExec, unsigned long long* flags); - // CHECK: result = hipGraphExecGetFlags(GraphExec_t, &ull_2); - result = cudaGraphExecGetFlags(GraphExec_t, &ull_2); -#endif - #if CUDA_VERSION >= 12020 // CHECK: hipGraphNodeParams *graphNodeParams = nullptr; cudaGraphNodeParams *graphNodeParams = nullptr; @@ -1609,16 +1614,6 @@ int main() { // HIP: hipError_t hipGraphAddNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, const hipGraphNode_t *pDependencies, size_t numDependencies, hipGraphNodeParams *nodeParams); // CHECK: result = hipGraphAddNode(&graphNode, Graph_t, &graphNode_2, bytes, graphNodeParams); result = cudaGraphAddNode(&graphNode, Graph_t, &graphNode_2, bytes, graphNodeParams); - - // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphNodeSetParams(cudaGraphNode_t node, struct cudaGraphNodeParams *nodeParams); - // HIP: hipError_t hipGraphNodeSetParams(hipGraphNode_t node, hipGraphNodeParams *nodeParams); - // CHECK: result = hipGraphNodeSetParams(graphNode, graphNodeParams); - result = cudaGraphNodeSetParams(graphNode, graphNodeParams); - - // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecNodeSetParams(cudaGraphExec_t graphExec, cudaGraphNode_t node, struct cudaGraphNodeParams *nodeParams); - // HIP: hipError_t hipGraphExecNodeSetParams(hipGraphExec_t graphExec, hipGraphNode_t node, hipGraphNodeParams* nodeParams); - // CHECK: result = hipGraphExecNodeSetParams(GraphExec_t, graphNode, graphNodeParams); - result = cudaGraphExecNodeSetParams(GraphExec_t, graphNode, graphNodeParams); #endif #if CUDA_VERSION >= 12030 diff --git a/tests/unit_tests/synthetic/transforming_matchers.cu b/tests/unit_tests/synthetic/transforming_matchers.cu new file mode 100644 index 00000000..9ade8811 --- /dev/null +++ b/tests/unit_tests/synthetic/transforming_matchers.cu @@ -0,0 +1,58 @@ +// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args + +// CHECK: #include + +template +void check(T result, char const *const func, const char *const file, int const line) { + if (result) { + fprintf(stderr, "Error at %s:%d code=%d(%s) \" \n", file, line, static_cast(result), func); + exit(EXIT_FAILURE); + } +} + +#define checkErrors(val) check((val), #val, __FILE__, __LINE__) +#define num 1024 + +int main(int argc, const char *argv[]) { + int *input = nullptr; + void *input_ptr = nullptr; + int deviceCount = 0; + // CHECK: checkErrors(hipGetDeviceCount(&deviceCount)); + checkErrors(cudaGetDeviceCount(&deviceCount)); + printf("Device Count: %d\n", deviceCount); + // CHECK: hipDeviceProp_t deviceProp; + cudaDeviceProp deviceProp; + deviceProp.major = 0; + deviceProp.minor = 0; + int deviceID = 0; + // CHECK: checkErrors(hipGetDeviceProperties(&deviceProp, deviceID)); + checkErrors(cudaGetDeviceProperties(&deviceProp, deviceID)); + // CHECK: checkErrors(hipSetDevice(deviceID)); + checkErrors(cudaSetDevice(deviceID)); + // CHECK: checkErrors(hipHostMalloc(&input, sizeof(int) * num * 2, hipHostMallocDefault)); + checkErrors(cudaMallocHost(&input, sizeof(int) * num * 2)); + // CHECK: checkErrors(hipHostMalloc(&input_ptr, sizeof(int) * num * 2, hipHostMallocDefault)); + checkErrors(cudaMallocHost(&input_ptr, sizeof(int) * num * 2)); + for (int i = 0; i < num * 2; ++i) { + input[i] = i; + } + + int *value = 0; + int *value_2 = 0; + int iBlockSize = 0; + int iBlockSize_2 = 0; + size_t bytes = 0; + // CHECK: hipFunction_t function; + CUfunction function; + // CHECK: void* occupancyB2DSize; + CUoccupancyB2DSize occupancyB2DSize; + + // CHECK: checkErrors(hipModuleOccupancyMaxPotentialBlockSizeWithFlags(value, value_2, function, bytes, iBlockSize, iBlockSize_2)); + checkErrors(cuOccupancyMaxPotentialBlockSizeWithFlags(value, value_2, function, occupancyB2DSize, bytes, iBlockSize, iBlockSize_2)); + + // CHECK: checkErrors(hipHostFree(input)); + checkErrors(cudaFreeHost(input)); + // CHECK: checkErrors(hipDeviceSynchronize()); + checkErrors(cudaDeviceSynchronize()); + return 0; +}