From d701aadf83a0a00808e458d35a304707d00b1848 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 18 Jul 2022 19:32:56 +0200 Subject: [PATCH 01/14] [HIPIFY][tests][fix] Synthetic test for CUDA Runtime API functions - Part 9 + Added missing (`cuGraphClone`, `cudaGraphClone`) -> `hipGraphClone` (from ROCm 5.0.0) + Populated `driver_functions.cu` with missing test for `cuGraphClone` + Fixed the typo: `cuTexObjectGetTextureDesc` was instead of `cudaGetTextureObjectTextureDesc` in Runtime API + Fixed: A few CUDA Runtime functions were erroneously marked as `CUDA_90` + Update docs and hipify-perl accordingly --- bin/hipify-perl | 5 +- ...A_Driver_API_functions_supported_by_HIP.md | 2 +- ..._Runtime_API_functions_supported_by_HIP.md | 14 +- src/CUDA2HIP_Driver_API_functions.cpp | 2 +- src/CUDA2HIP_Runtime_API_functions.cpp | 13 +- .../unit_tests/synthetic/driver_functions.cu | 7 +- .../unit_tests/synthetic/runtime_functions.cu | 139 +++++++++++++++++- 7 files changed, 153 insertions(+), 29 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 45e1e372..d840b527 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1935,6 +1935,7 @@ sub simpleSubstitutions { subst("cuGraphAddHostNode", "hipGraphAddHostNode", "graph"); subst("cuGraphAddKernelNode", "hipGraphAddKernelNode", "graph"); subst("cuGraphChildGraphNodeGetGraph", "hipGraphChildGraphNodeGetGraph", "graph"); + subst("cuGraphClone", "hipGraphClone", "graph"); subst("cuGraphCreate", "hipGraphCreate", "graph"); subst("cuGraphDestroy", "hipGraphDestroy", "graph"); subst("cuGraphDestroyNode", "hipGraphDestroyNode", "graph"); @@ -1982,6 +1983,7 @@ sub simpleSubstitutions { subst("cudaGraphAddMemcpyNodeToSymbol", "hipGraphAddMemcpyNodeToSymbol", "graph"); subst("cudaGraphAddMemsetNode", "hipGraphAddMemsetNode", "graph"); subst("cudaGraphChildGraphNodeGetGraph", "hipGraphChildGraphNodeGetGraph", "graph"); + subst("cudaGraphClone", "hipGraphClone", "graph"); subst("cudaGraphCreate", "hipGraphCreate", "graph"); subst("cudaGraphDestroy", "hipGraphDestroy", "graph"); subst("cudaGraphDestroyNode", "hipGraphDestroyNode", "graph"); @@ -2075,6 +2077,7 @@ sub simpleSubstitutions { subst("cudaGetTextureAlignmentOffset", "hipGetTextureAlignmentOffset", "texture"); subst("cudaGetTextureObjectResourceDesc", "hipGetTextureObjectResourceDesc", "texture"); subst("cudaGetTextureObjectResourceViewDesc", "hipGetTextureObjectResourceViewDesc", "texture"); + subst("cudaGetTextureObjectTextureDesc", "hipGetTextureObjectTextureDesc", "texture"); subst("cudaGetTextureReference", "hipGetTextureReference", "texture"); subst("cudaUnbindTexture", "hipUnbindTexture", "texture"); subst("cudaCreateSurfaceObject", "hipCreateSurfaceObject", "surface"); @@ -6132,7 +6135,6 @@ sub warnUnsupportedFunctions { "cudaGraphDebugDotFlagsExtSemasSignalNodeParams", "cudaGraphDebugDotFlagsEventNodeParams", "cudaGraphDebugDotFlags", - "cudaGraphClone", "cudaGraphAddMemFreeNode", "cudaGraphAddMemAllocNode", "cudaGraphAddExternalSemaphoresWaitNode", @@ -6573,7 +6575,6 @@ sub warnUnsupportedFunctions { "cuGraphExecExternalSemaphoresSignalNodeSetParams", "cuGraphExecBatchMemOpNodeSetParams", "cuGraphDebugDotPrint", - "cuGraphClone", "cuGraphBatchMemOpNodeSetParams", "cuGraphBatchMemOpNodeGetParams", "cuGraphAddMemsetNode", diff --git a/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md b/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md index 976c1df1..1ed82e98 100644 --- a/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md @@ -1525,7 +1525,7 @@ |`cuGraphBatchMemOpNodeGetParams`|11.7| | | | | | | | |`cuGraphBatchMemOpNodeSetParams`|11.7| | | | | | | | |`cuGraphChildGraphNodeGetGraph`|10.0| | |`hipGraphChildGraphNodeGetGraph`|5.0.0| | | | -|`cuGraphClone`|10.0| | | | | | | | +|`cuGraphClone`|10.0| | |`hipGraphClone`|5.0.0| | | | |`cuGraphCreate`|10.0| | |`hipGraphCreate`|4.3.0| | | | |`cuGraphDebugDotPrint`|11.3| | | | | | | | |`cuGraphDestroy`|10.0| | |`hipGraphDestroy`|4.3.0| | | | diff --git a/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md b/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md index ac961553..4878393c 100644 --- a/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md +++ b/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md @@ -392,26 +392,26 @@ |**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**| |:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:| -|`cuTexObjectGetTextureDesc`|9.0| | |`hipGetTextureObjectTextureDesc`|1.7.0| | | | |`cudaCreateTextureObject`| | | |`hipCreateTextureObject`|1.7.0| | | | |`cudaDestroyTextureObject`| | | |`hipDestroyTextureObject`|1.7.0| | | | |`cudaGetTextureObjectResourceDesc`| | | |`hipGetTextureObjectResourceDesc`|1.7.0| | | | |`cudaGetTextureObjectResourceViewDesc`| | | |`hipGetTextureObjectResourceViewDesc`|1.7.0| | | | +|`cudaGetTextureObjectTextureDesc`| | | |`hipGetTextureObjectTextureDesc`|1.7.0| | | | ## **28. Surface Object Management** |**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**| |:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:| -|`cudaCreateSurfaceObject`|9.0| | |`hipCreateSurfaceObject`|1.9.0| | | | -|`cudaDestroySurfaceObject`|9.0| | |`hipDestroySurfaceObject`|1.9.0| | | | -|`cudaGetSurfaceObjectResourceDesc`|9.0| | | | | | | | +|`cudaCreateSurfaceObject`| | | |`hipCreateSurfaceObject`|1.9.0| | | | +|`cudaDestroySurfaceObject`| | | |`hipDestroySurfaceObject`|1.9.0| | | | +|`cudaGetSurfaceObjectResourceDesc`| | | | | | | | | ## **29. Version Management** |**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**| |:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:| -|`cudaDriverGetVersion`|9.0| | |`hipDriverGetVersion`|1.6.0| | | | -|`cudaRuntimeGetVersion`|9.0| | |`hipRuntimeGetVersion`|1.6.0| | | | +|`cudaDriverGetVersion`| | | |`hipDriverGetVersion`|1.6.0| | | | +|`cudaRuntimeGetVersion`| | | |`hipRuntimeGetVersion`|1.6.0| | | | ## **30. Graph Management** @@ -437,7 +437,7 @@ |`cudaGraphAddMemcpyNodeToSymbol`|11.1| | |`hipGraphAddMemcpyNodeToSymbol`|5.0.0| | | | |`cudaGraphAddMemsetNode`|10.0| | |`hipGraphAddMemsetNode`|4.3.0| | | | |`cudaGraphChildGraphNodeGetGraph`|10.0| | |`hipGraphChildGraphNodeGetGraph`|5.0.0| | | | -|`cudaGraphClone`|10.0| | | | | | | | +|`cudaGraphClone`|10.0| | |`hipGraphClone`|5.0.0| | | | |`cudaGraphCreate`|10.0| | |`hipGraphCreate`|4.3.0| | | | |`cudaGraphDebugDotPrint`|11.3| | | | | | | | |`cudaGraphDestroy`|10.0| | |`hipGraphDestroy`|4.3.0| | | | diff --git a/src/CUDA2HIP_Driver_API_functions.cpp b/src/CUDA2HIP_Driver_API_functions.cpp index a3f4d2b3..81016ddc 100644 --- a/src/CUDA2HIP_Driver_API_functions.cpp +++ b/src/CUDA2HIP_Driver_API_functions.cpp @@ -591,7 +591,7 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // cudaGraphChildGraphNodeGetGraph {"cuGraphChildGraphNodeGetGraph", {"hipGraphChildGraphNodeGetGraph", "", CONV_GRAPH, API_DRIVER, 21}}, // cudaGraphClone - {"cuGraphClone", {"hipGraphClone", "", CONV_GRAPH, API_DRIVER, 21, HIP_UNSUPPORTED}}, + {"cuGraphClone", {"hipGraphClone", "", CONV_GRAPH, API_DRIVER, 21}}, // cudaGraphCreate {"cuGraphCreate", {"hipGraphCreate", "", CONV_GRAPH, API_DRIVER, 21}}, // cudaGraphDebugDotPrint diff --git a/src/CUDA2HIP_Runtime_API_functions.cpp b/src/CUDA2HIP_Runtime_API_functions.cpp index 3c525c73..626add5a 100644 --- a/src/CUDA2HIP_Runtime_API_functions.cpp +++ b/src/CUDA2HIP_Runtime_API_functions.cpp @@ -640,8 +640,8 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuTexObjectGetResourceViewDesc {"cudaGetTextureObjectResourceViewDesc", {"hipGetTextureObjectResourceViewDesc", "", CONV_TEXTURE, API_RUNTIME, 27}}, // no analogue - // NOTE: Not equal to cudaGetTextureObjectTextureDesc due to different signatures - {"cuTexObjectGetTextureDesc", {"hipGetTextureObjectTextureDesc", "", CONV_TEXTURE, API_RUNTIME, 27}}, + // NOTE: Not equal to cuTexObjectGetTextureDesc due to different signatures + {"cudaGetTextureObjectTextureDesc", {"hipGetTextureObjectTextureDesc", "", CONV_TEXTURE, API_RUNTIME, 27}}, // 28. Surface Object Management // no analogue @@ -683,7 +683,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuGraphChildGraphNodeGetGraph {"cudaGraphChildGraphNodeGetGraph", {"hipGraphChildGraphNodeGetGraph", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuGraphClone - {"cudaGraphClone", {"hipGraphClone", "", CONV_GRAPH, API_RUNTIME, 30, HIP_UNSUPPORTED}}, + {"cudaGraphClone", {"hipGraphClone", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuGraphCreate {"cudaGraphCreate", {"hipGraphCreate", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuGraphDebugDotPrint @@ -963,12 +963,6 @@ const std::map CUDA_RUNTIME_FUNCTION_VER_MAP { {"cudaUnbindTexture", {CUDA_0, CUDA_110, CUDA_0 }}, {"cudaBindSurfaceToArray", {CUDA_0, CUDA_110, CUDA_0 }}, {"cudaGetSurfaceReference", {CUDA_0, CUDA_110, CUDA_0 }}, - {"cuTexObjectGetTextureDesc", {CUDA_90, CUDA_0, CUDA_0 }}, - {"cudaCreateSurfaceObject", {CUDA_90, CUDA_0, CUDA_0 }}, - {"cudaDestroySurfaceObject", {CUDA_90, CUDA_0, CUDA_0 }}, - {"cudaGetSurfaceObjectResourceDesc", {CUDA_90, CUDA_0, CUDA_0 }}, - {"cudaDriverGetVersion", {CUDA_90, CUDA_0, CUDA_0 }}, - {"cudaRuntimeGetVersion", {CUDA_90, CUDA_0, CUDA_0 }}, {"cudaGraphAddChildGraphNode", {CUDA_100, CUDA_0, CUDA_0 }}, {"cudaGraphAddDependencies", {CUDA_100, CUDA_0, CUDA_0 }}, {"cudaGraphAddEmptyNode", {CUDA_100, CUDA_0, CUDA_0 }}, @@ -1288,6 +1282,7 @@ const std::map HIP_RUNTIME_FUNCTION_VER_MAP { {"hipGraphEventWaitNodeGetEvent", {HIP_5000, HIP_0, HIP_0 }}, {"hipGraphEventWaitNodeSetEvent", {HIP_5000, HIP_0, HIP_0 }}, {"hipGraphExecEventWaitNodeSetEvent", {HIP_5000, HIP_0, HIP_0 }}, + {"hipGraphClone", {HIP_5000, HIP_0, HIP_0 }}, {"hipDeviceGetDefaultMemPool", {HIP_5020, HIP_0, HIP_0, HIP_LATEST}}, {"hipDeviceSetMemPool", {HIP_5020, HIP_0, HIP_0, HIP_LATEST}}, {"hipDeviceGetMemPool", {HIP_5020, HIP_0, HIP_0, HIP_LATEST}}, diff --git a/tests/unit_tests/synthetic/driver_functions.cu b/tests/unit_tests/synthetic/driver_functions.cu index 7a2b6b5c..7c557999 100644 --- a/tests/unit_tests/synthetic/driver_functions.cu +++ b/tests/unit_tests/synthetic/driver_functions.cu @@ -1031,6 +1031,11 @@ int main() { // HIP: hipError_t hipGraphChildGraphNodeGetGraph(hipGraphNode_t node, hipGraph_t* pGraph); // CHECK: result = hipGraphChildGraphNodeGetGraph(graphNode, &graph); result = cuGraphChildGraphNodeGetGraph(graphNode, &graph); + + // CUDA: CUresult CUDAAPI cuGraphClone(CUgraph *phGraphClone, CUgraph originalGraph); + // HIP: hipError_t hipGraphClone(hipGraph_t* pGraphClone, hipGraph_t originalGraph); + // CHECK: result = hipGraphClone(&graph, graph2); + result = cuGraphClone(&graph, graph2); #endif #if CUDA_VERSION > 10000 @@ -1536,12 +1541,10 @@ int main() { // CHECK: result = hipTexObjectGetResourceViewDesc(&res_view_descr, texObject); result = cuTexObjectGetResourceViewDesc(&res_view_descr, texObject); -#if CUDA_VERSION >= 9000 // CUDA: CUresult CUDAAPI cuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC *pTexDesc, CUtexObject texObject); // HIP: hipError_t hipTexObjectGetTextureDesc(HIP_TEXTURE_DESC* pTexDesc, hipTextureObject_t texObject); // CHECK: result = hipTexObjectGetTextureDesc(&tex_descr, texObject); result = cuTexObjectGetTextureDesc(&tex_descr, texObject); -#endif // CUDA: CUresult CUDAAPI cuCtxEnablePeerAccess(CUcontext peerContext, unsigned int Flags); // HIP: DEPRECATED(DEPRECATED_MSG) hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags); diff --git a/tests/unit_tests/synthetic/runtime_functions.cu b/tests/unit_tests/synthetic/runtime_functions.cu index 29648536..cd6663bc 100644 --- a/tests/unit_tests/synthetic/runtime_functions.cu +++ b/tests/unit_tests/synthetic/runtime_functions.cu @@ -57,6 +57,9 @@ int main() { cudaError_t Error_t; cudaStream_t stream; + // CHECK: hipMemcpy3DParms Memcpy3DParms; + cudaMemcpy3DParms Memcpy3DParms; + #if CUDA_VERSION >= 8000 // CHECK: hipDeviceP2PAttr DeviceP2PAttr; cudaDeviceP2PAttr DeviceP2PAttr; @@ -133,8 +136,8 @@ int main() { // CHECK: result = hipStreamBeginCapture(stream, StreamCaptureMode); result = cudaStreamBeginCapture(stream, StreamCaptureMode); - // CHECK: hipGraph_t Graph_t; - cudaGraph_t Graph_t; + // CHECK: hipGraph_t Graph_t, Graph_t_2; + cudaGraph_t Graph_t, Graph_t_2; // CUDA: extern __host__ cudaError_t CUDARTAPI cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t *pGraph); // HIP: hipError_t hipStreamEndCapture(hipStream_t stream, hipGraph_t* pGraph); @@ -209,6 +212,73 @@ int main() { // HIP: hipError_t hipLaunchHostFunc(hipStream_t stream, hipHostFn_t fn, void* userData); // CHECK: result = hipLaunchHostFunc(stream, hostFn, image); result = cudaLaunchHostFunc(stream, hostFn, image); + + // CHECK: hipGraphNode_t graphNode, graphNode_2; + cudaGraphNode_t graphNode, graphNode_2; + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddChildGraphNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, cudaGraph_t childGraph); + // HIP: hipError_t hipGraphAddChildGraphNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies, hipGraph_t childGraph); + // CHECK: result = hipGraphAddChildGraphNode(&graphNode, Graph_t, &graphNode_2, bytes, Graph_t_2); + result = cudaGraphAddChildGraphNode(&graphNode, Graph_t, &graphNode_2, bytes, Graph_t_2); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, const cudaGraphNode_t *to, size_t numDependencies); + // HIP: hipError_t hipGraphAddDependencies(hipGraph_t graph, const hipGraphNode_t* from, const hipGraphNode_t* to, size_t numDependencies); + // CHECK: result = hipGraphAddDependencies(Graph_t, &graphNode, &graphNode_2, bytes); + result = cudaGraphAddDependencies(Graph_t, &graphNode, &graphNode_2, bytes); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddEmptyNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies); + // HIP: hipError_t hipGraphAddEmptyNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies); + // CHECK: result = hipGraphAddEmptyNode(&graphNode, Graph_t, &graphNode_2, bytes); + result = cudaGraphAddEmptyNode(&graphNode, Graph_t, &graphNode_2, bytes); + + // CHECK: hipHostNodeParams HostNodeParams; + cudaHostNodeParams HostNodeParams; + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddHostNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, const struct cudaHostNodeParams *pNodeParams); + // HIP: hipError_t hipGraphAddHostNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies, const hipHostNodeParams* pNodeParams); + // CHECK: result = hipGraphAddHostNode(&graphNode, Graph_t, &graphNode_2, bytes, &HostNodeParams); + result = cudaGraphAddHostNode(&graphNode, Graph_t, &graphNode_2, bytes, &HostNodeParams); + + // CHECK: hipKernelNodeParams KernelNodeParams; + cudaKernelNodeParams KernelNodeParams; + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddKernelNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, const struct cudaKernelNodeParams *pNodeParams); + // HIP: hipError_t hipGraphAddKernelNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies, const hipKernelNodeParams* pNodeParams); + // CHECK: result = hipGraphAddKernelNode(&graphNode, Graph_t, &graphNode_2, bytes, &KernelNodeParams); + result = cudaGraphAddKernelNode(&graphNode, Graph_t, &graphNode_2, bytes, &KernelNodeParams); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, const struct cudaMemcpy3DParms *pCopyParams); + // HIP: hipError_t hipGraphAddMemcpyNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies, const hipMemcpy3DParms* pCopyParams); + // CHECK: result = hipGraphAddMemcpyNode(&graphNode, Graph_t, &graphNode_2, bytes, &Memcpy3DParms); + result = cudaGraphAddMemcpyNode(&graphNode, Graph_t, &graphNode_2, bytes, &Memcpy3DParms); + + // CHECK: hipMemsetParams MemsetParams; + cudaMemsetParams MemsetParams; + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemsetNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, const struct cudaMemsetParams *pMemsetParams); + // HIP: hipError_t hipGraphAddMemsetNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies, const hipMemsetParams* pMemsetParams); + // CHECK: result = hipGraphAddMemsetNode(&graphNode, Graph_t, &graphNode_2, bytes, &MemsetParams); + result = cudaGraphAddMemsetNode(&graphNode, Graph_t, &graphNode_2, bytes, &MemsetParams); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t *pGraph); + // HIP: hipError_t hipGraphChildGraphNodeGetGraph(hipGraphNode_t node, hipGraph_t* pGraph); + // CHECK: result = hipGraphChildGraphNodeGetGraph(graphNode, &Graph_t); + result = cudaGraphChildGraphNodeGetGraph(graphNode, &Graph_t); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphClone(cudaGraph_t *pGraphClone, cudaGraph_t originalGraph); + // HIP: hipError_t hipGraphClone(hipGraph_t* pGraphClone, hipGraph_t originalGraph); + // CHECK: result = hipGraphClone(&Graph_t, Graph_t_2); + result = cudaGraphClone(&Graph_t, Graph_t_2); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphCreate(cudaGraph_t *pGraph, unsigned int flags); + // HIP: hipError_t hipGraphCreate(hipGraph_t* pGraph, unsigned int flags); + // CHECK: result = hipGraphCreate(&Graph_t, flags); + result = cudaGraphCreate(&Graph_t, flags); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphDestroy(cudaGraph_t graph); + // HIP: hipError_t hipGraphDestroy(hipGraph_t graph); + // CHECK: result = hipGraphDestroy(Graph_t); + result = cudaGraphDestroy(Graph_t); #endif #if CUDA_VERSION >= 10010 @@ -236,8 +306,6 @@ int main() { cudaKernelNodeAttrID kernelNodeAttrID; // CHECK: hipKernelNodeAttrValue kernelNodeAttrValue; cudaKernelNodeAttrValue kernelNodeAttrValue; - // CHECK: hipGraphNode_t graphNode; - cudaGraphNode_t graphNode; // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetAttribute(cudaGraphNode_t hNode, enum cudaKernelNodeAttrID attr, const union cudaKernelNodeAttrValue* value); // HIP: hipError_t hipGraphKernelNodeSetAttribute(hipGraphNode_t hNode, hipKernelNodeAttrID attr, const hipKernelNodeAttrValue* value); @@ -833,9 +901,6 @@ int main() { // CHECK: result = hipMemcpy2DToArrayAsync(Array_t, wOffset, hOffset, deviceptr_2, pitch, width, height, MemcpyKind, stream); result = cudaMemcpy2DToArrayAsync(Array_t, wOffset, hOffset, deviceptr_2, pitch, width, height, MemcpyKind, stream); - // CHECK: hipMemcpy3DParms Memcpy3DParms; - cudaMemcpy3DParms Memcpy3DParms; - // CUDA: extern __host__ cudaError_t CUDARTAPI cudaMemcpy3D(const struct cudaMemcpy3DParms *p); // HIP: hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p); // CHECK: result = hipMemcpy3D(&Memcpy3DParms); @@ -1058,5 +1123,65 @@ int main() { // CHECK: result = hipUnbindTexture(texref); result = cudaUnbindTexture(texref); + // CHECK: hipTextureObject_t TextureObject_t; + cudaTextureObject_t TextureObject_t; + + // CHECK: hipResourceDesc ResourceDesc; + cudaResourceDesc ResourceDesc; + + // CHECK: hipTextureDesc TextureDesc; + cudaTextureDesc TextureDesc; + + // CHECK: hipResourceViewDesc ResourceViewDesc; + cudaResourceViewDesc ResourceViewDesc; + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaCreateTextureObject(cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, const struct cudaTextureDesc *pTexDesc, const struct cudaResourceViewDesc *pResViewDesc); + // HIP: hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResourceDesc* pResDesc, const hipTextureDesc* pTexDesc, const struct hipResourceViewDesc* pResViewDesc); + // CHECK: result = hipCreateTextureObject(&TextureObject_t, &ResourceDesc, &TextureDesc, &ResourceViewDesc); + result = cudaCreateTextureObject(&TextureObject_t, &ResourceDesc, &TextureDesc, &ResourceViewDesc); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaDestroyTextureObject(cudaTextureObject_t texObject); + // HIP: hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject); + // CHECK: result = hipDestroyTextureObject(TextureObject_t); + result = cudaDestroyTextureObject(TextureObject_t); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceDesc(struct cudaResourceDesc *pResDesc, cudaTextureObject_t texObject); + // HIP: hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, hipTextureObject_t textureObject); + // CHECK: result = hipGetTextureObjectResourceDesc(&ResourceDesc, TextureObject_t); + result = cudaGetTextureObjectResourceDesc(&ResourceDesc, TextureObject_t); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceViewDesc(struct cudaResourceViewDesc *pResViewDesc, cudaTextureObject_t texObject); + // HIP: hipError_t hipGetTextureObjectResourceViewDesc(struct hipResourceViewDesc* pResViewDesc, hipTextureObject_t textureObject); + // CHECK: result = hipGetTextureObjectResourceViewDesc(&ResourceViewDesc, TextureObject_t); + result = cudaGetTextureObjectResourceViewDesc(&ResourceViewDesc, TextureObject_t); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectTextureDesc(struct cudaTextureDesc *pTexDesc, cudaTextureObject_t texObject); + // HIP: hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, hipTextureObject_t textureObject); + // CHECK: result = hipGetTextureObjectTextureDesc(&TextureDesc, TextureObject_t); + result = cudaGetTextureObjectTextureDesc(&TextureDesc, TextureObject_t); + + // CHECK: hipSurfaceObject_t SurfaceObject_t; + cudaSurfaceObject_t SurfaceObject_t; + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaCreateSurfaceObject(cudaSurfaceObject_t *pSurfObject, const struct cudaResourceDesc *pResDesc); + // HIP: hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, const hipResourceDesc* pResDesc); + // CHECK: result = hipCreateSurfaceObject(&SurfaceObject_t, &ResourceDesc); + result = cudaCreateSurfaceObject(&SurfaceObject_t, &ResourceDesc); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject); + // HIP: hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject); + // CHECK: result = hipDestroySurfaceObject(SurfaceObject_t); + result = cudaDestroySurfaceObject(SurfaceObject_t); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaDriverGetVersion(int *driverVersion); + // HIP: hipError_t hipDriverGetVersion(int* driverVersion); + // CHECK: result = hipDriverGetVersion(&intVal); + result = cudaDriverGetVersion(&intVal); + + // CUDA: extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaRuntimeGetVersion(int *runtimeVersion); + // HIP: hipError_t hipRuntimeGetVersion(int* runtimeVersion); + // CHECK: result = hipRuntimeGetVersion(&intVal); + result = cudaRuntimeGetVersion(&intVal); + return 0; } From b0db68666c936f57c7f01e783a7d96ff5804a95c Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 22 Jul 2022 19:10:36 +0200 Subject: [PATCH 02/14] [HIPIFY][#407][tests] Synthetic test for CUDA Runtime API functions - Part 10 --- .../unit_tests/synthetic/runtime_functions.cu | 139 ++++++++++++++++++ 1 file changed, 139 insertions(+) diff --git a/tests/unit_tests/synthetic/runtime_functions.cu b/tests/unit_tests/synthetic/runtime_functions.cu index cd6663bc..0981551e 100644 --- a/tests/unit_tests/synthetic/runtime_functions.cu +++ b/tests/unit_tests/synthetic/runtime_functions.cu @@ -42,6 +42,7 @@ int main() { GLuint gl_uint = 0; GLenum gl_enum = 0; struct textureReference* texref = nullptr; + std::string name = "str"; #if defined(_WIN32) unsigned long long ull = 0; @@ -279,6 +280,54 @@ int main() { // HIP: hipError_t hipGraphDestroy(hipGraph_t graph); // CHECK: result = hipGraphDestroy(Graph_t); result = cudaGraphDestroy(Graph_t); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphDestroyNode(cudaGraphNode_t node); + // HIP: hipError_t hipGraphDestroyNode(hipGraphNode_t node); + // CHECK: result = hipGraphDestroyNode(graphNode); + result = cudaGraphDestroyNode(graphNode); + + // CHECK: hipGraphExec_t GraphExec_t; + cudaGraphExec_t GraphExec_t; + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecDestroy(cudaGraphExec_t graphExec); + // HIP: hipError_t hipGraphExecDestroy(hipGraphExec_t graphExec); + // CHECK: result = hipGraphExecDestroy(GraphExec_t); + result = cudaGraphExecDestroy(GraphExec_t); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphGetEdges(cudaGraph_t graph, cudaGraphNode_t *from, cudaGraphNode_t *to, size_t *numEdges); + // HIP: hipError_t hipGraphGetEdges(hipGraph_t graph, hipGraphNode_t* from, hipGraphNode_t* to, size_t* numEdges); + // CHECK: result = hipGraphGetEdges(Graph_t, &graphNode, &graphNode_2, &bytes); + result = cudaGraphGetEdges(Graph_t, &graphNode, &graphNode_2, &bytes); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphGetNodes(cudaGraph_t graph, cudaGraphNode_t *nodes, size_t *numNodes); + // HIP: hipError_t hipGraphGetNodes(hipGraph_t graph, hipGraphNode_t* nodes, size_t* numNodes); + // CHECK: result = hipGraphGetNodes(Graph_t, &graphNode, &bytes); + result = cudaGraphGetNodes(Graph_t, &graphNode, &bytes); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphGetRootNodes(cudaGraph_t graph, cudaGraphNode_t *pRootNodes, size_t *pNumRootNodes); + // HIP: hipError_t hipGraphGetRootNodes(hipGraph_t graph, hipGraphNode_t* pRootNodes, size_t* pNumRootNodes); + // CHECK: result = hipGraphGetRootNodes(Graph_t, &graphNode, &bytes); + result = cudaGraphGetRootNodes(Graph_t, &graphNode, &bytes); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeGetParams(cudaGraphNode_t node, struct cudaHostNodeParams *pNodeParams); + // HIP: hipError_t hipGraphHostNodeGetParams(hipGraphNode_t node, hipHostNodeParams* pNodeParams); + // CHECK: result = hipGraphHostNodeGetParams(graphNode, &HostNodeParams); + result = cudaGraphHostNodeGetParams(graphNode, &HostNodeParams); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeSetParams(cudaGraphNode_t node, const struct cudaHostNodeParams *pNodeParams); + // HIP: hipError_t hipGraphHostNodeSetParams(hipGraphNode_t node, const hipHostNodeParams* pNodeParams); + // CHECK: result = hipGraphHostNodeSetParams(graphNode, &HostNodeParams); + result = cudaGraphHostNodeSetParams(graphNode, &HostNodeParams); + + char* name_ = const_cast(name.c_str()); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiate(cudaGraphExec_t *pGraphExec, cudaGraph_t graph, cudaGraphNode_t *pErrorNode, char *pLogBuffer, size_t bufferSize); + // HIP: hipError_t hipGraphInstantiate(hipGraphExec_t* pGraphExec, hipGraph_t graph, hipGraphNode_t* pErrorNode, char* pLogBuffer, size_t bufferSize); + // CHECK: result = hipGraphInstantiate(&GraphExec_t, Graph_t, &graphNode, name_, bytes); + result = cudaGraphInstantiate(&GraphExec_t, Graph_t, &graphNode, name_, bytes); + + // CHECK: hipGraphNodeType GraphNodeType; + cudaGraphNodeType GraphNodeType; #endif #if CUDA_VERSION >= 10010 @@ -301,6 +350,11 @@ int main() { result = cudaThreadExchangeStreamCaptureMode(&streamCaptureMode); #endif +#if CUDA_VERSION >= 10020 + // CHECK: hipGraphExecUpdateResult GraphExecUpdateResult; + cudaGraphExecUpdateResult GraphExecUpdateResult; +#endif + #if CUDA_VERSION >= 11000 // CHECK: hipKernelNodeAttrID kernelNodeAttrID; cudaKernelNodeAttrID kernelNodeAttrID; @@ -316,6 +370,91 @@ int main() { // HIP: hipError_t hipGraphKernelNodeGetAttribute(hipGraphNode_t hNode, hipKernelNodeAttrID attr, hipKernelNodeAttrValue* value); // CHECK: result = hipGraphKernelNodeGetAttribute(graphNode, kernelNodeAttrID, &kernelNodeAttrValue); result = cudaGraphKernelNodeGetAttribute(graphNode, kernelNodeAttrID, &kernelNodeAttrValue); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecKernelNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const struct cudaKernelNodeParams *pNodeParams); + // HIP: hipError_t hipGraphExecKernelNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, const hipKernelNodeParams* pNodeParams); + // CHECK: result = hipGraphExecKernelNodeSetParams(GraphExec_t, graphNode, &KernelNodeParams); + result = cudaGraphExecKernelNodeSetParams(GraphExec_t, graphNode, &KernelNodeParams); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const struct cudaMemcpy3DParms *pNodeParams); + // HIP: hipError_t hipGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, hipMemcpy3DParms* pNodeParams); + // CHECK: result = hipGraphExecMemcpyNodeSetParams(GraphExec_t, graphNode, &Memcpy3DParms); + result = cudaGraphExecMemcpyNodeSetParams(GraphExec_t, graphNode, &Memcpy3DParms); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemsetNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const struct cudaMemsetParams *pNodeParams); + // HIP: hipError_t hipGraphExecMemsetNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, const hipMemsetParams* pNodeParams); + // CHECK: result = hipGraphExecMemsetNodeSetParams(GraphExec_t, graphNode, &MemsetParams); + result = cudaGraphExecMemsetNodeSetParams(GraphExec_t, graphNode, &MemsetParams); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecHostNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const struct cudaHostNodeParams *pNodeParams); + // HIP: hipError_t hipError_t hipGraphExecHostNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, const hipHostNodeParams* pNodeParams); + // CHECK: result = hipGraphExecHostNodeSetParams(GraphExec_t, graphNode, &HostNodeParams); + result = cudaGraphExecHostNodeSetParams(GraphExec_t, graphNode, &HostNodeParams); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecUpdate(cudaGraphExec_t hGraphExec, cudaGraph_t hGraph, cudaGraphNode_t *hErrorNode_out, enum cudaGraphExecUpdateResult *updateResult_out); + // HIP: hipError_t hipGraphExecUpdate(hipGraphExec_t hGraphExec, hipGraph_t hGraph, hipGraphNode_t* hErrorNode_out, hipGraphExecUpdateResult* updateResult_out); + // CHECK: result = hipGraphExecUpdate(GraphExec_t, Graph_t, &graphNode, &GraphExecUpdateResult); + result = cudaGraphExecUpdate(GraphExec_t, Graph_t, &graphNode, &GraphExecUpdateResult); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetParams(cudaGraphNode_t node, struct cudaKernelNodeParams *pNodeParams); + // HIP: hipError_t hipGraphKernelNodeGetParams(hipGraphNode_t node, hipKernelNodeParams* pNodeParams); + // CHECK: result = hipGraphKernelNodeGetParams(graphNode, &KernelNodeParams); + result = cudaGraphKernelNodeGetParams(graphNode, &KernelNodeParams); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetParams(cudaGraphNode_t node, const struct cudaKernelNodeParams *pNodeParams); + // HIP: hipError_t hipGraphKernelNodeSetParams(hipGraphNode_t node, const hipKernelNodeParams* pNodeParams); + // CHECK: result = hipGraphKernelNodeSetParams(graphNode, &KernelNodeParams); + result = cudaGraphKernelNodeSetParams(graphNode, &KernelNodeParams); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphLaunch(cudaGraphExec_t graphExec, cudaStream_t stream); + // HIP: hipError_t hipGraphLaunch(hipGraphExec_t graphExec, hipStream_t stream); + // CHECK: result = hipGraphLaunch(GraphExec_t, stream); + result = cudaGraphLaunch(GraphExec_t, stream); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeGetParams(cudaGraphNode_t node, struct cudaMemcpy3DParms *pNodeParams); + // HIP: hipError_t hipGraphMemcpyNodeGetParams(hipGraphNode_t node, hipMemcpy3DParms* pNodeParams); + // CHECK: result = hipGraphMemcpyNodeGetParams(graphNode, &Memcpy3DParms); + result = cudaGraphMemcpyNodeGetParams(graphNode, &Memcpy3DParms); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParams(cudaGraphNode_t node, const struct cudaMemcpy3DParms *pNodeParams); + // HIP: hipError_t hipGraphMemcpyNodeSetParams(hipGraphNode_t node, const hipMemcpy3DParms* pNodeParams); + // CHECK: result = hipGraphMemcpyNodeSetParams(graphNode, &Memcpy3DParms); + result = cudaGraphMemcpyNodeSetParams(graphNode, &Memcpy3DParms); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeGetParams(cudaGraphNode_t node, struct cudaMemsetParams *pNodeParams); + // HIP: hipError_t hipGraphMemsetNodeGetParams(hipGraphNode_t node, hipMemsetParams* pNodeParams); + // CHECK: result = hipGraphMemsetNodeGetParams(graphNode, &MemsetParams); + result = cudaGraphMemsetNodeGetParams(graphNode, &MemsetParams); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeSetParams(cudaGraphNode_t node, const struct cudaMemsetParams *pNodeParams); + // HIP: hipError_t hipGraphMemsetNodeSetParams(hipGraphNode_t node, const hipMemsetParams* pNodeParams); + // CHECK: result = hipGraphMemsetNodeSetParams(graphNode, &MemsetParams); + result = cudaGraphMemsetNodeSetParams(graphNode, &MemsetParams); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphNodeFindInClone(cudaGraphNode_t *pNode, cudaGraphNode_t originalNode, cudaGraph_t clonedGraph); + // HIP: hipError_t hipError_t hipGraphNodeFindInClone(hipGraphNode_t* pNode, hipGraphNode_t originalNode, hipGraph_t clonedGraph); + // CHECK: result = hipGraphNodeFindInClone(&graphNode, graphNode_2, Graph_t); + result = cudaGraphNodeFindInClone(&graphNode, graphNode_2, Graph_t); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependencies(cudaGraphNode_t node, cudaGraphNode_t *pDependencies, size_t *pNumDependencies); + // HIP: hipError_t hipGraphNodeGetDependencies(hipGraphNode_t node, hipGraphNode_t* pDependencies, size_t* pNumDependencies); + // CHECK: result = hipGraphNodeGetDependencies(graphNode, &graphNode_2, &bytes); + result = cudaGraphNodeGetDependencies(graphNode, &graphNode_2, &bytes); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependentNodes(cudaGraphNode_t node, cudaGraphNode_t *pDependentNodes, size_t *pNumDependentNodes); + // HIP: hipError_t hipGraphNodeGetDependentNodes(hipGraphNode_t node, hipGraphNode_t* pDependentNodes, size_t* pNumDependentNodes); + // CHECK: result = hipGraphNodeGetDependentNodes(graphNode, &graphNode_2, &bytes); + result = cudaGraphNodeGetDependentNodes(graphNode, &graphNode_2, &bytes); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetType(cudaGraphNode_t node, enum cudaGraphNodeType *pType); + // HIP: hipError_t hipGraphNodeGetType(hipGraphNode_t node, hipGraphNodeType* pType); + // CHECK: result = hipGraphNodeGetType(graphNode, &GraphNodeType); + result = cudaGraphNodeGetType(graphNode, &GraphNodeType); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphRemoveDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, const cudaGraphNode_t *to, size_t numDependencies); + // HIP: hipError_t hipGraphRemoveDependencies(hipGraph_t graph, const hipGraphNode_t* from, const hipGraphNode_t* to, size_t numDependencies); + // CHECK: result = hipGraphRemoveDependencies(Graph_t, &graphNode, &graphNode, bytes); + result = cudaGraphRemoveDependencies(Graph_t, &graphNode, &graphNode, bytes); #endif #if CUDA_VERSION >= 11020 From 5464389d941a83e9a74c1ec70e7fbdf385d0d53e Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sat, 23 Jul 2022 19:42:26 +0200 Subject: [PATCH 03/14] [HIPIFY][#407][tests][fix] Synthetic test for CUDA Runtime API functions - Part 11 - final + Use `hip_runtime_api.h` instead of `hip_profile.h` + Updated tests and regenerated hipify-perl + Finished with synthetic tests for CUDA Runtime and Driver API --- bin/hipify-perl | 2 +- src/CUDA2HIP.cpp | 2 +- src/CUDA2HIP_Runtime_API_functions.cpp | 2 +- .../2_Cookbook/2_Profiler/Profiler.cpp | 2 +- .../unit_tests/synthetic/runtime_functions.cu | 147 +++++++++++++++++- 5 files changed, 143 insertions(+), 12 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index d840b527..52b94919 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -3242,7 +3242,7 @@ sub simpleSubstitutions { subst("channel_descriptor.h", "hip\/channel_descriptor.h", "include"); subst("cooperative_groups.h", "hip\/hip_cooperative_groups.h", "include"); subst("cuda_fp16.h", "hip\/hip_fp16.h", "include"); - subst("cuda_profiler_api.h", "hip\/hip_profile.h", "include"); + subst("cuda_profiler_api.h", "hip\/hip_runtime_api.h", "include"); subst("cuda_runtime_api.h", "hip\/hip_runtime_api.h", "include"); subst("cuda_texture_types.h", "hip\/hip_texture_types.h", "include"); subst("cufftXt.h", "hipfftXt.h", "include"); diff --git a/src/CUDA2HIP.cpp b/src/CUDA2HIP.cpp index b3f33a1e..3de83a28 100644 --- a/src/CUDA2HIP.cpp +++ b/src/CUDA2HIP.cpp @@ -35,7 +35,7 @@ const std::map CUDA_INCLUDE_MAP { {"cuda_texture_types.h", {"hip/hip_texture_types.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, {"texture_fetch_functions.h", {"", "", CONV_INCLUDE, API_RUNTIME, 0}}, {"vector_types.h", {"hip/hip_vector_types.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, - {"cuda_profiler_api.h", {"hip/hip_profile.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, + {"cuda_profiler_api.h", {"hip/hip_runtime_api.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, {"cooperative_groups.h", {"hip/hip_cooperative_groups.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, // cuComplex includes {"cuComplex.h", {"hip/hip_complex.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_COMPLEX, 0}}, diff --git a/src/CUDA2HIP_Runtime_API_functions.cpp b/src/CUDA2HIP_Runtime_API_functions.cpp index 626add5a..85f60896 100644 --- a/src/CUDA2HIP_Runtime_API_functions.cpp +++ b/src/CUDA2HIP_Runtime_API_functions.cpp @@ -833,7 +833,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuGraphInstantiateWithFlags {"cudaGraphInstantiateWithFlags", {"hipGraphInstantiateWithFlags", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuGraphNodeSetEnabled - {"cudaGraphNodeSetEnabled", {"hipGraphNodeSetEnabled", "", CONV_GRAPH, API_RUNTIME, 30, HIP_UNSUPPORTED}}, + {"cudaGraphNodeSetEnabled", {"hipGraphNodeSetEnabled", "", CONV_GRAPH, API_RUNTIME, 30, HIP_UNSUPPORTED}}, // 31. Driver Entry Point Access // cuGetProcAddress diff --git a/tests/unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp b/tests/unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp index 7b3e4649..0fc567d4 100644 --- a/tests/unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp +++ b/tests/unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp @@ -30,7 +30,7 @@ THE SOFTWARE. // CHECK: #include #include -// CHECK: #include +// CHECK: #include #include #define WIDTH 1024 diff --git a/tests/unit_tests/synthetic/runtime_functions.cu b/tests/unit_tests/synthetic/runtime_functions.cu index 0981551e..4e1a3578 100644 --- a/tests/unit_tests/synthetic/runtime_functions.cu +++ b/tests/unit_tests/synthetic/runtime_functions.cu @@ -8,7 +8,10 @@ #include "windows.h" #include #endif + #include "cuda_gl_interop.h" +// CHECK: #include "hip/hip_runtime_api.h" +#include "cuda_profiler_api.h" int main() { printf("12. CUDA Runtime API Functions synthetic test\n"); @@ -35,6 +38,8 @@ int main() { void* deviceptr_2 = nullptr; void* image = nullptr; void* func = nullptr; + void* src = nullptr; + void* dst = nullptr; char* ch = nullptr; const char* const_ch = nullptr; dim3 gridDim; @@ -58,9 +63,17 @@ int main() { cudaError_t Error_t; cudaStream_t stream; + // CHECK: hipEvent_t Event_t; + // CHECK-Next: hipEvent_t Event_2; + cudaEvent_t Event_t; + cudaEvent_t Event_2; + // CHECK: hipMemcpy3DParms Memcpy3DParms; cudaMemcpy3DParms Memcpy3DParms; + // CHECK: hipMemcpyKind MemcpyKind; + cudaMemcpyKind MemcpyKind; + #if CUDA_VERSION >= 8000 // CHECK: hipDeviceP2PAttr DeviceP2PAttr; cudaDeviceP2PAttr DeviceP2PAttr; @@ -457,6 +470,98 @@ int main() { result = cudaGraphRemoveDependencies(Graph_t, &graphNode, &graphNode, bytes); #endif +#if CUDA_VERSION >= 11010 + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNodeToSymbol(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const void* symbol, const void* src, size_t count, size_t offset, enum cudaMemcpyKind kind); + // HIP: hipError_t hipGraphAddMemcpyNodeToSymbol(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies, const void* symbol, const void* src, size_t count, size_t offset, hipMemcpyKind kind); + // CHECK: result = hipGraphAddMemcpyNodeToSymbol(&graphNode, Graph_t, &graphNode_2, width, HIP_SYMBOL(image), src, bytes, wOffset, MemcpyKind); + result = cudaGraphAddMemcpyNodeToSymbol(&graphNode, Graph_t, &graphNode_2, width, image, src, bytes, wOffset, MemcpyKind); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNodeFromSymbol(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, void* dst, const void* symbol, size_t count, size_t offset, enum cudaMemcpyKind kind); + // HIP: hipError_t hipGraphAddMemcpyNodeFromSymbol(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies, void* dst, const void* symbol, size_t count, size_t offset, hipMemcpyKind kind); + // CHECK: result = hipGraphAddMemcpyNodeFromSymbol(&graphNode, Graph_t, &graphNode_2, width, dst, HIP_SYMBOL(image), bytes, wOffset, MemcpyKind); + result = cudaGraphAddMemcpyNodeFromSymbol(&graphNode, Graph_t, &graphNode_2, width, dst, image, bytes, wOffset, MemcpyKind); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode1D(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, void* dst, const void* src, size_t count, enum cudaMemcpyKind kind); + // HIP: hipError_t hipGraphAddMemcpyNode1D(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies, void* dst, const void* src, size_t count, hipMemcpyKind kind); + // CHECK: result = hipGraphAddMemcpyNode1D(&graphNode, Graph_t, &graphNode_2, width, dst, src, bytes, MemcpyKind); + result = cudaGraphAddMemcpyNode1D(&graphNode, Graph_t, &graphNode_2, width, dst, src, bytes, MemcpyKind); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParamsToSymbol(cudaGraphNode_t node, const void* symbol, const void* src, size_t count, size_t offset, enum cudaMemcpyKind kind); + // HIP: hipError_t hipGraphMemcpyNodeSetParamsToSymbol(hipGraphNode_t node, const void* symbol, const void* src, size_t count, size_t offset, hipMemcpyKind kind); + // CHECK: result = hipGraphMemcpyNodeSetParamsToSymbol(graphNode, HIP_SYMBOL(image), src, bytes, wOffset, MemcpyKind); + result = cudaGraphMemcpyNodeSetParamsToSymbol(graphNode, image, src, bytes, wOffset, MemcpyKind); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParamsFromSymbol(cudaGraphNode_t node, void* dst, const void* symbol, size_t count, size_t offset, enum cudaMemcpyKind kind); + // HIP: hipError_t hipGraphMemcpyNodeSetParamsFromSymbol(hipGraphNode_t node, void* dst, const void* symbol, size_t count, size_t offset, hipMemcpyKind kind); + // CHECK: result = hipGraphMemcpyNodeSetParamsFromSymbol(graphNode, dst, HIP_SYMBOL(image), bytes, wOffset, MemcpyKind); + result = cudaGraphMemcpyNodeSetParamsFromSymbol(graphNode, dst, image, bytes, wOffset, MemcpyKind); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParams1D(cudaGraphNode_t node, void* dst, const void* src, size_t count, enum cudaMemcpyKind kind); + // HIP: hipError_t hipGraphMemcpyNodeSetParams1D(hipGraphNode_t node, void* dst, const void* src, size_t count, hipMemcpyKind kind); + // CHECK: result = hipGraphMemcpyNodeSetParams1D(graphNode, dst, src, bytes, MemcpyKind); + result = cudaGraphMemcpyNodeSetParams1D(graphNode, dst, src, bytes, MemcpyKind); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddEventRecordNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, cudaEvent_t event); + // HIP: hipError_t hipGraphAddEventRecordNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies, hipEvent_t event); + // CHECK: result = hipGraphAddEventRecordNode(&graphNode, Graph_t, &graphNode_2, bytes, Event_t); + result = cudaGraphAddEventRecordNode(&graphNode, Graph_t, &graphNode_2, bytes, Event_t); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphEventRecordNodeGetEvent(cudaGraphNode_t node, cudaEvent_t *event_out); + // HIP: hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, hipEvent_t* event_out); + // CHECK: result = hipGraphEventRecordNodeGetEvent(graphNode, &Event_t); + result = cudaGraphEventRecordNodeGetEvent(graphNode, &Event_t); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphEventRecordNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event); + // HIP: hipError_t hipGraphEventRecordNodeSetEvent(hipGraphNode_t node, hipEvent_t event); + // CHECK: result = hipGraphEventRecordNodeSetEvent(graphNode, Event_t); + result = cudaGraphEventRecordNodeSetEvent(graphNode, Event_t); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphAddEventWaitNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, cudaEvent_t event); + // HIP: hipError_t hipGraphAddEventWaitNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies, hipEvent_t event); + // CHECK: result = hipGraphAddEventWaitNode(&graphNode, Graph_t, &graphNode_2, bytes, Event_t); + result = cudaGraphAddEventWaitNode(&graphNode, Graph_t, &graphNode_2, bytes, Event_t); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphEventWaitNodeGetEvent(cudaGraphNode_t node, cudaEvent_t *event_out); + // HIP: hipError_t hipGraphEventWaitNodeGetEvent(hipGraphNode_t node, hipEvent_t* event_out); + // CHECK: result = hipGraphEventWaitNodeGetEvent(graphNode, &Event_t); + result = cudaGraphEventWaitNodeGetEvent(graphNode, &Event_t); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphEventWaitNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event); + // HIP: hipError_t hipGraphEventWaitNodeSetEvent(hipGraphNode_t node, hipEvent_t event); + // CHECK: result = hipGraphEventWaitNodeSetEvent(graphNode, Event_t); + result = cudaGraphEventWaitNodeSetEvent(graphNode, Event_t); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParamsToSymbol(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const void* symbol, const void* src, size_t count, size_t offset, enum cudaMemcpyKind kind); + // HIP: hipError_t hipGraphExecMemcpyNodeSetParamsToSymbol(hipGraphExec_t hGraphExec, hipGraphNode_t node, const void* symbol, const void* src, size_t count, size_t offset, hipMemcpyKind kind); + // CHECK: result = hipGraphExecMemcpyNodeSetParamsToSymbol(GraphExec_t, graphNode, HIP_SYMBOL(image), src, bytes, wOffset, MemcpyKind); + result = cudaGraphExecMemcpyNodeSetParamsToSymbol(GraphExec_t, graphNode, image, src, bytes, wOffset, MemcpyKind); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParamsFromSymbol(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, void* dst, const void* symbol, size_t count, size_t offset, enum cudaMemcpyKind kind); + // HIP: hipError_t hipGraphExecMemcpyNodeSetParamsFromSymbol(hipGraphExec_t hGraphExec, hipGraphNode_t node, void* dst, const void* symbol, size_t count, size_t offset, hipMemcpyKind kind); + // CHECK: result = hipGraphExecMemcpyNodeSetParamsFromSymbol(GraphExec_t, graphNode, dst, HIP_SYMBOL(image), bytes, wOffset, MemcpyKind); + result = cudaGraphExecMemcpyNodeSetParamsFromSymbol(GraphExec_t, graphNode, dst, image, bytes, wOffset, MemcpyKind); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParams1D(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, void* dst, const void* src, size_t count, enum cudaMemcpyKind kind); + // HIP: hipError_t hipGraphExecMemcpyNodeSetParams1D(hipGraphExec_t hGraphExec, hipGraphNode_t node, void* dst, const void* src, size_t count, hipMemcpyKind kind); + // CHECK: result = hipGraphExecMemcpyNodeSetParams1D(GraphExec_t, graphNode, dst, src, bytes, MemcpyKind); + result = cudaGraphExecMemcpyNodeSetParams1D(GraphExec_t, graphNode, dst, src, bytes, MemcpyKind); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecChildGraphNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, cudaGraph_t childGraph); + // HIP: hipError_t hipGraphExecChildGraphNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, hipGraph_t childGraph); + // CHECK: result = hipGraphExecChildGraphNodeSetParams(GraphExec_t, graphNode, Graph_t); + result = cudaGraphExecChildGraphNodeSetParams(GraphExec_t, graphNode, Graph_t); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecEventRecordNodeSetEvent(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event); + // HIP: hipError_t hipGraphExecEventRecordNodeSetEvent(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, hipEvent_t event); + // CHECK: result = hipGraphExecEventRecordNodeSetEvent(GraphExec_t, graphNode, Event_t); + result = cudaGraphExecEventRecordNodeSetEvent(GraphExec_t, graphNode, Event_t); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecEventWaitNodeSetEvent(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event); + // HIP: hipError_t hipGraphExecEventWaitNodeSetEvent(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, hipEvent_t event); + // CHECK: result = hipGraphExecEventWaitNodeSetEvent(GraphExec_t, graphNode, Event_t); + result = cudaGraphExecEventWaitNodeSetEvent(GraphExec_t, graphNode, Event_t); +#endif + #if CUDA_VERSION >= 11020 // CHECK: hipMemPoolAttr memPoolAttr; cudaMemPoolAttr memPoolAttr; @@ -562,6 +667,13 @@ int main() { result = cudaMemPoolImportPointer(&deviceptr, memPool_t, &memPoolPtrExportData); #endif +#if CUDA_VERSION >= 11040 + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiateWithFlags(cudaGraphExec_t *pGraphExec, cudaGraph_t graph, unsigned long long flags); + // HIP: hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t* pGraphExec, hipGraph_t graph, unsigned long long flags); + // CHECK: result = hipGraphInstantiateWithFlags(&GraphExec_t, Graph_t, ull); + result = cudaGraphInstantiateWithFlags(&GraphExec_t, Graph_t, ull); +#endif + // CHECK: hipDeviceProp_t DeviceProp; cudaDeviceProp DeviceProp; @@ -665,11 +777,6 @@ int main() { // CHECK: hipIpcEventHandle_t IpcEventHandle_t; cudaIpcEventHandle_t IpcEventHandle_t; - // CHECK: hipEvent_t Event_t; - // CHECK-Next: hipEvent_t Event_2; - cudaEvent_t Event_t; - cudaEvent_t Event_2; - // CUDA: extern __host__ cudaError_t CUDARTAPI cudaIpcGetEventHandle(cudaIpcEventHandle_t *handle, cudaEvent_t event); // HIP: hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event); // CHECK: result = hipIpcGetEventHandle(&IpcEventHandle_t, Event_t); @@ -1002,9 +1109,6 @@ int main() { // CHECK: result = hipMallocPitch(&deviceptr, &bytes, width, height); result = cudaMallocPitch(&deviceptr, &bytes, width, height); - // CHECK: hipMemcpyKind MemcpyKind; - cudaMemcpyKind MemcpyKind; - // CUDA: extern __host__ cudaError_t CUDARTAPI cudaMemcpy(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind); // HIP: hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind); // CHECK: result = hipMemcpy(deviceptr, deviceptr_2, bytes, MemcpyKind); @@ -1322,5 +1426,32 @@ int main() { // CHECK: result = hipRuntimeGetVersion(&intVal); result = cudaRuntimeGetVersion(&intVal); + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaProfilerStart(void); + // HIP: hipError_t hipProfilerStart(); + // CHECK: result = hipProfilerStart(); + result = cudaProfilerStart(); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaProfilerStop(void); + // HIP: hipError_t hipProfilerStop(); + // CHECK: result = hipProfilerStop(); + result = cudaProfilerStop(); + +#if CUDA_VERSION <= 10000 + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem __dv(0), cudaStream_t stream __dv(0)); + // HIP: hipError_t hipConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem __dparm(0), hipStream_t stream __dparm(0)); + // CHECK: result = hipConfigureCall(gridDim, blockDim, bytes, stream); + result = cudaConfigureCall(gridDim, blockDim, bytes, stream); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaLaunch(const void *func); + // HIP: hipError_t hipLaunchByPtr(const void* func); + // CHECK: result = hipLaunchByPtr(deviceptr); + result = cudaLaunch(deviceptr); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaSetupArgument(const void *arg, size_t size, size_t offset); + // HIP: hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset); + // CHECK: result = hipSetupArgument(deviceptr, bytes, wOffset); + result = cudaSetupArgument(deviceptr, bytes, wOffset); +#endif + return 0; } From c288b5dc5db4b56066741966a66713755022b76d Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 28 Jul 2022 17:22:16 +0200 Subject: [PATCH 04/14] [HIPIFY][#591][CUB] Revise main CUB defines + Update regenerated hipify-perl and CUB_API_supported_by_HIP.md --- bin/hipify-perl | 43 ++++++++++++++++++++- doc/markdown/CUB_API_supported_by_HIP.md | 45 +++++++++++++++++++++- src/CUDA2HIP_CUB_API_types.cpp | 49 +++++++++++++++++++++++- 3 files changed, 133 insertions(+), 4 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 52b94919..73ab7ee6 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -4647,6 +4647,7 @@ sub simpleSubstitutions { subst("REGISTER_CUDA_OPERATOR", "REGISTER_HIP_OPERATOR", "define"); subst("REGISTER_CUDA_OPERATOR_CREATOR", "REGISTER_HIP_OPERATOR_CREATOR", "define"); subst("_CubLog", "_HipcubLog", "define"); + subst("__CUB_ALIGN_BYTES", "__HIPCUB_ALIGN_BYTES", "define"); subst("__CUDACC__", "__HIPCC__", "define"); subst("cudaArrayCubemap", "hipArrayCubemap", "define"); subst("cudaArrayDefault", "hipArrayDefault", "define"); @@ -6679,10 +6680,14 @@ sub warnUnsupportedFunctions { "bsrilu02Info", "bsric02Info", "__curand_umul", + "__CUB_LP64__", + "_CUB_ASM_PTR_SIZE_", + "_CUB_ASM_PTR_", "PATCH_LEVEL", "MINOR_VERSION", "MAX_CUFFT_ERROR", "MAJOR_VERSION", + "CubVector", "CubDebugExit", "CUuserObject_st", "CUuserObject_flags_enum", @@ -7689,14 +7694,22 @@ sub warnUnsupportedFunctions { "CUDA_ARRAY3D_2DARRAY", "CUB_USE_COOPERATIVE_GROUPS", "CUB_SUBSCRIPTION_FACTOR", + "CUB_STATIC_ASSERT", "CUB_SMEM_BANKS", "CUB_RUNTIME_ENABLED", + "CUB_ROUND_UP_NEAREST", + "CUB_ROUND_DOWN_NEAREST", + "CUB_QUOTIENT_FLOOR", + "CUB_QUOTIENT_CEILING", "CUB_PTX_SUBSCRIPTION_FACTOR", "CUB_PTX_SMEM_BANKS", "CUB_PTX_PREFER_CONFLICT_OVER_PADDING", "CUB_PTX_LOG_WARP_THREADS", "CUB_PTX_LOG_SMEM_BANKS", + "CUB_PREVENT_MACRO_SUBSTITUTION", "CUB_PREFER_CONFLICT_OVER_PADDING", + "CUB_MSVC_VERSION_FULL", + "CUB_MSVC_VERSION", "CUB_MAX_DEVICES", "CUB_LOG_WARP_THREADS", "CUB_LOG_SMEM_BANKS", @@ -7704,7 +7717,35 @@ sub warnUnsupportedFunctions { "CUB_IS_DEVICE_CODE", "CUB_INCLUDE_HOST_CODE", "CUB_INCLUDE_DEVICE_CODE", - "CUB_CPP_DIALECT" + "CUB_IGNORE_DEPRECATED_DIALECT", + "CUB_IGNORE_DEPRECATED_CPP_DIALECT", + "CUB_IGNORE_DEPRECATED_CPP_11", + "CUB_IGNORE_DEPRECATED_COMPILER", + "CUB_IGNORE_DEPRECATED_API", + "CUB_HOST_COMPILER_UNKNOWN", + "CUB_HOST_COMPILER_MSVC", + "CUB_HOST_COMPILER_GCC", + "CUB_HOST_COMPILER_CLANG", + "CUB_HOST_COMPILER", + "CUB_DEVICE_COMPILER_UNKNOWN", + "CUB_DEVICE_COMPILER_NVCC", + "CUB_DEVICE_COMPILER_MSVC", + "CUB_DEVICE_COMPILER_GCC", + "CUB_DEVICE_COMPILER_CLANG", + "CUB_DEVICE_COMPILER", + "CUB_DEPRECATED", + "CUB_DEFINE_VECTOR_TYPE", + "CUB_DEFINE_DETECT_NESTED_TYPE", + "CUB_CPP_DIALECT", + "CUB_CPLUSPLUS", + "CUB_COMP_DEPR_IMPL1", + "CUB_COMP_DEPR_IMPL0", + "CUB_COMP_DEPR_IMPL", + "CUB_COMPILER_DEPRECATION_SOFT", + "CUB_COMPILER_DEPRECATION", + "CUB_CAT_", + "CUB_CAT", + "CUB_ALIGN" ) { my $mt = m/($func)/g; diff --git a/doc/markdown/CUB_API_supported_by_HIP.md b/doc/markdown/CUB_API_supported_by_HIP.md index 51c19d90..c0015286 100644 --- a/doc/markdown/CUB_API_supported_by_HIP.md +++ b/doc/markdown/CUB_API_supported_by_HIP.md @@ -4,19 +4,50 @@ |**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**| |:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:| +|`CUB_ALIGN`| | | | | | | | | +|`CUB_CAT`| | | | | | | | | +|`CUB_CAT_`| | | | | | | | | +|`CUB_COMPILER_DEPRECATION`| | | | | | | | | +|`CUB_COMPILER_DEPRECATION_SOFT`| | | | | | | | | +|`CUB_COMP_DEPR_IMPL`| | | | | | | | | +|`CUB_COMP_DEPR_IMPL0`| | | | | | | | | +|`CUB_COMP_DEPR_IMPL1`| | | | | | | | | +|`CUB_CPLUSPLUS`| | | | | | | | | |`CUB_CPP_DIALECT`| | | | | | | | | +|`CUB_DEFINE_DETECT_NESTED_TYPE`| | | | | | | | | +|`CUB_DEFINE_VECTOR_TYPE`| | | | | | | | | +|`CUB_DEPRECATED`| | | | | | | | | +|`CUB_DEVICE_COMPILER`| | | | | | | | | +|`CUB_DEVICE_COMPILER_CLANG`| | | | | | | | | +|`CUB_DEVICE_COMPILER_GCC`| | | | | | | | | +|`CUB_DEVICE_COMPILER_MSVC`| | | | | | | | | +|`CUB_DEVICE_COMPILER_NVCC`| | | | | | | | | +|`CUB_DEVICE_COMPILER_UNKNOWN`| | | | | | | | | +|`CUB_HOST_COMPILER`| | | | | | | | | +|`CUB_HOST_COMPILER_CLANG`| | | | | | | | | +|`CUB_HOST_COMPILER_GCC`| | | | | | | | | +|`CUB_HOST_COMPILER_MSVC`| | | | | | | | | +|`CUB_HOST_COMPILER_UNKNOWN`| | | | | | | | | +|`CUB_IGNORE_DEPRECATED_API`| | | | | | | | | +|`CUB_IGNORE_DEPRECATED_COMPILER`| | | | | | | | | +|`CUB_IGNORE_DEPRECATED_CPP_11`| | | | | | | | | +|`CUB_IGNORE_DEPRECATED_CPP_DIALECT`| | | | | | | | | +|`CUB_IGNORE_DEPRECATED_DIALECT`| | | | | | | | | |`CUB_INCLUDE_DEVICE_CODE`| | | | | | | | | |`CUB_INCLUDE_HOST_CODE`| | | | | | | | | |`CUB_IS_DEVICE_CODE`| | | | | | | | | |`CUB_IS_HOST_CODE`| | | | | | | | | |`CUB_LOG_SMEM_BANKS`| | | | | | | | | |`CUB_LOG_WARP_THREADS`| | | | | | | | | -|`CUB_MAX`| | | |`CUB_MAX`| | | | | +|`CUB_MAX`| | | |`CUB_MAX`|4.5.0| | | | |`CUB_MAX_DEVICES`| | | | | | | | | -|`CUB_MIN`| | | |`CUB_MIN`|2.5.0| | | | +|`CUB_MIN`| | | |`CUB_MIN`|4.5.0| | | | +|`CUB_MSVC_VERSION`| | | | | | | | | +|`CUB_MSVC_VERSION_FULL`| | | | | | | | | |`CUB_NAMESPACE_BEGIN`| | | |`BEGIN_HIPCUB_NAMESPACE`|2.5.0| | | | |`CUB_NAMESPACE_END`| | | |`END_HIPCUB_NAMESPACE`|2.5.0| | | | |`CUB_PREFER_CONFLICT_OVER_PADDING`| | | | | | | | | +|`CUB_PREVENT_MACRO_SUBSTITUTION`| | | | | | | | | |`CUB_PTX_ARCH`| | | |`HIPCUB_ARCH`|2.5.0| | | | |`CUB_PTX_LOG_SMEM_BANKS`| | | | | | | | | |`CUB_PTX_LOG_WARP_THREADS`| | | | | | | | | @@ -24,15 +55,25 @@ |`CUB_PTX_SMEM_BANKS`| | | | | | | | | |`CUB_PTX_SUBSCRIPTION_FACTOR`| | | | | | | | | |`CUB_PTX_WARP_THREADS`| | | |`HIPCUB_WARP_THREADS`|2.5.0| | | | +|`CUB_QUOTIENT_CEILING`| | | | | | | | | +|`CUB_QUOTIENT_FLOOR`| | | | | | | | | +|`CUB_ROUND_DOWN_NEAREST`| | | | | | | | | +|`CUB_ROUND_UP_NEAREST`| | | | | | | | | |`CUB_RUNTIME_ENABLED`| | | | | | | | | |`CUB_RUNTIME_FUNCTION`| | | |`HIPCUB_RUNTIME_FUNCTION`|2.5.0| | | | |`CUB_SMEM_BANKS`| | | | | | | | | +|`CUB_STATIC_ASSERT`| | | | | | | | | |`CUB_STDERR`| | | |`HIPCUB_STDERR`|2.5.0| | | | |`CUB_SUBSCRIPTION_FACTOR`| | | | | | | | | |`CUB_USE_COOPERATIVE_GROUPS`| | | | | | | | | |`CubDebug`| | | |`HipcubDebug`|2.5.0| | | | |`CubDebugExit`| | | | | | | | | +|`CubVector`| | | | | | | | | +|`_CUB_ASM_PTR_`| | | | | | | | | +|`_CUB_ASM_PTR_SIZE_`| | | | | | | | | |`_CubLog`| | | |`_HipcubLog`|2.5.0| | | | +|`__CUB_ALIGN_BYTES`| | | |`__HIPCUB_ALIGN_BYTES`|4.5.0| | | | +|`__CUB_LP64__`| | | | | | | | | \*A - Added; D - Deprecated; R - Removed; E - Experimental \ No newline at end of file diff --git a/src/CUDA2HIP_CUB_API_types.cpp b/src/CUDA2HIP_CUB_API_types.cpp index ef5f406f..e3650eba 100644 --- a/src/CUDA2HIP_CUB_API_types.cpp +++ b/src/CUDA2HIP_CUB_API_types.cpp @@ -29,6 +29,9 @@ const std::map CUDA_CUB_NAMESPACE_MAP { // Maps the names of CUDA CUB API types to the corresponding HIP types const std::map CUDA_CUB_TYPE_NAME_MAP { + // 1. Structs + {"CubVector", {"HipcubVector", "", CONV_TYPE, API_CUB, 1, HIP_UNSUPPORTED}}, + // 5. Defines {"CUB_STDERR", {"HIPCUB_STDERR", "", CONV_DEFINE, API_CUB, 1}}, {"CubDebug", {"HipcubDebug", "", CONV_DEFINE, API_CUB, 1}}, @@ -59,6 +62,48 @@ const std::map CUDA_CUB_TYPE_NAME_MAP { {"CUB_PTX_PREFER_CONFLICT_OVER_PADDING", {"HIPCUB_PREFER_CONFLICT_OVER_PADDING", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, {"CUB_MAX", {"CUB_MAX", "", CONV_DEFINE, API_CUB, 1}}, {"CUB_MIN", {"CUB_MIN", "", CONV_DEFINE, API_CUB, 1}}, + {"__CUB_ALIGN_BYTES", {"__HIPCUB_ALIGN_BYTES", "", CONV_DEFINE, API_CUB, 1}}, + {"CUB_DEFINE_VECTOR_TYPE", {"HIPCUB_DEFINE_VECTOR_TYPE", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_DEFINE_DETECT_NESTED_TYPE", {"HIPCUB_DEFINE_DETECT_NESTED_TYPE", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"__CUB_LP64__", {"__HIPCUB_LP64__", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"_CUB_ASM_PTR_", {"_HIPCUB_ASM_PTR_", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"_CUB_ASM_PTR_SIZE_", {"_HIPCUB_ASM_PTR_SIZE_", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_USE_COOPERATIVE_GROUPS", {"HIPCUB_USE_COOPERATIVE_GROUPS", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_ALIGN", {"HIPCUB_ALIGN", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_PREVENT_MACRO_SUBSTITUTION", {"HIPCUB_PREVENT_MACRO_SUBSTITUTION", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_QUOTIENT_FLOOR", {"HIPCUB_QUOTIENT_FLOOR", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_QUOTIENT_CEILING", {"HIPCUB_QUOTIENT_CEILING", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_ROUND_UP_NEAREST", {"HIPCUB_ROUND_UP_NEAREST", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_ROUND_DOWN_NEAREST", {"HIPCUB_ROUND_DOWN_NEAREST", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_STATIC_ASSERT", {"HIPCUB_STATIC_ASSERT", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_CAT", {"HIPCUB_CAT", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_CAT_", {"HIPCUB_CAT_", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_IGNORE_DEPRECATED_API", {"HIPCUB_IGNORE_DEPRECATED_API", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_DEPRECATED", {"HIPCUB_DEPRECATED", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_HOST_COMPILER", {"HIPCUB_HOST_COMPILER", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_HOST_COMPILER_MSVC", {"HIPCUB_HOST_COMPILER_MSVC", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_HOST_COMPILER_CLANG", {"HIPCUB_HOST_COMPILER_CLANG", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_HOST_COMPILER_GCC", {"HIPCUB_HOST_COMPILER_GCC", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_HOST_COMPILER_UNKNOWN", {"HIPCUB_HOST_COMPILER_UNKNOWN", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_DEVICE_COMPILER", {"HIPCUB_DEVICE_COMPILER", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_DEVICE_COMPILER_MSVC", {"HIPCUB_DEVICE_COMPILER_MSVC", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_DEVICE_COMPILER_CLANG", {"HIPCUB_DEVICE_COMPILER_CLANG", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_DEVICE_COMPILER_GCC", {"HIPCUB_DEVICE_COMPILER_GCC", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_DEVICE_COMPILER_NVCC", {"HIPCUB_DEVICE_COMPILER_NVCC", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_DEVICE_COMPILER_UNKNOWN", {"HIPCUB_DEVICE_COMPILER_UNKNOWN", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_IGNORE_DEPRECATED_DIALECT", {"HIPCUB_IGNORE_DEPRECATED_DIALECT", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_IGNORE_DEPRECATED_CPP_DIALECT", {"HIPCUB_IGNORE_DEPRECATED_CPP_DIALECT", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_IGNORE_DEPRECATED_CPP_11", {"HIPCUB_IGNORE_DEPRECATED_CPP_11", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_IGNORE_DEPRECATED_CPP_11", {"HIPCUB_IGNORE_DEPRECATED_CPP_11", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_IGNORE_DEPRECATED_COMPILER", {"HIPCUB_IGNORE_DEPRECATED_COMPILER", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_MSVC_VERSION", {"HIPCUB_MSVC_VERSION", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_MSVC_VERSION_FULL", {"HIPCUB_MSVC_VERSION_FULL", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_CPLUSPLUS", {"HIPCUB_CPLUSPLUS", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_COMP_DEPR_IMPL", {"HIPCUB_COMP_DEPR_IMPL", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_COMP_DEPR_IMPL0", {"HIPCUB_COMP_DEPR_IMPL0", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_COMP_DEPR_IMPL1", {"HIPCUB_COMP_DEPR_IMPL1", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_COMPILER_DEPRECATION", {"HIPCUB_COMPILER_DEPRECATION", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, + {"CUB_COMPILER_DEPRECATION_SOFT", {"HIPCUB_COMPILER_DEPRECATION_SOFT", "", CONV_DEFINE, API_CUB, 1, HIP_UNSUPPORTED}}, }; const std::map CUDA_CUB_TYPE_NAME_VER_MAP { @@ -73,7 +118,9 @@ const std::map HIP_CUB_TYPE_NAME_VER_MAP { {"HIPCUB_ARCH", {HIP_2050, HIP_0, HIP_0 }}, {"BEGIN_HIPCUB_NAMESPACE", {HIP_2050, HIP_0, HIP_0 }}, {"END_HIPCUB_NAMESPACE", {HIP_2050, HIP_0, HIP_0 }}, - {"CUB_MIN", {HIP_2050, HIP_0, HIP_0 }}, + {"CUB_MAX", {HIP_4050, HIP_0, HIP_0 }}, + {"CUB_MIN", {HIP_4050, HIP_0, HIP_0 }}, + {"__HIPCUB_ALIGN_BYTES", {HIP_4050, HIP_0, HIP_0 }}, }; const std::map CUDA_CUB_API_SECTION_MAP { From 6fff684ba7478333d4dfd0f862eaa18a27af94f4 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 29 Jul 2022 11:33:18 +0200 Subject: [PATCH 05/14] [HIPIFY][#601][BLAS][tests] Synthetic test for cuBLAS API - Part 1 + cuBLAS API to hipBLAS API data types only + Minor fixes --- doc/markdown/CUBLAS_API_supported_by_HIP.md | 2 +- src/CUDA2HIP_BLAS_API_types.cpp | 4 +- .../synthetic/libraries/cublas2hipblas.cu | 148 ++++++++++++++++++ 3 files changed, 151 insertions(+), 3 deletions(-) create mode 100644 tests/unit_tests/synthetic/libraries/cublas2hipblas.cu diff --git a/doc/markdown/CUBLAS_API_supported_by_HIP.md b/doc/markdown/CUBLAS_API_supported_by_HIP.md index c487dc3a..238db117 100644 --- a/doc/markdown/CUBLAS_API_supported_by_HIP.md +++ b/doc/markdown/CUBLAS_API_supported_by_HIP.md @@ -63,7 +63,7 @@ |`CUBLAS_GEMM_ALGO8_TENSOR_OP`|9.2| | | | | | | | |`CUBLAS_GEMM_ALGO9`|9.0| | | | | | | | |`CUBLAS_GEMM_ALGO9_TENSOR_OP`|9.2| | | | | | | | -|`CUBLAS_GEMM_DEFAULT`|8.0| | |`HIPBLAS_GEMM_DEFAULT`|1.8.2| | | | +|`CUBLAS_GEMM_DEFAULT`|9.0| | |`HIPBLAS_GEMM_DEFAULT`|1.8.2| | | | |`CUBLAS_GEMM_DEFAULT_TENSOR_OP`|9.0| | | | | | | | |`CUBLAS_GEMM_DFALT`|8.0| | |`HIPBLAS_GEMM_DEFAULT`|1.8.2| | | | |`CUBLAS_GEMM_DFALT_TENSOR_OP`|9.0| | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_types.cpp b/src/CUDA2HIP_BLAS_API_types.cpp index 3cd4caf1..997183f1 100644 --- a/src/CUDA2HIP_BLAS_API_types.cpp +++ b/src/CUDA2HIP_BLAS_API_types.cpp @@ -37,7 +37,7 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP { {"CUBLAS_OP_T", {"HIPBLAS_OP_T", "rocblas_operation_transpose", CONV_NUMERIC_LITERAL, API_BLAS, 2}}, {"CUBLAS_OP_C", {"HIPBLAS_OP_C", "rocblas_operation_conjugate_transpose", CONV_NUMERIC_LITERAL, API_BLAS, 2}}, {"CUBLAS_OP_HERMITAN", {"HIPBLAS_OP_C", "rocblas_operation_conjugate_transpose", CONV_NUMERIC_LITERAL, API_BLAS, 2}}, - {"CUBLAS_OP_CONJG", {"HIPBLAS_OP_CONJG", "rocblas_operation_conjugate", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, + {"CUBLAS_OP_CONJG", {"HIPBLAS_OP_CONJG", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // Blas statuses {"cublasStatus", {"hipblasStatus_t", "rocblas_status", CONV_TYPE, API_BLAS, 2}}, @@ -205,7 +205,7 @@ const std::map CUDA_BLAS_TYPE_NAME_VER_MAP { {"CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION", {CUDA_110, CUDA_0, CUDA_0}}, {"cublasGemmAlgo_t", {CUDA_80, CUDA_0, CUDA_0}}, {"CUBLAS_GEMM_DFALT", {CUDA_80, CUDA_0, CUDA_0}}, - {"CUBLAS_GEMM_DEFAULT", {CUDA_80, CUDA_0, CUDA_0}}, + {"CUBLAS_GEMM_DEFAULT", {CUDA_90, CUDA_0, CUDA_0}}, {"CUBLAS_GEMM_ALGO0", {CUDA_80, CUDA_0, CUDA_0}}, {"CUBLAS_GEMM_ALGO1", {CUDA_80, CUDA_0, CUDA_0}}, {"CUBLAS_GEMM_ALGO2", {CUDA_80, CUDA_0, CUDA_0}}, diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu new file mode 100644 index 00000000..5dc9d362 --- /dev/null +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu @@ -0,0 +1,148 @@ +// RUN: %run_test hipify "%s" "%t" %hipify_args 2 --skip-excluded-preprocessor-conditional-blocks --experimental %clang_args -D__CUDA_API_VERSION_INTERNAL + +// CHECK: #include +#include +// CHECK: #include "hipblas.h" +#include "cublas.h" + +int main() { + printf("14. cuBLAS API to hipBLAS API synthetic test\n"); + + // CHECK: hipblasOperation_t blasOperation; + // CHECK-NEXT: hipblasOperation_t BLAS_OP_N = HIPBLAS_OP_N; + // CHECK-NEXT: hipblasOperation_t BLAS_OP_T = HIPBLAS_OP_T; + // CHECK-NEXT: hipblasOperation_t BLAS_OP_C = HIPBLAS_OP_C; + cublasOperation_t blasOperation; + cublasOperation_t BLAS_OP_N = CUBLAS_OP_N; + cublasOperation_t BLAS_OP_T = CUBLAS_OP_T; + cublasOperation_t BLAS_OP_C = CUBLAS_OP_C; + +#if CUDA_VERSION >= 10010 + // CHECK: hipblasOperation_t BLAS_OP_HERMITAN = HIPBLAS_OP_C; + cublasOperation_t BLAS_OP_HERMITAN = CUBLAS_OP_HERMITAN; +#endif + + // CHECK: hipblasStatus_t blasStatus; + // CHECK-NEXT: hipblasStatus_t blasStatus_t; + // CHECK-NEXT: hipblasStatus_t BLAS_STATUS_SUCCESS = HIPBLAS_STATUS_SUCCESS; + // CHECK-NEXT: hipblasStatus_t BLAS_STATUS_NOT_INITIALIZED = HIPBLAS_STATUS_NOT_INITIALIZED; + // CHECK-NEXT: hipblasStatus_t BLAS_STATUS_ALLOC_FAILED = HIPBLAS_STATUS_ALLOC_FAILED; + // CHECK-NEXT: hipblasStatus_t BLAS_STATUS_INVALID_VALUE = HIPBLAS_STATUS_INVALID_VALUE; + // CHECK-NEXT: hipblasStatus_t BLAS_STATUS_MAPPING_ERROR = HIPBLAS_STATUS_MAPPING_ERROR; + // CHECK-NEXT: hipblasStatus_t BLAS_STATUS_EXECUTION_FAILED = HIPBLAS_STATUS_EXECUTION_FAILED; + // CHECK-NEXT: hipblasStatus_t BLAS_STATUS_INTERNAL_ERROR = HIPBLAS_STATUS_INTERNAL_ERROR; + // CHECK-NEXT: hipblasStatus_t BLAS_STATUS_NOT_SUPPORTED = HIPBLAS_STATUS_NOT_SUPPORTED; + // CHECK-NEXT: hipblasStatus_t BLAS_STATUS_ARCH_MISMATCH = HIPBLAS_STATUS_ARCH_MISMATCH; + cublasStatus blasStatus; + cublasStatus_t blasStatus_t; + cublasStatus_t BLAS_STATUS_SUCCESS = CUBLAS_STATUS_SUCCESS; + cublasStatus_t BLAS_STATUS_NOT_INITIALIZED = CUBLAS_STATUS_NOT_INITIALIZED; + cublasStatus_t BLAS_STATUS_ALLOC_FAILED = CUBLAS_STATUS_ALLOC_FAILED; + cublasStatus_t BLAS_STATUS_INVALID_VALUE = CUBLAS_STATUS_INVALID_VALUE; + cublasStatus_t BLAS_STATUS_MAPPING_ERROR = CUBLAS_STATUS_MAPPING_ERROR; + cublasStatus_t BLAS_STATUS_EXECUTION_FAILED = CUBLAS_STATUS_EXECUTION_FAILED; + cublasStatus_t BLAS_STATUS_INTERNAL_ERROR = CUBLAS_STATUS_INTERNAL_ERROR; + cublasStatus_t BLAS_STATUS_NOT_SUPPORTED = CUBLAS_STATUS_NOT_SUPPORTED; + cublasStatus_t BLAS_STATUS_ARCH_MISMATCH = CUBLAS_STATUS_ARCH_MISMATCH; + + // CHECK: hipblasFillMode_t blasFillMode; + // CHECK-NEXT: hipblasFillMode_t BLAS_FILL_MODE_LOWER = HIPBLAS_FILL_MODE_LOWER; + // CHECK-NEXT: hipblasFillMode_t BLAS_FILL_MODE_UPPER = HIPBLAS_FILL_MODE_UPPER; + cublasFillMode_t blasFillMode; + cublasFillMode_t BLAS_FILL_MODE_LOWER = CUBLAS_FILL_MODE_LOWER; + cublasFillMode_t BLAS_FILL_MODE_UPPER = CUBLAS_FILL_MODE_UPPER; + +#if CUDA_VERSION >= 10010 + // CHECK: hipblasFillMode_t BLAS_FILL_MODE_FULL = HIPBLAS_FILL_MODE_FULL; + cublasFillMode_t BLAS_FILL_MODE_FULL = CUBLAS_FILL_MODE_FULL; +#endif + + // CHECK: hipblasDiagType_t blasDiagType; + // CHECK-NEXT: hipblasDiagType_t BLAS_DIAG_NON_UNIT = HIPBLAS_DIAG_NON_UNIT; + // CHECK-NEXT: hipblasDiagType_t BLAS_DIAG_UNIT = HIPBLAS_DIAG_UNIT; + cublasDiagType_t blasDiagType; + cublasDiagType_t BLAS_DIAG_NON_UNIT = CUBLAS_DIAG_NON_UNIT; + cublasDiagType_t BLAS_DIAG_UNIT = CUBLAS_DIAG_UNIT; + + // CHECK: hipblasSideMode_t blasSideMode; + // CHECK-NEXT: hipblasSideMode_t BLAS_SIDE_LEFT = HIPBLAS_SIDE_LEFT; + // CHECK-NEXT: hipblasSideMode_t BLAS_SIDE_RIGHT = HIPBLAS_SIDE_RIGHT; + cublasSideMode_t blasSideMode; + cublasSideMode_t BLAS_SIDE_LEFT = CUBLAS_SIDE_LEFT; + cublasSideMode_t BLAS_SIDE_RIGHT = CUBLAS_SIDE_RIGHT; + + // CHECK: hipblasPointerMode_t blasPointerMode; + // CHECK-NEXT: hipblasPointerMode_t BLAS_POINTER_MODE_HOST = HIPBLAS_POINTER_MODE_HOST; + // CHECK-NEXT: hipblasPointerMode_t BLAS_POINTER_MODE_DEVICE = HIPBLAS_POINTER_MODE_DEVICE; + cublasPointerMode_t blasPointerMode; + cublasPointerMode_t BLAS_POINTER_MODE_HOST = CUBLAS_POINTER_MODE_HOST; + cublasPointerMode_t BLAS_POINTER_MODE_DEVICE = CUBLAS_POINTER_MODE_DEVICE; + + // CHECK: hipblasAtomicsMode_t blasAtomicsMode; + // CHECK-NEXT: hipblasAtomicsMode_t BLAS_ATOMICS_NOT_ALLOWED = HIPBLAS_ATOMICS_NOT_ALLOWED; + // CHECK-NEXT: hipblasAtomicsMode_t BLAS_ATOMICS_ALLOWED = HIPBLAS_ATOMICS_ALLOWED; + cublasAtomicsMode_t blasAtomicsMode; + cublasAtomicsMode_t BLAS_ATOMICS_NOT_ALLOWED = CUBLAS_ATOMICS_NOT_ALLOWED; + cublasAtomicsMode_t BLAS_ATOMICS_ALLOWED = CUBLAS_ATOMICS_ALLOWED; + +#if CUDA_VERSION >= 8000 + // CHECK: hipblasDatatype_t DataType; + // CHECK-NEXT: hipblasDatatype_t DataType_t; + // CHECK-NEXT: hipblasDatatype_t blasDataType; + // CHECK-NEXT: hipblasDatatype_t R_16F = HIPBLAS_R_16F; + // CHECK-NEXT: hipblasDatatype_t C_16F = HIPBLAS_C_16F; + // CHECK-NEXT: hipblasDatatype_t R_32F = HIPBLAS_R_32F; + // CHECK-NEXT: hipblasDatatype_t C_32F = HIPBLAS_C_32F; + // CHECK-NEXT: hipblasDatatype_t R_64F = HIPBLAS_R_64F; + // CHECK-NEXT: hipblasDatatype_t C_64F = HIPBLAS_C_64F; + // CHECK-NEXT: hipblasDatatype_t R_8I = HIPBLAS_R_8I; + // CHECK-NEXT: hipblasDatatype_t C_8I = HIPBLAS_C_8I; + // CHECK-NEXT: hipblasDatatype_t R_8U = HIPBLAS_R_8U; + // CHECK-NEXT: hipblasDatatype_t C_8U = HIPBLAS_C_8U; + // CHECK-NEXT: hipblasDatatype_t R_32I = HIPBLAS_R_32I; + // CHECK-NEXT: hipblasDatatype_t C_32I = HIPBLAS_C_32I; + // CHECK-NEXT: hipblasDatatype_t R_32U = HIPBLAS_R_32U; + // CHECK-NEXT: hipblasDatatype_t C_32U = HIPBLAS_C_32U; + cudaDataType DataType; + cudaDataType_t DataType_t; + cublasDataType_t blasDataType; + cublasDataType_t R_16F = CUDA_R_16F; + cublasDataType_t C_16F = CUDA_C_16F; + cublasDataType_t R_32F = CUDA_R_32F; + cublasDataType_t C_32F = CUDA_C_32F; + cublasDataType_t R_64F = CUDA_R_64F; + cublasDataType_t C_64F = CUDA_C_64F; + cublasDataType_t R_8I = CUDA_R_8I; + cublasDataType_t C_8I = CUDA_C_8I; + cublasDataType_t R_8U = CUDA_R_8U; + cublasDataType_t C_8U = CUDA_C_8U; + cublasDataType_t R_32I = CUDA_R_32I; + cublasDataType_t C_32I = CUDA_C_32I; + cublasDataType_t R_32U = CUDA_R_32U; + cublasDataType_t C_32U = CUDA_C_32U; +#endif + +#if CUDA_VERSION >= 11000 + // CHECK: hipblasDatatype_t R_16BF = HIPBLAS_R_16B; + // CHECK-NEXT: hipblasDatatype_t C_16BF = HIPBLAS_C_16B; + cublasDataType_t R_16BF = CUDA_R_16BF; + cublasDataType_t C_16BF = CUDA_C_16BF; +#endif + +#if CUDA_VERSION >= 8000 + // CHECK: hipblasGemmAlgo_t blasGemmAlgo; + // CHECK-NEXT: hipblasGemmAlgo_t BLAS_GEMM_DFALT = HIPBLAS_GEMM_DEFAULT; + cublasGemmAlgo_t blasGemmAlgo; + cublasGemmAlgo_t BLAS_GEMM_DFALT = CUBLAS_GEMM_DFALT; +#endif + +#if CUDA_VERSION >= 9000 + // CHECK: hipblasGemmAlgo_t BLAS_GEMM_DEFAULT = HIPBLAS_GEMM_DEFAULT; + cublasGemmAlgo_t BLAS_GEMM_DEFAULT = CUBLAS_GEMM_DEFAULT; +#endif + + // CHECK: hipblasHandle_t blasHandle; + cublasHandle_t blasHandle; + + return 0; +} From d9612d878ddb7fcc1d9067ac6c584ee691199441 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sat, 30 Jul 2022 16:23:28 +0200 Subject: [PATCH 06/14] [HIPIFY][#603][clang][fix] Fix linking error [Synopsis] + The affecting LLVM change: SHA-1: 7a5cb15ea6facd82756adafae76d60f36a0b60fd * [RISCV] Lazily add RVV C intrinsics (2022.07.13) when `clangSema` has got a dependency on `clangSupport` lib + `clangSupport` lib was introduced in LLVM with: SHA-1: f26c41e8dd28d86030cd0f5a6e9c11036acea5d2* [RISCV] Moving RVV intrinsic type related util to clang/Support (2022.04.14) [IMP] + This PR eliminates the #603 issue, but only for LLVM ToT + If another linking error about the missing lib `clangSupport` occurs, then LLVM trunk is very outdated (older than 2022.04.14) and should be updated to the latest and rebuilt + Sync with the latest LLVM ToT for resolving the #603 issue or any possible `clangSupport`-related issues --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 66274d34..2a45de4c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,7 +68,7 @@ if(LLVM_PACKAGE_VERSION VERSION_GREATER "9.0.1") endif() if(LLVM_PACKAGE_VERSION VERSION_EQUAL "15.0.0" OR LLVM_PACKAGE_VERSION VERSION_GREATER "15.0.0") - target_link_libraries(hipify-clang PRIVATE LLVMWindowsDriver) + target_link_libraries(hipify-clang PRIVATE LLVMWindowsDriver clangSupport) endif() if(MSVC) From 37237c6fa04d740453b85b53b86008fb686c3a07 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sat, 30 Jul 2022 20:30:42 +0200 Subject: [PATCH 07/14] [HIPIFY][#531][#532] Remove the temporary workaround for `SWDEV_331863` blocker --- CMakeLists.txt | 6 ------ src/HipifyAction.cpp | 6 +++--- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2a45de4c..a8d36f32 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,12 +86,6 @@ else() set(addr_var ) endif() -# [ToDo] Remove SWDEV_331863 related guards from CMakeLists.txt and HipifyAction.cpp when the blocker SWDEV_331863 is overcome -option (SWDEV_331863 "Enables SWDEV-331863 blocker workaround" OFF) -if(SWDEV_331863) - add_definitions(-DSWDEV_331863) -endif() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_CFLAGS} ${addr_var}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CFLAGS} -DHIPIFY_CLANG_RES=\\\"${LLVM_LIBRARY_DIRS}/clang/${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}\\\" ${addr_var}") diff --git a/src/HipifyAction.cpp b/src/HipifyAction.cpp index fb58ac87..360c813b 100644 --- a/src/HipifyAction.cpp +++ b/src/HipifyAction.cpp @@ -698,10 +698,10 @@ class PPCallbackProxy : public clang::PPCallbacks { public: explicit PPCallbackProxy(HipifyAction &action): hipifyAction(action) {} - // [ToDo] Remove SWDEV_331863 related guards from CMakeLists.txt and HipifyAction.cpp when the blocker SWDEV_331863 is overcome + void InclusionDirective(clang::SourceLocation hash_loc, const clang::Token &include_token, StringRef file_name, bool is_angled, clang::CharSourceRange filename_range, -#if (LLVM_VERSION_MAJOR < 15) || (LLVM_VERSION_MAJOR == 15 && SWDEV_331863) +#if LLVM_VERSION_MAJOR < 15 const clang::FileEntry *file, #else Optional file, @@ -712,7 +712,7 @@ class PPCallbackProxy : public clang::PPCallbacks { , clang::SrcMgr::CharacteristicKind FileType #endif ) override { -#if (LLVM_VERSION_MAJOR < 15) || (LLVM_VERSION_MAJOR == 15 && SWDEV_331863) +#if LLVM_VERSION_MAJOR < 15 auto f = file; #else auto f = &file->getFileEntry(); From f67cbf7d7e5f1dbec1b4f760a8797de018f1db53 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sun, 31 Jul 2022 16:24:12 +0200 Subject: [PATCH 08/14] [HIPIFY][#601][BLAS][tests] Synthetic test for cuBLAS API - Part 2 + cuBLAS API to hipBLAS API functions (the beginning) + Took `cublas_api.h` into account + Revised `NRM2` functions: only _v2 functions are supported by HIP + Regenerated and updated hipify-perl and cuBLAS doc correspondingly --- bin/hipify-perl | 16 +-- doc/markdown/CUBLAS_API_supported_by_HIP.md | 8 +- src/CUDA2HIP.cpp | 1 + src/CUDA2HIP_BLAS_API_functions.cpp | 12 +- .../synthetic/libraries/cublas2hipblas.cu | 126 ++++++++++++++++++ 5 files changed, 147 insertions(+), 16 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 73ab7ee6..7ca8649f 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1338,7 +1338,6 @@ sub rocSubstitutions { subst("cublasDgemv_v2", "rocblas_dgemv", "library"); subst("cublasDger", "rocblas_dger", "library"); subst("cublasDger_v2", "rocblas_dger", "library"); - subst("cublasDnrm2", "rocblas_dnrm2", "library"); subst("cublasDnrm2_v2", "rocblas_dnrm2", "library"); subst("cublasDotEx", "rocblas_dot_ex", "library"); subst("cublasDotcEx", "rocblas_dotc_ex", "library"); @@ -1394,7 +1393,6 @@ sub rocSubstitutions { subst("cublasDtrsv_v2", "rocblas_dtrsv", "library"); subst("cublasDzasum", "rocblas_dzasum", "library"); subst("cublasDzasum_v2", "rocblas_dzasum", "library"); - subst("cublasDznrm2", "rocblas_dznrm2", "library"); subst("cublasDznrm2_v2", "rocblas_dznrm2", "library"); subst("cublasGemmBatchedEx", "rocblas_gemm_batched_ex", "library"); subst("cublasGemmEx", "rocblas_gemm_ex", "library"); @@ -1435,7 +1433,6 @@ sub rocSubstitutions { subst("cublasScalEx", "rocblas_scal_ex", "library"); subst("cublasScasum", "rocblas_scasum", "library"); subst("cublasScasum_v2", "rocblas_scasum", "library"); - subst("cublasScnrm2", "rocblas_scnrm2", "library"); subst("cublasScnrm2_v2", "rocblas_scnrm2", "library"); subst("cublasScopy", "rocblas_scopy", "library"); subst("cublasScopy_v2", "rocblas_scopy", "library"); @@ -1461,7 +1458,6 @@ sub rocSubstitutions { subst("cublasSgemv_v2", "rocblas_sgemv", "library"); subst("cublasSger", "rocblas_sger", "library"); subst("cublasSger_v2", "rocblas_sger", "library"); - subst("cublasSnrm2", "rocblas_snrm2", "library"); subst("cublasSnrm2_v2", "rocblas_snrm2", "library"); subst("cublasSrot", "rocblas_srot", "library"); subst("cublasSrot_v2", "rocblas_srot", "library"); @@ -2255,7 +2251,6 @@ sub simpleSubstitutions { subst("cublasDgetrfBatched", "hipblasDgetrfBatched", "library"); subst("cublasDgetriBatched", "hipblasDgetriBatched", "library"); subst("cublasDgetrsBatched", "hipblasDgetrsBatched", "library"); - subst("cublasDnrm2", "hipblasDnrm2", "library"); subst("cublasDnrm2_v2", "hipblasDnrm2", "library"); subst("cublasDotEx", "hipblasDotEx", "library"); subst("cublasDotcEx", "hipblasDotcEx", "library"); @@ -2311,7 +2306,6 @@ sub simpleSubstitutions { subst("cublasDtrsv_v2", "hipblasDtrsv", "library"); subst("cublasDzasum", "hipblasDzasum", "library"); subst("cublasDzasum_v2", "hipblasDzasum", "library"); - subst("cublasDznrm2", "hipblasDznrm2", "library"); subst("cublasDznrm2_v2", "hipblasDznrm2", "library"); subst("cublasGemmBatchedEx", "hipblasGemmBatchedEx", "library"); subst("cublasGemmEx", "hipblasGemmEx", "library"); @@ -2353,7 +2347,6 @@ sub simpleSubstitutions { subst("cublasScalEx", "hipblasScalEx", "library"); subst("cublasScasum", "hipblasScasum", "library"); subst("cublasScasum_v2", "hipblasScasum", "library"); - subst("cublasScnrm2", "hipblasScnrm2", "library"); subst("cublasScnrm2_v2", "hipblasScnrm2", "library"); subst("cublasScopy", "hipblasScopy", "library"); subst("cublasScopy_v2", "hipblasScopy", "library"); @@ -2384,7 +2377,6 @@ sub simpleSubstitutions { subst("cublasSgetrfBatched", "hipblasSgetrfBatched", "library"); subst("cublasSgetriBatched", "hipblasSgetriBatched", "library"); subst("cublasSgetrsBatched", "hipblasSgetrsBatched", "library"); - subst("cublasSnrm2", "hipblasSnrm2", "library"); subst("cublasSnrm2_v2", "hipblasSnrm2", "library"); subst("cublasSrot", "hipblasSrot", "library"); subst("cublasSrot_v2", "hipblasSrot", "library"); @@ -7771,6 +7763,7 @@ sub warnHipOnlyUnsupportedFunctions { "cublasSwapEx", "cublasStrttp", "cublasStpttr", + "cublasSnrm2", "cublasSmatinvBatched", "cublasShutdown", "cublasSgemmEx", @@ -7779,6 +7772,7 @@ sub warnHipOnlyUnsupportedFunctions { "cublasSetMathMode", "cublasSetLoggerCallback", "cublasSetKernelStream", + "cublasScnrm2", "cublasRotmgEx", "cublasRotmEx", "cublasRotgEx", @@ -7800,8 +7794,10 @@ sub warnHipOnlyUnsupportedFunctions { "cublasGetError", "cublasGetCudartVersion", "cublasFree", + "cublasDznrm2", "cublasDtrttp", "cublasDtpttr", + "cublasDnrm2", "cublasDmatinvBatched", "cublasDgelsBatched", "cublasCtrttp", @@ -7915,6 +7911,7 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSwapEx", "cublasStrttp", "cublasStpttr", + "cublasSnrm2", "cublasSmatinvBatched", "cublasShutdown", "cublasSgetrsBatched", @@ -7928,6 +7925,7 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSetLoggerCallback", "cublasSetKernelStream", "cublasSetAtomicsMode", + "cublasScnrm2", "cublasRotmgEx", "cublasRotmEx", "cublasRotgEx", @@ -7950,8 +7948,10 @@ sub warnRocOnlyUnsupportedFunctions { "cublasGetCudartVersion", "cublasGetAtomicsMode", "cublasFree", + "cublasDznrm2", "cublasDtrttp", "cublasDtpttr", + "cublasDnrm2", "cublasDmatinvBatched", "cublasDgetrsBatched", "cublasDgetriBatched", diff --git a/doc/markdown/CUBLAS_API_supported_by_HIP.md b/doc/markdown/CUBLAS_API_supported_by_HIP.md index 238db117..f3efcba3 100644 --- a/doc/markdown/CUBLAS_API_supported_by_HIP.md +++ b/doc/markdown/CUBLAS_API_supported_by_HIP.md @@ -226,7 +226,7 @@ |`cublasDcopy_v2`| | | |`hipblasDcopy`|1.8.2| | | | |`cublasDdot`| | | |`hipblasDdot`|3.0.0| | | | |`cublasDdot_v2`| | | |`hipblasDdot`|3.0.0| | | | -|`cublasDnrm2`| | | |`hipblasDnrm2`|1.8.2| | | | +|`cublasDnrm2`| | | | | | | | | |`cublasDnrm2_v2`| | | |`hipblasDnrm2`|1.8.2| | | | |`cublasDrot`| | | |`hipblasDrot`|3.0.0| | | | |`cublasDrot_v2`| | | |`hipblasDrot`|3.0.0| | | | @@ -242,7 +242,7 @@ |`cublasDswap_v2`| | | |`hipblasDswap`|3.0.0| | | | |`cublasDzasum`| | | |`hipblasDzasum`|3.0.0| | | | |`cublasDzasum_v2`| | | |`hipblasDzasum`|3.0.0| | | | -|`cublasDznrm2`| | | |`hipblasDznrm2`|3.0.0| | | | +|`cublasDznrm2`| | | | | | | | | |`cublasDznrm2_v2`| | | |`hipblasDznrm2`|3.0.0| | | | |`cublasIcamax`| | | |`hipblasIcamax`|3.0.0| | | | |`cublasIcamax_v2`| | | |`hipblasIcamax`|3.0.0| | | | @@ -267,13 +267,13 @@ |`cublasSaxpy_v2`| | | |`hipblasSaxpy`|1.8.2| | | | |`cublasScasum`| | | |`hipblasScasum`|3.0.0| | | | |`cublasScasum_v2`| | | |`hipblasScasum`|3.0.0| | | | -|`cublasScnrm2`| | | |`hipblasScnrm2`|3.0.0| | | | +|`cublasScnrm2`| | | | | | | | | |`cublasScnrm2_v2`| | | |`hipblasScnrm2`|3.0.0| | | | |`cublasScopy`| | | |`hipblasScopy`|1.8.2| | | | |`cublasScopy_v2`| | | |`hipblasScopy`|1.8.2| | | | |`cublasSdot`| | | |`hipblasSdot`|3.0.0| | | | |`cublasSdot_v2`| | | |`hipblasSdot`|3.0.0| | | | -|`cublasSnrm2`| | | |`hipblasSnrm2`|1.8.2| | | | +|`cublasSnrm2`| | | | | | | | | |`cublasSnrm2_v2`| | | |`hipblasSnrm2`|1.8.2| | | | |`cublasSrot`| | | |`hipblasSrot`|3.0.0| | | | |`cublasSrot_v2`| | | |`hipblasSrot`|3.0.0| | | | diff --git a/src/CUDA2HIP.cpp b/src/CUDA2HIP.cpp index 3de83a28..0aba0f7f 100644 --- a/src/CUDA2HIP.cpp +++ b/src/CUDA2HIP.cpp @@ -42,6 +42,7 @@ const std::map CUDA_INCLUDE_MAP { // cuBLAS includes {"cublas.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_H, API_BLAS, 0}}, {"cublas_v2.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_H, API_BLAS, 0}}, + {"cublas_api.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE, API_BLAS, 0}}, // cuRAND includes {"curand.h", {"hiprand.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_RAND, 0}}, {"curand_kernel.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index fecd2c8c..1b2b6cc2 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -78,10 +78,14 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasGetCudartVersion", {"hipblasGetCudartVersion", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, // NRM2 - {"cublasSnrm2", {"hipblasSnrm2", "rocblas_snrm2", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasDnrm2", {"hipblasDnrm2", "rocblas_dnrm2", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasScnrm2", {"hipblasScnrm2", "rocblas_scnrm2", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasDznrm2", {"hipblasDznrm2", "rocblas_dznrm2", CONV_LIB_FUNC, API_BLAS, 5}}, + // cublasSnrm2 signature differs from cublasSnrm2_v2 signature, hipblasSnrm2 and rocblas_snrm2 have mapping to cublasSnrm2_v2 only + {"cublasSnrm2", {"hipblasSnrm2_v1", "rocblas_snrm2_v1", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + // cublasDnrm2 signature differs from cublasDnrm2_v2 signature, hipblasDnrm2 and rocblas_dnrm2 have mapping to cublasDnrm2_v2 only + {"cublasDnrm2", {"hipblasDnrm2_v1", "rocblas_dnrm2_v1", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + // cublasScnrm2 signature differs from cublasScnrm2_v2 signature, hipblasScnrm2 and rocblas_scnrm2 have mapping to cublasScnrm2_v2 only + {"cublasScnrm2", {"hipblasScnrm2_v1", "rocblas_scnrm2_v1", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + // cublasDznrm2 signature differs from cublasDznrm2_v2 signature, hipblasDznrm2 and rocblas_dznrm2 have mapping to cublasDznrm2_v2 only + {"cublasDznrm2", {"hipblasDznrm2_v1", "rocblas_dznrm2_v1", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, {"cublasNrm2Ex", {"hipblasNrm2Ex", "rocblas_nrm2_ex", CONV_LIB_FUNC, API_BLAS, 5}}, // DOT diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu index 5dc9d362..6c6cf57f 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu @@ -3,7 +3,10 @@ // CHECK: #include #include // CHECK: #include "hipblas.h" +// CHECK-NOT: #include "cublas_v2.h" #include "cublas.h" +#include "cublas_v2.h" +// CHECK-NOT: #include "hipblas.h" int main() { printf("14. cuBLAS API to hipBLAS API synthetic test\n"); @@ -144,5 +147,128 @@ int main() { // CHECK: hipblasHandle_t blasHandle; cublasHandle_t blasHandle; +// Functions + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetAtomicsMode(cublasHandle_t handle, cublasAtomicsMode_t* mode); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGetAtomicsMode(hipblasHandle_t handle, hipblasAtomicsMode_t* atomics_mode); + // CHECK: blasStatus = hipblasGetAtomicsMode(blasHandle, &blasAtomicsMode); + blasStatus = cublasGetAtomicsMode(blasHandle, &blasAtomicsMode); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSetAtomicsMode(cublasHandle_t handle, cublasAtomicsMode_t mode); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSetAtomicsMode(hipblasHandle_t handle, hipblasAtomicsMode_t atomics_mode); + // CHECK: blasStatus = hipblasSetAtomicsMode(blasHandle, blasAtomicsMode); + blasStatus = cublasSetAtomicsMode(blasHandle, blasAtomicsMode); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCreate_v2(cublasHandle_t* handle); + // CUDA: #define cublasCreate cublasCreate_v2 + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCreate(hipblasHandle_t* handle); + // CHECK: blasStatus = hipblasCreate(&blasHandle); + // CHECK-NEXT: blasStatus = hipblasCreate(&blasHandle); + blasStatus = cublasCreate(&blasHandle); + blasStatus = cublasCreate_v2(&blasHandle); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDestroy_v2(cublasHandle_t handle); + // CUDA: #define cublasDestroy cublasDestroy_v2 + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDestroy(hipblasHandle_t handle); + // CHECK: blasStatus = hipblasDestroy(blasHandle); + // CHECK-NEXT: blasStatus = hipblasDestroy(blasHandle); + blasStatus = cublasDestroy(blasHandle); + blasStatus = cublasDestroy_v2(blasHandle); + + // CHECK: hipStream_t stream; + cudaStream_t stream; + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSetStream_v2(cublasHandle_t handle, cudaStream_t streamId); + // CUDA: #define cublasSetStream cublasSetStream_v2 + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSetStream(hipblasHandle_t handle, hipStream_t streamId); + // CHECK: blasStatus = hipblasSetStream(blasHandle, stream); + // CHECK-NEXT: blasStatus = hipblasSetStream(blasHandle, stream); + blasStatus = cublasSetStream(blasHandle, stream); + blasStatus = cublasSetStream_v2(blasHandle, stream); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetStream_v2(cublasHandle_t handle, cudaStream_t* streamId); + // CUDA: #define cublasGetStream cublasGetStream_v2 + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGetStream(hipblasHandle_t handle, hipStream_t* streamId); + // CHECK: blasStatus = hipblasGetStream(blasHandle, &stream); + // CHECK-NEXT: blasStatus = hipblasGetStream(blasHandle, &stream); + blasStatus = cublasGetStream(blasHandle, &stream); + blasStatus = cublasGetStream_v2(blasHandle, &stream); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSetPointerMode_v2(cublasHandle_t handle, cublasPointerMode_t mode); + // CUDA: #define cublasSetPointerMode cublasSetPointerMode_v2 + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSetPointerMode(hipblasHandle_t handle, hipblasPointerMode_t mode); + // CHECK: blasStatus = hipblasSetPointerMode(blasHandle, blasPointerMode); + // CHECK-NEXT: blasStatus = hipblasSetPointerMode(blasHandle, blasPointerMode); + blasStatus = cublasSetPointerMode(blasHandle, blasPointerMode); + blasStatus = cublasSetPointerMode_v2(blasHandle, blasPointerMode); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetPointerMode_v2(cublasHandle_t handle, cublasPointerMode_t* mode); + // CUDA: #define cublasGetPointerMode cublasGetPointerMode_v2 + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGetPointerMode(hipblasHandle_t handle, hipblasPointerMode_t* mode); + // CHECK: blasStatus = hipblasGetPointerMode(blasHandle, &blasPointerMode); + // CHECK-NEXT: blasStatus = hipblasGetPointerMode(blasHandle, &blasPointerMode); + blasStatus = cublasGetPointerMode(blasHandle, &blasPointerMode); + blasStatus = cublasGetPointerMode_v2(blasHandle, &blasPointerMode); + + int n = 0; + int num = 0; + int incx = 0; + int incy = 0; + void* image = nullptr; + void* image_2 = nullptr; + void* deviceptr = nullptr; + + // CUDA: cublasStatus_t CUBLASWINAPI cublasSetVector(int n, int elemSize, const void* x, int incx, void* devicePtr, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSetVector(int n, int elemSize, const void* x, int incx, void* y, int incy); + // CHECK: blasStatus = hipblasSetVector(n, num, image, incx, image_2, incy); + blasStatus = cublasSetVector(n, num, image, incx, image_2, incy); + + // CUDA: cublasStatus_t CUBLASWINAPI cublasGetVector(int n, int elemSize, const void* x, int incx, void* y, int incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGetVector(int n, int elemSize, const void* x, int incx, void* y, int incy); + // CHECK: blasStatus = hipblasGetVector(n, num, image, incx, image_2, incy); + blasStatus = cublasGetVector(n, num, image, incx, image_2, incy); + + // CUDA: cublasStatus_t CUBLASWINAPI cublasSetVectorAsync(int n, int elemSize, const void* hostPtr, int incx, void* devicePtr, int incy, cudaStream_t stream); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSetVectorAsync(int n, int elemSize, const void* x, int incx, void* y, int incy, hipStream_t stream); + // CHECK: blasStatus = hipblasSetVectorAsync(n, num, image, incx, image_2, incy, stream); + blasStatus = cublasSetVectorAsync(n, num, image, incx, image_2, incy, stream); + + // CUDA: cublasStatus_t CUBLASWINAPI cublasGetVectorAsync(int n, int elemSize, const void* devicePtr, int incx, void* hostPtr, int incy, cudaStream_t stream); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGetVectorAsync(int n, int elemSize, const void* x, int incx, void* y, int incy, hipStream_t stream); + // CHECK: blasStatus = hipblasGetVectorAsync(n, num, image, incx, image_2, incy, stream); + blasStatus = cublasGetVectorAsync(n, num, image, incx, image_2, incy, stream); + + int rows = 0; + int cols = 0; + + // CUDA: cublasStatus_t CUBLASWINAPI cublasSetMatrix(int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSetMatrix(int rows, int cols, int elemSize, const void* AP, int lda, void* BP, int ldb); + // CHECK: blasStatus = hipblasSetMatrix(rows, cols, num, image, incx, image_2, incy); + blasStatus = cublasSetMatrix(rows, cols, num, image, incx, image_2, incy); + + // CUDA: cublasStatus_t CUBLASWINAPI cublasGetMatrix(int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGetMatrix(int rows, int cols, int elemSize, const void* AP, int lda, void* BP, int ldb); + // CHECK: blasStatus = hipblasGetMatrix(rows, cols, num, image, incx, image_2, incy); + blasStatus = cublasGetMatrix(rows, cols, num, image, incx, image_2, incy); + + // CUDA: cublasStatus_t CUBLASWINAPI cublasSetMatrixAsync(int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb, cudaStream_t stream); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSetMatrixAsync(int rows, int cols, int elemSize, const void* AP, int lda, void* BP, int ldb, hipStream_t stream); + // CHECK: blasStatus = hipblasSetMatrixAsync(rows, cols, num, image, incx, image_2, incy, stream); + blasStatus = cublasSetMatrixAsync(rows, cols, num, image, incx, image_2, incy, stream); + + // CUDA: cublasStatus_t CUBLASWINAPI cublasGetMatrixAsync(int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb, cudaStream_t stream); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGetMatrixAsync(int rows, int cols, int elemSize, const void* AP, int lda, void* BP, int ldb, hipStream_t stream); + // CHECK: blasStatus = hipblasGetMatrixAsync(rows, cols, num, image, incx, image_2, incy, stream); + blasStatus = cublasGetMatrixAsync(rows, cols, num, image, incx, image_2, incy, stream); + + cudaDataType DataType_2, DataType_3; + +#if CUDA_VERSION >= 8000 + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasNrm2Ex(cublasHandle_t handle, int n, const void* x, cudaDataType xType, int incx, void* result, cudaDataType resultType, cudaDataType executionType); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasNrm2Ex(hipblasHandle_t handle, int n, const void* x, hipblasDatatype_t xType, int incx, void* result, hipblasDatatype_t resultType, hipblasDatatype_t executionType); + // CHECK: blasStatus = hipblasNrm2Ex(blasHandle, n, image, DataType, incx, image_2, DataType_2, DataType_3); + blasStatus = cublasNrm2Ex(blasHandle, n, image, DataType, incx, image_2, DataType_2, DataType_3); +#endif + return 0; } From 5a11d1515262b24f1a4f09603d7f1941e20050d1 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 5 Aug 2022 17:35:43 +0200 Subject: [PATCH 09/14] [HIPIFY][fix] Do preprocessing first, then matching, then lexer rewriting [Before] + Lexer rewriting, preprocessing, matching [After] + Preprocessing, matching, lexer rewriting [Reason for change] + The necessity in taking into account the included headers (in the future changes) + For instance, if cublas_v2.h is included, then v2 versions of some cuBLAS functions can be converted to hipBLAS analogues + If cublas_v2.h is not included, then hipification of some cuBLAS functions can't be done, as hipBLAS doesn't support "v1" versions --- src/HipifyAction.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/HipifyAction.cpp b/src/HipifyAction.cpp index 360c813b..2dc3a41f 100644 --- a/src/HipifyAction.cpp +++ b/src/HipifyAction.cpp @@ -737,6 +737,10 @@ bool HipifyAction::BeginInvocation(clang::CompilerInstance &CI) { void HipifyAction::ExecuteAction() { clang::Preprocessor &PP = getCompilerInstance().getPreprocessor(); + // Register yourself as the preprocessor callback, by proxy. + PP.addPPCallbacks(std::unique_ptr(new PPCallbackProxy(*this))); + // Now we're done futzing with the lexer, have the subclass proceeed with Sema and AST matching. + clang::ASTFrontendAction::ExecuteAction(); auto &SM = getCompilerInstance().getSourceManager(); // Start lexing the specified input file. llcompat::Memory_Buffer FromFile = llcompat::getMemoryBuffer(SM); @@ -751,10 +755,6 @@ void HipifyAction::ExecuteAction() { RewriteToken(RawTok); RawLex.LexFromRawLexer(RawTok); } - // Register yourself as the preprocessor callback, by proxy. - PP.addPPCallbacks(std::unique_ptr(new PPCallbackProxy(*this))); - // Now we're done futzing with the lexer, have the subclass proceeed with Sema and AST matching. - clang::ASTFrontendAction::ExecuteAction(); } void HipifyAction::run(const mat::MatchFinder::MatchResult &Result) { From a51e84f848e040299ad99cfde8aa86e4eca63102 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sat, 6 Aug 2022 16:28:30 +0200 Subject: [PATCH 10/14] [HIPIFY][#607][BLAS][feature] Introduce v1/v2 API support approach + Introduce the `HIP_SUPPORTED_V2_ONLY` flag for API which is supported only if the `_v2` version of CUDA API is proposed; if the corresponding CUDA header is included, where `_v1` (as a rule without `_v1` suffix) to `v2` defines are redefined + Populate a few cuBLAS to hipBLAS mapping items with `HIP_SUPPORTED_V2_ONLY` to test the feature + Provide a corresponding function `isHipSupportedV2Only` for checking an API for the `HIP_SUPPORTED_V2_ONLY` flag + Introduce the `CONV_INCLUDE_CUDA_MAIN_V2_H` marker for `_v2` header files like `cublas_v2.h` + Track `_v2` headers includes + Warn about the identifier, supported only for the `_v2` version of it; warn only in case of absence of the corresponding `_v2` header file; warn and do not hipify if the `_v2` header is not included; hipify otherwise. + Update the existing cuBLAS/rocBLAS tests + Add new cuBLAS/rocBLAS tests for `_v1` cases only + Update regenerated hipify-perl and CUBLAS_API_supported_by_HIP.md [TODO] + Update the rest of the `HIP_SUPPORTED_V2_ONLY` APIs + Add the corresponding synthetic tests + Provide the same or similar functionality in hipify-perl --- bin/hipify-perl | 23 +++-- doc/markdown/CUBLAS_API_supported_by_HIP.md | 8 +- src/CUDA2HIP.cpp | 2 +- src/CUDA2HIP_BLAS_API_functions.cpp | 39 +++++---- src/HipifyAction.cpp | 27 +++++- src/HipifyAction.h | 1 + src/Statistics.cpp | 6 ++ src/Statistics.h | 6 +- .../cuBLAS/cublas_0_based_indexing.cu | 17 +++- .../unit_tests/libraries/cuBLAS/cublas_v1.cu | 83 +++++++++++++++++++ .../cublas_0_based_indexing_rocblas.cu | 17 +++- 11 files changed, 181 insertions(+), 48 deletions(-) create mode 100644 tests/unit_tests/libraries/cuBLAS/cublas_v1.cu diff --git a/bin/hipify-perl b/bin/hipify-perl index 7ca8649f..07914304 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -967,7 +967,7 @@ push(@exclude_filelist, split(',', $exclude_files)); %exclude_dirhash = map { $_ => 1 } @exclude_dirlist; %exclude_filehash = map { $_ => 1 } @exclude_filelist; -@statNames = ("error", "init", "version", "device", "context", "module", "memory", "virtual_memory", "stream_ordered_memory", "addressing", "stream", "event", "external_resource_interop", "stream_memory", "execution", "graph", "occupancy", "texture", "surface", "peer", "graphics", "interactions", "profiler", "openGL", "D3D9", "D3D10", "D3D11", "VDPAU", "EGL", "thread", "complex", "library", "device_library", "device_function", "include", "include_cuda_main_header", "type", "literal", "numeric_literal", "define", "extern_shared", "kernel_launch"); +@statNames = ("error", "init", "version", "device", "context", "module", "memory", "virtual_memory", "stream_ordered_memory", "addressing", "stream", "event", "external_resource_interop", "stream_memory", "execution", "graph", "occupancy", "texture", "surface", "peer", "graphics", "interactions", "profiler", "openGL", "D3D9", "D3D10", "D3D11", "VDPAU", "EGL", "thread", "complex", "library", "device_library", "device_function", "include", "include_cuda_main_header", "include_cuda_main_header_v2", "type", "literal", "numeric_literal", "define", "extern_shared", "kernel_launch"); sub totalStats { my %count = %{shift()}; @@ -1338,6 +1338,7 @@ sub rocSubstitutions { subst("cublasDgemv_v2", "rocblas_dgemv", "library"); subst("cublasDger", "rocblas_dger", "library"); subst("cublasDger_v2", "rocblas_dger", "library"); + subst("cublasDnrm2", "rocblas_dnrm2", "library"); subst("cublasDnrm2_v2", "rocblas_dnrm2", "library"); subst("cublasDotEx", "rocblas_dot_ex", "library"); subst("cublasDotcEx", "rocblas_dotc_ex", "library"); @@ -1393,6 +1394,7 @@ sub rocSubstitutions { subst("cublasDtrsv_v2", "rocblas_dtrsv", "library"); subst("cublasDzasum", "rocblas_dzasum", "library"); subst("cublasDzasum_v2", "rocblas_dzasum", "library"); + subst("cublasDznrm2", "rocblas_dznrm2", "library"); subst("cublasDznrm2_v2", "rocblas_dznrm2", "library"); subst("cublasGemmBatchedEx", "rocblas_gemm_batched_ex", "library"); subst("cublasGemmEx", "rocblas_gemm_ex", "library"); @@ -1433,6 +1435,7 @@ sub rocSubstitutions { subst("cublasScalEx", "rocblas_scal_ex", "library"); subst("cublasScasum", "rocblas_scasum", "library"); subst("cublasScasum_v2", "rocblas_scasum", "library"); + subst("cublasScnrm2", "rocblas_scnrm2", "library"); subst("cublasScnrm2_v2", "rocblas_scnrm2", "library"); subst("cublasScopy", "rocblas_scopy", "library"); subst("cublasScopy_v2", "rocblas_scopy", "library"); @@ -1458,6 +1461,7 @@ sub rocSubstitutions { subst("cublasSgemv_v2", "rocblas_sgemv", "library"); subst("cublasSger", "rocblas_sger", "library"); subst("cublasSger_v2", "rocblas_sger", "library"); + subst("cublasSnrm2", "rocblas_snrm2", "library"); subst("cublasSnrm2_v2", "rocblas_snrm2", "library"); subst("cublasSrot", "rocblas_srot", "library"); subst("cublasSrot_v2", "rocblas_srot", "library"); @@ -1595,7 +1599,6 @@ sub rocSubstitutions { subst("cublasZtrsv", "rocblas_ztrsv", "library"); subst("cublasZtrsv_v2", "rocblas_ztrsv", "library"); subst("cublas.h", "rocblas.h", "include_cuda_main_header"); - subst("cublas_v2.h", "rocblas.h", "include_cuda_main_header"); subst("cublasAtomicsMode_t", "rocblas_atomics_mode", "type"); subst("cublasContext", "_rocblas_handle", "type"); subst("cublasDataType_t", "rocblas_datatype", "type"); @@ -2251,6 +2254,7 @@ sub simpleSubstitutions { subst("cublasDgetrfBatched", "hipblasDgetrfBatched", "library"); subst("cublasDgetriBatched", "hipblasDgetriBatched", "library"); subst("cublasDgetrsBatched", "hipblasDgetrsBatched", "library"); + subst("cublasDnrm2", "hipblasDnrm2", "library"); subst("cublasDnrm2_v2", "hipblasDnrm2", "library"); subst("cublasDotEx", "hipblasDotEx", "library"); subst("cublasDotcEx", "hipblasDotcEx", "library"); @@ -2306,6 +2310,7 @@ sub simpleSubstitutions { subst("cublasDtrsv_v2", "hipblasDtrsv", "library"); subst("cublasDzasum", "hipblasDzasum", "library"); subst("cublasDzasum_v2", "hipblasDzasum", "library"); + subst("cublasDznrm2", "hipblasDznrm2", "library"); subst("cublasDznrm2_v2", "hipblasDznrm2", "library"); subst("cublasGemmBatchedEx", "hipblasGemmBatchedEx", "library"); subst("cublasGemmEx", "hipblasGemmEx", "library"); @@ -2347,6 +2352,7 @@ sub simpleSubstitutions { subst("cublasScalEx", "hipblasScalEx", "library"); subst("cublasScasum", "hipblasScasum", "library"); subst("cublasScasum_v2", "hipblasScasum", "library"); + subst("cublasScnrm2", "hipblasScnrm2", "library"); subst("cublasScnrm2_v2", "hipblasScnrm2", "library"); subst("cublasScopy", "hipblasScopy", "library"); subst("cublasScopy_v2", "hipblasScopy", "library"); @@ -2377,6 +2383,7 @@ sub simpleSubstitutions { subst("cublasSgetrfBatched", "hipblasSgetrfBatched", "library"); subst("cublasSgetriBatched", "hipblasSgetriBatched", "library"); subst("cublasSgetrsBatched", "hipblasSgetrsBatched", "library"); + subst("cublasSnrm2", "hipblasSnrm2", "library"); subst("cublasSnrm2_v2", "hipblasSnrm2", "library"); subst("cublasSrot", "hipblasSrot", "library"); subst("cublasSrot_v2", "hipblasSrot", "library"); @@ -3233,6 +3240,7 @@ sub simpleSubstitutions { subst("caffe2\/operators\/spatial_batch_norm_op.h", "caffe2\/operators\/hip\/spatial_batch_norm_op_miopen.hip", "include"); subst("channel_descriptor.h", "hip\/channel_descriptor.h", "include"); subst("cooperative_groups.h", "hip\/hip_cooperative_groups.h", "include"); + subst("cublas_api.h", "hipblas.h", "include"); subst("cuda_fp16.h", "hip\/hip_fp16.h", "include"); subst("cuda_profiler_api.h", "hip\/hip_runtime_api.h", "include"); subst("cuda_runtime_api.h", "hip\/hip_runtime_api.h", "include"); @@ -3261,7 +3269,6 @@ sub simpleSubstitutions { subst("cuComplex.h", "hip\/hip_complex.h", "include_cuda_main_header"); subst("cub\/cub.cuh", "hipcub\/hipcub.hpp", "include_cuda_main_header"); subst("cublas.h", "hipblas.h", "include_cuda_main_header"); - subst("cublas_v2.h", "hipblas.h", "include_cuda_main_header"); subst("cuda.h", "hip\/hip_runtime.h", "include_cuda_main_header"); subst("cuda_runtime.h", "hip\/hip_runtime.h", "include_cuda_main_header"); subst("cudnn.h", "hipDNN.h", "include_cuda_main_header"); @@ -7763,7 +7770,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasSwapEx", "cublasStrttp", "cublasStpttr", - "cublasSnrm2", "cublasSmatinvBatched", "cublasShutdown", "cublasSgemmEx", @@ -7772,7 +7778,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasSetMathMode", "cublasSetLoggerCallback", "cublasSetKernelStream", - "cublasScnrm2", "cublasRotmgEx", "cublasRotmEx", "cublasRotgEx", @@ -7794,10 +7799,8 @@ sub warnHipOnlyUnsupportedFunctions { "cublasGetError", "cublasGetCudartVersion", "cublasFree", - "cublasDznrm2", "cublasDtrttp", "cublasDtpttr", - "cublasDnrm2", "cublasDmatinvBatched", "cublasDgelsBatched", "cublasCtrttp", @@ -7911,7 +7914,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSwapEx", "cublasStrttp", "cublasStpttr", - "cublasSnrm2", "cublasSmatinvBatched", "cublasShutdown", "cublasSgetrsBatched", @@ -7925,7 +7927,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSetLoggerCallback", "cublasSetKernelStream", "cublasSetAtomicsMode", - "cublasScnrm2", "cublasRotmgEx", "cublasRotmEx", "cublasRotgEx", @@ -7948,10 +7949,8 @@ sub warnRocOnlyUnsupportedFunctions { "cublasGetCudartVersion", "cublasGetAtomicsMode", "cublasFree", - "cublasDznrm2", "cublasDtrttp", "cublasDtpttr", - "cublasDnrm2", "cublasDmatinvBatched", "cublasDgetrsBatched", "cublasDgetriBatched", @@ -8195,7 +8194,7 @@ while (@ARGV) { transformHostFunctions(); # TODO: would like to move this code outside loop but it uses $_ which contains the whole file unless ($no_output) { - my $apiCalls = $ft{'error'} + $ft{'init'} + $ft{'version'} + $ft{'device'} + $ft{'context'} + $ft{'module'} + $ft{'memory'} + $ft{'virtual_memory'} + $ft{'stream_ordered_memory'} + $ft{'addressing'} + $ft{'stream'} + $ft{'event'} + $ft{'external_resource_interop'} + $ft{'stream_memory'} + $ft{'execution'} + $ft{'graph'} + $ft{'occupancy'} + $ft{'texture'} + $ft{'surface'} + $ft{'peer'} + $ft{'graphics'} + $ft{'interactions'} + $ft{'profiler'} + $ft{'openGL'} + $ft{'D3D9'} + $ft{'D3D10'} + $ft{'D3D11'} + $ft{'VDPAU'} + $ft{'EGL'} + $ft{'thread'} + $ft{'complex'} + $ft{'library'} + $ft{'device_library'} + $ft{'include'} + $ft{'include_cuda_main_header'} + $ft{'type'} + $ft{'literal'} + $ft{'numeric_literal'} + $ft{'define'}; + my $apiCalls = $ft{'error'} + $ft{'init'} + $ft{'version'} + $ft{'device'} + $ft{'context'} + $ft{'module'} + $ft{'memory'} + $ft{'virtual_memory'} + $ft{'stream_ordered_memory'} + $ft{'addressing'} + $ft{'stream'} + $ft{'event'} + $ft{'external_resource_interop'} + $ft{'stream_memory'} + $ft{'execution'} + $ft{'graph'} + $ft{'occupancy'} + $ft{'texture'} + $ft{'surface'} + $ft{'peer'} + $ft{'graphics'} + $ft{'interactions'} + $ft{'profiler'} + $ft{'openGL'} + $ft{'D3D9'} + $ft{'D3D10'} + $ft{'D3D11'} + $ft{'VDPAU'} + $ft{'EGL'} + $ft{'thread'} + $ft{'complex'} + $ft{'library'} + $ft{'device_library'} + $ft{'include'} + $ft{'include_cuda_main_header'} + $ft{'include_cuda_main_header_v2'} + $ft{'type'} + $ft{'literal'} + $ft{'numeric_literal'} + $ft{'define'}; my $kernStuff = $hasDeviceCode + $ft{'kernel_launch'} + $ft{'device_function'}; my $totalCalls = $apiCalls + $kernStuff; $is_dos = m/\r\n$/; diff --git a/doc/markdown/CUBLAS_API_supported_by_HIP.md b/doc/markdown/CUBLAS_API_supported_by_HIP.md index f3efcba3..238db117 100644 --- a/doc/markdown/CUBLAS_API_supported_by_HIP.md +++ b/doc/markdown/CUBLAS_API_supported_by_HIP.md @@ -226,7 +226,7 @@ |`cublasDcopy_v2`| | | |`hipblasDcopy`|1.8.2| | | | |`cublasDdot`| | | |`hipblasDdot`|3.0.0| | | | |`cublasDdot_v2`| | | |`hipblasDdot`|3.0.0| | | | -|`cublasDnrm2`| | | | | | | | | +|`cublasDnrm2`| | | |`hipblasDnrm2`|1.8.2| | | | |`cublasDnrm2_v2`| | | |`hipblasDnrm2`|1.8.2| | | | |`cublasDrot`| | | |`hipblasDrot`|3.0.0| | | | |`cublasDrot_v2`| | | |`hipblasDrot`|3.0.0| | | | @@ -242,7 +242,7 @@ |`cublasDswap_v2`| | | |`hipblasDswap`|3.0.0| | | | |`cublasDzasum`| | | |`hipblasDzasum`|3.0.0| | | | |`cublasDzasum_v2`| | | |`hipblasDzasum`|3.0.0| | | | -|`cublasDznrm2`| | | | | | | | | +|`cublasDznrm2`| | | |`hipblasDznrm2`|3.0.0| | | | |`cublasDznrm2_v2`| | | |`hipblasDznrm2`|3.0.0| | | | |`cublasIcamax`| | | |`hipblasIcamax`|3.0.0| | | | |`cublasIcamax_v2`| | | |`hipblasIcamax`|3.0.0| | | | @@ -267,13 +267,13 @@ |`cublasSaxpy_v2`| | | |`hipblasSaxpy`|1.8.2| | | | |`cublasScasum`| | | |`hipblasScasum`|3.0.0| | | | |`cublasScasum_v2`| | | |`hipblasScasum`|3.0.0| | | | -|`cublasScnrm2`| | | | | | | | | +|`cublasScnrm2`| | | |`hipblasScnrm2`|3.0.0| | | | |`cublasScnrm2_v2`| | | |`hipblasScnrm2`|3.0.0| | | | |`cublasScopy`| | | |`hipblasScopy`|1.8.2| | | | |`cublasScopy_v2`| | | |`hipblasScopy`|1.8.2| | | | |`cublasSdot`| | | |`hipblasSdot`|3.0.0| | | | |`cublasSdot_v2`| | | |`hipblasSdot`|3.0.0| | | | -|`cublasSnrm2`| | | | | | | | | +|`cublasSnrm2`| | | |`hipblasSnrm2`|1.8.2| | | | |`cublasSnrm2_v2`| | | |`hipblasSnrm2`|1.8.2| | | | |`cublasSrot`| | | |`hipblasSrot`|3.0.0| | | | |`cublasSrot_v2`| | | |`hipblasSrot`|3.0.0| | | | diff --git a/src/CUDA2HIP.cpp b/src/CUDA2HIP.cpp index 0aba0f7f..1711966c 100644 --- a/src/CUDA2HIP.cpp +++ b/src/CUDA2HIP.cpp @@ -41,7 +41,7 @@ const std::map CUDA_INCLUDE_MAP { {"cuComplex.h", {"hip/hip_complex.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_COMPLEX, 0}}, // cuBLAS includes {"cublas.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_H, API_BLAS, 0}}, - {"cublas_v2.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_H, API_BLAS, 0}}, + {"cublas_v2.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_V2_H, API_BLAS, 0}}, {"cublas_api.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE, API_BLAS, 0}}, // cuRAND includes {"curand.h", {"hiprand.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_RAND, 0}}, diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 1b2b6cc2..0f6f5ae9 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -78,31 +78,30 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasGetCudartVersion", {"hipblasGetCudartVersion", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, // NRM2 - // cublasSnrm2 signature differs from cublasSnrm2_v2 signature, hipblasSnrm2 and rocblas_snrm2 have mapping to cublasSnrm2_v2 only - {"cublasSnrm2", {"hipblasSnrm2_v1", "rocblas_snrm2_v1", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, - // cublasDnrm2 signature differs from cublasDnrm2_v2 signature, hipblasDnrm2 and rocblas_dnrm2 have mapping to cublasDnrm2_v2 only - {"cublasDnrm2", {"hipblasDnrm2_v1", "rocblas_dnrm2_v1", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, - // cublasScnrm2 signature differs from cublasScnrm2_v2 signature, hipblasScnrm2 and rocblas_scnrm2 have mapping to cublasScnrm2_v2 only - {"cublasScnrm2", {"hipblasScnrm2_v1", "rocblas_scnrm2_v1", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, - // cublasDznrm2 signature differs from cublasDznrm2_v2 signature, hipblasDznrm2 and rocblas_dznrm2 have mapping to cublasDznrm2_v2 only - {"cublasDznrm2", {"hipblasDznrm2_v1", "rocblas_dznrm2_v1", CONV_LIB_FUNC, API_BLAS, 5, UNSUPPORTED}}, + // NRM2 functions' signatures differ from _v2 ones, hipblas and rocblas NRM2 functions have mapping to NRM2_v2 functions only + {"cublasSnrm2", {"hipblasSnrm2", "rocblas_snrm2", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDnrm2", {"hipblasDnrm2", "rocblas_dnrm2", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasScnrm2", {"hipblasScnrm2", "rocblas_scnrm2", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDznrm2", {"hipblasDznrm2", "rocblas_dznrm2", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, {"cublasNrm2Ex", {"hipblasNrm2Ex", "rocblas_nrm2_ex", CONV_LIB_FUNC, API_BLAS, 5}}, // DOT - {"cublasSdot", {"hipblasSdot", "rocblas_sdot", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasDdot", {"hipblasDdot", "rocblas_ddot", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasCdotu", {"hipblasCdotu", "rocblas_cdotu", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasCdotc", {"hipblasCdotc", "rocblas_cdotc", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasZdotu", {"hipblasZdotu", "rocblas_zdotu", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasZdotc", {"hipblasZdotc", "rocblas_zdotc", CONV_LIB_FUNC, API_BLAS, 5}}, + // DOT functions' signatures differ from _v2 ones, hipblas and rocblas DOT functions have mapping to DOT_v2 functions only + {"cublasSdot", {"hipblasSdot", "rocblas_sdot", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDdot", {"hipblasDdot", "rocblas_ddot", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCdotu", {"hipblasCdotu", "rocblas_cdotu", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCdotc", {"hipblasCdotc", "rocblas_cdotc", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZdotu", {"hipblasZdotu", "rocblas_zdotu", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZdotc", {"hipblasZdotc", "rocblas_zdotc", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, // SCAL - {"cublasSscal", {"hipblasSscal", "rocblas_sscal", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasDscal", {"hipblasDscal", "rocblas_dscal", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasCscal", {"hipblasCscal", "rocblas_cscal", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasCsscal", {"hipblasCsscal", "rocblas_csscal", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasZscal", {"hipblasZscal", "rocblas_zscal", CONV_LIB_FUNC, API_BLAS, 5}}, - {"cublasZdscal", {"hipblasZdscal", "rocblas_zdscal", CONV_LIB_FUNC, API_BLAS, 5}}, + // SCAL functions' signatures differ from _v2 ones, hipblas and rocblas SCAL functions have mapping to SCAL_v2 functions only + {"cublasSscal", {"hipblasSscal", "rocblas_sscal", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDscal", {"hipblasDscal", "rocblas_dscal", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCscal", {"hipblasCscal", "rocblas_cscal", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCsscal", {"hipblasCsscal", "rocblas_csscal", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZscal", {"hipblasZscal", "rocblas_zscal", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZdscal", {"hipblasZdscal", "rocblas_zdscal", CONV_LIB_FUNC, API_BLAS, 5, HIP_SUPPORTED_V2_ONLY}}, // AXPY {"cublasSaxpy", {"hipblasSaxpy", "rocblas_saxpy", CONV_LIB_FUNC, API_BLAS, 5}}, diff --git a/src/HipifyAction.cpp b/src/HipifyAction.cpp index 2dc3a41f..ad93bea3 100644 --- a/src/HipifyAction.cpp +++ b/src/HipifyAction.cpp @@ -186,8 +186,18 @@ void HipifyAction::FindAndReplace(StringRef name, DE.Report(sl, ID) << sWarn; return; } - - // Warn the user about unsupported identifier. + // Warn about the identifier which is supported only for _v2 version of it + // [NOTE]: Currently, only cuBlas is tracked for versioning and only for _v2; + // cublas_v2.h has to be included in the source cuda file for hipification. + if (Statistics::isHipSupportedV2Only(found->second) && found->second.apiType == API_BLAS && !insertedBLASHeader_V2) { + std::string sWarn; + Statistics::isToRoc(found->second) ? sWarn = sROC : sWarn = sHIP; + sWarn = "" + sWarn; + const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "Only _v2 version of identifier is supported in %0. To hipify it, include cublas_v2.h in the source code."); + DE.Report(sl, ID) << sWarn; + return; + } + // Warn about unsupported identifier. if (Statistics::isUnsupported(found->second)) { std::string sWarn; Statistics::isToRoc(found->second) ? sWarn = sROC : sWarn = sHIP; @@ -288,6 +298,18 @@ bool HipifyAction::Exclude(const hipCounter &hipToken) { default: return false; } + return false; + case CONV_INCLUDE_CUDA_MAIN_V2_H: + switch (hipToken.apiType) { + case API_BLAS: + if (insertedBLASHeader_V2) return true; + insertedBLASHeader_V2 = true; + if (insertedBLASHeader) return true; + return false; + default: + return false; + } + return false; case CONV_INCLUDE: if (hipToken.hipName.empty()) return true; switch (hipToken.apiType) { @@ -300,6 +322,7 @@ bool HipifyAction::Exclude(const hipCounter &hipToken) { default: return false; } + return false; default: return false; } diff --git a/src/HipifyAction.h b/src/HipifyAction.h index 02813f54..7fbf7134 100644 --- a/src/HipifyAction.h +++ b/src/HipifyAction.h @@ -49,6 +49,7 @@ class HipifyAction : public clang::ASTFrontendAction, // This approach means we do the best it's possible to do w.r.t preserving the user's include order. bool insertedRuntimeHeader = false; bool insertedBLASHeader = false; + bool insertedBLASHeader_V2 = false; bool insertedRANDHeader = false; bool insertedRAND_kernelHeader = false; bool insertedDNNHeader = false; diff --git a/src/Statistics.cpp b/src/Statistics.cpp index 6afa60ba..ade47baf 100644 --- a/src/Statistics.cpp +++ b/src/Statistics.cpp @@ -64,6 +64,7 @@ const char *counterNames[NUM_CONV_TYPES] = { "device_function", // CONV_DEVICE_FUNC "include", // CONV_INCLUDE "include_cuda_main_header", // CONV_INCLUDE_CUDA_MAIN_H + "include_cuda_main_header_v2", // CONV_INCLUDE_CUDA_MAIN_V2_H "type", // CONV_TYPE "literal", // CONV_LITERAL "numeric_literal", // CONV_NUMERIC_LITERAL @@ -108,6 +109,7 @@ const char *counterTypes[NUM_CONV_TYPES] = { "CONV_LIB_DEVICE_FUNC", "CONV_INCLUDE", "CONV_INCLUDE_CUDA_MAIN_H", + "CONV_INCLUDE_CUDA_MAIN_V2_H", "CONV_TYPE", "CONV_LITERAL", "CONV_NUMERIC_LITERAL", @@ -399,6 +401,10 @@ bool Statistics::isRemoved(const hipCounter &counter) { HIP_REMOVED == (counter.supportDegree & HIP_REMOVED)); } +bool Statistics::isHipSupportedV2Only(const hipCounter& counter) { + return HIP_SUPPORTED_V2_ONLY == (counter.supportDegree & HIP_SUPPORTED_V2_ONLY); +} + std::string Statistics::getCudaVersion(const cudaVersions& ver) { switch (ver) { case CUDA_0: diff --git a/src/Statistics.h b/src/Statistics.h index 259bc0bf..66b0c3a4 100644 --- a/src/Statistics.h +++ b/src/Statistics.h @@ -121,6 +121,7 @@ enum ConvTypes { CONV_DEVICE_FUNC, CONV_INCLUDE, CONV_INCLUDE_CUDA_MAIN_H, + CONV_INCLUDE_CUDA_MAIN_V2_H, CONV_TYPE, CONV_LITERAL, CONV_NUMERIC_LITERAL, @@ -158,7 +159,8 @@ enum SupportDegree { CUDA_REMOVED = 0x40, HIP_REMOVED = 0x80, REMOVED = 0x100, - HIP_EXPERIMENTAL = 0x200 + HIP_EXPERIMENTAL = 0x200, + HIP_SUPPORTED_V2_ONLY = 0x400 }; enum cudaVersions { @@ -411,6 +413,8 @@ class Statistics { static bool isHipRemoved(const hipCounter& counter); // Check whether the counter is REMOVED or not. static bool isRemoved(const hipCounter& counter); + // Check whether the counter is HIP_SUPPORTED_V2_ONLY or not. + static bool isHipSupportedV2Only(const hipCounter& counter); // Get string CUDA version. static std::string getCudaVersion(const cudaVersions &ver); // Get string HIP version. diff --git a/tests/unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu b/tests/unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu index 69812c98..4d8cadf7 100644 --- a/tests/unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu +++ b/tests/unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu @@ -5,16 +5,25 @@ #include #include // CHECK: #include "hipblas.h" +// CHECK-NOT: #include "cublas_v2.h" #include "cublas.h" +#include "cublas_v2.h" +// CHECK-NOT: #include "hipblas.h" #define M 6 #define N 5 #define IDX2C(i,j,ld) (((j)*(ld))+(i)) static __inline__ void modify(float *m, int ldm, int n, int p, int q, float alpha, float beta) { - // CHECK: hipblasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm); - // CHECK: hipblasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1); - cublasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm); - cublasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1); + // CHECK: hipblasHandle_t blasHandle; + cublasHandle_t blasHandle; + // CHECK: hipblasStatus_t blasStatus = hipblasCreate(&blasHandle); + cublasStatus blasStatus = cublasCreate(&blasHandle); + // CHECK: hipblasSscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm); + cublasSscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm); + // CHECK: hipblasSscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1); + cublasSscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1); + // CHECK: hipblasDestroy(blasHandle); + cublasDestroy(blasHandle); } int main(void) { int i, j; diff --git a/tests/unit_tests/libraries/cuBLAS/cublas_v1.cu b/tests/unit_tests/libraries/cuBLAS/cublas_v1.cu new file mode 100644 index 00000000..74a3645a --- /dev/null +++ b/tests/unit_tests/libraries/cuBLAS/cublas_v1.cu @@ -0,0 +1,83 @@ +// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args + +// CHECK: #include +#include +#include +#include +// CHECK: #include "hipblas.h" +#include "cublas.h" +#define M 6 +#define N 5 +#define IDX2C(i,j,ld) (((j)*(ld))+(i)) +static __inline__ void modify(float* m, int ldm, int n, int p, int q, float + alpha, float beta) { + // CHECK-NOT: hipblasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm); + // CHECK-NOT: hipblasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1); + // CHECK: cublasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm); + // CHECK: cublasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1); + cublasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm); + cublasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1); +} +int main(void) { + int i, j; + // CHECK: hipblasStatus_t stat; + cublasStatus stat; + float* devPtrA; + float* a = 0; + a = (float*)malloc(M * N * sizeof(*a)); + if (!a) { + printf("host memory allocation failed"); + return EXIT_FAILURE; + } + for (j = 0; j < N; j++) { + for (i = 0; i < M; i++) { + a[IDX2C(i, j, M)] = (float)(i * M + j + 1); + } + } + // cublasInit is not supported yet + cublasInit(); + // cublasAlloc is not supported yet + stat = cublasAlloc(M * N, sizeof(*a), (void**)&devPtrA); + // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { + if (stat != CUBLAS_STATUS_SUCCESS) { + printf("device memory allocation failed"); + // cublasShutdown is not supported yet + cublasShutdown(); + return EXIT_FAILURE; + } + // CHECK: stat = hipblasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); + stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); + // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { + if (stat != CUBLAS_STATUS_SUCCESS) { + printf("data download failed"); + // cublasFree is not supported yet + cublasFree(devPtrA); + // cublasShutdown is not supported yet + cublasShutdown(); + return EXIT_FAILURE; + } + modify(devPtrA, M, N, 1, 2, 16.0f, 12.0f); + // CHECK: stat = hipblasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); + stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); + // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { + if (stat != CUBLAS_STATUS_SUCCESS) { + printf("data upload failed"); + // cublasFree is not supported yet + cublasFree(devPtrA); + // cublasShutdown is not supported yet + cublasShutdown(); + return EXIT_FAILURE; + } + // cublasFree is not supported yet + cublasFree(devPtrA); + // cublasShutdown is not supported yet + cublasShutdown(); + for (j = 0; j < N; j++) { + for (i = 0; i < M; i++) { + printf("%7.0f", a[IDX2C(i, j, M)]); + } + printf("\n"); + } + free(a); + return EXIT_SUCCESS; +} diff --git a/tests/unit_tests/libraries/cuBLAS/rocBLAS/cublas_0_based_indexing_rocblas.cu b/tests/unit_tests/libraries/cuBLAS/rocBLAS/cublas_0_based_indexing_rocblas.cu index 2c014cb0..a47561bd 100644 --- a/tests/unit_tests/libraries/cuBLAS/rocBLAS/cublas_0_based_indexing_rocblas.cu +++ b/tests/unit_tests/libraries/cuBLAS/rocBLAS/cublas_0_based_indexing_rocblas.cu @@ -5,16 +5,25 @@ #include #include // CHECK: #include "rocblas.h" +// CHECK-NOT: #include "cublas_v2.h" #include "cublas.h" +#include "cublas_v2.h" +// CHECK-NOT: #include "rocblas.h" #define M 6 #define N 5 #define IDX2C(i,j,ld) (((j)*(ld))+(i)) static __inline__ void modify(float *m, int ldm, int n, int p, int q, float alpha, float beta) { - // CHECK: rocblas_sscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm); - // CHECK: rocblas_sscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1); - cublasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm); - cublasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1); + // CHECK: rocblas_handle blasHandle; + cublasHandle_t blasHandle; + // CHECK: rocblas_status blasStatus = rocblas_create_handle(&blasHandle); + cublasStatus blasStatus = cublasCreate(&blasHandle); + // CHECK: rocblas_sscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm); + // CHECK: rocblas_sscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1); + cublasSscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm); + cublasSscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1); + // CHECK: rocblas_destroy_handle(blasHandle); + cublasDestroy(blasHandle); } int main(void) { int i, j; From af5f23137c13a4ba6f24760a2dda7f29e0aa1f01 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sat, 6 Aug 2022 21:12:47 +0200 Subject: [PATCH 11/14] [HIPIFY] Improve diagnostics + Provide the identifier's name and API in warnings [TODO] + Fix the source location: the source file should be reported instead of temp file, reported currently --- src/HipifyAction.cpp | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/src/HipifyAction.cpp b/src/HipifyAction.cpp index ad93bea3..ae02a4dd 100644 --- a/src/HipifyAction.cpp +++ b/src/HipifyAction.cpp @@ -173,17 +173,18 @@ void HipifyAction::FindAndReplace(StringRef name, } Statistics::current().incrementCounter(found->second, name.str()); clang::DiagnosticsEngine &DE = getCompilerInstance().getDiagnostics(); - // Warn the user about deprecated idenrifier. + // Warn about the deprecated identifier in CUDA but hipify it. if (Statistics::isDeprecated(found->second)) { - DE.Report(sl, DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "CUDA identifier is deprecated.")); + const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "'%0' is deprecated in CUDA."); + DE.Report(sl, ID) << found->first; } - // Warn the user about unsupported experimental identifier. + // Warn about the unsupported experimental identifier. if (Statistics::isHipExperimental(found->second) &&!Experimental) { std::string sWarn; Statistics::isToRoc(found->second) ? sWarn = sROC : sWarn = sHIP; sWarn = "" + sWarn; - const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "CUDA identifier is experimental in %0. To hipify it, use the '--experimental' option."); - DE.Report(sl, ID) << sWarn; + const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "'%0' is experimental in '%1'; to hipify it, use the '--experimental' option."); + DE.Report(sl, ID) << found->first << sWarn; return; } // Warn about the identifier which is supported only for _v2 version of it @@ -193,17 +194,17 @@ void HipifyAction::FindAndReplace(StringRef name, std::string sWarn; Statistics::isToRoc(found->second) ? sWarn = sROC : sWarn = sHIP; sWarn = "" + sWarn; - const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "Only _v2 version of identifier is supported in %0. To hipify it, include cublas_v2.h in the source code."); - DE.Report(sl, ID) << sWarn; + const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "Only '%0_v2' version of '%0' is supported in '%1'; to hipify it, include 'cublas_v2.h' in the source."); + DE.Report(sl, ID) << found->first << sWarn; return; } - // Warn about unsupported identifier. + // Warn about the unsupported identifier. if (Statistics::isUnsupported(found->second)) { std::string sWarn; Statistics::isToRoc(found->second) ? sWarn = sROC : sWarn = sHIP; sWarn = "" + sWarn; - const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "CUDA identifier is unsupported in %0."); - DE.Report(sl, ID) << sWarn; + const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "'%0' is unsupported in '%1'."); + DE.Report(sl, ID) << found->first << sWarn; return; } if (!bReplace) { @@ -348,7 +349,10 @@ void HipifyAction::InclusionDirective(clang::SourceLocation hash_loc, clang::SourceLocation sl = filename_range.getBegin(); if (Statistics::isUnsupported(found->second)) { clang::DiagnosticsEngine &DE = getCompilerInstance().getDiagnostics(); - DE.Report(sl, DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "Unsupported CUDA header.")); + std::string sWarn; + Statistics::isToRoc(found->second) ? sWarn = sROC : sWarn = sHIP; + const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "'%0' is unsupported header in '%1'."); + DE.Report(sl, ID) << found->first << sWarn; return; } clang::StringRef newInclude; @@ -566,8 +570,8 @@ bool HipifyAction::cudaHostFuncCall(const mat::MatchFinder::MatchResult &Result) switch (c.second.castWarn) { case cw_DataLoss: { clang::DiagnosticsEngine& DE = getCompilerInstance().getDiagnostics(); - const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "Possible data loss in %0 argument."); - DE.Report(fullSL, ID) << argNum+1; + const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "Possible data loss in %0 argument of '%1'."); + DE.Report(fullSL, ID) << argNum+1 << sName; break; } case cw_None: From b4081fc55ad8d8a72d816d8854495a1314c1aeca Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sun, 7 Aug 2022 14:03:18 +0200 Subject: [PATCH 12/14] [HIPIFY][BLAS] Satisfy CUDA 11.7.1 restriction [Restriction] + If both `cublas.h` and `cublas_v2.h` are included the following error occurs: `cublas.h(61,2): error: "It is an error to include both cublas.h and cublas_v2.h"` [Solution] + Swap `cublas_v2.h` and `cublas.h` includes if `cublas_v2.h` goes first + Actually, CUDA error message is misleading: erroneous to include `cublas_v2.h` before `cublas.h` only; + On the other hand, including them both is correct and only possible, when v1 and v2 APIs are needed to be used, cause not every API from `cublas.h` has a corresponding v2 analogue in `cublas_v2.h` --- tests/unit_tests/headers/headers_test_07.cu | 5 ++++- tests/unit_tests/headers/headers_test_08.cu | 8 ++++++-- tests/unit_tests/headers/headers_test_09.cu | 6 +++++- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/tests/unit_tests/headers/headers_test_07.cu b/tests/unit_tests/headers/headers_test_07.cu index 1effc189..c125522c 100644 --- a/tests/unit_tests/headers/headers_test_07.cu +++ b/tests/unit_tests/headers/headers_test_07.cu @@ -1,8 +1,11 @@ // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args +// CHECK: #include // CHECK: #include "hipblas.h" // CHECK-NOT: #include "cublas.h" +// CHECK-NOT: #include "cublas_v2.h" // CHECK: #include -#include "cublas_v2.h" #include "cublas.h" +#include "cublas_v2.h" +// CHECK-NOT: #include "hipblas.h" #include diff --git a/tests/unit_tests/headers/headers_test_08.cu b/tests/unit_tests/headers/headers_test_08.cu index aca7f194..84677d57 100644 --- a/tests/unit_tests/headers/headers_test_08.cu +++ b/tests/unit_tests/headers/headers_test_08.cu @@ -1,14 +1,18 @@ // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args // CHECK: #include -// CHECK-NOT: #include // CHECK: #include // CHECK: #include "hipblas.h" // CHECK-NOT: #include "cublas.h" +// CHECK-NOT: #include "cublas_v2.h" // CHECK: #include +// CHECK-NOT: #include #include +// CHECK-NOT: #include #include +// CHECK-NOT: #include #include -#include "cublas_v2.h" #include "cublas.h" +#include "cublas_v2.h" +// CHECK-NOT: #include "hipblas.h" #include diff --git a/tests/unit_tests/headers/headers_test_09.cu b/tests/unit_tests/headers/headers_test_09.cu index 37e718b5..067fe179 100644 --- a/tests/unit_tests/headers/headers_test_09.cu +++ b/tests/unit_tests/headers/headers_test_09.cu @@ -19,6 +19,7 @@ // CHECK: #include "hipblas.h" // CHECK-NOT: #include "cublas.h" +// CHECK-NOT: #include "cublas_v2.h" // CHECK: #include @@ -51,10 +52,12 @@ // CHECK: #include "hipsparse.h" #include +// CHECK-NOT: #include #include #include +// CHECK-NOT: #include #include "cuda_runtime_api.h" #include "channel_descriptor.h" @@ -67,8 +70,9 @@ #include -#include "cublas_v2.h" #include "cublas.h" +#include "cublas_v2.h" +// CHECK-NOT: #include "hipblas.h" #include From f2aac2ba5ca1ac3f8f241bf37919b6fb30151992 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sun, 7 Aug 2022 16:37:46 +0200 Subject: [PATCH 13/14] [HIPIFY][doc] CUDA 11.7.1 is the latest supported release + Update README.md accordingly + Update 3rd parties tools versions --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index c43bbd8c..e9e983c4 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ After applying all the matchers, the output HIP source is produced. 1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [4.0.0](http://releases.llvm.org/download.html#4.0.0); the latest stable and recommended release: [**14.0.6**](https://github.com/llvm/llvm-project/releases/tag/llvmorg-14.0.6). -2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive), the latest supported version is [**11.7.0**](https://developer.nvidia.com/cuda-downloads). +2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive), the latest supported version is [**11.7.1**](https://developer.nvidia.com/cuda-downloads). @@ -160,7 +160,7 @@ After applying all the matchers, the output HIP source is produced. 14.0.2, 14.0.3, 14.0.4 - + - + @@ -385,7 +385,7 @@ Ubuntu 14: LLVM 4.0.0 - 7.1.0, CUDA 7.0 - 9.0, cuDNN 5.0.5 - 7.6.5 Ubuntu 16-18: LLVM 8.0.0 - 14.0.6, CUDA 8.0 - 10.2, cuDNN 5.1.10 - 8.0.5 -Ubuntu 20-21: LLVM 9.0.0 - 14.0.6, CUDA 8.0 - 11.7.0, cuDNN 5.1.10 - 8.4.1 +Ubuntu 20-21: LLVM 9.0.0 - 14.0.6, CUDA 8.0 - 11.7.1, cuDNN 5.1.10 - 8.4.1 Minimum build system requirements for the above configurations: @@ -560,8 +560,8 @@ Testing Time: 6.03s | 10.0.0 - 11.0.0 | 8.0 - 11.1 | 7.6.5 - 8.0.5 | 2017.15.9.30, 2019.16.8.3 | 3.19.2 | 3.9.1 | | 11.0.1 - 11.1.0 | 8.0 - 11.2.2 | 7.6.5 - 8.0.5 | 2017.15.9.31, 2019.16.8.4 | 3.19.3 | 3.9.2 | | 12.0.0 - 13.0.1 | 8.0 - 11.5.1 | 7.6.5 - 8.3.2 | 2017.15.9.43, 2019.16.11.9 | 3.22.2 | 3.10.2 | -| 14.0.0 - 14.0.6 | 8.0 - 11.7.0 | 8.0.5 - 8.4.1 | 2017.15.9.49, 2019.16.11.17, 2022.17.2.6 | 3.23.2 | 3.10.5 | -| 15.0.0git | 8.0 - 11.7.0 | 8.0.5 - 8.4.1 | 2017.15.9.49, 2019.16.11.17, 2022.17.2.6 | 3.23.2 | 3.10.5 | +| 14.0.0 - 14.0.6 | 8.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.49, 2019.16.11.17, 2022.17.2.6 | 3.24.0 | 3.10.6 | +| 15.0.0git | 8.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.49, 2019.16.11.17, 2022.17.2.6 | 3.24.0 | 3.10.6 | *Building with testing support by `Visual Studio 17 2022` on `Windows 10`:* From e78713d260208b640d39538c28de53dd639cff4e Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 8 Aug 2022 17:44:19 +0200 Subject: [PATCH 14/14] [HIPIFY][#601][BLAS][tests] Synthetic test for cuBLAS API - Part 3 + Added tests for NRM2, DOT, SCAL v2 functions --- .../synthetic/libraries/cublas2hipblas.cu | 125 ++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu index 6c6cf57f..b8bdfd48 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu @@ -263,6 +263,47 @@ int main() { cudaDataType DataType_2, DataType_3; + float fx = 0; + float fy = 0; + float fresult = 0; + + double dx = 0; + double dy = 0; + double dresult = 0; + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle, int n, const float* x, int incx, float* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSnrm2(hipblasHandle_t handle, int n, const float* x, int incx, float* result); + // CHECK: blasStatus = hipblasSnrm2(blasHandle, n, &fx, incx, &fresult); + // CHECK-NEXT: blasStatus = hipblasSnrm2(blasHandle, n, &fx, incx, &fresult); + blasStatus = cublasSnrm2(blasHandle, n, &fx, incx, &fresult); + blasStatus = cublasSnrm2_v2(blasHandle, n, &fx, incx, &fresult); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDnrm2_v2(cublasHandle_t handle, int n, const double* x, int incx, double* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDnrm2(hipblasHandle_t handle, int n, const double* x, int incx, double* result); + // CHECK: blasStatus = hipblasDnrm2(blasHandle, n, &dx, incx, &dresult); + // CHECK-NEXT: blasStatus = hipblasDnrm2(blasHandle, n, &dx, incx, &dresult); + blasStatus = cublasDnrm2(blasHandle, n, &dx, incx, &dresult); + blasStatus = cublasDnrm2_v2(blasHandle, n, &dx, incx, &dresult); + + // CHECK: hipComplex complex, complex_2, complex_res; + cuComplex complex, complex_2, complex_res; + // CHECK: hipDoubleComplex dcomplex, dcomplex_2, dcomplex_res; + cuDoubleComplex dcomplex, dcomplex_2, dcomplex_res; + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScnrm2_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, float* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasScnrm2(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, float* result); + // CHECK: blasStatus = hipblasScnrm2(blasHandle, n, &complex, incx, &fresult); + // CHECK-NEXT: blasStatus = hipblasScnrm2(blasHandle, n, &complex, incx, &fresult); + blasStatus = cublasScnrm2(blasHandle, n, &complex, incx, &fresult); + blasStatus = cublasScnrm2_v2(blasHandle, n, &complex, incx, &fresult); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDznrm2_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, double* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDznrm2(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, double* result); + // CHECK: blasStatus = hipblasDznrm2(blasHandle, n, &dcomplex, incx, &dresult); + // CHECK-NEXT: blasStatus = hipblasDznrm2(blasHandle, n, &dcomplex, incx, &dresult); + blasStatus = cublasDznrm2(blasHandle, n, &dcomplex, incx, &dresult); + blasStatus = cublasDznrm2_v2(blasHandle, n, &dcomplex, incx, &dresult); + #if CUDA_VERSION >= 8000 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasNrm2Ex(cublasHandle_t handle, int n, const void* x, cudaDataType xType, int incx, void* result, cudaDataType resultType, cudaDataType executionType); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasNrm2Ex(hipblasHandle_t handle, int n, const void* x, hipblasDatatype_t xType, int incx, void* result, hipblasDatatype_t resultType, hipblasDatatype_t executionType); @@ -270,5 +311,89 @@ int main() { blasStatus = cublasNrm2Ex(blasHandle, n, image, DataType, incx, image_2, DataType_2, DataType_3); #endif + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdot_v2(cublasHandle_t handle, int n, const float* x, int incx, const float* y, int incy, float* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSdot(hipblasHandle_t handle, int n, const float* x, int incx, const float* y, int incy, float* result); + // CHECK: blasStatus = hipblasSdot(blasHandle, n, &fx, incx, &fy, incy, &fresult); + // CHECK-NEXT: blasStatus = hipblasSdot(blasHandle, n, &fx, incx, &fy, incy, &fresult); + blasStatus = cublasSdot(blasHandle, n, &fx, incx, &fy, incy, &fresult); + blasStatus = cublasSdot_v2(blasHandle, n, &fx, incx, &fy, incy, &fresult); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDdot_v2(cublasHandle_t handle, int n, const double* x, int incx, const double* y, int incy, double* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDdot(hipblasHandle_t handle, int n, const double* x, int incx, const double* y, int incy, double* result); + // CHECK: blasStatus = hipblasDdot(blasHandle, n, &dx, incx, &dy, incy, &dresult); + // CHECK-NEXT: blasStatus = hipblasDdot(blasHandle, n, &dx, incx, &dy, incy, &dresult); + blasStatus = cublasDdot(blasHandle, n, &dx, incx, &dy, incy, &dresult); + blasStatus = cublasDdot_v2(blasHandle, n, &dx, incx, &dy, incy, &dresult); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdotu_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCdotu(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* result); + // CHECK: blasStatus = hipblasCdotu(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res); + // CHECK-NEXT: blasStatus = hipblasCdotu(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res); + blasStatus = cublasCdotu(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res); + blasStatus = cublasCdotu_v2(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdotc_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCdotc(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* result); + // CHECK: blasStatus = hipblasCdotc(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res); + // CHECK-NEXT: blasStatus = hipblasCdotc(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res); + blasStatus = cublasCdotc(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res); + blasStatus = cublasCdotc_v2(blasHandle, n, &complex, incx, &complex_2, incy, &complex_res); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdotu_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZdotu(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* result); + // CHECK: blasStatus = hipblasZdotu(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res); + // CHECK-NEXT: blasStatus = hipblasZdotu(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res); + blasStatus = cublasZdotu(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res); + blasStatus = cublasZdotu_v2(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdotc_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, const cuDoubleComplex* y, int incy, cuDoubleComplex* result); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZdotc(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* result); + // CHECK: blasStatus = hipblasZdotc(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res); + // CHECK-NEXT: blasStatus = hipblasZdotc(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res); + blasStatus = cublasZdotc(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res); + blasStatus = cublasZdotc_v2(blasHandle, n, &dcomplex, incx, &dcomplex_2, incy, &dcomplex_res); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSscal_v2(cublasHandle_t handle, int n, const float* alpha, float* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSscal(hipblasHandle_t handle, int n, const float* alpha, float* x, int incx); + // CHECK: blasStatus = hipblasSscal(blasHandle, n, &fy, &fx, incx); + // CHECK-NEXT: blasStatus = hipblasSscal(blasHandle, n, &fy, &fx, incx); + blasStatus = cublasSscal(blasHandle, n, &fy, &fx, incx); + blasStatus = cublasSscal_v2(blasHandle, n, &fy, &fx, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDscal_v2(cublasHandle_t handle, int n, const double* alpha, double* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDscal(hipblasHandle_t handle, int n, const double* alpha, double* x, int incx); + // CHECK: blasStatus = hipblasDscal(blasHandle, n, &dx, &dy, incx); + // CHECK-NEXT: blasStatus = hipblasDscal(blasHandle, n, &dx, &dy, incx); + blasStatus = cublasDscal(blasHandle, n, &dx, &dy, incx); + blasStatus = cublasDscal_v2(blasHandle, n, &dx, &dy, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCscal_v2(cublasHandle_t handle, int n, const cuComplex* alpha, cuComplex* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCscal(hipblasHandle_t handle, int n, const hipblasComplex* alpha, hipblasComplex* x, int incx); + // CHECK: blasStatus = hipblasCscal(blasHandle, n, &complex, &complex_2, incx); + // CHECK-NEXT: blasStatus = hipblasCscal(blasHandle, n, &complex, &complex_2, incx); + blasStatus = cublasCscal(blasHandle, n, &complex, &complex_2, incx); + blasStatus = cublasCscal_v2(blasHandle, n, &complex, &complex_2, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsscal_v2(cublasHandle_t handle, int n, const float* alpha, cuComplex* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsscal(hipblasHandle_t handle, int n, const float* alpha, hipblasComplex* x, int incx); + // CHECK: blasStatus = hipblasCsscal(blasHandle, n, &fx, &complex, incx); + // CHECK-NEXT: blasStatus = hipblasCsscal(blasHandle, n, &fx, &complex, incx); + blasStatus = cublasCsscal(blasHandle, n, &fx, &complex, incx); + blasStatus = cublasCsscal_v2(blasHandle, n, &fx, &complex, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZscal_v2(cublasHandle_t handle, int n, const cuDoubleComplex* alpha, cuDoubleComplex* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZscal(hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* x, int incx); + // CHECK: blasStatus = hipblasZscal(blasHandle, n, &dcomplex, &dcomplex_2, incx); + // CHECK-NEXT: blasStatus = hipblasZscal(blasHandle, n, &dcomplex, &dcomplex_2, incx); + blasStatus = cublasZscal(blasHandle, n, &dcomplex, &dcomplex_2, incx); + blasStatus = cublasZscal_v2(blasHandle, n, &dcomplex, &dcomplex_2, incx); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdscal_v2(cublasHandle_t handle, int n, const double* alpha, cuDoubleComplex* x, int incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZdscal(hipblasHandle_t handle, int n, const double* alpha, hipblasDoubleComplex* x, int incx); + // CHECK: blasStatus = hipblasZdscal(blasHandle, n, &dx, &dcomplex, incx); + // CHECK-NEXT: blasStatus = hipblasZdscal(blasHandle, n, &dx, &dcomplex, incx); + blasStatus = cublasZdscal(blasHandle, n, &dx, &dcomplex, incx); + blasStatus = cublasZdscal_v2(blasHandle, n, &dx, &dcomplex, incx); + return 0; }
11.7.011.7.1 works only with the patch
due to the clang's bug 54609
patch for 14.0.0**
patch for 14.0.1**
@@ -171,7 +171,7 @@ After applying all the matchers, the output HIP source is produced.
14.0.5, 14.0.611.7.011.7.1 LATEST STABLE CONFIG