From 30ab08940c6acf7d8ed1f48e8bff995f1363d1d6 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 7 Jul 2023 20:41:43 +0200 Subject: [PATCH 01/20] [HIPIFY][SPARSE][test][Windows][fix] Take into account APIs unsupported on WIN32 for some CUDA versions in the corresponding hipSPARSE and rocSPARSE tests + [Reason] Some APIs were appeared in CUDA 10.x only for Linux initially, and then they are appeared in CUDA 11.0 for Windows as well, so we need to distinguish them on Windows; otherwise some tests will fail there --- .../synthetic/libraries/cusparse2hipsparse.cu | 30 +++++++++++-------- .../synthetic/libraries/cusparse2rocsparse.cu | 14 +++++---- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/tests/unit_tests/synthetic/libraries/cusparse2hipsparse.cu b/tests/unit_tests/synthetic/libraries/cusparse2hipsparse.cu index b59e9adc..615a00b6 100644 --- a/tests/unit_tests/synthetic/libraries/cusparse2hipsparse.cu +++ b/tests/unit_tests/synthetic/libraries/cusparse2hipsparse.cu @@ -426,7 +426,7 @@ int main() { status_t = cusparseCopyMatDescr(matDescr_t, matDescr_t_2); #endif -#if CUDA_VERSION >= 10010 +#if (CUDA_VERSION >= 10010 && CUDA_VERSION < 11000 && !defined(_WIN32)) || CUDA_VERSION >= 11000 // CHECK: hipsparseSpMatDescr_t spMatDescr_t, matC; cusparseSpMatDescr_t spMatDescr_t, matC; @@ -471,11 +471,6 @@ int main() { // CHECK: hipsparseSpMMAlg_t spMMAlg_t; cusparseSpMMAlg_t spMMAlg_t; - // CHECK: hipsparseCsr2CscAlg_t Csr2CscAlg_t; - // CHECK-NEXT: hipsparseCsr2CscAlg_t CSR2CSC_ALG1 = HIPSPARSE_CSR2CSC_ALG1; - cusparseCsr2CscAlg_t Csr2CscAlg_t; - cusparseCsr2CscAlg_t CSR2CSC_ALG1 = CUSPARSE_CSR2CSC_ALG1; - // CUDA: cusparseStatus_t CUSPARSEAPI cusparseCreateCoo(cusparseSpMatDescr_t* spMatDescr, int64_t ows, int64_t cols, int64_t nnz, void* cooRowInd, void* cooColInd, void* cooValues, cusparseIndexType_t cooIdxType, cusparseIndexBase_t idxBase, cudaDataType valueType); // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseCreateCoo(hipsparseSpMatDescr_t* spMatDescr, int64_t rows, int64_t cols, int64_t nnz, void* cooRowInd, void* cooColInd, void* cooValues, hipsparseIndexType_t cooIdxType, hipsparseIndexBase_t idxBase, hipDataType valueType); // CHECK: status_t = hipsparseCreateCoo(&spMatDescr_t, rows, cols, nnz, cooRowInd, cooColInd, cooValues, indexType_t, indexBase_t, dataType); @@ -527,28 +522,34 @@ int main() { status_t = cusparseDnMatSetStridedBatch(dnMatDescr_t, batchCount, batchStride); #endif -#if CUDA_VERSION >= 10010 && CUDA_VERSION < 12000 +#if CUDA_VERSION >= 10010 + // CHECK: hipsparseCsr2CscAlg_t Csr2CscAlg_t; + // CHECK-NEXT: hipsparseCsr2CscAlg_t CSR2CSC_ALG1 = HIPSPARSE_CSR2CSC_ALG1; + cusparseCsr2CscAlg_t Csr2CscAlg_t; + cusparseCsr2CscAlg_t CSR2CSC_ALG1 = CUSPARSE_CSR2CSC_ALG1; +#endif + +#if (CUDA_VERSION >= 10010 && CUDA_VERSION < 11000 && !defined(_WIN32)) || (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) // CHECK: hipsparseSpMMAlg_t COOMM_ALG1 = HIPSPARSE_COOMM_ALG1; // CHECK-NEXT: hipsparseSpMMAlg_t COOMM_ALG2 = HIPSPARSE_COOMM_ALG2; // CHECK-NEXT: hipsparseSpMMAlg_t COOMM_ALG3 = HIPSPARSE_COOMM_ALG3; cusparseSpMMAlg_t COOMM_ALG1 = CUSPARSE_COOMM_ALG1; cusparseSpMMAlg_t COOMM_ALG2 = CUSPARSE_COOMM_ALG2; cusparseSpMMAlg_t COOMM_ALG3 = CUSPARSE_COOMM_ALG3; +#endif +#if CUDA_VERSION >= 10010 && CUDA_VERSION < 12000 // CHECK: hipsparseCsr2CscAlg_t CSR2CSC_ALG2 = HIPSPARSE_CSR2CSC_ALG2; cusparseCsr2CscAlg_t CSR2CSC_ALG2 = CUSPARSE_CSR2CSC_ALG2; #endif -#if CUDA_VERSION >= 10020 +#if (CUDA_VERSION >= 10020 && CUDA_VERSION < 11000 && !defined(_WIN32)) || CUDA_VERSION >= 11000 // CHECK: hipsparseSpVecDescr_t spVecDescr_t; cusparseSpVecDescr_t spVecDescr_t; // CHECK: hipsparseDnVecDescr_t dnVecDescr_t, vecX, vecY; cusparseDnVecDescr_t dnVecDescr_t, vecX, vecY; - // CHECK: hipsparseStatus_t STATUS_NOT_SUPPORTED = HIPSPARSE_STATUS_NOT_SUPPORTED; - cusparseStatus_t STATUS_NOT_SUPPORTED = CUSPARSE_STATUS_NOT_SUPPORTED; - // CHECK: hipsparseSpMVAlg_t spMVAlg_t; cusparseSpMVAlg_t spMVAlg_t; @@ -648,7 +649,12 @@ int main() { status_t = cusparseSpMV(handle_t, opA, alpha, spMatDescr_t, vecX, beta, vecY, dataType, spMVAlg_t, tempBuffer); #endif -#if CUDA_VERSION >= 10020 && CUDA_VERSION < 12000 +#if CUDA_VERSION >= 10020 + // CHECK: hipsparseStatus_t STATUS_NOT_SUPPORTED = HIPSPARSE_STATUS_NOT_SUPPORTED; + cusparseStatus_t STATUS_NOT_SUPPORTED = CUSPARSE_STATUS_NOT_SUPPORTED; +#endif + +#if (CUDA_VERSION >= 10020 && CUDA_VERSION < 11000 && !defined(_WIN32)) || (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) // CHECK: hipsparseFormat_t FORMAT_COO_AOS = HIPSPARSE_FORMAT_COO_AOS; cusparseFormat_t FORMAT_COO_AOS = CUSPARSE_FORMAT_COO_AOS; diff --git a/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu b/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu index 14a3dc10..4e4d65f6 100644 --- a/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu +++ b/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu @@ -431,7 +431,7 @@ int main() { status_t = cusparseCopyMatDescr(matDescr_t, matDescr_t_2); #endif -#if CUDA_VERSION >= 10010 +#if (CUDA_VERSION >= 10010 && CUDA_VERSION < 11000 && !defined(_WIN32)) || CUDA_VERSION >= 11000 // CHECK: _rocsparse_spmat_descr *spMatDescr = nullptr; // CHECK-NEXT: rocsparse_spmat_descr spMatDescr_t, matC; cusparseSpMatDescr *spMatDescr = nullptr; @@ -531,7 +531,7 @@ int main() { status_t = cusparseDnMatSetStridedBatch(dnMatDescr_t, batchCount, batchStride); #endif -#if CUDA_VERSION >= 10020 +#if (CUDA_VERSION >= 10020 && CUDA_VERSION < 11000 && !defined(_WIN32)) || CUDA_VERSION >= 11000 // CHECK: _rocsparse_spvec_descr *spVecDescr = nullptr; // CHECK-NEXT: rocsparse_spvec_descr spVecDescr_t; cusparseSpVecDescr *spVecDescr = nullptr; @@ -542,9 +542,6 @@ int main() { cusparseDnVecDescr *dnVecDescr = nullptr; cusparseDnVecDescr_t dnVecDescr_t, vecX, vecY; - // CHECK: rocsparse_status STATUS_NOT_SUPPORTED = rocsparse_status_not_implemented; - cusparseStatus_t STATUS_NOT_SUPPORTED = CUSPARSE_STATUS_NOT_SUPPORTED; - // CHECK: rocsparse_spmv_alg spMVAlg_t; cusparseSpMVAlg_t spMVAlg_t; @@ -644,7 +641,12 @@ int main() { status_t = cusparseSpMV(handle_t, opA, alpha, spMatDescr_t, vecX, beta, vecY, dataType, spMVAlg_t, tempBuffer); #endif -#if CUDA_VERSION >= 10020 && CUDA_VERSION < 12000 +#if CUDA_VERSION >= 10020 + // CHECK: rocsparse_status STATUS_NOT_SUPPORTED = rocsparse_status_not_implemented; + cusparseStatus_t STATUS_NOT_SUPPORTED = CUSPARSE_STATUS_NOT_SUPPORTED; +#endif + +#if (CUDA_VERSION >= 10020 && CUDA_VERSION < 11000 && !defined(_WIN32)) || (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) // CHECK: rocsparse_format FORMAT_COO_AOS = rocsparse_format_coo_aos; cusparseFormat_t FORMAT_COO_AOS = CUSPARSE_FORMAT_COO_AOS; From e1563759e50c8c798edab1f1d107642fbcb75219 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sun, 9 Jul 2023 14:52:06 +0200 Subject: [PATCH 02/20] [HIPIFY][BLAS][tests][fix] Use `const_cast` for some function arguments, which signatures changed in 9.2 + Update hipBLAS-related synthetic tests to support both BLAS APIs before 9.2 and after 9.2 simultaneously + The change fixes BLAS test failures against CUDA < 9.2 + [ToDo] Do the similar for rocBLAS-related synthetic tests --- .../synthetic/libraries/cublas2hipblas.cu | 82 +++++++++++-------- .../synthetic/libraries/cublas2hipblas_v2.cu | 82 +++++++++++-------- 2 files changed, 96 insertions(+), 68 deletions(-) diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu index fa917ec3..982f1e74 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu @@ -208,7 +208,9 @@ int main() { float fresult = 0; float** fAarray = 0; + const float** const fAarray_const = const_cast(fAarray); float** fBarray = 0; + const float** const fBarray_const = const_cast(fBarray); float** fCarray = 0; float** fTauarray = 0; @@ -228,12 +230,16 @@ int main() { double dresult = 0; double** dAarray = 0; + const double** const dAarray_const = const_cast(dAarray); double** dBarray = 0; + const double** const dBarray_const = const_cast(dBarray); double** dCarray = 0; double** dTauarray = 0; void** voidAarray = nullptr; + const void** const voidAarray_const = const_cast(voidAarray); void** voidBarray = nullptr; + const void** const voidBarray_const = const_cast(voidBarray); void** voidCarray = nullptr; // NOTE: float CUBLASWINAPI cublasSnrm2(int n, const float* x, int incx) is not supported by HIP @@ -254,20 +260,28 @@ int main() { cuDoubleComplex dcomplex, dcomplexa, dcomplexA, dcomplexB, dcomplexC, dcomplexx, dcomplexy, dcomplexs, dcomplexb; // CHECK: hipComplex** complexAarray = 0; + // CHECK: const hipComplex** const complexAarray_const = const_cast(complexAarray); // CHECK-NEXT: hipComplex** complexBarray = 0; + // CHECK: const hipComplex** const complexBarray_const = const_cast(complexBarray); // CHECK-NEXT: hipComplex** complexCarray = 0; // CHECK-NEXT: hipComplex** complexTauarray = 0; cuComplex** complexAarray = 0; + const cuComplex** const complexAarray_const = const_cast(complexAarray); cuComplex** complexBarray = 0; + const cuComplex** const complexBarray_const = const_cast(complexBarray); cuComplex** complexCarray = 0; cuComplex** complexTauarray = 0; // CHECK: hipDoubleComplex** dcomplexAarray = 0; + // CHECK: const hipDoubleComplex** const dcomplexAarray_const = const_cast(dcomplexAarray); // CHECK-NEXT: hipDoubleComplex** dcomplexBarray = 0; + // CHECK: const hipDoubleComplex** const dcomplexBarray_const = const_cast(dcomplexBarray); // CHECK-NEXT: hipDoubleComplex** dcomplexCarray = 0; // CHECK-NEXT: hipDoubleComplex** dcomplexTauarray = 0; cuDoubleComplex** dcomplexAarray = 0; + const cuDoubleComplex** const dcomplexAarray_const = const_cast(dcomplexAarray); cuDoubleComplex** dcomplexBarray = 0; + const cuDoubleComplex** const dcomplexBarray_const = const_cast(dcomplexBarray); cuDoubleComplex** dcomplexCarray = 0; cuDoubleComplex** dcomplexTauarray = 0; @@ -1041,13 +1055,13 @@ int main() { // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* const Aarray[], int lda, const float* const Barray[], int ldb, const float* beta, float* const Carray[], int ldc, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const float* alpha, const float* const AP[], int lda, const float* const BP[], int ldb, const float* beta, float* const CP[], int ldc, int batchCount); - // CHECK: blasStatus = hipblasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount); - blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount); + // CHECK: blasStatus = hipblasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount); + blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const double* alpha, const double* const Aarray[], int lda, const double* const Barray[], int ldb, const double* beta, double* const Carray[], int ldc, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const double* alpha, const double* const AP[], int lda, const double* const BP[], int ldb, const double* beta, double* const CP[], int ldc, int batchCount); - // CHECK: blasStatus = hipblasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount); - blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount); + // CHECK: blasStatus = hipblasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount); + blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount); // TODO: __half -> hipblasHalf // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const __half* alpha, const __half* const Aarray[], int lda, const __half* const Barray[], int ldb, const __half* beta, __half* const Carray[], int ldc, int batchCount); @@ -1055,13 +1069,13 @@ int main() { // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuComplex* alpha, const cuComplex* const Aarray[], int lda, const cuComplex* const Barray[], int ldb, const cuComplex* beta, cuComplex* const Carray[], int ldc, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const AP[], int lda, const hipblasComplex* const BP[], int ldb, const hipblasComplex* beta, hipblasComplex* const CP[], int ldc, int batchCount); - // CHECK: blasStatus = hipblasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount); - blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount); + // CHECK: blasStatus = hipblasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount); + blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* const Aarray[], int lda, const cuDoubleComplex* const Barray[], int ldb, const cuDoubleComplex* beta, cuDoubleComplex* const Carray[], int ldc, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const AP[], int lda, const hipblasDoubleComplex* const BP[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const CP[], int ldc, int batchCount); - // CHECK: blasStatus = hipblasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); - blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); + // CHECK: blasStatus = hipblasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); + blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); // NOTE: void CUBLASWINAPI cublasSsyrk(char uplo, char trans, int n, int k, float alpha, const float* A, int lda, float beta, float* C, int ldc); is not supported by HIP // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* beta, float* C, int ldc); @@ -1267,63 +1281,63 @@ int main() { // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetriBatched(cublasHandle_t handle, int n, const float* const A[], int lda, const int* P, float* const C[], int ldc, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgetriBatched(hipblasHandle_t handle, const int n, float* const A[], const int lda, int* ipiv, float* const C[], const int ldc, int* info, const int batchCount); - // CHECK: blasStatus = hipblasSgetriBatched(blasHandle, n, fAarray, lda, &P, fCarray, ldc, &info, batchCount); - blasStatus = cublasSgetriBatched(blasHandle, n, fAarray, lda, &P, fCarray, ldc, &info, batchCount); + // CHECK: blasStatus = hipblasSgetriBatched(blasHandle, n, fAarray_const, lda, &P, fCarray, ldc, &info, batchCount); + blasStatus = cublasSgetriBatched(blasHandle, n, fAarray_const, lda, &P, fCarray, ldc, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetriBatched(cublasHandle_t handle, int n, const double* const A[], int lda, const int* P, double* const C[], int ldc, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgetriBatched(hipblasHandle_t handle, const int n, double* const A[], const int lda, int* ipiv, double* const C[], const int ldc, int* info, const int batchCount); - // CHECK: blasStatus = hipblasDgetriBatched(blasHandle, n, dAarray, lda, &P, dCarray, ldc, &info, batchCount); - blasStatus = cublasDgetriBatched(blasHandle, n, dAarray, lda, &P, dCarray, ldc, &info, batchCount); + // CHECK: blasStatus = hipblasDgetriBatched(blasHandle, n, dAarray_const, lda, &P, dCarray, ldc, &info, batchCount); + blasStatus = cublasDgetriBatched(blasHandle, n, dAarray_const, lda, &P, dCarray, ldc, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetriBatched(cublasHandle_t handle, int n, const cuComplex* const A[], int lda, const int* P, cuComplex* const C[], int ldc, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgetriBatched(hipblasHandle_t handle, const int n, hipblasComplex* const A[], const int lda, int* ipiv, hipblasComplex* const C[], const int ldc, int* info, const int batchCount); - // CHECK: blasStatus = hipblasCgetriBatched(blasHandle, n, complexAarray, lda, &P, complexCarray, ldc, &info, batchCount); - blasStatus = cublasCgetriBatched(blasHandle, n, complexAarray, lda, &P, complexCarray, ldc, &info, batchCount); + // CHECK: blasStatus = hipblasCgetriBatched(blasHandle, n, complexAarray_const, lda, &P, complexCarray, ldc, &info, batchCount); + blasStatus = cublasCgetriBatched(blasHandle, n, complexAarray_const, lda, &P, complexCarray, ldc, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetriBatched(cublasHandle_t handle, int n, const cuDoubleComplex* const A[], int lda, const int* P, cuDoubleComplex* const C[], int ldc, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgetriBatched(hipblasHandle_t handle, const int n, hipblasDoubleComplex* const A[], const int lda, int* ipiv, hipblasDoubleComplex* const C[], const int ldc, int* info, const int batchCount); - // CHECK: blasStatus = hipblasZgetriBatched(blasHandle, n, dcomplexAarray, lda, &P, dcomplexCarray, ldc, &info, batchCount); - blasStatus = cublasZgetriBatched(blasHandle, n, dcomplexAarray, lda, &P, dcomplexCarray, ldc, &info, batchCount); + // CHECK: blasStatus = hipblasZgetriBatched(blasHandle, n, dcomplexAarray_const, lda, &P, dcomplexCarray, ldc, &info, batchCount); + blasStatus = cublasZgetriBatched(blasHandle, n, dcomplexAarray_const, lda, &P, dcomplexCarray, ldc, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const float* const Aarray[], int lda, const int* devIpiv, float* const Barray[], int ldb, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, float* const A[], const int lda, const int* ipiv, float* const B[], const int ldb, int* info, const int batchCount); - // CHECK: blasStatus = hipblasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray, lda, &P, fBarray, ldb, &info, batchCount); - blasStatus = cublasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray, lda, &P, fBarray, ldb, &info, batchCount); + // CHECK: blasStatus = hipblasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray_const, lda, &P, fBarray, ldb, &info, batchCount); + blasStatus = cublasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray_const, lda, &P, fBarray, ldb, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const double* const Aarray[], int lda, const int* devIpiv, double* const Barray[], int ldb, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, double* const A[], const int lda, const int* ipiv, double* const B[], const int ldb, int* info, const int batchCount); - // CHECK: blasStatus = hipblasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray, lda, &P, dBarray, ldb, &info, batchCount); - blasStatus = cublasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray, lda, &P, dBarray, ldb, &info, batchCount); + // CHECK: blasStatus = hipblasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray_const, lda, &P, dBarray, ldb, &info, batchCount); + blasStatus = cublasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray_const, lda, &P, dBarray, ldb, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const cuComplex* const Aarray[], int lda, const int* devIpiv, cuComplex* const Barray[], int ldb, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex* const A[], const int lda, const int* ipiv, hipblasComplex* const B[], const int ldb, int* info, const int batchCount); - // CHECK: blasStatus = hipblasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray, lda, &P, complexBarray, ldb, &info, batchCount); - blasStatus = cublasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray, lda, &P, complexBarray, ldb, &info, batchCount); + // CHECK: blasStatus = hipblasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray_const, lda, &P, complexBarray, ldb, &info, batchCount); + blasStatus = cublasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray_const, lda, &P, complexBarray, ldb, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const cuDoubleComplex* const Aarray[], int lda, const int* devIpiv, cuDoubleComplex* const Barray[], int ldb, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex* const A[], const int lda, const int* ipiv, hipblasDoubleComplex* const B[], const int ldb, int* info, const int batchCount); - // CHECK: blasStatus = hipblasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray, lda, &P, dcomplexBarray, ldb, &info, batchCount); - blasStatus = cublasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray, lda, &P, dcomplexBarray, ldb, &info, batchCount); + // CHECK: blasStatus = hipblasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray_const, lda, &P, dcomplexBarray, ldb, &info, batchCount); + blasStatus = cublasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray_const, lda, &P, dcomplexBarray, ldb, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* const A[], int lda, float* const B[], int ldb, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, float* const AP[], int lda, float* BP[], int ldb, int batchCount); - // CHECK: blasStatus = hipblasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount); - blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount); + // CHECK: blasStatus = hipblasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount); + blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* const A[], int lda, double* const B[], int ldb, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, double* const AP[], int lda, double* BP[], int ldb, int batchCount); - // CHECK: blasStatus = hipblasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount); - blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount); + // CHECK: blasStatus = hipblasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount); + blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* const A[], int lda, cuComplex* const B[], int ldb, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, hipblasComplex* const AP[], int lda, hipblasComplex* BP[], int ldb, int batchCount); - // CHECK: blasStatus = hipblasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount); - blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount); + // CHECK: blasStatus = hipblasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount); + blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* const A[], int lda, cuDoubleComplex* const B[], int ldb, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* const AP[], int lda, hipblasDoubleComplex* BP[], int ldb, int batchCount); - // CHECK: blasStatus = hipblasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount); - blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount); + // CHECK: blasStatus = hipblasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount); + blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgeqrfBatched(cublasHandle_t handle, int m, int n, float* const Aarray[], int lda, float* const TauArray[], int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgeqrfBatched(hipblasHandle_t handle, const int m, const int n, float* const A[], const int lda, float* const ipiv[], int* info, const int batchCount); @@ -1528,8 +1542,8 @@ int main() { #if CUDA_VERSION >= 9010 && CUDA_VERSION < 11000 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A[], hipblasDatatype_t aType, int lda, const void* B[], hipblasDatatype_t bType, int ldb, const void* beta, void* C[], hipblasDatatype_t cType, int ldc, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo); - // CHECK: blasStatus = hipblasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); - blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); + // CHECK: blasStatus = hipblasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); + blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmStridedBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipblasDatatype_t aType, int lda, hipblasStride strideA, const void* B, hipblasDatatype_t bType, int ldb, hipblasStride strideB, const void* beta, void* C, hipblasDatatype_t cType, int ldc, hipblasStride strideC, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo); diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index dde02d90..21a984d3 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -219,7 +219,9 @@ int main() { float fresult = 0; float** fAarray = 0; + const float** const fAarray_const = const_cast(fAarray); float** fBarray = 0; + const float** const fBarray_const = const_cast(fBarray); float** fCarray = 0; float** fTauarray = 0; @@ -239,12 +241,16 @@ int main() { double dresult = 0; double** dAarray = 0; + const double** const dAarray_const = const_cast(dAarray); double** dBarray = 0; + const double** const dBarray_const = const_cast(dBarray); double** dCarray = 0; double** dTauarray = 0; void** voidAarray = nullptr; + const void** const voidAarray_const = const_cast(voidAarray); void** voidBarray = nullptr; + const void** const voidBarray_const = const_cast(voidBarray); void** voidCarray = nullptr; // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle, int n, const float* x, int incx, float* result); @@ -267,20 +273,28 @@ int main() { cuDoubleComplex dcomplex, dcomplexa, dcomplexA, dcomplexB, dcomplexC, dcomplexx, dcomplexy, dcomplexs, dcomplexb; // CHECK: hipComplex** complexAarray = 0; + // CHECK: const hipComplex** const complexAarray_const = const_cast(complexAarray); // CHECK-NEXT: hipComplex** complexBarray = 0; + // CHECK: const hipComplex** const complexBarray_const = const_cast(complexBarray); // CHECK-NEXT: hipComplex** complexCarray = 0; // CHECK-NEXT: hipComplex** complexTauarray = 0; cuComplex** complexAarray = 0; + const cuComplex** const complexAarray_const = const_cast(complexAarray); cuComplex** complexBarray = 0; + const cuComplex** const complexBarray_const = const_cast(complexBarray); cuComplex** complexCarray = 0; cuComplex** complexTauarray = 0; // CHECK: hipDoubleComplex** dcomplexAarray = 0; + // CHECK: const hipDoubleComplex** const dcomplexAarray_const = const_cast(dcomplexAarray); // CHECK-NEXT: hipDoubleComplex** dcomplexBarray = 0; + // CHECK: const hipDoubleComplex** const dcomplexBarray_const = const_cast(dcomplexBarray); // CHECK-NEXT: hipDoubleComplex** dcomplexCarray = 0; // CHECK-NEXT: hipDoubleComplex** dcomplexTauarray = 0; cuDoubleComplex** dcomplexAarray = 0; + const cuDoubleComplex** const dcomplexAarray_const = const_cast(dcomplexAarray); cuDoubleComplex** dcomplexBarray = 0; + const cuDoubleComplex** const dcomplexBarray_const = const_cast(dcomplexBarray); cuDoubleComplex** dcomplexCarray = 0; cuDoubleComplex** dcomplexTauarray = 0; @@ -1188,13 +1202,13 @@ int main() { // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* const Aarray[], int lda, const float* const Barray[], int ldb, const float* beta, float* const Carray[], int ldc, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const float* alpha, const float* const AP[], int lda, const float* const BP[], int ldb, const float* beta, float* const CP[], int ldc, int batchCount); - // CHECK: blasStatus = hipblasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount); - blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount); + // CHECK: blasStatus = hipblasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount); + blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const double* alpha, const double* const Aarray[], int lda, const double* const Barray[], int ldb, const double* beta, double* const Carray[], int ldc, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const double* alpha, const double* const AP[], int lda, const double* const BP[], int ldb, const double* beta, double* const CP[], int ldc, int batchCount); - // CHECK: blasStatus = hipblasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount); - blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount); + // CHECK: blasStatus = hipblasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount); + blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount); // TODO: __half -> hipblasHalf // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const __half* alpha, const __half* const Aarray[], int lda, const __half* const Barray[], int ldb, const __half* beta, __half* const Carray[], int ldc, int batchCount); @@ -1202,13 +1216,13 @@ int main() { // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuComplex* alpha, const cuComplex* const Aarray[], int lda, const cuComplex* const Barray[], int ldb, const cuComplex* beta, cuComplex* const Carray[], int ldc, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const AP[], int lda, const hipblasComplex* const BP[], int ldb, const hipblasComplex* beta, hipblasComplex* const CP[], int ldc, int batchCount); - // CHECK: blasStatus = hipblasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount); - blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount); + // CHECK: blasStatus = hipblasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount); + blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* const Aarray[], int lda, const cuDoubleComplex* const Barray[], int ldb, const cuDoubleComplex* beta, cuDoubleComplex* const Carray[], int ldc, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemmBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const AP[], int lda, const hipblasDoubleComplex* const BP[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const CP[], int ldc, int batchCount); - // CHECK: blasStatus = hipblasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); - blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); + // CHECK: blasStatus = hipblasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); + blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* beta, float* C, int ldc); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* AP, int lda, const float* beta, float* CP, int ldc); @@ -1436,63 +1450,63 @@ int main() { // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetriBatched(cublasHandle_t handle, int n, const float* const A[], int lda, const int* P, float* const C[], int ldc, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgetriBatched(hipblasHandle_t handle, const int n, float* const A[], const int lda, int* ipiv, float* const C[], const int ldc, int* info, const int batchCount); - // CHECK: blasStatus = hipblasSgetriBatched(blasHandle, n, fAarray, lda, &P, fCarray, ldc, &info, batchCount); - blasStatus = cublasSgetriBatched(blasHandle, n, fAarray, lda, &P, fCarray, ldc, &info, batchCount); + // CHECK: blasStatus = hipblasSgetriBatched(blasHandle, n, fAarray_const, lda, &P, fCarray, ldc, &info, batchCount); + blasStatus = cublasSgetriBatched(blasHandle, n, fAarray_const, lda, &P, fCarray, ldc, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetriBatched(cublasHandle_t handle, int n, const double* const A[], int lda, const int* P, double* const C[], int ldc, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgetriBatched(hipblasHandle_t handle, const int n, double* const A[], const int lda, int* ipiv, double* const C[], const int ldc, int* info, const int batchCount); - // CHECK: blasStatus = hipblasDgetriBatched(blasHandle, n, dAarray, lda, &P, dCarray, ldc, &info, batchCount); - blasStatus = cublasDgetriBatched(blasHandle, n, dAarray, lda, &P, dCarray, ldc, &info, batchCount); + // CHECK: blasStatus = hipblasDgetriBatched(blasHandle, n, dAarray_const, lda, &P, dCarray, ldc, &info, batchCount); + blasStatus = cublasDgetriBatched(blasHandle, n, dAarray_const, lda, &P, dCarray, ldc, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetriBatched(cublasHandle_t handle, int n, const cuComplex* const A[], int lda, const int* P, cuComplex* const C[], int ldc, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgetriBatched(hipblasHandle_t handle, const int n, hipblasComplex* const A[], const int lda, int* ipiv, hipblasComplex* const C[], const int ldc, int* info, const int batchCount); - // CHECK: blasStatus = hipblasCgetriBatched(blasHandle, n, complexAarray, lda, &P, complexCarray, ldc, &info, batchCount); - blasStatus = cublasCgetriBatched(blasHandle, n, complexAarray, lda, &P, complexCarray, ldc, &info, batchCount); + // CHECK: blasStatus = hipblasCgetriBatched(blasHandle, n, complexAarray_const, lda, &P, complexCarray, ldc, &info, batchCount); + blasStatus = cublasCgetriBatched(blasHandle, n, complexAarray_const, lda, &P, complexCarray, ldc, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetriBatched(cublasHandle_t handle, int n, const cuDoubleComplex* const A[], int lda, const int* P, cuDoubleComplex* const C[], int ldc, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgetriBatched(hipblasHandle_t handle, const int n, hipblasDoubleComplex* const A[], const int lda, int* ipiv, hipblasDoubleComplex* const C[], const int ldc, int* info, const int batchCount); - // CHECK: blasStatus = hipblasZgetriBatched(blasHandle, n, dcomplexAarray, lda, &P, dcomplexCarray, ldc, &info, batchCount); - blasStatus = cublasZgetriBatched(blasHandle, n, dcomplexAarray, lda, &P, dcomplexCarray, ldc, &info, batchCount); + // CHECK: blasStatus = hipblasZgetriBatched(blasHandle, n, dcomplexAarray_const, lda, &P, dcomplexCarray, ldc, &info, batchCount); + blasStatus = cublasZgetriBatched(blasHandle, n, dcomplexAarray_const, lda, &P, dcomplexCarray, ldc, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const float* const Aarray[], int lda, const int* devIpiv, float* const Barray[], int ldb, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, float* const A[], const int lda, const int* ipiv, float* const B[], const int ldb, int* info, const int batchCount); - // CHECK: blasStatus = hipblasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray, lda, &P, fBarray, ldb, &info, batchCount); - blasStatus = cublasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray, lda, &P, fBarray, ldb, &info, batchCount); + // CHECK: blasStatus = hipblasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray_const, lda, &P, fBarray, ldb, &info, batchCount); + blasStatus = cublasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray_const, lda, &P, fBarray, ldb, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const double* const Aarray[], int lda, const int* devIpiv, double* const Barray[], int ldb, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, double* const A[], const int lda, const int* ipiv, double* const B[], const int ldb, int* info, const int batchCount); - // CHECK: blasStatus = hipblasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray, lda, &P, dBarray, ldb, &info, batchCount); - blasStatus = cublasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray, lda, &P, dBarray, ldb, &info, batchCount); + // CHECK: blasStatus = hipblasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray_const, lda, &P, dBarray, ldb, &info, batchCount); + blasStatus = cublasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray_const, lda, &P, dBarray, ldb, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const cuComplex* const Aarray[], int lda, const int* devIpiv, cuComplex* const Barray[], int ldb, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex* const A[], const int lda, const int* ipiv, hipblasComplex* const B[], const int ldb, int* info, const int batchCount); - // CHECK: blasStatus = hipblasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray, lda, &P, complexBarray, ldb, &info, batchCount); - blasStatus = cublasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray, lda, &P, complexBarray, ldb, &info, batchCount); + // CHECK: blasStatus = hipblasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray_const, lda, &P, complexBarray, ldb, &info, batchCount); + blasStatus = cublasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray_const, lda, &P, complexBarray, ldb, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const cuDoubleComplex* const Aarray[], int lda, const int* devIpiv, cuDoubleComplex* const Barray[], int ldb, int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex* const A[], const int lda, const int* ipiv, hipblasDoubleComplex* const B[], const int ldb, int* info, const int batchCount); - // CHECK: blasStatus = hipblasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray, lda, &P, dcomplexBarray, ldb, &info, batchCount); - blasStatus = cublasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray, lda, &P, dcomplexBarray, ldb, &info, batchCount); + // CHECK: blasStatus = hipblasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray_const, lda, &P, dcomplexBarray, ldb, &info, batchCount); + blasStatus = cublasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray_const, lda, &P, dcomplexBarray, ldb, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* const A[], int lda, float* const B[], int ldb, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, float* const AP[], int lda, float* BP[], int ldb, int batchCount); - // CHECK: blasStatus = hipblasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount); - blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount); + // CHECK: blasStatus = hipblasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount); + blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* const A[], int lda, double* const B[], int ldb, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, double* const AP[], int lda, double* BP[], int ldb, int batchCount); - // CHECK: blasStatus = hipblasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount); - blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount); + // CHECK: blasStatus = hipblasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount); + blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* const A[], int lda, cuComplex* const B[], int ldb, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, hipblasComplex* const AP[], int lda, hipblasComplex* BP[], int ldb, int batchCount); - // CHECK: blasStatus = hipblasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount); - blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount); + // CHECK: blasStatus = hipblasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount); + blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* const A[], int lda, cuDoubleComplex* const B[], int ldb, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* const AP[], int lda, hipblasDoubleComplex* BP[], int ldb, int batchCount); - // CHECK: blasStatus = hipblasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount); - blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount); + // CHECK: blasStatus = hipblasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount); + blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgeqrfBatched(cublasHandle_t handle, int m, int n, float* const Aarray[], int lda, float* const TauArray[], int* info, int batchSize); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgeqrfBatched(hipblasHandle_t handle, const int m, const int n, float* const A[], const int lda, float* const ipiv[], int* info, const int batchCount); @@ -1697,8 +1711,8 @@ int main() { #if CUDA_VERSION >= 9010 && CUDA_VERSION < 11000 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A[], hipblasDatatype_t aType, int lda, const void* B[], hipblasDatatype_t bType, int ldb, const void* beta, void* C[], hipblasDatatype_t cType, int ldc, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo); - // CHECK: blasStatus = hipblasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); - blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); + // CHECK: blasStatus = hipblasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); + blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmStridedBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipblasDatatype_t aType, int lda, hipblasStride strideA, const void* B, hipblasDatatype_t bType, int ldb, hipblasStride strideB, const void* beta, void* C, hipblasDatatype_t cType, int ldc, hipblasStride strideC, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo); From 6e686dbcfd0e903f0bf8c5781e62cffbd73a87c8 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sun, 9 Jul 2023 16:52:02 +0200 Subject: [PATCH 03/20] [HIPIFY] Sync with CUDA 12.2.0 - Part 3 - Driver API - functions + Updated the regenerated hipify-perl and docs accordingly --- bin/hipify-perl | 7 +++++++ .../CUDA_Driver_API_functions_supported_by_HIP.md | 7 +++++++ src/CUDA2HIP_Driver_API_functions.cpp | 15 +++++++++++++++ src/CUDA2HIP_Driver_API_types.cpp | 15 +++++++++++---- 4 files changed, 40 insertions(+), 4 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index a066031b..11bc23d7 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -7278,7 +7278,9 @@ sub warnUnsupportedFunctions { "cuMemcpy3DPeerAsync", "cuMemcpy3DPeer", "cuMemcpy", + "cuMemPrefetchAsync_v2", "cuMemGetHandleForAddressRange", + "cuMemAdvise_v2", "cuLibraryUnload", "cuLibraryLoadFromFile", "cuLibraryLoadData", @@ -7305,11 +7307,13 @@ sub warnUnsupportedFunctions { "cuGraphicsD3D9RegisterResource", "cuGraphicsD3D11RegisterResource", "cuGraphicsD3D10RegisterResource", + "cuGraphNodeSetParams", "cuGraphInstantiateWithParams", "cuGraphExternalSemaphoresWaitNodeSetParams", "cuGraphExternalSemaphoresWaitNodeGetParams", "cuGraphExternalSemaphoresSignalNodeSetParams", "cuGraphExternalSemaphoresSignalNodeGetParams", + "cuGraphExecNodeSetParams", "cuGraphExecMemsetNodeSetParams", "cuGraphExecMemcpyNodeSetParams", "cuGraphExecGetFlags", @@ -7318,6 +7322,7 @@ sub warnUnsupportedFunctions { "cuGraphExecBatchMemOpNodeSetParams", "cuGraphBatchMemOpNodeSetParams", "cuGraphBatchMemOpNodeGetParams", + "cuGraphAddNode", "cuGraphAddMemsetNode", "cuGraphAddMemcpyNode", "cuGraphAddExternalSemaphoresWaitNode", @@ -7882,6 +7887,8 @@ sub warnUnsupportedFunctions { "CU_EGL_COLOR_FORMAT_ARGB", "CU_EGL_COLOR_FORMAT_ABGR", "CU_EGL_COLOR_FORMAT_A", + "CU_DEVICE_NUMA_CONFIG_NUMA_NODE", + "CU_DEVICE_NUMA_CONFIG_NONE", "CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED", "CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS", "CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED", diff --git a/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md b/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md index b43fa72e..ac4b69f0 100644 --- a/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md @@ -479,6 +479,8 @@ |`CU_DEVICE_ATTRIBUTE_WARP_SIZE`| | | |`hipDeviceAttributeWarpSize`|1.6.0| | | | |`CU_DEVICE_CPU`|8.0| | |`hipCpuDeviceId`|3.7.0| | | | |`CU_DEVICE_INVALID`|8.0| | |`hipInvalidDeviceId`|3.7.0| | | | +|`CU_DEVICE_NUMA_CONFIG_NONE`|12.2| | | | | | | | +|`CU_DEVICE_NUMA_CONFIG_NUMA_NODE`|12.2| | | | | | | | |`CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED`|10.1|10.1| |`hipDevP2PAttrHipArrayAccessSupported`|3.8.0| | | | |`CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED`|8.0| | |`hipDevP2PAttrAccessSupported`|3.8.0| | | | |`CU_DEVICE_P2P_ATTRIBUTE_ARRAY_ACCESS_ACCESS_SUPPORTED`|9.2|10.0|10.1|`hipDevP2PAttrHipArrayAccessSupported`|3.8.0| | | | @@ -1647,7 +1649,9 @@ |**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**| |:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:| |`cuMemAdvise`|8.0| | |`hipMemAdvise`|3.7.0| | | | +|`cuMemAdvise_v2`|12.2| | | | | | | | |`cuMemPrefetchAsync`|8.0| | |`hipMemPrefetchAsync`|3.7.0| | | | +|`cuMemPrefetchAsync_v2`|12.2| | | | | | | | |`cuMemRangeGetAttribute`|8.0| | |`hipMemRangeGetAttribute`|3.7.0| | | | |`cuMemRangeGetAttributes`|8.0| | |`hipMemRangeGetAttributes`|3.7.0| | | | |`cuPointerGetAttribute`| | | |`hipPointerGetAttribute`|5.0.0| | | | @@ -1776,6 +1780,7 @@ |`cuGraphAddMemFreeNode`|11.4| | |`hipGraphAddMemFreeNode`|5.5.0| | | | |`cuGraphAddMemcpyNode`|10.0| | | | | | | | |`cuGraphAddMemsetNode`|10.0| | | | | | | | +|`cuGraphAddNode`|12.2| | | | | | | | |`cuGraphBatchMemOpNodeGetParams`|11.7| | | | | | | | |`cuGraphBatchMemOpNodeSetParams`|11.7| | | | | | | | |`cuGraphChildGraphNodeGetGraph`|10.0| | |`hipGraphChildGraphNodeGetGraph`|5.0.0| | | | @@ -1800,6 +1805,7 @@ |`cuGraphExecKernelNodeSetParams`|10.1| | |`hipGraphExecKernelNodeSetParams`|4.5.0| | | | |`cuGraphExecMemcpyNodeSetParams`|10.2| | | | | | | | |`cuGraphExecMemsetNodeSetParams`|10.2| | | | | | | | +|`cuGraphExecNodeSetParams`|12.2| | | | | | | | |`cuGraphExecUpdate`|10.2| | |`hipGraphExecUpdate`|5.0.0| | | | |`cuGraphExternalSemaphoresSignalNodeGetParams`|11.2| | | | | | | | |`cuGraphExternalSemaphoresSignalNodeSetParams`|11.2| | | | | | | | @@ -1832,6 +1838,7 @@ |`cuGraphNodeGetEnabled`|11.6| | |`hipGraphNodeGetEnabled`|5.5.0| | | | |`cuGraphNodeGetType`|10.0| | |`hipGraphNodeGetType`|5.0.0| | | | |`cuGraphNodeSetEnabled`|11.6| | |`hipGraphNodeSetEnabled`|5.5.0| | | | +|`cuGraphNodeSetParams`|12.2| | | | | | | | |`cuGraphReleaseUserObject`|11.3| | |`hipGraphReleaseUserObject`|5.3.0| | | | |`cuGraphRemoveDependencies`|10.0| | |`hipGraphRemoveDependencies`|5.0.0| | | | |`cuGraphRetainUserObject`|11.3| | |`hipGraphRetainUserObject`|5.3.0| | | | diff --git a/src/CUDA2HIP_Driver_API_functions.cpp b/src/CUDA2HIP_Driver_API_functions.cpp index 0e7ed78c..bce25009 100644 --- a/src/CUDA2HIP_Driver_API_functions.cpp +++ b/src/CUDA2HIP_Driver_API_functions.cpp @@ -431,8 +431,12 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // 17. Unified Addressing // cudaMemAdvise {"cuMemAdvise", {"hipMemAdvise", "", CONV_UNIFIED, API_DRIVER, SEC::UNIFIED}}, + // + {"cuMemAdvise_v2", {"hipMemAdvise_v2", "", CONV_UNIFIED, API_DRIVER, SEC::UNIFIED, HIP_UNSUPPORTED}}, // TODO: double check cudaMemPrefetchAsync {"cuMemPrefetchAsync", {"hipMemPrefetchAsync", "", CONV_UNIFIED, API_DRIVER, SEC::UNIFIED}}, + // + {"cuMemPrefetchAsync_v2", {"hipMemPrefetchAsync_v2", "", CONV_UNIFIED, API_DRIVER, SEC::UNIFIED, HIP_UNSUPPORTED}}, // cudaMemRangeGetAttribute {"cuMemRangeGetAttribute", {"hipMemRangeGetAttribute", "", CONV_UNIFIED, API_DRIVER, SEC::UNIFIED}}, // cudaMemRangeGetAttributes @@ -782,6 +786,12 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { {"cuGraphInstantiateWithParams", {"hipGraphInstantiateWithParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, // cudaGraphExecGetFlags {"cuGraphExecGetFlags", {"hipGraphExecGetFlags", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, + // + {"cuGraphAddNode", {"hipGraphAddNode", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, + // + {"cuGraphNodeSetParams", {"hipGraphNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, + // + {"cuGraphExecNodeSetParams", {"hipGraphExecNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, // 25. Occupancy // cudaOccupancyAvailableDynamicSMemPerBlock @@ -1384,6 +1394,11 @@ const std::map CUDA_DRIVER_FUNCTION_VER_MAP { {"cuCoredumpGetAttributeGlobal", {CUDA_121, CUDA_0, CUDA_0 }}, {"cuCoredumpSetAttribute", {CUDA_121, CUDA_0, CUDA_0 }}, {"cuCoredumpSetAttributeGlobal", {CUDA_121, CUDA_0, CUDA_0 }}, + {"cuMemPrefetchAsync_v2", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cuMemAdvise_v2", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cuGraphAddNode", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cuGraphNodeSetParams", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cuGraphExecNodeSetParams", {CUDA_122, CUDA_0, CUDA_0 }}, }; const std::map HIP_DRIVER_FUNCTION_VER_MAP { diff --git a/src/CUDA2HIP_Driver_API_types.cpp b/src/CUDA2HIP_Driver_API_types.cpp index 479db2c0..eef3b3d3 100644 --- a/src/CUDA2HIP_Driver_API_types.cpp +++ b/src/CUDA2HIP_Driver_API_types.cpp @@ -340,10 +340,6 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUgraphNodeParams_st", {"hipGraphNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUgraphNodeParams", {"hipGraphNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, - // - {"CUdeviceNumaConfig_enum", {"hipDeviceNumaConfig", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, - {"CUdeviceNumaConfig", {"hipDeviceNumaConfig", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, - // cudaArrayMemoryRequirements {"CUDA_ARRAY_MEMORY_REQUIREMENTS_st", {"hipArrayMemoryRequirements", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_ARRAY_MEMORY_REQUIREMENTS_v1", {"hipArrayMemoryRequirements", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -2534,6 +2530,15 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { // {"CU_COREDUMP_MAX", {"HIP_COREDUMP_MAX", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // + {"CUdeviceNumaConfig", {"hipDeviceNumaConfig", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + {"CUdeviceNumaConfig_enum", {"hipDeviceNumaConfig", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // CUdeviceNumaConfig enum values + // + {"CU_DEVICE_NUMA_CONFIG_NONE", {"HIP_DEVICE_NUMA_CONFIG_NONE", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // + {"CU_DEVICE_NUMA_CONFIG_NUMA_NODE", {"HIP_DEVICE_NUMA_CONFIG_NUMA_NODE", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // 4. Typedefs // no analogue @@ -3512,6 +3517,8 @@ const std::map CUDA_DRIVER_TYPE_NAME_VER_MAP { {"CUgraphNodeParams", {CUDA_122, CUDA_0, CUDA_0 }}, {"CUdeviceNumaConfig_enum", {CUDA_122, CUDA_0, CUDA_0 }}, {"CUdeviceNumaConfig", {CUDA_122, CUDA_0, CUDA_0 }}, + {"CU_DEVICE_NUMA_CONFIG_NONE", {CUDA_122, CUDA_0, CUDA_0 }}, + {"CU_DEVICE_NUMA_CONFIG_NUMA_NODE", {CUDA_122, CUDA_0, CUDA_0 }}, }; const std::map HIP_DRIVER_TYPE_NAME_VER_MAP { From b3ef0218b40e2b51620ea764b3ed49451a2f90bd Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sun, 9 Jul 2023 18:12:15 +0200 Subject: [PATCH 04/20] [HIPIFY][rocBLAS][tests][fix] Use `const_cast` for some function arguments, which signatures changed in 9.2 + Update rocBLAS-related synthetic tests to support both BLAS APIs before 9.2 and after 9.2 simultaneously + The change fixes rocBLAS test failures against CUDA < 9.2 --- .../synthetic/libraries/cublas2rocblas.cu | 50 ++++++++++++------- .../synthetic/libraries/cublas2rocblas_v2.cu | 50 ++++++++++++------- 2 files changed, 64 insertions(+), 36 deletions(-) diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu index 7a73c9b2..26acd8d6 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu @@ -235,7 +235,9 @@ int main() { float fresult = 0; float** fAarray = 0; + const float** const fAarray_const = const_cast(fAarray); float** fBarray = 0; + const float** const fBarray_const = const_cast(fBarray); float** fCarray = 0; float** fTauarray = 0; @@ -255,12 +257,16 @@ int main() { double dresult = 0; double** dAarray = 0; + const double** const dAarray_const = const_cast(dAarray); double** dBarray = 0; + const double** const dBarray_const = const_cast(dBarray); double** dCarray = 0; double** dTauarray = 0; void** voidAarray = nullptr; + const void** const voidAarray_const = const_cast(voidAarray); void** voidBarray = nullptr; + const void** const voidBarray_const = const_cast(voidBarray); void** voidCarray = nullptr; // TODO: #1281 @@ -283,20 +289,28 @@ int main() { cuDoubleComplex dcomplex, dcomplexa, dcomplexA, dcomplexB, dcomplexC, dcomplexx, dcomplexy, dcomplexs, dcomplexb; // CHECK: rocblas_float_complex** complexAarray = 0; + // CHECK: const rocblas_float_complex** const complexAarray_const = const_cast(complexAarray); // CHECK-NEXT: rocblas_float_complex** complexBarray = 0; + // CHECK: const rocblas_float_complex** const complexBarray_const = const_cast(complexBarray); // CHECK-NEXT: rocblas_float_complex** complexCarray = 0; // CHECK-NEXT: rocblas_float_complex** complexTauarray = 0; cuComplex** complexAarray = 0; + const cuComplex** const complexAarray_const = const_cast(complexAarray); cuComplex** complexBarray = 0; + const cuComplex** const complexBarray_const = const_cast(complexBarray); cuComplex** complexCarray = 0; cuComplex** complexTauarray = 0; // CHECK: rocblas_double_complex** dcomplexAarray = 0; + // CHECK: const rocblas_double_complex** const dcomplexAarray_const = const_cast(dcomplexAarray); // CHECK-NEXT: rocblas_double_complex** dcomplexBarray = 0; + // CHECK: const rocblas_double_complex** const dcomplexBarray_const = const_cast(dcomplexBarray); // CHECK-NEXT: rocblas_double_complex** dcomplexCarray = 0; // CHECK-NEXT: rocblas_double_complex** dcomplexTauarray = 0; cuDoubleComplex** dcomplexAarray = 0; + const cuDoubleComplex** const dcomplexAarray_const = const_cast(dcomplexAarray); cuDoubleComplex** dcomplexBarray = 0; + const cuDoubleComplex** const dcomplexBarray_const = const_cast(dcomplexBarray); cuDoubleComplex** dcomplexCarray = 0; cuDoubleComplex** dcomplexTauarray = 0; @@ -1194,14 +1208,14 @@ int main() { // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* const Aarray[], int lda, const float* const Barray[], int ldb, const float* beta, float* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const float* alpha, const float* const A[], rocblas_int lda, const float* const B[], rocblas_int ldb, const float* beta, float* const C[], rocblas_int ldc, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_sgemm_batched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount); - blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount); + // CHECK: blasStatus = rocblas_sgemm_batched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount); + blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const double* alpha, const double* const Aarray[], int lda, const double* const Barray[], int ldb, const double* beta, double* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const double* alpha, const double* const A[], rocblas_int lda, const double* const B[], rocblas_int ldb, const double* beta, double* const C[], rocblas_int ldc, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_dgemm_batched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount); - blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount); + // CHECK: blasStatus = rocblas_dgemm_batched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount); + blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount); // TODO: #1281 // TODO: __half -> rocblas_half @@ -1211,14 +1225,14 @@ int main() { // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuComplex* alpha, const cuComplex* const Aarray[], int lda, const cuComplex* const Barray[], int ldb, const cuComplex* beta, cuComplex* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* const A[], rocblas_int lda, const rocblas_float_complex* const B[], rocblas_int ldb, const rocblas_float_complex* beta, rocblas_float_complex* const C[], rocblas_int ldc, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_cgemm_batched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount); - blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount); + // CHECK: blasStatus = rocblas_cgemm_batched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount); + blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* const Aarray[], int lda, const cuDoubleComplex* const Barray[], int ldb, const cuDoubleComplex* beta, cuDoubleComplex* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* const A[], rocblas_int lda, const rocblas_double_complex* const B[], rocblas_int ldb, const rocblas_double_complex* beta, rocblas_double_complex* const C[], rocblas_int ldc, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_zgemm_batched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); - blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); + // CHECK: blasStatus = rocblas_zgemm_batched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); + blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); // TODO: #1281 // NOTE: void CUBLASWINAPI cublasSsyrk(char uplo, char trans, int n, int k, float alpha, const float* A, int lda, float beta, float* C, int ldc); is not supported by HIP @@ -1465,26 +1479,26 @@ int main() { // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* const A[], int lda, float* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const float* alpha, const float* const A[], rocblas_int lda, float* const B[], rocblas_int ldb, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_strsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount); - blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount); + // CHECK: blasStatus = rocblas_strsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount); + blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* const A[], int lda, double* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const double* alpha, const double* const A[], rocblas_int lda, double* const B[], rocblas_int ldb, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_dtrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount); - blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount); + // CHECK: blasStatus = rocblas_dtrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount); + blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* const A[], int lda, cuComplex* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* const A[], rocblas_int lda, rocblas_float_complex* const B[], rocblas_int ldb, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_ctrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount); - blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount); + // CHECK: blasStatus = rocblas_ctrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount); + blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* const A[], int lda, cuDoubleComplex* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* const A[], rocblas_int lda, rocblas_double_complex* const B[], rocblas_int ldb, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_ztrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount); - blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount); + // CHECK: blasStatus = rocblas_ztrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount); + blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const float* A, int lda, const float* x, int incx, float* C, int ldc); @@ -1657,8 +1671,8 @@ int main() { // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_gemm_batched_ex(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const void* alpha, const void* a, rocblas_datatype a_type, rocblas_int lda, const void* b, rocblas_datatype b_type, rocblas_int ldb, const void* beta, const void* c, rocblas_datatype c_type, rocblas_int ldc, void* d, rocblas_datatype d_type, rocblas_int ldd, rocblas_int batch_count, rocblas_datatype compute_type, rocblas_gemm_algo algo, int32_t solution_index, uint32_t flags); - // CHECK: blasStatus = rocblas_gemm_batched_ex(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); - blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); + // CHECK: blasStatus = rocblas_gemm_batched_ex(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); + blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index eab649d8..c03a1f0f 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -243,7 +243,9 @@ int main() { float fresult = 0; float** fAarray = 0; + const float** const fAarray_const = const_cast(fAarray); float** fBarray = 0; + const float** const fBarray_const = const_cast(fBarray); float** fCarray = 0; float** fTauarray = 0; @@ -263,12 +265,16 @@ int main() { double dresult = 0; double** dAarray = 0; + const double** const dAarray_const = const_cast(dAarray); double** dBarray = 0; + const double** const dBarray_const = const_cast(dBarray); double** dCarray = 0; double** dTauarray = 0; void** voidAarray = nullptr; + const void** const voidAarray_const = const_cast(voidAarray); void** voidBarray = nullptr; + const void** const voidBarray_const = const_cast(voidBarray); void** voidCarray = nullptr; // TODO: #1281 @@ -293,20 +299,28 @@ int main() { cuDoubleComplex dcomplex, dcomplexa, dcomplexA, dcomplexB, dcomplexC, dcomplexx, dcomplexy, dcomplexs, dcomplexb; // CHECK: rocblas_float_complex** complexAarray = 0; + // CHECK: const rocblas_float_complex** const complexAarray_const = const_cast(complexAarray); // CHECK-NEXT: rocblas_float_complex** complexBarray = 0; + // CHECK: const rocblas_float_complex** const complexBarray_const = const_cast(complexBarray); // CHECK-NEXT: rocblas_float_complex** complexCarray = 0; // CHECK-NEXT: rocblas_float_complex** complexTauarray = 0; cuComplex** complexAarray = 0; + const cuComplex** const complexAarray_const = const_cast(complexAarray); cuComplex** complexBarray = 0; + const cuComplex** const complexBarray_const = const_cast(complexBarray); cuComplex** complexCarray = 0; cuComplex** complexTauarray = 0; // CHECK: rocblas_double_complex** dcomplexAarray = 0; + // CHECK: const rocblas_double_complex** const dcomplexAarray_const = const_cast(dcomplexAarray); // CHECK-NEXT: rocblas_double_complex** dcomplexBarray = 0; + // CHECK: const rocblas_double_complex** const dcomplexBarray_const = const_cast(dcomplexBarray); // CHECK-NEXT: rocblas_double_complex** dcomplexCarray = 0; // CHECK-NEXT: rocblas_double_complex** dcomplexTauarray = 0; cuDoubleComplex** dcomplexAarray = 0; + const cuDoubleComplex** const dcomplexAarray_const = const_cast(dcomplexAarray); cuDoubleComplex** dcomplexBarray = 0; + const cuDoubleComplex** const dcomplexBarray_const = const_cast(dcomplexBarray); cuDoubleComplex** dcomplexCarray = 0; cuDoubleComplex** dcomplexTauarray = 0; @@ -1338,14 +1352,14 @@ int main() { // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* const Aarray[], int lda, const float* const Barray[], int ldb, const float* beta, float* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const float* alpha, const float* const A[], rocblas_int lda, const float* const B[], rocblas_int ldb, const float* beta, float* const C[], rocblas_int ldc, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_sgemm_batched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount); - blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray, lda, fBarray, ldb, &fb, fCarray, ldc, batchCount); + // CHECK: blasStatus = rocblas_sgemm_batched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount); + blasStatus = cublasSgemmBatched(blasHandle, transa, transb, m, n, k, &fa, fAarray_const, lda, fBarray_const, ldb, &fb, fCarray, ldc, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const double* alpha, const double* const Aarray[], int lda, const double* const Barray[], int ldb, const double* beta, double* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const double* alpha, const double* const A[], rocblas_int lda, const double* const B[], rocblas_int ldb, const double* beta, double* const C[], rocblas_int ldc, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_dgemm_batched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount); - blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray, lda, dBarray, ldb, &db, dCarray, ldc, batchCount); + // CHECK: blasStatus = rocblas_dgemm_batched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount); + blasStatus = cublasDgemmBatched(blasHandle, transa, transb, m, n, k, &da, dAarray_const, lda, dBarray_const, ldb, &db, dCarray, ldc, batchCount); // TODO: #1281 // TODO: __half -> rocblas_half @@ -1355,14 +1369,14 @@ int main() { // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuComplex* alpha, const cuComplex* const Aarray[], int lda, const cuComplex* const Barray[], int ldb, const cuComplex* beta, cuComplex* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_float_complex* alpha, const rocblas_float_complex* const A[], rocblas_int lda, const rocblas_float_complex* const B[], rocblas_int ldb, const rocblas_float_complex* beta, rocblas_float_complex* const C[], rocblas_int ldc, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_cgemm_batched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount); - blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray, lda, complexBarray, ldb, &complexb, complexCarray, ldc, batchCount); + // CHECK: blasStatus = rocblas_cgemm_batched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount); + blasStatus = cublasCgemmBatched(blasHandle, transa, transb, m, n, k, &complexa, complexAarray_const, lda, complexBarray_const, ldb, &complexb, complexCarray, ldc, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmBatched(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* const Aarray[], int lda, const cuDoubleComplex* const Barray[], int ldb, const cuDoubleComplex* beta, cuDoubleComplex* const Carray[], int ldc, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemm_batched(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const rocblas_double_complex* alpha, const rocblas_double_complex* const A[], rocblas_int lda, const rocblas_double_complex* const B[], rocblas_int ldb, const rocblas_double_complex* beta, rocblas_double_complex* const C[], rocblas_int ldc, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_zgemm_batched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); - blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); + // CHECK: blasStatus = rocblas_zgemm_batched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); + blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray_const, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* beta, float* C, int ldc); @@ -1635,26 +1649,26 @@ int main() { // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* const A[], int lda, float* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const float* alpha, const float* const A[], rocblas_int lda, float* const B[], rocblas_int ldb, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_strsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount); - blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount); + // CHECK: blasStatus = rocblas_strsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount); + blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray_const, lda, fBarray, ldb, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* const A[], int lda, double* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const double* alpha, const double* const A[], rocblas_int lda, double* const B[], rocblas_int ldb, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_dtrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount); - blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount); + // CHECK: blasStatus = rocblas_dtrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount); + blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray_const, lda, dBarray, ldb, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* const A[], int lda, cuComplex* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* const A[], rocblas_int lda, rocblas_float_complex* const B[], rocblas_int ldb, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_ctrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount); - blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount); + // CHECK: blasStatus = rocblas_ctrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount); + blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray_const, lda, complexBarray, ldb, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* const A[], int lda, cuDoubleComplex* const B[], int ldb, int batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrsm_batched(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* const A[], rocblas_int lda, rocblas_double_complex* const B[], rocblas_int ldb, rocblas_int batch_count); - // CHECK: blasStatus = rocblas_ztrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount); - blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount); + // CHECK: blasStatus = rocblas_ztrsm_batched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount); + blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexBarray, ldb, batchCount); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const float* A, int lda, const float* x, int incx, float* C, int ldc); @@ -1827,8 +1841,8 @@ int main() { // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_gemm_batched_ex(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, rocblas_int m, rocblas_int n, rocblas_int k, const void* alpha, const void* a, rocblas_datatype a_type, rocblas_int lda, const void* b, rocblas_datatype b_type, rocblas_int ldb, const void* beta, const void* c, rocblas_datatype c_type, rocblas_int ldc, void* d, rocblas_datatype d_type, rocblas_int ldd, rocblas_int batch_count, rocblas_datatype compute_type, rocblas_gemm_algo algo, int32_t solution_index, uint32_t flags); - // CHECK: blasStatus = rocblas_gemm_batched_ex(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); - blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); + // CHECK: blasStatus = rocblas_gemm_batched_ex(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); + blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray_const, Atype, lda, voidBarray_const, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); // TODO: #1281 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); From 24cd408064a3ebc4a0fdcf32cc8f5a9ddcba153f Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 10 Jul 2023 15:34:49 +0200 Subject: [PATCH 05/20] [HIPIFY] Sync with CUDA 12.2.0 - Part 4 - Runtime API - functions + Updated the regenerated hipify-perl and docs accordingly --- bin/hipify-perl | 5 +++++ ...DA_Runtime_API_functions_supported_by_HIP.md | 5 +++++ src/CUDA2HIP_Driver_API_functions.cpp | 12 ++++++------ src/CUDA2HIP_Runtime_API_functions.cpp | 17 ++++++++++++++++- 4 files changed, 32 insertions(+), 7 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 11bc23d7..539a0384 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -6772,6 +6772,8 @@ sub warnUnsupportedFunctions { "cudaMemcpy3DPeerAsync", "cudaMemcpy3DPeer", "cudaMemcpy2DArrayToArray", + "cudaMemPrefetchAsync_v2", + "cudaMemAdvise_v2", "cudaLimitPersistingL2CacheSize", "cudaLimitMaxL2FetchGranularity", "cudaLimitDevRuntimeSyncDepth", @@ -6828,6 +6830,7 @@ sub warnUnsupportedFunctions { "cudaGraphicsCubeFaceNegativeY", "cudaGraphicsCubeFaceNegativeX", "cudaGraphicsCubeFace", + "cudaGraphNodeSetParams", "cudaGraphInstantiateWithParams", "cudaGraphInstantiateSuccess", "cudaGraphInstantiateResult", @@ -6844,9 +6847,11 @@ sub warnUnsupportedFunctions { "cudaGraphExecUpdateResultInfo_st", "cudaGraphExecUpdateResultInfo", "cudaGraphExecUpdateErrorAttributesChanged", + "cudaGraphExecNodeSetParams", "cudaGraphExecGetFlags", "cudaGraphExecExternalSemaphoresWaitNodeSetParams", "cudaGraphExecExternalSemaphoresSignalNodeSetParams", + "cudaGraphAddNode", "cudaGraphAddExternalSemaphoresWaitNode", "cudaGraphAddExternalSemaphoresSignalNode", "cudaGetTextureObjectTextureDesc_v2", diff --git a/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md b/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md index eb93726b..7920451c 100644 --- a/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md +++ b/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md @@ -171,8 +171,10 @@ |`cudaMallocMipmappedArray`| | | |`hipMallocMipmappedArray`|3.5.0| | | | |`cudaMallocPitch`| | | |`hipMallocPitch`|1.6.0| | | | |`cudaMemAdvise`|8.0| | |`hipMemAdvise`|3.7.0| | | | +|`cudaMemAdvise_v2`|12.2| | | | | | | | |`cudaMemGetInfo`| | | |`hipMemGetInfo`|1.6.0| | | | |`cudaMemPrefetchAsync`|8.0| | |`hipMemPrefetchAsync`|3.7.0| | | | +|`cudaMemPrefetchAsync_v2`|12.2| | | | | | | | |`cudaMemRangeGetAttribute`|8.0| | |`hipMemRangeGetAttribute`|3.7.0| | | | |`cudaMemRangeGetAttributes`|8.0| | |`hipMemRangeGetAttributes`|3.7.0| | | | |`cudaMemcpy`| | | |`hipMemcpy`|1.5.0| | | | @@ -424,6 +426,7 @@ |`cudaGraphAddMemcpyNodeFromSymbol`|11.1| | |`hipGraphAddMemcpyNodeFromSymbol`|5.0.0| | | | |`cudaGraphAddMemcpyNodeToSymbol`|11.1| | |`hipGraphAddMemcpyNodeToSymbol`|5.0.0| | | | |`cudaGraphAddMemsetNode`|10.0| | |`hipGraphAddMemsetNode`|4.3.0| | | | +|`cudaGraphAddNode`|12.2| | | | | | | | |`cudaGraphChildGraphNodeGetGraph`|10.0| | |`hipGraphChildGraphNodeGetGraph`|5.0.0| | | | |`cudaGraphClone`|10.0| | |`hipGraphClone`|5.0.0| | | | |`cudaGraphCreate`|10.0| | |`hipGraphCreate`|4.3.0| | | | @@ -448,6 +451,7 @@ |`cudaGraphExecMemcpyNodeSetParamsFromSymbol`|11.1| | |`hipGraphExecMemcpyNodeSetParamsFromSymbol`|5.0.0| | | | |`cudaGraphExecMemcpyNodeSetParamsToSymbol`|11.1| | |`hipGraphExecMemcpyNodeSetParamsToSymbol`|5.0.0| | | | |`cudaGraphExecMemsetNodeSetParams`|11.0| | |`hipGraphExecMemsetNodeSetParams`|5.0.0| | | | +|`cudaGraphExecNodeSetParams`|12.2| | | | | | | | |`cudaGraphExecUpdate`|11.0| | |`hipGraphExecUpdate`|5.0.0| | | | |`cudaGraphExternalSemaphoresSignalNodeGetParams`|11.2| | | | | | | | |`cudaGraphExternalSemaphoresSignalNodeSetParams`|11.2| | | | | | | | @@ -482,6 +486,7 @@ |`cudaGraphNodeGetEnabled`|11.6| | |`hipGraphNodeGetEnabled`|5.5.0| | | | |`cudaGraphNodeGetType`|11.0| | |`hipGraphNodeGetType`|5.0.0| | | | |`cudaGraphNodeSetEnabled`|11.6| | |`hipGraphNodeSetEnabled`|5.5.0| | | | +|`cudaGraphNodeSetParams`|12.2| | | | | | | | |`cudaGraphReleaseUserObject`|11.3| | |`hipGraphReleaseUserObject`|5.3.0| | | | |`cudaGraphRemoveDependencies`|11.0| | |`hipGraphRemoveDependencies`|5.0.0| | | | |`cudaGraphRetainUserObject`|11.3| | |`hipGraphRetainUserObject`|5.3.0| | | | diff --git a/src/CUDA2HIP_Driver_API_functions.cpp b/src/CUDA2HIP_Driver_API_functions.cpp index bce25009..012b0076 100644 --- a/src/CUDA2HIP_Driver_API_functions.cpp +++ b/src/CUDA2HIP_Driver_API_functions.cpp @@ -431,11 +431,11 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // 17. Unified Addressing // cudaMemAdvise {"cuMemAdvise", {"hipMemAdvise", "", CONV_UNIFIED, API_DRIVER, SEC::UNIFIED}}, - // + // cudaMemAdvise_v2 {"cuMemAdvise_v2", {"hipMemAdvise_v2", "", CONV_UNIFIED, API_DRIVER, SEC::UNIFIED, HIP_UNSUPPORTED}}, - // TODO: double check cudaMemPrefetchAsync + // cudaMemPrefetchAsync {"cuMemPrefetchAsync", {"hipMemPrefetchAsync", "", CONV_UNIFIED, API_DRIVER, SEC::UNIFIED}}, - // + // cudaMemPrefetchAsync_v2 {"cuMemPrefetchAsync_v2", {"hipMemPrefetchAsync_v2", "", CONV_UNIFIED, API_DRIVER, SEC::UNIFIED, HIP_UNSUPPORTED}}, // cudaMemRangeGetAttribute {"cuMemRangeGetAttribute", {"hipMemRangeGetAttribute", "", CONV_UNIFIED, API_DRIVER, SEC::UNIFIED}}, @@ -786,11 +786,11 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { {"cuGraphInstantiateWithParams", {"hipGraphInstantiateWithParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, // cudaGraphExecGetFlags {"cuGraphExecGetFlags", {"hipGraphExecGetFlags", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, - // + // cudaGraphAddNode {"cuGraphAddNode", {"hipGraphAddNode", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, - // + // cudaGraphNodeSetParams {"cuGraphNodeSetParams", {"hipGraphNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, - // + // cudaGraphExecNodeSetParams {"cuGraphExecNodeSetParams", {"hipGraphExecNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, // 25. Occupancy diff --git a/src/CUDA2HIP_Runtime_API_functions.cpp b/src/CUDA2HIP_Runtime_API_functions.cpp index 3616b64f..81b1af53 100644 --- a/src/CUDA2HIP_Runtime_API_functions.cpp +++ b/src/CUDA2HIP_Runtime_API_functions.cpp @@ -308,6 +308,8 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { {"cudaMallocPitch", {"hipMallocPitch", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}}, // cuMemAdvise {"cudaMemAdvise", {"hipMemAdvise", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}}, + // cuMemAdvise_v2 + {"cudaMemAdvise_v2", {"hipMemAdvise_v2", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY, HIP_UNSUPPORTED}}, // no analogue // NOTE: Not equal to cuMemcpy due to different signatures {"cudaMemcpy", {"hipMemcpy", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}}, @@ -358,8 +360,10 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { {"cudaMemcpyToSymbolAsync", {"hipMemcpyToSymbolAsync", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}}, // cuMemGetInfo {"cudaMemGetInfo", {"hipMemGetInfo", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}}, - // TODO: double check cuMemPrefetchAsync + // cuMemPrefetchAsync {"cudaMemPrefetchAsync", {"hipMemPrefetchAsync", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}}, + // cuMemPrefetchAsync_v2 + {"cudaMemPrefetchAsync_v2", {"hipMemPrefetchAsync_v2", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY, HIP_UNSUPPORTED}}, // cuMemRangeGetAttribute {"cudaMemRangeGetAttribute", {"hipMemRangeGetAttribute", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}}, // cuMemRangeGetAttributes @@ -836,6 +840,12 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { {"cudaGraphInstantiateWithParams", {"hipGraphInstantiateWithParams", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}}, // cuGraphExecGetFlags {"cudaGraphExecGetFlags", {"hipGraphExecGetFlags", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}}, + // cuGraphAddNode + {"cudaGraphAddNode", {"hipGraphAddNode", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}}, + // cuGraphNodeSetParams + {"cudaGraphNodeSetParams", {"hipGraphNodeSetParams", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}}, + // cuGraphExecNodeSetParams + {"cudaGraphExecNodeSetParams", {"hipGraphExecNodeSetParams", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}}, // 29. Driver Entry Point Access // cuGetProcAddress @@ -1110,6 +1120,11 @@ const std::map CUDA_RUNTIME_FUNCTION_VER_MAP { {"cudaGraphInstantiateWithParams", {CUDA_120, CUDA_0, CUDA_0 }}, {"cudaGraphExecGetFlags", {CUDA_120, CUDA_0, CUDA_0 }}, {"cudaGetKernel", {CUDA_121, CUDA_0, CUDA_0 }}, + {"cudaMemPrefetchAsync_v2", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaMemAdvise_v2", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaGraphAddNode", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaGraphNodeSetParams", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaGraphExecNodeSetParams", {CUDA_122, CUDA_0, CUDA_0 }}, }; const std::map HIP_RUNTIME_FUNCTION_VER_MAP { From e765a932dd6809ba587c29c4e713357a56a7f60f Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 10 Jul 2023 16:36:36 +0200 Subject: [PATCH 06/20] [HIPIFY][build][#724][cleanup] Remove SWDEV_375013 related guards from `CMakeLists.txt` and `HipifyAction.cpp` [Reason] LLVM 16.0.0 was released a long ago, so the guard is not used anymore --- CMakeLists.txt | 6 ------ src/HipifyAction.cpp | 3 +-- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a92a5e9b..f61c36b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -119,12 +119,6 @@ else() endif() endif() -# [ToDo] Remove SWDEV_375013 related guards from CMakeLists.txt and HipifyAction.cpp along with the LLVM 16.0.0 official release -option (SWDEV_375013 "Enables SWDEV-375013 blocker workaround for the clang's change https://reviews.llvm.org/D140332" OFF) -if(SWDEV_375013) - add_definitions(-DSWDEV_375013) -endif() - if(MSVC) target_link_libraries(hipify-clang PRIVATE version) target_compile_options(hipify-clang PRIVATE ${STD} /Od /GR- /EHs- /EHc-) diff --git a/src/HipifyAction.cpp b/src/HipifyAction.cpp index 7e252a9b..2c635d58 100644 --- a/src/HipifyAction.cpp +++ b/src/HipifyAction.cpp @@ -1199,12 +1199,11 @@ class PPCallbackProxy : public clang::PPCallbacks { public: explicit PPCallbackProxy(HipifyAction &action): hipifyAction(action) {} - // [ToDo] Remove SWDEV_375013 related guards from CMakeLists.txt and HipifyAction.cpp along with the LLVM 16.0.0 official release void InclusionDirective(clang::SourceLocation hash_loc, const clang::Token &include_token, StringRef file_name, bool is_angled, clang::CharSourceRange filename_range, #if LLVM_VERSION_MAJOR < 15 const clang::FileEntry *file, -#elif (LLVM_VERSION_MAJOR == 15) || (LLVM_VERSION_MAJOR == 16 && SWDEV_375013) +#elif LLVM_VERSION_MAJOR == 15 Optional file, #else clang::OptionalFileEntryRef file, From 16103b7cd0b555d427cbe31db238cb9ad5e88790 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 11 Jul 2023 15:26:21 +0200 Subject: [PATCH 07/20] [HIPIFY][cmake] Formatting and indentation --- CMakeLists.txt | 301 +++++++++++++++++++++++++------------------------ 1 file changed, 151 insertions(+), 150 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f61c36b4..4c46b2f5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,6 +5,7 @@ project(hipify-clang) include(GNUInstallDirs) option(HIPIFY_INCLUDE_IN_HIP_SDK "Include HIPIFY in HIP SDK" OFF) + if(HIPIFY_INCLUDE_IN_HIP_SDK) if(NOT WIN32) message(FATAL_ERROR "HIPIFY_INCLUDE_IN_HIP_SDK is only supported on Windows") @@ -28,48 +29,47 @@ list(APPEND CMAKE_MODULE_PATH ${LLVM_CMAKE_DIR}) include(AddLLVM) if (NOT HIPIFY_CLANG_TESTS_ONLY) - -if(MSVC AND MSVC_VERSION VERSION_LESS "1900") + if(MSVC AND MSVC_VERSION VERSION_LESS "1900") message(SEND_ERROR "hipify-clang could be built by Visual Studio 14 2015 or higher.") return() -endif() + endif() -include_directories(${LLVM_INCLUDE_DIRS}) -add_definitions(${LLVM_DEFINITIONS}) + include_directories(${LLVM_INCLUDE_DIRS}) + add_definitions(${LLVM_DEFINITIONS}) -file(GLOB_RECURSE HIPIFY_SOURCES src/*.cpp) -file(GLOB_RECURSE HIPIFY_HEADERS src/*.h) -add_llvm_executable(hipify-clang ${HIPIFY_SOURCES} ${HIPIFY_HEADERS}) -target_link_directories(hipify-clang PRIVATE ${LLVM_LIBRARY_DIRS}) + file(GLOB_RECURSE HIPIFY_SOURCES src/*.cpp) + file(GLOB_RECURSE HIPIFY_HEADERS src/*.h) -if(HIPIFY_INCLUDE_IN_HIP_SDK) - if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) - message(FATAL_ERROR "In order to include HIPIFY in HIP SDK, HIPIFY needs to be built with LLVM_EXTERNAL_PROJECTS") - endif() + add_llvm_executable(hipify-clang ${HIPIFY_SOURCES} ${HIPIFY_HEADERS}) + target_link_directories(hipify-clang PRIVATE ${LLVM_LIBRARY_DIRS}) - # Need to add clang include directories explicitly if - # building as part of llvm. - if(LLVM_EXTERNAL_CLANG_SOURCE_DIR) - target_include_directories(hipify-clang - PRIVATE - ${LLVM_BINARY_DIR}/tools/clang/include - ${LLVM_EXTERNAL_CLANG_SOURCE_DIR}/include) - endif() - # Need to add lld include directories explicitly if - # building as part of llvm. - if(LLVM_EXTERNAL_LLD_SOURCE_DIR) - target_include_directories(hipify-clang - PRIVATE - ${LLVM_BINARY_DIR}/tools/lld/include - ${LLVM_EXTERNAL_LLD_SOURCE_DIR}/include) + if(HIPIFY_INCLUDE_IN_HIP_SDK) + if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) + message(FATAL_ERROR "In order to include HIPIFY in HIP SDK, HIPIFY needs to be built with LLVM_EXTERNAL_PROJECTS") + endif() + + # Need to add clang include directories explicitly if building as part of llvm. + if(LLVM_EXTERNAL_CLANG_SOURCE_DIR) + target_include_directories(hipify-clang + PRIVATE + ${LLVM_BINARY_DIR}/tools/clang/include + ${LLVM_EXTERNAL_CLANG_SOURCE_DIR}/include) + endif() + + # Need to add lld include directories explicitly if building as part of llvm. + if(LLVM_EXTERNAL_LLD_SOURCE_DIR) + target_include_directories(hipify-clang + PRIVATE + ${LLVM_BINARY_DIR}/tools/lld/include + ${LLVM_EXTERNAL_LLD_SOURCE_DIR}/include) + endif() + else() + set(CMAKE_CXX_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang++) + set(CMAKE_C_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang) endif() -else() - set(CMAKE_CXX_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang++) - set(CMAKE_C_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang) -endif() -# Link against LLVM and CLANG libraries -target_link_libraries(hipify-clang PRIVATE + # Link against LLVM and CLANG libraries. + target_link_libraries(hipify-clang PRIVATE clangASTMatchers clangFrontend clangTooling @@ -93,81 +93,80 @@ target_link_libraries(hipify-clang PRIVATE LLVMOption LLVMCore) -if(LLVM_PACKAGE_VERSION VERSION_GREATER "6.0.1") + if(LLVM_PACKAGE_VERSION VERSION_GREATER "6.0.1") target_link_libraries(hipify-clang PRIVATE clangToolingInclusions) -endif() + endif() -if(LLVM_PACKAGE_VERSION VERSION_GREATER "9.0.1") + if(LLVM_PACKAGE_VERSION VERSION_GREATER "9.0.1") target_link_libraries(hipify-clang PRIVATE LLVMFrontendOpenMP) -endif() + endif() -if(LLVM_PACKAGE_VERSION VERSION_EQUAL "15.0.0" OR LLVM_PACKAGE_VERSION VERSION_GREATER "15.0.0") + if(LLVM_PACKAGE_VERSION VERSION_EQUAL "15.0.0" OR LLVM_PACKAGE_VERSION VERSION_GREATER "15.0.0") target_link_libraries(hipify-clang PRIVATE LLVMWindowsDriver clangSupport) -endif() + endif() -if(LLVM_PACKAGE_VERSION VERSION_EQUAL "16.0.0" OR LLVM_PACKAGE_VERSION VERSION_GREATER "16.0.0") + if(LLVM_PACKAGE_VERSION VERSION_EQUAL "16.0.0" OR LLVM_PACKAGE_VERSION VERSION_GREATER "16.0.0") if(MSVC) - set(STD "/std:c++17") + set(STD "/std:c++17") else() - set(STD "-std=c++17") + set(STD "-std=c++17") endif() -else() + else() if(MSVC) - set(STD "/std:c++14") + set(STD "/std:c++14") else() - set(STD "-std=c++14") + set(STD "-std=c++14") endif() -endif() + endif() -if(MSVC) + if(MSVC) target_link_libraries(hipify-clang PRIVATE version) target_compile_options(hipify-clang PRIVATE ${STD} /Od /GR- /EHs- /EHc-) set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} /SUBSYSTEM:WINDOWS") -else() + else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${STD} -pthread -fno-rtti -fvisibility-inlines-hidden") -endif() + endif() -# Address Sanitize Flag -if(ADDRESS_SANITIZER) + # Address Sanitize Flag. + if(ADDRESS_SANITIZER) set(addr_var -fsanitize=address) -else() + else() set(addr_var ) -endif() -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_CFLAGS} ${addr_var}") + endif() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_CFLAGS} ${addr_var}") -# [ToDo] Remove D125860 related guards from CMakeLists.txt with the LLVM 16.0.0 official release -option (D125860 "Enables treating clang's resource dir as lib/clang/X.Y.Z, as it was before clang's change D125860, merged as e1b88c8a09be25b86b13f98755a9bd744b4dbf14" OFF) -if(D125860) + # [ToDo] Remove D125860 related guards from CMakeLists.txt with the LLVM 16.0.0 official release. + option (D125860 "Enables treating clang's resource dir as lib/clang/X.Y.Z, as it was before clang's change D125860, merged as e1b88c8a09be25b86b13f98755a9bd744b4dbf14" OFF) + if(D125860) add_definitions(-D125860) -endif() -if((LLVM_PACKAGE_VERSION VERSION_EQUAL "16.0.0" OR LLVM_PACKAGE_VERSION VERSION_GREATER "16.0.0") AND (NOT D125860)) + endif() + if((LLVM_PACKAGE_VERSION VERSION_EQUAL "16.0.0" OR LLVM_PACKAGE_VERSION VERSION_GREATER "16.0.0") AND (NOT D125860)) set(LIB_CLANG_RES ${LLVM_VERSION_MAJOR}) -else() + else() set(LIB_CLANG_RES ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}) -endif() -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CFLAGS} -DHIPIFY_CLANG_RES=\\\"${LLVM_LIBRARY_DIRS}/clang/${LIB_CLANG_RES}\\\" ${addr_var}") + endif() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CFLAGS} -DHIPIFY_CLANG_RES=\\\"${LLVM_LIBRARY_DIRS}/clang/${LIB_CLANG_RES}\\\" ${addr_var}") -set(INSTALL_PATH_DOC_STRING "hipify-clang Installation Path") -if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + set(INSTALL_PATH_DOC_STRING "hipify-clang Installation Path") + if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) set(CMAKE_INSTALL_PREFIX "${PROJECT_BINARY_DIR}/dist" CACHE PATH ${INSTALL_PATH_DOC_STRING} FORCE) -endif() + endif() -set(HIPIFY_BIN_INSTALL_PATH "${CMAKE_INSTALL_PREFIX}/bin") + set(HIPIFY_BIN_INSTALL_PATH "${CMAKE_INSTALL_PREFIX}/bin") -install(TARGETS hipify-clang DESTINATION bin) -# install bin directory in CMAKE_INSTALL_PREFIX path -install( + install(TARGETS hipify-clang DESTINATION bin) + # Install bin directory in CMAKE_INSTALL_PREFIX path. + install( DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/bin DESTINATION . USE_SOURCE_PERMISSIONS PATTERN "hipify-perl" PATTERN "*.sh") -# Headers are already included in HIP SDK, so skip those if including -# HIPIFY in HIP SDK. -if(NOT HIPIFY_INCLUDE_IN_HIP_SDK) - # install all folders under clang/version/ in CMAKE_INSTALL_PREFIX path - install( + # Headers are already included in HIP SDK, so skip those if including HIPIFY in HIP SDK. + if(NOT HIPIFY_INCLUDE_IN_HIP_SDK) + # Install all folders under clang/version/ in CMAKE_INSTALL_PREFIX path. + install( DIRECTORY ${LLVM_DIR}/../../clang/${LIB_CLANG_RES}/ DESTINATION . COMPONENT clang-resource-headers @@ -179,99 +178,101 @@ if(NOT HIPIFY_INCLUDE_IN_HIP_SDK) PATTERN "new" PATTERN "ppc_wrappers" EXCLUDE PATTERN "openmp_wrappers" EXCLUDE) -endif() - -option(FILE_REORG_BACKWARD_COMPATIBILITY "Enable File Reorg with backward compatibility" ON) + endif() -if(UNIX) + option(FILE_REORG_BACKWARD_COMPATIBILITY "Enable File Reorg with backward compatibility" ON) - #get rid of any RPATH definations already + if(UNIX) + # Get rid of any RPATH definations already. set_target_properties(hipify-clang PROPERTIES INSTALL_RPATH "") - #set RPATH for the binary + # Set RPATH for the binary. set_target_properties(hipify-clang PROPERTIES LINK_FLAGS "-Wl,--enable-new-dtags -Wl,--rpath,$ORIGIN/../lib" ) if(FILE_REORG_BACKWARD_COMPATIBILITY) - include(hipify-backward-compat.cmake) + include(hipify-backward-compat.cmake) endif() + set(CPACK_PACKAGING_INSTALL_PREFIX "/opt/rocm" CACHE PATH "HIP Package Installation Path") set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hipify-clang) + configure_file(packaging/hipify-clang.txt ${BUILD_DIR}/CMakeLists.txt @ONLY) configure_file(${CMAKE_SOURCE_DIR}/LICENSE.txt ${BUILD_DIR}/LICENSE.txt @ONLY) - add_custom_target(package_hipify-clang COMMAND ${CMAKE_COMMAND} . - WORKING_DIRECTORY ${PROJECT_BINARY_DIR} - COMMAND rm -rf *.deb *.rpm *.tar.gz - COMMAND make package - COMMAND cp *.deb ${PROJECT_BINARY_DIR} - COMMAND cp *.rpm ${PROJECT_BINARY_DIR} - COMMAND cp *.tar.gz ${PROJECT_BINARY_DIR} - WORKING_DIRECTORY ${BUILD_DIR}) -endif() + add_custom_target(package_hipify-clang COMMAND ${CMAKE_COMMAND} . + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + COMMAND rm -rf *.deb *.rpm *.tar.gz + COMMAND make package + COMMAND cp *.deb ${PROJECT_BINARY_DIR} + COMMAND cp *.rpm ${PROJECT_BINARY_DIR} + COMMAND cp *.tar.gz ${PROJECT_BINARY_DIR} + WORKING_DIRECTORY ${BUILD_DIR}) + endif() -endif() +endif() # if (NOT HIPIFY_CLANG_TESTS_ONLY) if(HIPIFY_CLANG_TESTS OR HIPIFY_CLANG_TESTS_ONLY) - find_package(PythonInterp 2.7 REQUIRED) - - function (require_program PROGRAM_NAME) - find_program(FOUND_${PROGRAM_NAME} ${PROGRAM_NAME}) - if(FOUND_${PROGRAM_NAME}) - message(STATUS "Found ${PROGRAM_NAME}: ${FOUND_${PROGRAM_NAME}}") - else() - message(SEND_ERROR "Can't find ${PROGRAM_NAME}. Either set HIPIFY_CLANG_TESTS(_ONLY) to OFF to disable HIPIFY tests, or install the missing program.") - endif() - endfunction() - - require_program(lit) - require_program(FileCheck) - - find_package(CUDA REQUIRED) - if((CUDA_VERSION VERSION_LESS "7.0") OR (LLVM_PACKAGE_VERSION VERSION_LESS "3.8") OR - (CUDA_VERSION VERSION_GREATER "7.5" AND LLVM_PACKAGE_VERSION VERSION_LESS "4.0") OR - (CUDA_VERSION VERSION_GREATER "8.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "6.0") OR - (CUDA_VERSION VERSION_GREATER "9.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "7.0") OR - (CUDA_VERSION VERSION_GREATER "9.2" AND LLVM_PACKAGE_VERSION VERSION_LESS "8.0") OR - (CUDA_VERSION VERSION_GREATER "10.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "9.0") OR - (CUDA_VERSION VERSION_GREATER "10.1" AND LLVM_PACKAGE_VERSION VERSION_LESS "10.0")) - message(SEND_ERROR "CUDA ${CUDA_VERSION} is not supported by LLVM ${LLVM_PACKAGE_VERSION}.") - if(CUDA_VERSION_MAJOR VERSION_LESS "7") - message(STATUS "Please install CUDA 7.0 or higher.") - elseif(CUDA_VERSION_MAJOR VERSION_LESS "8") - message(STATUS "Please install LLVM + clang 3.8 or higher.") - elseif(CUDA_VERSION_MAJOR VERSION_LESS "9") - message(STATUS "Please install LLVM + clang 4.0 or higher.") - elseif(CUDA_VERSION VERSION_EQUAL "9.0") - message(STATUS "Please install LLVM + clang 6.0 or higher.") - elseif(CUDA_VERSION_MAJOR VERSION_LESS "10") - message(STATUS "Please install LLVM + clang 7.0 or higher.") - elseif(CUDA_VERSION VERSION_EQUAL "10.0") - message(STATUS "Please install LLVM + clang 8.0 or higher.") - elseif(CUDA_VERSION VERSION_EQUAL "10.1") - message(STATUS "Please install LLVM + clang 9.0 or higher.") - elseif(CUDA_VERSION VERSION_EQUAL "10.2" OR CUDA_VERSION VERSION_EQUAL "11.0") - message(STATUS "Please install LLVM + clang 10.0 or higher.") - endif() + find_package(PythonInterp 2.7 REQUIRED) + + function (require_program PROGRAM_NAME) + find_program(FOUND_${PROGRAM_NAME} ${PROGRAM_NAME}) + if(FOUND_${PROGRAM_NAME}) + message(STATUS "Found ${PROGRAM_NAME}: ${FOUND_${PROGRAM_NAME}}") + else() + message(SEND_ERROR "Can't find ${PROGRAM_NAME}. Either set HIPIFY_CLANG_TESTS(_ONLY) to OFF to disable HIPIFY tests, or install the missing program.") + endif() + endfunction() + + require_program(lit) + require_program(FileCheck) + + find_package(CUDA REQUIRED) + if((CUDA_VERSION VERSION_LESS "7.0") OR (LLVM_PACKAGE_VERSION VERSION_LESS "3.8") OR + (CUDA_VERSION VERSION_GREATER "7.5" AND LLVM_PACKAGE_VERSION VERSION_LESS "4.0") OR + (CUDA_VERSION VERSION_GREATER "8.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "6.0") OR + (CUDA_VERSION VERSION_GREATER "9.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "7.0") OR + (CUDA_VERSION VERSION_GREATER "9.2" AND LLVM_PACKAGE_VERSION VERSION_LESS "8.0") OR + (CUDA_VERSION VERSION_GREATER "10.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "9.0") OR + (CUDA_VERSION VERSION_GREATER "10.1" AND LLVM_PACKAGE_VERSION VERSION_LESS "10.0")) + message(SEND_ERROR "CUDA ${CUDA_VERSION} is not supported by LLVM ${LLVM_PACKAGE_VERSION}.") + if(CUDA_VERSION_MAJOR VERSION_LESS "7") + message(STATUS "Please install CUDA 7.0 or higher.") + elseif(CUDA_VERSION_MAJOR VERSION_LESS "8") + message(STATUS "Please install LLVM + clang 3.8 or higher.") + elseif(CUDA_VERSION_MAJOR VERSION_LESS "9") + message(STATUS "Please install LLVM + clang 4.0 or higher.") + elseif(CUDA_VERSION VERSION_EQUAL "9.0") + message(STATUS "Please install LLVM + clang 6.0 or higher.") + elseif(CUDA_VERSION_MAJOR VERSION_LESS "10") + message(STATUS "Please install LLVM + clang 7.0 or higher.") + elseif(CUDA_VERSION VERSION_EQUAL "10.0") + message(STATUS "Please install LLVM + clang 8.0 or higher.") + elseif(CUDA_VERSION VERSION_EQUAL "10.1") + message(STATUS "Please install LLVM + clang 9.0 or higher.") + elseif(CUDA_VERSION VERSION_EQUAL "10.2" OR CUDA_VERSION VERSION_EQUAL "11.0") + message(STATUS "Please install LLVM + clang 10.0 or higher.") endif() + endif() - configure_file( - ${CMAKE_CURRENT_LIST_DIR}/tests/lit.site.cfg.in - ${CMAKE_CURRENT_BINARY_DIR}/tests/lit.site.cfg - @ONLY) + configure_file( + ${CMAKE_CURRENT_LIST_DIR}/tests/lit.site.cfg.in + ${CMAKE_CURRENT_BINARY_DIR}/tests/lit.site.cfg + @ONLY) -if(HIPIFY_CLANG_TESTS_ONLY) + if(HIPIFY_CLANG_TESTS_ONLY) add_lit_testsuite(test-hipify "Running HIPIFY regression tests" - ${CMAKE_CURRENT_LIST_DIR}/tests - PARAMS site_config=${CMAKE_CURRENT_BINARY_DIR}/tests/lit.site.cfg - ARGS -v) -else() + ${CMAKE_CURRENT_LIST_DIR}/tests + PARAMS site_config=${CMAKE_CURRENT_BINARY_DIR}/tests/lit.site.cfg + ARGS -v) + else() add_lit_testsuite(test-hipify "Running HIPIFY regression tests" - ${CMAKE_CURRENT_LIST_DIR}/tests - PARAMS site_config=${CMAKE_CURRENT_BINARY_DIR}/tests/lit.site.cfg - ARGS -v - DEPENDS hipify-clang) -endif() + ${CMAKE_CURRENT_LIST_DIR}/tests + PARAMS site_config=${CMAKE_CURRENT_BINARY_DIR}/tests/lit.site.cfg + ARGS -v + DEPENDS hipify-clang) + endif() - add_custom_target(test-hipify-clang) - add_dependencies(test-hipify-clang test-hipify) - set_target_properties(test-hipify-clang PROPERTIES FOLDER "Tests") -endif() + add_custom_target(test-hipify-clang) + add_dependencies(test-hipify-clang test-hipify) + set_target_properties(test-hipify-clang PROPERTIES FOLDER "Tests") + +endif() # if(HIPIFY_CLANG_TESTS OR HIPIFY_CLANG_TESTS_ONLY) From 41d0a54b901eb1945469a49b540a2bdc8e852808 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 11 Jul 2023 13:41:50 +0000 Subject: [PATCH 08/20] Bump rocm-docs-core from 0.18.4 to 0.19.0 in /docs/.sphinx Bumps [rocm-docs-core](https://github.com/RadeonOpenCompute/rocm-docs-core) from 0.18.4 to 0.19.0. - [Release notes](https://github.com/RadeonOpenCompute/rocm-docs-core/releases) - [Changelog](https://github.com/RadeonOpenCompute/rocm-docs-core/blob/develop/CHANGELOG.md) - [Commits](https://github.com/RadeonOpenCompute/rocm-docs-core/compare/v0.18.4...v0.19.0) --- updated-dependencies: - dependency-name: rocm-docs-core dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- docs/.sphinx/requirements.in | 2 +- docs/.sphinx/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/.sphinx/requirements.in b/docs/.sphinx/requirements.in index a4f5c674..e53f3df1 100644 --- a/docs/.sphinx/requirements.in +++ b/docs/.sphinx/requirements.in @@ -1 +1 @@ -rocm-docs-core==0.18.4 +rocm-docs-core==0.19.0 diff --git a/docs/.sphinx/requirements.txt b/docs/.sphinx/requirements.txt index 0a276bca..ee9448ee 100644 --- a/docs/.sphinx/requirements.txt +++ b/docs/.sphinx/requirements.txt @@ -92,7 +92,7 @@ requests==2.28.2 # via # pygithub # sphinx -rocm-docs-core==0.18.4 +rocm-docs-core==0.19.0 # via -r requirements.in smmap==5.0.0 # via gitdb From 958bd24866efd539f0d720a3fcb08db9605f51f0 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 11 Jul 2023 20:36:26 +0200 Subject: [PATCH 09/20] [HIPIFY][package][cmake][Linux][fix] `FILE_REORG_BACKWARD_COMPATIBILITY` is Linux-only option --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4c46b2f5..b28d1d79 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -180,14 +180,14 @@ if (NOT HIPIFY_CLANG_TESTS_ONLY) PATTERN "openmp_wrappers" EXCLUDE) endif() - option(FILE_REORG_BACKWARD_COMPATIBILITY "Enable File Reorg with backward compatibility" ON) - if(UNIX) # Get rid of any RPATH definations already. set_target_properties(hipify-clang PROPERTIES INSTALL_RPATH "") # Set RPATH for the binary. set_target_properties(hipify-clang PROPERTIES LINK_FLAGS "-Wl,--enable-new-dtags -Wl,--rpath,$ORIGIN/../lib" ) + option(FILE_REORG_BACKWARD_COMPATIBILITY "Enable File Reorg with backward compatibility" ON) + if(FILE_REORG_BACKWARD_COMPATIBILITY) include(hipify-backward-compat.cmake) endif() From dcef62a3b71d10bfd1840cfa8b1a8bab77ee551f Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 12 Jul 2023 16:51:50 +0200 Subject: [PATCH 10/20] [HIPIFY][cmake] Do not check `HIPIFY_INCLUDE_IN_HIP_SDK` for `Visual Studio` in `UNIX` OSs --- CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b28d1d79..1588ac89 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,8 +9,7 @@ option(HIPIFY_INCLUDE_IN_HIP_SDK "Include HIPIFY in HIP SDK" OFF) if(HIPIFY_INCLUDE_IN_HIP_SDK) if(NOT WIN32) message(FATAL_ERROR "HIPIFY_INCLUDE_IN_HIP_SDK is only supported on Windows") - endif() - if(CMAKE_GENERATOR MATCHES "Visual Studio") + elseif(CMAKE_GENERATOR MATCHES "Visual Studio") message(FATAL_ERROR "HIPIFY_INCLUDE_IN_HIP_SDK is not targeting Visual Studio") endif() else() From d18ffb62014197010036f034d29b4a80165d0399 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 13 Jul 2023 16:25:58 +0200 Subject: [PATCH 11/20] [HIPIFY] Sync with CUDA 12.2.0 - Part 5 - Runtime API - data types + Synced with Driver API + Updated the regenerated hipify-perl and docs accordingly + Fixed typos --- bin/hipify-perl | 10 +++++ ..._Runtime_API_functions_supported_by_HIP.md | 10 +++++ src/CUDA2HIP_Driver_API_types.cpp | 33 +++++++++------- src/CUDA2HIP_Runtime_API_types.cpp | 39 +++++++++++++++++-- 4 files changed, 73 insertions(+), 19 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 539a0384..190f2542 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -6764,14 +6764,20 @@ sub warnUnsupportedFunctions { "cudaOccupancyAvailableDynamicSMemPerBlock", "cudaNvSciSyncAttrWait", "cudaNvSciSyncAttrSignal", + "cudaMemsetParamsV2", "cudaMemoryTypeUnregistered", "cudaMemcpyToArrayAsync", + "cudaMemcpyNodeParams", "cudaMemcpyFromArrayAsync", "cudaMemcpyArrayToArray", "cudaMemcpy3DPeerParms", "cudaMemcpy3DPeerAsync", "cudaMemcpy3DPeer", "cudaMemcpy2DArrayToArray", + "cudaMemRangeAttributePreferredLocationType", + "cudaMemRangeAttributePreferredLocationId", + "cudaMemRangeAttributeLastPrefetchLocationType", + "cudaMemRangeAttributeLastPrefetchLocationId", "cudaMemPrefetchAsync_v2", "cudaMemAdvise_v2", "cudaLimitPersistingL2CacheSize", @@ -6810,6 +6816,7 @@ sub warnUnsupportedFunctions { "cudaKernelNodeAttributeClusterDimension", "cudaInitDeviceFlagsAreValid", "cudaInitDevice", + "cudaHostNodeParamsV2", "cudaGraphicsVDPAURegisterVideoSurface", "cudaGraphicsVDPAURegisterOutputSurface", "cudaGraphicsResourceSetMapFlags", @@ -7090,6 +7097,8 @@ sub warnUnsupportedFunctions { "cudaDevAttrReserved124", "cudaDevAttrReserved123", "cudaDevAttrReserved122", + "cudaDevAttrNumaId", + "cudaDevAttrNumaConfig", "cudaDevAttrMemoryPoolSupportedHandleTypes", "cudaDevAttrMemSyncDomainCount", "cudaDevAttrMaxTimelineSemaphoreInteropSupported", @@ -7105,6 +7114,7 @@ sub warnUnsupportedFunctions { "cudaDevAttrIpcEventSupport", "cudaDevAttrHostRegisterSupported", "cudaDevAttrHostRegisterReadOnlySupported", + "cudaDevAttrHostNumaId", "cudaDevAttrGPUDirectRDMAWritesOrdering", "cudaDevAttrGPUDirectRDMASupported", "cudaDevAttrGPUDirectRDMAFlushWritesOptions", diff --git a/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md b/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md index 7920451c..3c8afd66 100644 --- a/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md +++ b/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md @@ -662,6 +662,7 @@ |`cudaDevAttrGlobalMemoryBusWidth`| | | |`hipDeviceAttributeMemoryBusWidth`|1.6.0| | | | |`cudaDevAttrGpuOverlap`| | | |`hipDeviceAttributeAsyncEngineCount`|4.3.0| | | | |`cudaDevAttrHostNativeAtomicSupported`|8.0| | |`hipDeviceAttributeHostNativeAtomicSupported`|4.3.0| | | | +|`cudaDevAttrHostNumaId`|12.2| | | | | | | | |`cudaDevAttrHostRegisterReadOnlySupported`|11.1| | | | | | | | |`cudaDevAttrHostRegisterSupported`|9.2| | | | | | | | |`cudaDevAttrIntegrated`| | | |`hipDeviceAttributeIntegrated`|1.9.0| | | | @@ -736,6 +737,8 @@ |`cudaDevAttrMemoryPoolsSupported`|11.2| | |`hipDeviceAttributeMemoryPoolsSupported`|5.2.0| | | | |`cudaDevAttrMultiGpuBoardGroupID`| | | |`hipDeviceAttributeMultiGpuBoardGroupID`|5.0.0| | | | |`cudaDevAttrMultiProcessorCount`| | | |`hipDeviceAttributeMultiprocessorCount`|1.6.0| | | | +|`cudaDevAttrNumaConfig`|12.2| | | | | | | | +|`cudaDevAttrNumaId`|12.2| | | | | | | | |`cudaDevAttrPageableMemoryAccess`|8.0| | |`hipDeviceAttributePageableMemoryAccess`|3.10.0| | | | |`cudaDevAttrPageableMemoryAccessUsesHostPageTables`|9.2| | |`hipDeviceAttributePageableMemoryAccessUsesHostPageTables`|3.10.0| | | | |`cudaDevAttrPciBusId`| | | |`hipDeviceAttributePciBusId`|1.6.0| | | | @@ -1172,6 +1175,7 @@ |`cudaHostAllocWriteCombined`| | | |`hipHostMallocWriteCombined`|1.6.0| | | | |`cudaHostFn_t`|10.0| | |`hipHostFn_t`|4.3.0| | | | |`cudaHostNodeParams`|10.0| | |`hipHostNodeParams`|4.3.0| | | | +|`cudaHostNodeParamsV2`|12.2| | | | | | | | |`cudaHostRegisterDefault`| | | |`hipHostRegisterDefault`|1.6.0| | | | |`cudaHostRegisterIoMemory`|7.5| | |`hipHostRegisterIoMemory`|1.6.0| | | | |`cudaHostRegisterMapped`| | | |`hipHostRegisterMapped`|1.6.0| | | | @@ -1270,7 +1274,11 @@ |`cudaMemRangeAttribute`|8.0| | |`hipMemRangeAttribute`|3.7.0| | | | |`cudaMemRangeAttributeAccessedBy`|8.0| | |`hipMemRangeAttributeAccessedBy`|3.7.0| | | | |`cudaMemRangeAttributeLastPrefetchLocation`|8.0| | |`hipMemRangeAttributeLastPrefetchLocation`|3.7.0| | | | +|`cudaMemRangeAttributeLastPrefetchLocationId`|12.2| | | | | | | | +|`cudaMemRangeAttributeLastPrefetchLocationType`|12.2| | | | | | | | |`cudaMemRangeAttributePreferredLocation`|8.0| | |`hipMemRangeAttributePreferredLocation`|3.7.0| | | | +|`cudaMemRangeAttributePreferredLocationId`|12.2| | | | | | | | +|`cudaMemRangeAttributePreferredLocationType`|12.2| | | | | | | | |`cudaMemRangeAttributeReadMostly`|8.0| | |`hipMemRangeAttributeReadMostly`|3.7.0| | | | |`cudaMemcpy3DParms`| | | |`hipMemcpy3DParms`|1.7.0| | | | |`cudaMemcpy3DPeerParms`| | | | | | | | | @@ -1280,6 +1288,7 @@ |`cudaMemcpyHostToDevice`| | | |`hipMemcpyHostToDevice`|1.5.0| | | | |`cudaMemcpyHostToHost`| | | |`hipMemcpyHostToHost`|1.5.0| | | | |`cudaMemcpyKind`| | | |`hipMemcpyKind`|1.5.0| | | | +|`cudaMemcpyNodeParams`|12.2| | | | | | | | |`cudaMemoryAdvise`|8.0| | |`hipMemoryAdvise`|3.7.0| | | | |`cudaMemoryType`| | | |`hipMemoryType`|1.6.0| | | | |`cudaMemoryTypeDevice`| | | |`hipMemoryTypeDevice`|1.6.0| | | | @@ -1287,6 +1296,7 @@ |`cudaMemoryTypeManaged`|10.0| | |`hipMemoryTypeManaged`|5.3.0| | | | |`cudaMemoryTypeUnregistered`| | | | | | | | | |`cudaMemsetParams`|10.0| | |`hipMemsetParams`|4.3.0| | | | +|`cudaMemsetParamsV2`|12.2| | | | | | | | |`cudaMipmappedArray`| | | |`hipMipmappedArray`|1.7.0| | | | |`cudaMipmappedArray_const_t`| | | |`hipMipmappedArray_const_t`|1.6.0| | | | |`cudaMipmappedArray_t`| | | |`hipMipmappedArray_t`|1.7.0| | | | diff --git a/src/CUDA2HIP_Driver_API_types.cpp b/src/CUDA2HIP_Driver_API_types.cpp index eef3b3d3..9de32a34 100644 --- a/src/CUDA2HIP_Driver_API_types.cpp +++ b/src/CUDA2HIP_Driver_API_types.cpp @@ -74,6 +74,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUDA_HOST_NODE_PARAMS", {"hipHostNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, {"CUDA_HOST_NODE_PARAMS_v1", {"hipHostNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, {"CUDA_HOST_NODE_PARAMS_v2_st", {"hipHostNodeParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // cudaHostNodeParamsV2 {"CUDA_HOST_NODE_PARAMS_v2", {"hipHostNodeParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // cudaKernelNodeParams @@ -109,13 +110,15 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUDA_MEMCPY3D_PEER_v1", {"hip_Memcpy3D_Peer", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_MEMCPY_NODE_PARAMS_st", {"hiMemcpyNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // cudaMemcpyNodeParams {"CUDA_MEMCPY_NODE_PARAMS", {"hiMemcpyNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, - // cudaMemsetParams {"CUDA_MEMSET_NODE_PARAMS_st", {"hipMemsetParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, + // cudaMemsetParams {"CUDA_MEMSET_NODE_PARAMS", {"hipMemsetParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, {"CUDA_MEMSET_NODE_PARAMS_v1", {"hipMemsetParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, {"CUDA_MEMSET_NODE_PARAMS_v2_st", {"hipMemsetParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // cudaMemsetParamsV2 {"CUDA_MEMSET_NODE_PARAMS_v2", {"hipMemsetParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st", {"HIP_POINTER_ATTRIBUTE_P2P_TOKENS", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -855,27 +858,27 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH", {"hipDeviceAttributeClusterLaunch", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 120 // cudaDevAttrDeferredMappingCudaArraySupported {"CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED", {"hipDeviceAttributeDeferredMappingCudaArraySupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 121 - // + // cudaDevAttrReserved122 {"CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V2", {"hipDeviceAttributeCanUse64BitStreamMemOpsV2", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED | CUDA_REMOVED}}, // 122 - // + // cudaDevAttrReserved123 {"CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V2", {"hipDeviceAttributeCanUseStreamWaitValueNorV2", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED | CUDA_REMOVED}}, // 123 - // + // cudaDevAttrReserved124 {"CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED", {"hipDeviceAttributeDmaBufSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 124 // cudaDevAttrIpcEventSupport {"CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED", {"hipDeviceAttributeIpcEventSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 125 // cudaDevAttrMemSyncDomainCount {"CU_DEVICE_ATTRIBUTE_MEM_SYNC_DOMAIN_COUNT", {"hipDeviceAttributeMemSyncDomainCount", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 126 - // + // cudaDevAttrReserved127 {"CU_DEVICE_ATTRIBUTE_TENSOR_MAP_ACCESS_SUPPORTED", {"hipDeviceAttributeTensorMapAccessSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 127 - // + // cudaDevAttrReserved129 {"CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS", {"hipDeviceAttributeUnifiedFunctionPointers", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 129 - // + // cudaDevAttrNumaConfig {"CU_DEVICE_ATTRIBUTE_NUMA_CONFIG", {"hipDeviceAttributeNumaConfig", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 130 - // + // cudaDevAttrNumaId {"CU_DEVICE_ATTRIBUTE_NUMA_ID", {"hipDeviceAttributeNumaId", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 131 // {"CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED", {"hipDeviceAttributeMulticastSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 132 - // + // cudaDevAttrHostNumaId {"CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID", {"hipDeviceAttributeHostNumaId", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 134 // cudaDevAttrMax {"CU_DEVICE_ATTRIBUTE_MAX", {"hipDeviceAttributeMax", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -1453,14 +1456,14 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY", {"hipMemRangeAttributeAccessedBy", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES}}, // 3 // cudaMemRangeAttributeLastPrefetchLocation {"CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION", {"hipMemRangeAttributeLastPrefetchLocation", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES}}, // 4 - // + // cudaMemRangeAttributePreferredLocationType {"CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_TYPE", {"hipMemRangeAttributePreferredLocationType", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 5 - // + // cudaMemRangeAttributePreferredLocationId {"CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_ID", {"hipMemRangeAttributePreferredLocationId", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 6 - // - {"CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE", {"hipMemRangeAttributeLastPreferredLocationType", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 7 - // - {"CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID", {"hipMemRangeAttributeLastPreferredLocationId", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 8 + // cudaMemRangeAttributeLastPrefetchLocationType + {"CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE", {"hipMemRangeAttributeLastPrefetchLocationType", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 7 + // cudaMemRangeAttributeLastPrefetchLocationId + {"CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID", {"hipMemRangeAttributeLastPrefetchLocationId", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 8 // no analogue {"CUoccupancy_flags", {"hipOccupancyFlags", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, diff --git a/src/CUDA2HIP_Runtime_API_types.cpp b/src/CUDA2HIP_Runtime_API_types.cpp index d66849e6..15730575 100644 --- a/src/CUDA2HIP_Runtime_API_types.cpp +++ b/src/CUDA2HIP_Runtime_API_types.cpp @@ -70,6 +70,8 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // CUDA_HOST_NODE_PARAMS {"cudaHostNodeParams", {"hipHostNodeParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}}, + // CUDA_HOST_NODE_PARAMS_v2 + {"cudaHostNodeParamsV2", {"hipHostNodeParams_v2", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // CUipcEventHandle {"cudaIpcEventHandle_t", {"hipIpcEventHandle_t", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}}, @@ -97,6 +99,8 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // CUDA_MEMSET_NODE_PARAMS {"cudaMemsetParams", {"hipMemsetParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}}, + // CUDA_MEMSET_NODE_PARAMS_v2 + {"cudaMemsetParamsV2", {"hipMemsetParams_v2", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // no analogue {"cudaPitchedPtr", {"hipPitchedPtr", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}}, @@ -230,6 +234,9 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // CUkernel {"cudaKernel_t", {"hipKernel", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // CUDA_MEMCPY_NODE_PARAMS + {"cudaMemcpyNodeParams", {"hiMemcpyNodeParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // 2. Unions // CUstreamAttrValue @@ -586,11 +593,11 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaDevAttrClusterLaunch", {"hipDeviceAttributeClusterLaunch", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 120 // CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED {"cudaDevAttrDeferredMappingCudaArraySupported", {"hipDeviceAttributeDeferredMappingCudaArraySupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 121 - // + // CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V2 {"cudaDevAttrReserved122", {"hipDevAttrReserved122", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 122 - // + // CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V2 {"cudaDevAttrReserved123", {"hipDevAttrReserved123", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 123 - // + // CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED {"cudaDevAttrReserved124", {"hipDevAttrReserved124", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 124 // CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED {"cudaDevAttrIpcEventSupport", {"hipDevAttrIpcEventSupport", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 125 @@ -598,12 +605,18 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaDevAttrMemSyncDomainCount", {"hipDevAttrMemSyncDomainCount", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 126 // CU_DEVICE_ATTRIBUTE_TENSOR_MAP_ACCESS_SUPPORTED {"cudaDevAttrReserved127", {"hipDeviceAttributeTensorMapAccessSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 127 - // + // CUDA only {"cudaDevAttrReserved128", {"hipDevAttrReserved128", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 128 // CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS {"cudaDevAttrReserved129", {"hipDeviceAttributeUnifiedFunctionPointers", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 129 + // CU_DEVICE_ATTRIBUTE_NUMA_CONFIG + {"cudaDevAttrNumaConfig", {"hipDeviceAttributeNumaConfig", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 130 + // CU_DEVICE_ATTRIBUTE_NUMA_ID + {"cudaDevAttrNumaId", {"hipDeviceAttributeNumaId", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 131 // CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED {"cudaDevAttrReserved132", {"hipDeviceAttributeMulticastSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 132 + // CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID + {"cudaDevAttrHostNumaId", {"hipDeviceAttributeHostNumaId", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 134 // CU_DEVICE_ATTRIBUTE_MAX {"cudaDevAttrMax", {"hipDeviceAttributeMax", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -1292,6 +1305,14 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaMemRangeAttributeAccessedBy", {"hipMemRangeAttributeAccessedBy", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES}}, // 3 // CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION {"cudaMemRangeAttributeLastPrefetchLocation", {"hipMemRangeAttributeLastPrefetchLocation", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES}}, // 4 + // CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_TYPE + {"cudaMemRangeAttributePreferredLocationType", {"hipMemRangeAttributePreferredLocationType", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 5 + // CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_ID + {"cudaMemRangeAttributePreferredLocationId", {"hipMemRangeAttributePreferredLocationId", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 6 + // CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE + {"cudaMemRangeAttributeLastPrefetchLocationType", {"hipMemRangeAttributeLastPrefetchLocationType", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 7 + // CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID + {"cudaMemRangeAttributeLastPrefetchLocationId", {"hipMemRangeAttributeLastPrefetchLocationId", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 8 // no analogue {"cudaOutputMode", {"hipOutputMode", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED | CUDA_REMOVED}}, @@ -2508,6 +2529,16 @@ const std::map CUDA_RUNTIME_TYPE_NAME_VER_MAP {"cudaDevAttrReserved132", {CUDA_121, CUDA_0, CUDA_0 }}, {"CUkern_st", {CUDA_121, CUDA_0, CUDA_0 }}, {"cudaKernel_t", {CUDA_121, CUDA_0, CUDA_0 }}, + {"cudaMemcpyNodeParams", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaMemsetParamsV2", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaHostNodeParamsV2", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaMemRangeAttributePreferredLocationType", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaMemRangeAttributePreferredLocationId", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaMemRangeAttributeLastPrefetchLocationType", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaMemRangeAttributeLastPrefetchLocationId", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaDevAttrNumaConfig", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaDevAttrNumaId", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaDevAttrHostNumaId", {CUDA_122, CUDA_0, CUDA_0 }}, }; const std::map HIP_RUNTIME_TYPE_NAME_VER_MAP { From c51ecc59240aa04dd096081d27dcfb9aa6834dec Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 14 Jul 2023 19:09:46 +0200 Subject: [PATCH 12/20] [HIPIFY] Sync with CUDA 12.2.0 - Part 6 - Runtime API - data types + Synced with Driver API + Updated the regenerated hipify-perl and docs accordingly --- bin/hipify-perl | 15 ++++++ ..._Runtime_API_functions_supported_by_HIP.md | 15 ++++++ src/CUDA2HIP_Driver_API_types.cpp | 30 ++++++----- src/CUDA2HIP_Runtime_API_types.cpp | 52 +++++++++++++++++++ 4 files changed, 99 insertions(+), 13 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 190f2542..b134a767 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -6779,6 +6779,11 @@ sub warnUnsupportedFunctions { "cudaMemRangeAttributeLastPrefetchLocationType", "cudaMemRangeAttributeLastPrefetchLocationId", "cudaMemPrefetchAsync_v2", + "cudaMemLocationTypeHostNumaCurrent", + "cudaMemLocationTypeHostNuma", + "cudaMemLocationTypeHost", + "cudaMemFreeNodeParams", + "cudaMemAllocNodeParamsV2", "cudaMemAdvise_v2", "cudaLimitPersistingL2CacheSize", "cudaLimitMaxL2FetchGranularity", @@ -6809,6 +6814,7 @@ sub warnUnsupportedFunctions { "cudaLaunchAttribute", "cudaKeyValuePair", "cudaKernel_t", + "cudaKernelNodeParamsV2", "cudaKernelNodeAttributePriority", "cudaKernelNodeAttributeMemSyncDomainMap", "cudaKernelNodeAttributeMemSyncDomain", @@ -6838,6 +6844,7 @@ sub warnUnsupportedFunctions { "cudaGraphicsCubeFaceNegativeX", "cudaGraphicsCubeFace", "cudaGraphNodeSetParams", + "cudaGraphNodeParams", "cudaGraphInstantiateWithParams", "cudaGraphInstantiateSuccess", "cudaGraphInstantiateResult", @@ -6903,8 +6910,10 @@ sub warnUnsupportedFunctions { "cudaFlushGPUDirectRDMAWritesOptionMemOps", "cudaFlushGPUDirectRDMAWritesOptionHost", "cudaExternalSemaphoreWaitSkipNvSciBufMemSync", + "cudaExternalSemaphoreWaitNodeParamsV2", "cudaExternalSemaphoreWaitNodeParams", "cudaExternalSemaphoreSignalSkipNvSciBufMemSync", + "cudaExternalSemaphoreSignalNodeParamsV2", "cudaExternalSemaphoreSignalNodeParams", "cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32", "cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd", @@ -6915,9 +6924,11 @@ sub warnUnsupportedFunctions { "cudaExternalMemoryMipmappedArrayDesc", "cudaExternalMemoryHandleTypeNvSciBuf", "cudaExternalMemoryGetMappedMipmappedArray", + "cudaEventWaitNodeParams", "cudaEventWaitExternal", "cudaEventWaitDefault", "cudaEventRecordWithFlags", + "cudaEventRecordNodeParams", "cudaEventRecordExternal", "cudaEventRecordDefault", "cudaEventCreateFromEGLSync", @@ -7081,6 +7092,9 @@ sub warnUnsupportedFunctions { "cudaDriverEntryPointQueryResult", "cudaDeviceSyncMemops", "cudaDevicePropDontCare", + "cudaDeviceNumaConfigNumaNode", + "cudaDeviceNumaConfigNone", + "cudaDeviceNumaConfig", "cudaDeviceMask", "cudaDeviceGetTexture1DLinearMaxWidth", "cudaDeviceGetNvSciSyncAttributes", @@ -7185,6 +7199,7 @@ sub warnUnsupportedFunctions { "cudaClusterSchedulingPolicyLoadBalancing", "cudaClusterSchedulingPolicyDefault", "cudaClusterSchedulingPolicy", + "cudaChildGraphNodeParams", "cudaChannelFormatKindUnsignedNormalized8X4", "cudaChannelFormatKindUnsignedNormalized8X2", "cudaChannelFormatKindUnsignedNormalized8X1", diff --git a/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md b/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md index 3c8afd66..21dbbfbd 100644 --- a/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md +++ b/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md @@ -600,6 +600,7 @@ |`cudaChannelFormatKindUnsignedNormalized8X1`|11.5| | | | | | | | |`cudaChannelFormatKindUnsignedNormalized8X2`|11.5| | | | | | | | |`cudaChannelFormatKindUnsignedNormalized8X4`|11.5| | | | | | | | +|`cudaChildGraphNodeParams`|12.2| | | | | | | | |`cudaClusterSchedulingPolicy`|11.8| | | | | | | | |`cudaClusterSchedulingPolicyDefault`|11.8| | | | | | | | |`cudaClusterSchedulingPolicyLoadBalancing`|11.8| | | | | | | | @@ -775,6 +776,9 @@ |`cudaDeviceLmemResizeToMax`| | | |`hipDeviceLmemResizeToMax`|1.6.0| | | | |`cudaDeviceMapHost`| | | |`hipDeviceMapHost`|1.6.0| | | | |`cudaDeviceMask`| | | | | | | | | +|`cudaDeviceNumaConfig`|12.2| | | | | | | | +|`cudaDeviceNumaConfigNone`|12.2| | | | | | | | +|`cudaDeviceNumaConfigNumaNode`|12.2| | | | | | | | |`cudaDeviceP2PAttr`|8.0| | |`hipDeviceP2PAttr`|3.8.0| | | | |`cudaDeviceProp`| | | |`hipDeviceProp_t`|1.6.0| | | | |`cudaDevicePropDontCare`| | |12.0| | | | | | @@ -1010,8 +1014,10 @@ |`cudaEventInterprocess`| | | |`hipEventInterprocess`|1.6.0| | | | |`cudaEventRecordDefault`|11.1| | | | | | | | |`cudaEventRecordExternal`|11.1| | | | | | | | +|`cudaEventRecordNodeParams`|12.2| | | | | | | | |`cudaEventWaitDefault`|11.1| | | | | | | | |`cudaEventWaitExternal`| | | | | | | | | +|`cudaEventWaitNodeParams`|12.2| | | | | | | | |`cudaEvent_t`| | | |`hipEvent_t`|1.6.0| | | | |`cudaExtent`| | | |`hipExtent`|1.7.0| | | | |`cudaExternalMemoryBufferDesc`|10.0| | |`hipExternalMemoryBufferDesc`|4.3.0| | | | @@ -1041,10 +1047,12 @@ |`cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd`|11.2| | | | | | | | |`cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32`|11.2| | | | | | | | |`cudaExternalSemaphoreSignalNodeParams`|11.2| | | | | | | | +|`cudaExternalSemaphoreSignalNodeParamsV2`|12.2| | | | | | | | |`cudaExternalSemaphoreSignalParams`|10.0| | |`hipExternalSemaphoreSignalParams`|4.4.0| | | | |`cudaExternalSemaphoreSignalParams_v1`|11.2| | |`hipExternalSemaphoreSignalParams`|4.4.0| | | | |`cudaExternalSemaphoreSignalSkipNvSciBufMemSync`|10.2| | | | | | | | |`cudaExternalSemaphoreWaitNodeParams`|11.2| | | | | | | | +|`cudaExternalSemaphoreWaitNodeParamsV2`|12.2| | | | | | | | |`cudaExternalSemaphoreWaitParams`|10.0| | |`hipExternalSemaphoreWaitParams`|4.4.0| | | | |`cudaExternalSemaphoreWaitParams_v1`|11.2| | |`hipExternalSemaphoreWaitParams`|4.4.0| | | | |`cudaExternalSemaphoreWaitSkipNvSciBufMemSync`|10.2| | | | | | | | @@ -1133,6 +1141,7 @@ |`cudaGraphMemAttrUsedMemCurrent`|11.4| | |`hipGraphMemAttrUsedMemCurrent`|5.3.0| | | | |`cudaGraphMemAttrUsedMemHigh`|11.4| | |`hipGraphMemAttrUsedMemHigh`|5.3.0| | | | |`cudaGraphMemAttributeType`|11.4| | |`hipGraphMemAttributeType`|5.3.0| | | | +|`cudaGraphNodeParams`|12.2| | | | | | | | |`cudaGraphNodeType`|10.0| | |`hipGraphNodeType`|4.3.0| | | | |`cudaGraphNodeTypeCount`|10.0| | |`hipGraphNodeTypeCount`|4.3.0| | | | |`cudaGraphNodeTypeEmpty`|10.0| | |`hipGraphNodeTypeEmpty`|4.3.0| | | | @@ -1198,6 +1207,7 @@ |`cudaKernelNodeAttributeMemSyncDomainMap`|12.0| | | | | | | | |`cudaKernelNodeAttributePriority`|11.7| | | | | | | | |`cudaKernelNodeParams`|10.0| | |`hipKernelNodeParams`|4.3.0| | | | +|`cudaKernelNodeParamsV2`|12.2| | | | | | | | |`cudaKernel_t`|12.1| | | | | | | | |`cudaKeyValuePair`| | |12.0| | | | | | |`cudaLaunchAttribute`|11.8| | | | | | | | @@ -1243,6 +1253,7 @@ |`cudaMemAdviseUnsetPreferredLocation`|8.0| | |`hipMemAdviseUnsetPreferredLocation`|3.7.0| | | | |`cudaMemAdviseUnsetReadMostly`|8.0| | |`hipMemAdviseUnsetReadMostly`|3.7.0| | | | |`cudaMemAllocNodeParams`|11.4| | |`hipMemAllocNodeParams`|5.5.0| | | | +|`cudaMemAllocNodeParamsV2`|12.2| | | | | | | | |`cudaMemAllocationHandleType`|11.2| | |`hipMemAllocationHandleType`|5.2.0| | | | |`cudaMemAllocationType`|11.2| | |`hipMemAllocationType`|5.2.0| | | | |`cudaMemAllocationTypeInvalid`|11.2| | |`hipMemAllocationTypeInvalid`|5.2.0| | | | @@ -1251,6 +1262,7 @@ |`cudaMemAttachGlobal`| | | |`hipMemAttachGlobal`|2.5.0| | | | |`cudaMemAttachHost`| | | |`hipMemAttachHost`|2.5.0| | | | |`cudaMemAttachSingle`| | | |`hipMemAttachSingle`|3.7.0| | | | +|`cudaMemFreeNodeParams`|12.2| | | | | | | | |`cudaMemHandleTypeNone`|11.2| | |`hipMemHandleTypeNone`|5.2.0| | | | |`cudaMemHandleTypePosixFileDescriptor`|11.2| | |`hipMemHandleTypePosixFileDescriptor`|5.2.0| | | | |`cudaMemHandleTypeWin32`|11.2| | |`hipMemHandleTypeWin32`|5.2.0| | | | @@ -1258,6 +1270,9 @@ |`cudaMemLocation`|11.2| | |`hipMemLocation`|5.2.0| | | | |`cudaMemLocationType`|11.2| | |`hipMemLocationType`|5.2.0| | | | |`cudaMemLocationTypeDevice`|11.2| | |`hipMemLocationTypeDevice`|5.2.0| | | | +|`cudaMemLocationTypeHost`|12.2| | | | | | | | +|`cudaMemLocationTypeHostNuma`|12.2| | | | | | | | +|`cudaMemLocationTypeHostNumaCurrent`|12.2| | | | | | | | |`cudaMemLocationTypeInvalid`|11.2| | |`hipMemLocationTypeInvalid`|5.2.0| | | | |`cudaMemPoolAttr`|11.2| | |`hipMemPoolAttr`|5.2.0| | | | |`cudaMemPoolAttrReleaseThreshold`|11.2| | |`hipMemPoolAttrReleaseThreshold`|5.2.0| | | | diff --git a/src/CUDA2HIP_Driver_API_types.cpp b/src/CUDA2HIP_Driver_API_types.cpp index 9de32a34..15e8748f 100644 --- a/src/CUDA2HIP_Driver_API_types.cpp +++ b/src/CUDA2HIP_Driver_API_types.cpp @@ -81,6 +81,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUDA_KERNEL_NODE_PARAMS_st", {"hipKernelNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, {"CUDA_KERNEL_NODE_PARAMS", {"hipKernelNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, {"CUDA_KERNEL_NODE_PARAMS_v1", {"hipKernelNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, + // cudaKernelNodeParamsV2 {"CUDA_KERNEL_NODE_PARAMS_v2_st", {"hipKernelNodeParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_KERNEL_NODE_PARAMS_v2", {"hipKernelNodeParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_KERNEL_NODE_PARAMS_v3_st", {"hipKernelNodeParams_v3", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -280,6 +281,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st", {"hipExternalSemaphoreSignalNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_EXT_SEM_SIGNAL_NODE_PARAMS", {"hipExternalSemaphoreSignalNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v1", {"hipExternalSemaphoreSignalNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // cudaExternalSemaphoreSignalNodeParamsV2 {"CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2_st", {"hipExternalSemaphoreSignalNodeParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2", {"hipExternalSemaphoreSignalNodeParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -287,6 +289,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUDA_EXT_SEM_WAIT_NODE_PARAMS_st", {"hipExternalSemaphoreWaitNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_EXT_SEM_WAIT_NODE_PARAMS", {"hipExternalSemaphoreWaitNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_EXT_SEM_WAIT_NODE_PARAMS_v1", {"hipExternalSemaphoreWaitNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // cudaExternalSemaphoreWaitNodeParamsV2 {"CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2_st", {"hipExternalSemaphoreWaitNodeParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2", {"hipExternalSemaphoreWaitNodeParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -318,28 +321,29 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { // cudaMemAllocNodeParams {"CUDA_MEM_ALLOC_NODE_PARAMS_st", {"hipMemAllocNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, CUDA_REMOVED}}, {"CUDA_MEM_ALLOC_NODE_PARAMS_v1_st", {"hipMemAllocNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, + // cudaMemAllocNodeParamsV2 {"CUDA_MEM_ALLOC_NODE_PARAMS_v2_st", {"hipMemAllocNodeParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_MEM_ALLOC_NODE_PARAMS", {"hipMemAllocNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, {"CUDA_MEM_ALLOC_NODE_PARAMS_v1", {"hipMemAllocNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, {"CUDA_MEM_ALLOC_NODE_PARAMS_v2", {"hipMemAllocNodeParams_v2", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, - // + // cudaMemFreeNodeParams {"CUDA_MEM_FREE_NODE_PARAMS_st", {"hipMemFreeNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_MEM_FREE_NODE_PARAMS", {"hipMemFreeNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, - // + // cudaChildGraphNodeParams {"CUDA_CHILD_GRAPH_NODE_PARAMS_st", {"hipChildGraphNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_CHILD_GRAPH_NODE_PARAMS", {"hipChildGraphNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, - // + // cudaEventRecordNodeParams {"CUDA_EVENT_RECORD_NODE_PARAMS_st", {"hipEventRecordNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_EVENT_RECORD_NODE_PARAMS", {"hipEventRecordNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, - // + // cudaEventWaitNodeParams {"CUDA_EVENT_WAIT_NODE_PARAMS_st", {"hipEventWaitNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUDA_EVENT_WAIT_NODE_PARAMS", {"hipEventWaitNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, - // + // cudaGraphNodeParams {"CUgraphNodeParams_st", {"hipGraphNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUgraphNodeParams", {"hipGraphNodeParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -1997,11 +2001,11 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_MEM_LOCATION_TYPE_INVALID", {"hipMemLocationTypeInvalid", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES}}, // 0x0 // cudaMemLocationTypeDevice {"CU_MEM_LOCATION_TYPE_DEVICE", {"hipMemLocationTypeDevice", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES}}, // 0x1 - // + // cudaMemLocationTypeHost {"CU_MEM_LOCATION_TYPE_HOST", {"hipMemLocationTypeHost", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0x2 - // + // cudaMemLocationTypeHostNuma {"CU_MEM_LOCATION_TYPE_HOST_NUMA", {"hipMemLocationTypeHostNuma", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0x3 - // + // cudaMemLocationTypeHostNumaCurrent {"CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT", {"hipMemLocationTypeHostNumaCurrent", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0x4 // no analogue {"CU_MEM_LOCATION_TYPE_MAX", {"hipMemLocationTypeMax", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0x7FFFFFFF @@ -2533,14 +2537,14 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { // {"CU_COREDUMP_MAX", {"HIP_COREDUMP_MAX", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, - // + // cudaDeviceNumaConfig {"CUdeviceNumaConfig", {"hipDeviceNumaConfig", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUdeviceNumaConfig_enum", {"hipDeviceNumaConfig", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // CUdeviceNumaConfig enum values - // - {"CU_DEVICE_NUMA_CONFIG_NONE", {"HIP_DEVICE_NUMA_CONFIG_NONE", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, - // - {"CU_DEVICE_NUMA_CONFIG_NUMA_NODE", {"HIP_DEVICE_NUMA_CONFIG_NUMA_NODE", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // cudaDeviceNumaConfigNone + {"CU_DEVICE_NUMA_CONFIG_NONE", {"hipDeviceNumaConfigNone", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // cudaDeviceNumaConfigNumaNode + {"CU_DEVICE_NUMA_CONFIG_NUMA_NODE", {"hipDeviceNumaConfigNumaNode", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 4. Typedefs diff --git a/src/CUDA2HIP_Runtime_API_types.cpp b/src/CUDA2HIP_Runtime_API_types.cpp index 15730575..9428a1eb 100644 --- a/src/CUDA2HIP_Runtime_API_types.cpp +++ b/src/CUDA2HIP_Runtime_API_types.cpp @@ -85,6 +85,8 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // CUDA_KERNEL_NODE_PARAMS {"cudaKernelNodeParams", {"hipKernelNodeParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}}, + // CUDA_KERNEL_NODE_PARAMS_v2_st + {"cudaKernelNodeParamsV2", {"hipKernelNodeParams_v2", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // no analogue // CUDA_LAUNCH_PARAMS struct differs @@ -214,12 +216,33 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st {"cudaExternalSemaphoreSignalNodeParams", {"hipExternalSemaphoreSignalNodeParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2_st + {"cudaExternalSemaphoreSignalNodeParamsV2", {"hipExternalSemaphoreSignalNodeParams_v2", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // CUDA_EXT_SEM_WAIT_NODE_PARAMS_st {"cudaExternalSemaphoreWaitNodeParams", {"hipExternalSemaphoreWaitNodeParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2_st + {"cudaExternalSemaphoreWaitNodeParamsV2", {"hipExternalSemaphoreWaitNodeParams_v2", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // CUDA_MEM_ALLOC_NODE_PARAMS_st {"cudaMemAllocNodeParams", {"hipMemAllocNodeParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}}, + // CUDA_MEM_ALLOC_NODE_PARAMS_v2_st + {"cudaMemAllocNodeParamsV2", {"hipMemAllocNodeParams_v2", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + + // CUDA_MEM_FREE_NODE_PARAMS_st + {"cudaMemFreeNodeParams", {"hipMemFreeNodeParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + + // CUDA_CHILD_GRAPH_NODE_PARAMS_st + {"cudaChildGraphNodeParams", {"hipChildGraphNodeParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + + // CUDA_EVENT_RECORD_NODE_PARAMS_st + {"cudaEventRecordNodeParams", {"hipEventRecordNodeParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + + // CUDA_EVENT_WAIT_NODE_PARAMS_st + {"cudaEventWaitNodeParams", {"hipEventWaitNodeParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + + // CUgraphNodeParams_st + {"cudaGraphNodeParams", {"hipGraphNodeParams", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // CUDA_ARRAY_MEMORY_REQUIREMENTS_st {"cudaArrayMemoryRequirements", {"hipArrayMemoryRequirements", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -1632,6 +1655,12 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaMemLocationTypeInvalid", {"hipMemLocationTypeInvalid", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES}}, // 0 // CU_MEM_LOCATION_TYPE_DEVICE {"cudaMemLocationTypeDevice", {"hipMemLocationTypeDevice", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES}}, // 1 + // CU_MEM_LOCATION_TYPE_HOST + {"cudaMemLocationTypeHost", {"hipMemLocationTypeHost", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 2 + // CU_MEM_LOCATION_TYPE_HOST_NUMA + {"cudaMemLocationTypeHostNuma", {"hipMemLocationTypeHostNuma", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 3 + // CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT + {"cudaMemLocationTypeHostNumaCurrent", {"hipMemLocationTypeHostNumaCurrent", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 4 // CUmemAllocationType {"cudaMemAllocationType", {"hipMemAllocationType", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}}, @@ -1840,6 +1869,14 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // CU_LAUNCH_MEM_SYNC_DOMAIN_REMOTE {"cudaLaunchMemSyncDomainRemote", {"hipLaunchMemSyncDomainRemote", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // CUdeviceNumaConfig + {"cudaDeviceNumaConfig", {"hipDeviceNumaConfig", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // cudaDeviceNumaConfig enum values + // CU_DEVICE_NUMA_CONFIG_NONE + {"cudaDeviceNumaConfigNone", {"hipDeviceNumaConfigNone", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // CU_DEVICE_NUMA_CONFIG_NUMA_NODE + {"cudaDeviceNumaConfigNumaNode", {"hipDeviceNumaConfigNumaNode", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // 4. Typedefs // CUhostFn @@ -2539,6 +2576,21 @@ const std::map CUDA_RUNTIME_TYPE_NAME_VER_MAP {"cudaDevAttrNumaConfig", {CUDA_122, CUDA_0, CUDA_0 }}, {"cudaDevAttrNumaId", {CUDA_122, CUDA_0, CUDA_0 }}, {"cudaDevAttrHostNumaId", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaMemLocationTypeHost", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaMemLocationTypeHostNuma", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaMemLocationTypeHostNumaCurrent", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaMemAllocNodeParamsV2", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaMemFreeNodeParams", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaKernelNodeParamsV2", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaExternalSemaphoreSignalNodeParamsV2", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaExternalSemaphoreWaitNodeParamsV2", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaChildGraphNodeParams", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaEventRecordNodeParams", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaEventWaitNodeParams", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaGraphNodeParams", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaDeviceNumaConfig", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaDeviceNumaConfigNone", {CUDA_122, CUDA_0, CUDA_0 }}, + {"cudaDeviceNumaConfigNumaNode", {CUDA_122, CUDA_0, CUDA_0 }}, }; const std::map HIP_RUNTIME_TYPE_NAME_VER_MAP { From eeddf18bd127e46bb8f46f06a9299ef0c6196833 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sat, 15 Jul 2023 14:31:39 +0200 Subject: [PATCH 13/20] [HIPIFY][#674][rocSPARSE][feature] rocSPARSE support - Step 19 - functions + Updated synthetic tests and the regenerated hipify-perl and SPARSE docs --- bin/hipify-perl | 6 ++++ .../CUSPARSE_API_supported_by_HIP_and_ROC.md | 12 +++---- docs/tables/CUSPARSE_API_supported_by_ROC.md | 12 +++---- src/CUDA2HIP_SPARSE_API_functions.cpp | 18 +++++++---- .../synthetic/libraries/cusparse2hipsparse.cu | 32 +++++++++++++++++++ .../synthetic/libraries/cusparse2rocsparse.cu | 32 +++++++++++++++++++ 6 files changed, 94 insertions(+), 18 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index b134a767..3f044e5d 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1476,6 +1476,7 @@ sub rocSubstitutions { subst("cusparseCreateDnMat", "rocsparse_create_dnmat_descr", "library"); subst("cusparseCreateDnVec", "rocsparse_create_dnvec_descr", "library"); subst("cusparseCreateHybMat", "rocsparse_create_hyb_mat", "library"); + subst("cusparseCreateIdentityPermutation", "rocsparse_create_identity_permutation", "library"); subst("cusparseCreateMatDescr", "rocsparse_create_mat_descr", "library"); subst("cusparseCreateSpVec", "rocsparse_create_spvec_descr", "library"); subst("cusparseCscSetPointers", "rocsparse_csc_set_pointers", "library"); @@ -1543,6 +1544,11 @@ sub rocSubstitutions { subst("cusparseSpVecSetValues", "rocsparse_spvec_set_values", "library"); subst("cusparseXcoosortByColumn", "rocsparse_coosort_by_column", "library"); subst("cusparseXcoosortByRow", "rocsparse_coosort_by_row", "library"); + subst("cusparseXcoosort_bufferSizeExt", "rocsparse_coosort_buffer_size", "library"); + subst("cusparseXcscsort", "rocsparse_cscsort", "library"); + subst("cusparseXcscsort_bufferSizeExt", "rocsparse_cscsort_buffer_size", "library"); + subst("cusparseXcsrsort", "rocsparse_csrsort", "library"); + subst("cusparseXcsrsort_bufferSizeExt", "rocsparse_csrsort_buffer_size", "library"); subst("cusparseXgebsr2gebsrNnz", "rocsparse_gebsr2gebsr_nnz", "library"); subst("cusparseZbsr2csr", "rocsparse_zbsr2csr", "library"); subst("cusparseZcsrcolor", "rocsparse_zcsrcolor", "library"); diff --git a/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md b/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md index bbc87ed2..34a5e78c 100644 --- a/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md @@ -645,7 +645,7 @@ |`cusparseCnnz`| | | |`hipsparseCnnz`|3.2.0| | | | | | | | | |`cusparseCnnz_compress`|8.0| | |`hipsparseCnnz_compress`|3.5.0| | | | | | | | | |`cusparseCreateCsru2csrInfo`| | | |`hipsparseCreateCsru2csrInfo`|4.2.0| | | | | | | | | -|`cusparseCreateIdentityPermutation`| | | |`hipsparseCreateIdentityPermutation`|1.9.2| | | | | | | | | +|`cusparseCreateIdentityPermutation`| | | |`hipsparseCreateIdentityPermutation`|1.9.2| | | |`rocsparse_create_identity_permutation`|1.9.0| | | | |`cusparseCsr2cscEx`|8.0|10.2|11.0| | | | | | | | | | | |`cusparseCsr2cscEx2`|10.1| | |`hipsparseCsr2cscEx2`|5.4.0| | | | | | | | | |`cusparseCsr2cscEx2_bufferSize`|10.1| | |`hipsparseCsr2cscEx2_bufferSize`|5.4.0| | | | | | | | | @@ -747,14 +747,14 @@ |`cusparseXcoo2csr`| | | |`hipsparseXcoo2csr`|1.9.2| | | | | | | | | |`cusparseXcoosortByColumn`| | | |`hipsparseXcoosortByColumn`|1.9.2| | | |`rocsparse_coosort_by_column`|1.9.0| | | | |`cusparseXcoosortByRow`| | | |`hipsparseXcoosortByRow`|1.9.2| | | |`rocsparse_coosort_by_row`|1.9.0| | | | -|`cusparseXcoosort_bufferSizeExt`| | | |`hipsparseXcoosort_bufferSizeExt`|1.9.2| | | | | | | | | -|`cusparseXcscsort`| | | |`hipsparseXcscsort`|2.10.0| | | | | | | | | -|`cusparseXcscsort_bufferSizeExt`| | | |`hipsparseXcscsort_bufferSizeExt`|2.10.0| | | | | | | | | +|`cusparseXcoosort_bufferSizeExt`| | | |`hipsparseXcoosort_bufferSizeExt`|1.9.2| | | |`rocsparse_coosort_buffer_size`|1.9.0| | | | +|`cusparseXcscsort`| | | |`hipsparseXcscsort`|2.10.0| | | |`rocsparse_cscsort`|2.10.0| | | | +|`cusparseXcscsort_bufferSizeExt`| | | |`hipsparseXcscsort_bufferSizeExt`|2.10.0| | | |`rocsparse_cscsort_buffer_size`|2.10.0| | | | |`cusparseXcsr2bsrNnz`| | | |`hipsparseXcsr2bsrNnz`|3.5.0| | | | | | | | | |`cusparseXcsr2coo`| | | |`hipsparseXcsr2coo`|1.9.2| | | | | | | | | |`cusparseXcsr2gebsrNnz`| | | |`hipsparseXcsr2gebsrNnz`|4.1.0| | | | | | | | | -|`cusparseXcsrsort`| | | |`hipsparseXcsrsort`|1.9.2| | | | | | | | | -|`cusparseXcsrsort_bufferSizeExt`| | | |`hipsparseXcsrsort_bufferSizeExt`|1.9.2| | | | | | | | | +|`cusparseXcsrsort`| | | |`hipsparseXcsrsort`|1.9.2| | | |`rocsparse_csrsort`|1.9.0| | | | +|`cusparseXcsrsort_bufferSizeExt`| | | |`hipsparseXcsrsort_bufferSizeExt`|1.9.2| | | |`rocsparse_csrsort_buffer_size`|1.9.0| | | | |`cusparseXgebsr2csr`| | | | | | | | | | | | | | |`cusparseXgebsr2gebsrNnz`| | | |`hipsparseXgebsr2gebsrNnz`|4.1.0| | | |`rocsparse_gebsr2gebsr_nnz`|4.1.0| | | | |`cusparseZbsr2csr`| | | |`hipsparseZbsr2csr`|3.5.0| | | |`rocsparse_zbsr2csr`|3.10.0| | | | diff --git a/docs/tables/CUSPARSE_API_supported_by_ROC.md b/docs/tables/CUSPARSE_API_supported_by_ROC.md index 2dabb8b1..22f00e66 100644 --- a/docs/tables/CUSPARSE_API_supported_by_ROC.md +++ b/docs/tables/CUSPARSE_API_supported_by_ROC.md @@ -645,7 +645,7 @@ |`cusparseCnnz`| | | | | | | | | |`cusparseCnnz_compress`|8.0| | | | | | | | |`cusparseCreateCsru2csrInfo`| | | | | | | | | -|`cusparseCreateIdentityPermutation`| | | | | | | | | +|`cusparseCreateIdentityPermutation`| | | |`rocsparse_create_identity_permutation`|1.9.0| | | | |`cusparseCsr2cscEx`|8.0|10.2|11.0| | | | | | |`cusparseCsr2cscEx2`|10.1| | | | | | | | |`cusparseCsr2cscEx2_bufferSize`|10.1| | | | | | | | @@ -747,14 +747,14 @@ |`cusparseXcoo2csr`| | | | | | | | | |`cusparseXcoosortByColumn`| | | |`rocsparse_coosort_by_column`|1.9.0| | | | |`cusparseXcoosortByRow`| | | |`rocsparse_coosort_by_row`|1.9.0| | | | -|`cusparseXcoosort_bufferSizeExt`| | | | | | | | | -|`cusparseXcscsort`| | | | | | | | | -|`cusparseXcscsort_bufferSizeExt`| | | | | | | | | +|`cusparseXcoosort_bufferSizeExt`| | | |`rocsparse_coosort_buffer_size`|1.9.0| | | | +|`cusparseXcscsort`| | | |`rocsparse_cscsort`|2.10.0| | | | +|`cusparseXcscsort_bufferSizeExt`| | | |`rocsparse_cscsort_buffer_size`|2.10.0| | | | |`cusparseXcsr2bsrNnz`| | | | | | | | | |`cusparseXcsr2coo`| | | | | | | | | |`cusparseXcsr2gebsrNnz`| | | | | | | | | -|`cusparseXcsrsort`| | | | | | | | | -|`cusparseXcsrsort_bufferSizeExt`| | | | | | | | | +|`cusparseXcsrsort`| | | |`rocsparse_csrsort`|1.9.0| | | | +|`cusparseXcsrsort_bufferSizeExt`| | | |`rocsparse_csrsort_buffer_size`|1.9.0| | | | |`cusparseXgebsr2csr`| | | | | | | | | |`cusparseXgebsr2gebsrNnz`| | | |`rocsparse_gebsr2gebsr_nnz`|4.1.0| | | | |`cusparseZbsr2csr`| | | |`rocsparse_zbsr2csr`|3.10.0| | | | diff --git a/src/CUDA2HIP_SPARSE_API_functions.cpp b/src/CUDA2HIP_SPARSE_API_functions.cpp index b2a24b97..330f26cc 100644 --- a/src/CUDA2HIP_SPARSE_API_functions.cpp +++ b/src/CUDA2HIP_SPARSE_API_functions.cpp @@ -630,17 +630,17 @@ const std::map CUDA_SPARSE_FUNCTION_MAP { {"cusparseCnnz", {"hipsparseCnnz", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, {"cusparseZnnz", {"hipsparseZnnz", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, - {"cusparseCreateIdentityPermutation", {"hipsparseCreateIdentityPermutation", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, + {"cusparseCreateIdentityPermutation", {"hipsparseCreateIdentityPermutation", "rocsparse_create_identity_permutation", CONV_LIB_FUNC, API_SPARSE, 14}}, - {"cusparseXcoosort_bufferSizeExt", {"hipsparseXcoosort_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, + {"cusparseXcoosort_bufferSizeExt", {"hipsparseXcoosort_bufferSizeExt", "rocsparse_coosort_buffer_size", CONV_LIB_FUNC, API_SPARSE, 14}}, {"cusparseXcoosortByRow", {"hipsparseXcoosortByRow", "rocsparse_coosort_by_row", CONV_LIB_FUNC, API_SPARSE, 14}}, {"cusparseXcoosortByColumn", {"hipsparseXcoosortByColumn", "rocsparse_coosort_by_column", CONV_LIB_FUNC, API_SPARSE, 14}}, - {"cusparseXcsrsort_bufferSizeExt", {"hipsparseXcsrsort_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, - {"cusparseXcsrsort", {"hipsparseXcsrsort", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, + {"cusparseXcsrsort_bufferSizeExt", {"hipsparseXcsrsort_bufferSizeExt", "rocsparse_csrsort_buffer_size", CONV_LIB_FUNC, API_SPARSE, 14}}, + {"cusparseXcsrsort", {"hipsparseXcsrsort", "rocsparse_csrsort", CONV_LIB_FUNC, API_SPARSE, 14}}, - {"cusparseXcscsort_bufferSizeExt", {"hipsparseXcscsort_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, - {"cusparseXcscsort", {"hipsparseXcscsort", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, + {"cusparseXcscsort_bufferSizeExt", {"hipsparseXcscsort_bufferSizeExt", "rocsparse_cscsort_buffer_size", CONV_LIB_FUNC, API_SPARSE, 14}}, + {"cusparseXcscsort", {"hipsparseXcscsort", "rocsparse_cscsort", CONV_LIB_FUNC, API_SPARSE, 14}}, {"cusparseCreateCsru2csrInfo", {"hipsparseCreateCsru2csrInfo", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, {"cusparseDestroyCsru2csrInfo", {"hipsparseDestroyCsru2csrInfo", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, @@ -1895,6 +1895,12 @@ const std::map HIP_SPARSE_FUNCTION_VER_MAP { {"rocsparse_sbsr2csr", {HIP_3100, HIP_0, HIP_0 }}, {"rocsparse_coosort_by_column", {HIP_1090, HIP_0, HIP_0 }}, {"rocsparse_coosort_by_row", {HIP_1090, HIP_0, HIP_0 }}, + {"rocsparse_coosort_buffer_size", {HIP_1090, HIP_0, HIP_0 }}, + {"rocsparse_cscsort", {HIP_2100, HIP_0, HIP_0 }}, + {"rocsparse_cscsort_buffer_size", {HIP_2100, HIP_0, HIP_0 }}, + {"rocsparse_csrsort", {HIP_1090, HIP_0, HIP_0 }}, + {"rocsparse_csrsort_buffer_size", {HIP_1090, HIP_0, HIP_0 }}, + {"rocsparse_create_identity_permutation", {HIP_1090, HIP_0, HIP_0 }}, }; const std::map CUDA_SPARSE_API_SECTION_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cusparse2hipsparse.cu b/tests/unit_tests/synthetic/libraries/cusparse2hipsparse.cu index 615a00b6..2755bc7e 100644 --- a/tests/unit_tests/synthetic/libraries/cusparse2hipsparse.cu +++ b/tests/unit_tests/synthetic/libraries/cusparse2hipsparse.cu @@ -125,6 +125,8 @@ int main() { int nnzb = 0; int innz = 0; int blockDim = 0; + int cscRowIndA = 0; + int cscColPtrA = 0; int csrRowPtrA = 0; int csrColIndA = 0; int ncolors = 0; @@ -412,6 +414,36 @@ int main() { // CHECK: status_t = hipsparseXcoosortByRow(handle_t, m, n, innz, &icooRows, &icooColumns, P, pBuffer); status_t = cusparseXcoosortByRow(handle_t, m, n, innz, &icooRows, &icooColumns, P, pBuffer); + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcoosort_bufferSizeExt(cusparseHandle_t handle, int m, int n, int nnz, const int* cooRowsA, const int* cooColsA, size_t* pBufferSizeInBytes); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseXcoosort_bufferSizeExt(hipsparseHandle_t handle, int m, int n, int nnz, const int* cooRows, const int* cooCols, size_t* pBufferSizeInBytes); + // CHECK: status_t = hipsparseXcoosort_bufferSizeExt(handle_t, m, n, innz, &icooRows, &icooColumns, &bufferSize); + status_t = cusparseXcoosort_bufferSizeExt(handle_t, m, n, innz, &icooRows, &icooColumns, &bufferSize); + + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcscsort(cusparseHandle_t handle, int m, int n, int nnz, const cusparseMatDescr_t descrA, const int* cscColPtrA, int* cscRowIndA, int* P, void* pBuffer); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseXcscsort(hipsparseHandle_t handle, int m, int n, int nnz, const hipsparseMatDescr_t descrA, const int* cscColPtr, int* cscRowInd, int* P, void* pBuffer); + // CHECK: status_t = hipsparseXcscsort(handle_t, m, n, innz, matDescr_A, &cscColPtrA, &cscRowIndA, P, pBuffer); + status_t = cusparseXcscsort(handle_t, m, n, innz, matDescr_A, &cscColPtrA, &cscRowIndA, P, pBuffer); + + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcscsort_bufferSizeExt(cusparseHandle_t handle, int m, int n, int nnz, const int* cscColPtrA, const int* cscRowIndA, size_t* pBufferSizeInBytes); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseXcscsort_bufferSizeExt(hipsparseHandle_t handle, int m, int n, int nnz, const int* cscColPtr, const int* cscRowInd, size_t* pBufferSizeInBytes); + // CHECK: status_t = hipsparseXcscsort_bufferSizeExt(handle_t, m, n, innz, &cscColPtrA, &cscRowIndA, &bufferSize); + status_t = cusparseXcscsort_bufferSizeExt(handle_t, m, n, innz, &cscColPtrA, &cscRowIndA, &bufferSize); + + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcsrsort(cusparseHandle_t handle, int m, int n, int nnz, const cusparseMatDescr_t descrA, const int* csrRowPtrA, int* csrColIndA, int* P, void* pBuffer); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseXcsrsort(hipsparseHandle_t handle, int m, int n, int nnz, const hipsparseMatDescr_t descrA, const int* csrRowPtr, int* csrColInd, int* P, void* pBuffer); + // CHECK: status_t = hipsparseXcsrsort(handle_t, m, n, innz, matDescr_A, &cscRowIndA, &cscColPtrA, P, pBuffer); + status_t = cusparseXcsrsort(handle_t, m, n, innz, matDescr_A, &cscRowIndA, &cscColPtrA, P, pBuffer); + + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcsrsort_bufferSizeExt(cusparseHandle_t handle, int m, int n, int nnz, const int* csrRowPtrA, const int* csrColIndA, size_t* pBufferSizeInBytes); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseXcsrsort_bufferSizeExt(hipsparseHandle_t handle, int m, int n, int nnz, const int* csrRowPtr, const int* csrColInd, size_t* pBufferSizeInBytes); + // CHECK: status_t = hipsparseXcsrsort_bufferSizeExt(handle_t, m, n, innz, &cscRowIndA, &cscColPtrA, &bufferSize); + status_t = cusparseXcsrsort_bufferSizeExt(handle_t, m, n, innz, &cscRowIndA, &cscColPtrA, &bufferSize); + + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseCreateIdentityPermutation(cusparseHandle_t handle, int n, int* p); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseCreateIdentityPermutation(hipsparseHandle_t handle, int n, int* p); + // CHECK: status_t = hipsparseCreateIdentityPermutation(handle_t, n, P); + status_t = cusparseCreateIdentityPermutation(handle_t, n, P); + #if CUDA_VERSION >= 8000 // CHECK: hipDataType dataType_t; // CHECK-NEXT: hipDataType dataType; diff --git a/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu b/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu index 4e4d65f6..9565220e 100644 --- a/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu +++ b/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu @@ -125,6 +125,8 @@ int main() { int nnzb = 0; int innz = 0; int blockDim = 0; + int cscRowIndA = 0; + int cscColPtrA = 0; int csrRowPtrA = 0; int csrColIndA = 0; int ncolors = 0; @@ -416,6 +418,36 @@ int main() { // CHECK: status_t = rocsparse_coosort_by_row(handle_t, m, n, innz, &icooRows, &icooColumns, P, pBuffer); status_t = cusparseXcoosortByRow(handle_t, m, n, innz, &icooRows, &icooColumns, P, pBuffer); + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcoosort_bufferSizeExt(cusparseHandle_t handle, int m, int n, int nnz, const int* cooRowsA, const int* cooColsA, size_t* pBufferSizeInBytes); + // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_coosort_buffer_size(rocsparse_handle handle, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, const rocsparse_int* coo_row_ind, const rocsparse_int* coo_col_ind, size_t* buffer_size); + // CHECK: status_t = rocsparse_coosort_buffer_size(handle_t, m, n, innz, &icooRows, &icooColumns, &bufferSize); + status_t = cusparseXcoosort_bufferSizeExt(handle_t, m, n, innz, &icooRows, &icooColumns, &bufferSize); + + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcscsort(cusparseHandle_t handle, int m, int n, int nnz, const cusparseMatDescr_t descrA, const int* cscColPtrA, int* cscRowIndA, int* P, void* pBuffer); + // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_cscsort(rocsparse_handle handle, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, const rocsparse_mat_descr descr, const rocsparse_int* csc_col_ptr, rocsparse_int* csc_row_ind, rocsparse_int* perm, void* temp_buffer); + // CHECK: status_t = rocsparse_cscsort(handle_t, m, n, innz, matDescr_A, &cscColPtrA, &cscRowIndA, P, pBuffer); + status_t = cusparseXcscsort(handle_t, m, n, innz, matDescr_A, &cscColPtrA, &cscRowIndA, P, pBuffer); + + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcscsort_bufferSizeExt(cusparseHandle_t handle, int m, int n, int nnz, const int* cscColPtrA, const int* cscRowIndA, size_t* pBufferSizeInBytes); + // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_cscsort_buffer_size(rocsparse_handle handle, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, const rocsparse_int* csc_col_ptr, const rocsparse_int* csc_row_ind, size_t* buffer_size); + // CHECK: status_t = rocsparse_cscsort_buffer_size(handle_t, m, n, innz, &cscColPtrA, &cscRowIndA, &bufferSize); + status_t = cusparseXcscsort_bufferSizeExt(handle_t, m, n, innz, &cscColPtrA, &cscRowIndA, &bufferSize); + + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcsrsort(cusparseHandle_t handle, int m, int n, int nnz, const cusparseMatDescr_t descrA, const int* csrRowPtrA, int* csrColIndA, int* P, void* pBuffer); + // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_csrsort(rocsparse_handle handle, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, const rocsparse_mat_descr descr, const rocsparse_int* csr_row_ptr, rocsparse_int* csr_col_ind, rocsparse_int* perm, void* temp_buffer); + // CHECK: status_t = rocsparse_csrsort(handle_t, m, n, innz, matDescr_A, &cscRowIndA, &cscColPtrA, P, pBuffer); + status_t = cusparseXcsrsort(handle_t, m, n, innz, matDescr_A, &cscRowIndA, &cscColPtrA, P, pBuffer); + + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcsrsort_bufferSizeExt(cusparseHandle_t handle, int m, int n, int nnz, const int* csrRowPtrA, const int* csrColIndA, size_t* pBufferSizeInBytes); + // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_csrsort_buffer_size(rocsparse_handle handle, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, const rocsparse_int* csr_row_ptr, const rocsparse_int* csr_col_ind, size_t* buffer_size); + // CHECK: status_t = rocsparse_csrsort_buffer_size(handle_t, m, n, innz, &cscRowIndA, &cscColPtrA, &bufferSize); + status_t = cusparseXcsrsort_bufferSizeExt(handle_t, m, n, innz, &cscRowIndA, &cscColPtrA, &bufferSize); + + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseCreateIdentityPermutation(cusparseHandle_t handle, int n, int* p); + // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_create_identity_permutation(rocsparse_handle handle, rocsparse_int n, rocsparse_int* p); + // CHECK: status_t = rocsparse_create_identity_permutation(handle_t, n, P); + status_t = cusparseCreateIdentityPermutation(handle_t, n, P); + #if CUDA_VERSION >= 8000 // CHECK: hipDataType dataType_t; // TODO: [#899] There should be rocsparse_datatype From d6005f05b2b7961453f5cd75d8718a3882564302 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sun, 16 Jul 2023 16:13:53 +0200 Subject: [PATCH 14/20] [HIPIFY] Sync with CUDA 12.2.0 - Part 7 - Driver API - bfp16 functions + Updated the regenerated hipify-perl and docs accordingly --- bin/hipify-perl | 4 ++++ docs/tables/CUDA_Device_API_supported_by_HIP.md | 4 ++++ src/CUDA2HIP_Device_functions.cpp | 10 +++++++++- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 3f044e5d..b5c75213 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -5886,6 +5886,7 @@ sub warnUnsupportedDeviceFunctions { "mulhi", "mul64hi", "mul24", + "make_bfloat162", "llmin", "llmax", "int_as_float", @@ -6094,6 +6095,7 @@ sub warnUnsupportedDeviceFunctions { "__float2bfloat16_rd", "__float2bfloat162_rn", "__float2bfloat16", + "__float22bfloat162_rn", "__finitel", "__finitef", "__finite", @@ -6138,6 +6140,7 @@ sub warnUnsupportedDeviceFunctions { "__bfloat162uint_ru", "__bfloat162uint_rn", "__bfloat162uint_rd", + "__bfloat162uchar_rz", "__bfloat162short_rz", "__bfloat162short_ru", "__bfloat162short_rn", @@ -6151,6 +6154,7 @@ sub warnUnsupportedDeviceFunctions { "__bfloat162int_rn", "__bfloat162int_rd", "__bfloat162float", + "__bfloat162char_rz", "__bfloat162bfloat162", "__bfloat1622float2", "_Pow_int" diff --git a/docs/tables/CUDA_Device_API_supported_by_HIP.md b/docs/tables/CUDA_Device_API_supported_by_HIP.md index a44ed3a9..efa8b2d6 100644 --- a/docs/tables/CUDA_Device_API_supported_by_HIP.md +++ b/docs/tables/CUDA_Device_API_supported_by_HIP.md @@ -12,6 +12,7 @@ |`__ballot`| | | |`__ballot`|1.6.0| | | | |`__bfloat1622float2`|11.0| | | | | | | | |`__bfloat162bfloat162`|11.0| | | | | | | | +|`__bfloat162char_rz`|12.2| | | | | | | | |`__bfloat162float`|11.0| | | | | | | | |`__bfloat162int_rd`|11.0| | | | | | | | |`__bfloat162int_rn`|11.0| | | | | | | | @@ -25,6 +26,7 @@ |`__bfloat162short_rn`|11.0| | | | | | | | |`__bfloat162short_ru`|11.0| | | | | | | | |`__bfloat162short_rz`|11.0| | | | | | | | +|`__bfloat162uchar_rz`|12.2| | | | | | | | |`__bfloat162uint_rd`|11.0| | | | | | | | |`__bfloat162uint_rn`|11.0| | | | | | | | |`__bfloat162uint_ru`|11.0| | | | | | | | @@ -111,6 +113,7 @@ |`__finite`| | | | | | | | | |`__finitef`| | | | | | | | | |`__finitel`| | | | | | | | | +|`__float22bfloat162_rn`|11.0| | | | | | | | |`__float22half2_rn`| | | |`__float22half2_rn`|1.6.0| | | | |`__float2bfloat16`|11.0| | | | | | | | |`__float2bfloat162_rn`|11.0| | | | | | | | @@ -707,6 +710,7 @@ |`lrintf`| | | |`lrintf`|1.6.0| | | | |`lround`| | | |`lround`|1.6.0| | | | |`lroundf`| | | |`lroundf`|1.6.0| | | | +|`make_bfloat162`|12.2| | | | | | | | |`max`| | | |`max`|1.6.0| | | | |`min`| | | |`min`|1.6.0| | | | |`modf`| | | |`modf`|1.9.0| | | | diff --git a/src/CUDA2HIP_Device_functions.cpp b/src/CUDA2HIP_Device_functions.cpp index 9d9511e4..55932cf9 100644 --- a/src/CUDA2HIP_Device_functions.cpp +++ b/src/CUDA2HIP_Device_functions.cpp @@ -775,12 +775,16 @@ const std::map CUDA_DEVICE_FUNCTION_MAP { {"__high2bfloat16", {"__high2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__low2bfloat16", {"__low2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__halves2bfloat162", {"__halves2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__low2bfloat162", {"__halves2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__low2bfloat162", {"__low2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__high2bfloat162", {"__high2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__bfloat16_as_short", {"__bfloat16_as_short", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__bfloat16_as_ushort", {"__bfloat16_as_ushort", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__short_as_bfloat16", {"__short_as_bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__ushort_as_bfloat16", {"__ushort_as_bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__float22bfloat162_rn", {"__float22bfloat162_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162char_rz", {"__bfloat162char_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162uchar_rz", {"__bfloat162uchar_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"make_bfloat162", {"make_bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, // atomic functions {"atomicAdd", {"atomicAdd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"atomicAdd_system", {"atomicAdd_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, @@ -945,6 +949,10 @@ const std::map CUDA_DEVICE_FUNCTION_VER_MAP { {"__hgeu2_mask", {CUDA_120, CUDA_0, CUDA_0 }}, {"__hltu2_mask", {CUDA_120, CUDA_0, CUDA_0 }}, {"__hgtu2_mask", {CUDA_120, CUDA_0, CUDA_0 }}, + {"__float22bfloat162_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162char_rz", {CUDA_122, CUDA_0, CUDA_0 }}, + {"__bfloat162uchar_rz", {CUDA_122, CUDA_0, CUDA_0 }}, + {"make_bfloat162", {CUDA_122, CUDA_0, CUDA_0 }}, }; const std::map HIP_DEVICE_FUNCTION_VER_MAP { From 399659dc9c73e2d4a63d0a0af17d10c9353057ad Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sun, 16 Jul 2023 19:26:55 +0200 Subject: [PATCH 15/20] [HIPIFY][#674][rocSPARSE][feature] rocSPARSE support - Step 20 - functions + [Fix] Revised `pruneInfo_t` in `hipSPARSE` and `rocSPARSE` + Updated synthetic tests and the regenerated hipify-perl and SPARSE docs --- bin/hipify-perl | 6 ++- docs/tables/CUSPARSE_API_supported_by_HIP.md | 2 +- .../CUSPARSE_API_supported_by_HIP_and_ROC.md | 8 ++-- docs/tables/CUSPARSE_API_supported_by_ROC.md | 8 ++-- src/CUDA2HIP_SPARSE_API_functions.cpp | 18 ++++++--- src/CUDA2HIP_SPARSE_API_types.cpp | 7 +++- .../synthetic/libraries/cusparse2hipsparse.cu | 37 +++++++++++++++++++ .../synthetic/libraries/cusparse2rocsparse.cu | 17 +++++++++ 8 files changed, 85 insertions(+), 18 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index b5c75213..98b8b8c2 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1504,6 +1504,7 @@ sub rocSubstitutions { subst("cusparseDnVecGet", "rocsparse_dnvec_get", "library"); subst("cusparseDnVecGetValues", "rocsparse_dnvec_get_values", "library"); subst("cusparseDnVecSetValues", "rocsparse_dnvec_set_values", "library"); + subst("cusparseDpruneCsr2csrByPercentage", "rocsparse_dprune_csr2csr_by_percentage", "library"); subst("cusparseGather", "rocsparse_gather", "library"); subst("cusparseGetMatDiagType", "rocsparse_get_mat_diag_type", "library"); subst("cusparseGetMatFillMode", "rocsparse_get_mat_fill_mode", "library"); @@ -1542,6 +1543,7 @@ sub rocSubstitutions { subst("cusparseSpVecGetIndexBase", "rocsparse_spvec_get_index_base", "library"); subst("cusparseSpVecGetValues", "rocsparse_spvec_get_values", "library"); subst("cusparseSpVecSetValues", "rocsparse_spvec_set_values", "library"); + subst("cusparseXcoo2csr", "rocsparse_coo2csr", "library"); subst("cusparseXcoosortByColumn", "rocsparse_coosort_by_column", "library"); subst("cusparseXcoosortByRow", "rocsparse_coosort_by_row", "library"); subst("cusparseXcoosort_bufferSizeExt", "rocsparse_coosort_buffer_size", "library"); @@ -1650,6 +1652,8 @@ sub rocSubstitutions { subst("cusparseSpVecDescr_t", "rocsparse_spvec_descr", "type"); subst("cusparseSparseToDenseAlg_t", "rocsparse_sparse_to_dense_alg", "type"); subst("cusparseStatus_t", "rocsparse_status", "type"); + subst("pruneInfo", "_rocsparse_mat_info", "type"); + subst("pruneInfo_t", "rocsparse_mat_info", "type"); subst("CUBLAS_ATOMICS_ALLOWED", "rocblas_atomics_allowed", "numeric_literal"); subst("CUBLAS_ATOMICS_NOT_ALLOWED", "rocblas_atomics_not_allowed", "numeric_literal"); subst("CUBLAS_DIAG_NON_UNIT", "rocblas_diagonal_non_unit", "numeric_literal"); @@ -4065,6 +4069,7 @@ sub simpleSubstitutions { subst("cusparseStatus_t", "hipsparseStatus_t", "type"); subst("nvrtcProgram", "hiprtcProgram", "type"); subst("nvrtcResult", "hiprtcResult", "type"); + subst("pruneInfo", "pruneInfo", "type"); subst("pruneInfo_t", "pruneInfo_t", "type"); subst("surfaceReference", "surfaceReference", "type"); subst("texture", "texture", "type"); @@ -6228,7 +6233,6 @@ sub warnUnsupportedFunctions { my $line_num = shift; my $k = 0; foreach $func ( - "pruneInfo", "nvrtcGetSupportedArchs", "nvrtcGetOptiXIRSize", "nvrtcGetOptiXIR", diff --git a/docs/tables/CUSPARSE_API_supported_by_HIP.md b/docs/tables/CUSPARSE_API_supported_by_HIP.md index 496269da..089f9327 100644 --- a/docs/tables/CUSPARSE_API_supported_by_HIP.md +++ b/docs/tables/CUSPARSE_API_supported_by_HIP.md @@ -183,7 +183,7 @@ |`cusparseSpVecDescr_t`|10.2| | |`hipsparseSpVecDescr_t`|4.1.0| | | | |`cusparseSparseToDenseAlg_t`|11.1| | |`hipsparseSparseToDenseAlg_t`|4.2.0| | | | |`cusparseStatus_t`| | | |`hipsparseStatus_t`|1.9.2| | | | -|`pruneInfo`|9.0| | | | | | | | +|`pruneInfo`|9.0| | |`pruneInfo`|3.9.0| | | | |`pruneInfo_t`|9.0| | |`pruneInfo_t`|3.9.0| | | | ## **5. CUSPARSE Management Function Reference** diff --git a/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md b/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md index 34a5e78c..07efeb46 100644 --- a/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md @@ -183,8 +183,8 @@ |`cusparseSpVecDescr_t`|10.2| | |`hipsparseSpVecDescr_t`|4.1.0| | | |`rocsparse_spvec_descr`|4.1.0| | | | |`cusparseSparseToDenseAlg_t`|11.1| | |`hipsparseSparseToDenseAlg_t`|4.2.0| | | |`rocsparse_sparse_to_dense_alg`|4.1.0| | | | |`cusparseStatus_t`| | | |`hipsparseStatus_t`|1.9.2| | | |`rocsparse_status`|1.9.0| | | | -|`pruneInfo`|9.0| | | | | | | | | | | | | -|`pruneInfo_t`|9.0| | |`pruneInfo_t`|3.9.0| | | | | | | | | +|`pruneInfo`|9.0| | |`pruneInfo`|3.9.0| | | |`_rocsparse_mat_info`|1.9.0| | | | +|`pruneInfo_t`|9.0| | |`pruneInfo_t`|3.9.0| | | |`rocsparse_mat_info`|1.9.0| | | | ## **5. CUSPARSE Management Function Reference** @@ -680,7 +680,7 @@ |`cusparseDnnz`| | | |`hipsparseDnnz`|3.2.0| | | | | | | | | |`cusparseDnnz_compress`|8.0| | |`hipsparseDnnz_compress`|3.5.0| | | | | | | | | |`cusparseDpruneCsr2csr`|9.0| | |`hipsparseDpruneCsr2csr`|3.9.0| | | | | | | | | -|`cusparseDpruneCsr2csrByPercentage`|9.0| | |`hipsparseDpruneCsr2csrByPercentage`|3.9.0| | | | | | | | | +|`cusparseDpruneCsr2csrByPercentage`|9.0| | |`hipsparseDpruneCsr2csrByPercentage`|3.9.0| | | |`rocsparse_dprune_csr2csr_by_percentage`|3.9.0| | | | |`cusparseDpruneCsr2csrByPercentage_bufferSizeExt`|9.0| | |`hipsparseDpruneCsr2csrByPercentage_bufferSizeExt`|3.9.0| | | | | | | | | |`cusparseDpruneCsr2csrNnz`|9.0| | |`hipsparseDpruneCsr2csrNnz`|3.9.0| | | | | | | | | |`cusparseDpruneCsr2csrNnzByPercentage`|9.0| | |`hipsparseDpruneCsr2csrNnzByPercentage`|3.9.0| | | | | | | | | @@ -744,7 +744,7 @@ |`cusparseSpruneDense2csrNnz`|9.0| | |`hipsparseSpruneDense2csrNnz`|3.9.0| | | | | | | | | |`cusparseSpruneDense2csrNnzByPercentage`|9.0| | |`hipsparseSpruneDense2csrNnzByPercentage`|3.9.0| | | | | | | | | |`cusparseSpruneDense2csr_bufferSizeExt`|9.0| | |`hipsparseSpruneDense2csr_bufferSizeExt`|3.9.0| | | | | | | | | -|`cusparseXcoo2csr`| | | |`hipsparseXcoo2csr`|1.9.2| | | | | | | | | +|`cusparseXcoo2csr`| | | |`hipsparseXcoo2csr`|1.9.2| | | |`rocsparse_coo2csr`|1.9.0| | | | |`cusparseXcoosortByColumn`| | | |`hipsparseXcoosortByColumn`|1.9.2| | | |`rocsparse_coosort_by_column`|1.9.0| | | | |`cusparseXcoosortByRow`| | | |`hipsparseXcoosortByRow`|1.9.2| | | |`rocsparse_coosort_by_row`|1.9.0| | | | |`cusparseXcoosort_bufferSizeExt`| | | |`hipsparseXcoosort_bufferSizeExt`|1.9.2| | | |`rocsparse_coosort_buffer_size`|1.9.0| | | | diff --git a/docs/tables/CUSPARSE_API_supported_by_ROC.md b/docs/tables/CUSPARSE_API_supported_by_ROC.md index 22f00e66..aa15c8da 100644 --- a/docs/tables/CUSPARSE_API_supported_by_ROC.md +++ b/docs/tables/CUSPARSE_API_supported_by_ROC.md @@ -183,8 +183,8 @@ |`cusparseSpVecDescr_t`|10.2| | |`rocsparse_spvec_descr`|4.1.0| | | | |`cusparseSparseToDenseAlg_t`|11.1| | |`rocsparse_sparse_to_dense_alg`|4.1.0| | | | |`cusparseStatus_t`| | | |`rocsparse_status`|1.9.0| | | | -|`pruneInfo`|9.0| | | | | | | | -|`pruneInfo_t`|9.0| | | | | | | | +|`pruneInfo`|9.0| | |`_rocsparse_mat_info`|1.9.0| | | | +|`pruneInfo_t`|9.0| | |`rocsparse_mat_info`|1.9.0| | | | ## **5. CUSPARSE Management Function Reference** @@ -680,7 +680,7 @@ |`cusparseDnnz`| | | | | | | | | |`cusparseDnnz_compress`|8.0| | | | | | | | |`cusparseDpruneCsr2csr`|9.0| | | | | | | | -|`cusparseDpruneCsr2csrByPercentage`|9.0| | | | | | | | +|`cusparseDpruneCsr2csrByPercentage`|9.0| | |`rocsparse_dprune_csr2csr_by_percentage`|3.9.0| | | | |`cusparseDpruneCsr2csrByPercentage_bufferSizeExt`|9.0| | | | | | | | |`cusparseDpruneCsr2csrNnz`|9.0| | | | | | | | |`cusparseDpruneCsr2csrNnzByPercentage`|9.0| | | | | | | | @@ -744,7 +744,7 @@ |`cusparseSpruneDense2csrNnz`|9.0| | | | | | | | |`cusparseSpruneDense2csrNnzByPercentage`|9.0| | | | | | | | |`cusparseSpruneDense2csr_bufferSizeExt`|9.0| | | | | | | | -|`cusparseXcoo2csr`| | | | | | | | | +|`cusparseXcoo2csr`| | | |`rocsparse_coo2csr`|1.9.0| | | | |`cusparseXcoosortByColumn`| | | |`rocsparse_coosort_by_column`|1.9.0| | | | |`cusparseXcoosortByRow`| | | |`rocsparse_coosort_by_row`|1.9.0| | | | |`cusparseXcoosort_bufferSizeExt`| | | |`rocsparse_coosort_buffer_size`|1.9.0| | | | diff --git a/src/CUDA2HIP_SPARSE_API_functions.cpp b/src/CUDA2HIP_SPARSE_API_functions.cpp index 330f26cc..70562a19 100644 --- a/src/CUDA2HIP_SPARSE_API_functions.cpp +++ b/src/CUDA2HIP_SPARSE_API_functions.cpp @@ -551,7 +551,7 @@ const std::map CUDA_SPARSE_FUNCTION_MAP { {"cusparseCcsr2gebsr", {"hipsparseCcsr2gebsr", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, {"cusparseZcsr2gebsr", {"hipsparseZcsr2gebsr", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, - {"cusparseXcoo2csr", {"hipsparseXcoo2csr", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, + {"cusparseXcoo2csr", {"hipsparseXcoo2csr", "rocsparse_coo2csr", CONV_LIB_FUNC, API_SPARSE, 14}}, {"cusparseScsc2dense", {"hipsparseScsc2dense", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, {"cusparseDcsc2dense", {"hipsparseDcsc2dense", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, @@ -615,10 +615,14 @@ const std::map CUDA_SPARSE_FUNCTION_MAP { {"cusparseChyb2csc", {"hipsparseChyb2csc", "", CONV_LIB_FUNC, API_SPARSE, 14, UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, {"cusparseZhyb2csc", {"hipsparseZhyb2csc", "", CONV_LIB_FUNC, API_SPARSE, 14, UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, - {"cusparseShyb2csr", {"hipsparseShyb2csr", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED |CUDA_DEPRECATED | CUDA_REMOVED}}, - {"cusparseDhyb2csr", {"hipsparseDhyb2csr", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED |CUDA_DEPRECATED | CUDA_REMOVED}}, - {"cusparseChyb2csr", {"hipsparseChyb2csr", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED |CUDA_DEPRECATED | CUDA_REMOVED}}, - {"cusparseZhyb2csr", {"hipsparseZhyb2csr", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED |CUDA_DEPRECATED | CUDA_REMOVED}}, + // NOTE: rocsparse_shyb2csr has one additioanl attribute void* temp_buffer; see hipsparseShyb2csr implementation, which wraps rocsparse_hyb2csr_buffer_size + rocsparse_shyb2csr + {"cusparseShyb2csr", {"hipsparseShyb2csr", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, + // NOTE: rocsparse_dhyb2csr has one additioanl attribute void* temp_buffer; see hipsparseDhyb2csr implementation, which wraps rocsparse_hyb2csr_buffer_size + rocsparse_dhyb2csr + {"cusparseDhyb2csr", {"hipsparseDhyb2csr", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, + // NOTE: rocsparse_chyb2csr has one additioanl attribute void* temp_buffer; see hipsparseChyb2csr implementation, which wraps rocsparse_hyb2csr_buffer_size + rocsparse_chyb2csr + {"cusparseChyb2csr", {"hipsparseChyb2csr", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, + // NOTE: rocsparse_zhyb2csr has one additioanl attribute void* temp_buffer; see hipsparseZhyb2csr implementation, which wraps rocsparse_hyb2csr_buffer_size + rocsparse_zhyb2csr + {"cusparseZhyb2csr", {"hipsparseZhyb2csr", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, {"cusparseShyb2dense", {"hipsparseShyb2dense", "", CONV_LIB_FUNC, API_SPARSE, 14, UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, {"cusparseDhyb2dense", {"hipsparseDhyb2dense", "", CONV_LIB_FUNC, API_SPARSE, 14, UNSUPPORTED | CUDA_DEPRECATED | CUDA_REMOVED}}, @@ -698,7 +702,7 @@ const std::map CUDA_SPARSE_FUNCTION_MAP { {"cusparseHpruneCsr2csrByPercentage", {"hipsparseHpruneCsr2csrByPercentage", "", CONV_LIB_FUNC, API_SPARSE, 14, UNSUPPORTED}}, {"cusparseSpruneCsr2csrByPercentage", {"hipsparseSpruneCsr2csrByPercentage", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, - {"cusparseDpruneCsr2csrByPercentage", {"hipsparseDpruneCsr2csrByPercentage", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, + {"cusparseDpruneCsr2csrByPercentage", {"hipsparseDpruneCsr2csrByPercentage", "rocsparse_dprune_csr2csr_by_percentage", CONV_LIB_FUNC, API_SPARSE, 14}}, {"cusparseHpruneCsr2csrByPercentage_bufferSizeExt", {"hipsparseHpruneCsr2csrByPercentage_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, 14, UNSUPPORTED}}, {"cusparseSpruneCsr2csrByPercentage_bufferSizeExt", {"hipsparseSpruneCsr2csrByPercentage_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, 14, ROC_UNSUPPORTED}}, @@ -1901,6 +1905,8 @@ const std::map HIP_SPARSE_FUNCTION_VER_MAP { {"rocsparse_csrsort", {HIP_1090, HIP_0, HIP_0 }}, {"rocsparse_csrsort_buffer_size", {HIP_1090, HIP_0, HIP_0 }}, {"rocsparse_create_identity_permutation", {HIP_1090, HIP_0, HIP_0 }}, + {"rocsparse_coo2csr", {HIP_1090, HIP_0, HIP_0 }}, + {"rocsparse_dprune_csr2csr_by_percentage", {HIP_3090, HIP_0, HIP_0 }}, }; const std::map CUDA_SPARSE_API_SECTION_MAP { diff --git a/src/CUDA2HIP_SPARSE_API_types.cpp b/src/CUDA2HIP_SPARSE_API_types.cpp index 3ae22cf2..233912cd 100644 --- a/src/CUDA2HIP_SPARSE_API_types.cpp +++ b/src/CUDA2HIP_SPARSE_API_types.cpp @@ -68,8 +68,8 @@ const std::map CUDA_SPARSE_TYPE_NAME_MAP { {"cusparseColorInfo", {"hipsparseColorInfo", "_rocsparse_color_info", CONV_TYPE, API_SPARSE, 4, HIP_UNSUPPORTED}}, {"cusparseColorInfo_t", {"hipsparseColorInfo_t", "rocsparse_color_info", CONV_TYPE, API_SPARSE, 4}}, - {"pruneInfo", {"pruneInfo", "", CONV_TYPE, API_SPARSE, 4, UNSUPPORTED}}, - {"pruneInfo_t", {"pruneInfo_t", "", CONV_TYPE, API_SPARSE, 4, ROC_UNSUPPORTED}}, + {"pruneInfo", {"pruneInfo", "_rocsparse_mat_info", CONV_TYPE, API_SPARSE, 4}}, + {"pruneInfo_t", {"pruneInfo_t", "rocsparse_mat_info", CONV_TYPE, API_SPARSE, 4}}, {"cusparseSpMatDescr", {"hipsparseSpMatDescr", "_rocsparse_spmat_descr", CONV_TYPE, API_SPARSE, 4, HIP_UNSUPPORTED}}, {"cusparseSpMatDescr_t", {"hipsparseSpMatDescr_t", "rocsparse_spmat_descr", CONV_TYPE, API_SPARSE, 4}}, @@ -401,6 +401,7 @@ const std::map HIP_SPARSE_TYPE_NAME_VER_MAP { {"csrilu02Info_t", {HIP_1092, HIP_0, HIP_0 }}, {"bsrilu02Info_t", {HIP_3090, HIP_0, HIP_0 }}, {"csrgemm2Info_t", {HIP_2080, HIP_0, HIP_0 }}, + {"pruneInfo", {HIP_3090, HIP_0, HIP_0 }}, {"pruneInfo_t", {HIP_3090, HIP_0, HIP_0 }}, {"hipsparseAction_t", {HIP_1092, HIP_0, HIP_0 }}, {"HIPSPARSE_ACTION_SYMBOLIC", {HIP_1092, HIP_0, HIP_0 }}, @@ -631,4 +632,6 @@ const std::map HIP_SPARSE_TYPE_NAME_VER_MAP { {"rocsparse_dense_to_sparse_alg_default", {HIP_4010, HIP_0, HIP_0 }}, {"rocsparse_spgemm_alg", {HIP_4010, HIP_0, HIP_0 }}, {"rocsparse_spgemm_alg_default", {HIP_4010, HIP_0, HIP_0 }}, + {"_rocsparse_mat_info", {HIP_1090, HIP_0, HIP_0 }}, + {"rocsparse_mat_info", {HIP_1090, HIP_0, HIP_0 }}, }; diff --git a/tests/unit_tests/synthetic/libraries/cusparse2hipsparse.cu b/tests/unit_tests/synthetic/libraries/cusparse2hipsparse.cu index 2755bc7e..382bd209 100644 --- a/tests/unit_tests/synthetic/libraries/cusparse2hipsparse.cu +++ b/tests/unit_tests/synthetic/libraries/cusparse2hipsparse.cu @@ -159,6 +159,7 @@ int main() { void *indices = nullptr; void *values = nullptr; void *cooRowInd = nullptr; + int icooRowInd = 0; void *cscRowInd = nullptr; void *csrColInd = nullptr; void *cooColInd = nullptr; @@ -187,6 +188,7 @@ int main() { float ffractionToColor = 0.f; double bsrValA = 0.f; double csrValA = 0.f; + float fcsrValA = 0.f; double csrValC = 0.f; float csrSortedValA = 0.f; double dbsrSortedValA = 0.f; @@ -194,6 +196,9 @@ int main() { float fbsrSortedValA = 0.f; float fbsrSortedValC = 0.f; float fcsrSortedValC = 0.f; + double percentage = 0.f; + + pruneInfo_t prune_info; // CHECK: hipDoubleComplex dcomplex, dComplexbsrSortedValA, dComplexbsrSortedValC; cuDoubleComplex dcomplex, dComplexbsrSortedValA, dComplexbsrSortedValC; @@ -444,6 +449,11 @@ int main() { // CHECK: status_t = hipsparseCreateIdentityPermutation(handle_t, n, P); status_t = cusparseCreateIdentityPermutation(handle_t, n, P); + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcoo2csr(cusparseHandle_t handle, const int* cooRowInd, int nnz, int m, int* csrSortedRowPtr, cusparseIndexBase_t idxBase); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseXcoo2csr(hipsparseHandle_t handle, const int* cooRowInd, int nnz, int m, int* csrRowPtr, hipsparseIndexBase_t idxBase); + // CHECK: status_t = hipsparseXcoo2csr(handle_t, &icooRowInd, nnz, m, &csrRowPtrA, indexBase_t); + status_t = cusparseXcoo2csr(handle_t, &icooRowInd, nnz, m, &csrRowPtrA, indexBase_t); + #if CUDA_VERSION >= 8000 // CHECK: hipDataType dataType_t; // CHECK-NEXT: hipDataType dataType; @@ -451,6 +461,13 @@ int main() { cudaDataType dataType; #endif +#if CUDA_VERSION >= 9000 + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrByPercentage(cusparseHandle_t handle, int m, int n, int nnzA, const cusparseMatDescr_t descrA, const double* csrSortedValA, const int* csrSortedRowPtrA, const int* csrSortedColIndA, float percentage, const cusparseMatDescr_t descrC, double* csrSortedValC, const int* csrSortedRowPtrC, int* csrSortedColIndC, pruneInfo_t info, void* pBuffer); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseDpruneCsr2csrByPercentage(hipsparseHandle_t handle, int m, int n, int nnzA, const hipsparseMatDescr_t descrA, const double* csrValA, const int* csrRowPtrA, const int* csrColIndA, double percentage, const hipsparseMatDescr_t descrC, double* csrValC, const int* csrRowPtrC, int* csrColIndC, pruneInfo_t info, void* buffer); + // CHECK: status_t = hipsparseDpruneCsr2csrByPercentage(handle_t, m, n, nnz, matDescr_A, &csrValA, &csrRowPtrA, &csrColIndA, percentage, matDescr_C, &csrValC, &csrRowPtrC, &csrColIndC, prune_info, pBuffer); + status_t = cusparseDpruneCsr2csrByPercentage(handle_t, m, n, nnz, matDescr_A, &csrValA, &csrRowPtrA, &csrColIndA, percentage, matDescr_C, &csrValC, &csrRowPtrC, &csrColIndC, prune_info, pBuffer); +#endif + #if CUDA_VERSION >= 8000 && CUDA_VERSION < 12000 // CUDA: cusparseStatus_t CUSPARSEAPI cusparseCopyMatDescr(cusparseMatDescr_t dest, const cusparseMatDescr_t src); // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseCopyMatDescr(hipsparseMatDescr_t dest, const hipsparseMatDescr_t src); @@ -810,6 +827,26 @@ int main() { // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseDestroyHybMat(hipsparseHybMat_t hybA); // CHECK: status_t = hipsparseDestroyHybMat(hybMat_t); status_t = cusparseDestroyHybMat(hybMat_t); + + // CUDA: CUSPARSE_DEPRECATED cusparseStatus_t CUSPARSEAPI cusparseZhyb2csr(cusparseHandle_t handle, const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, cuDoubleComplex* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseZhyb2csr(hipsparseHandle_t handle, const hipsparseMatDescr_t descrA, const hipsparseHybMat_t hybA, hipDoubleComplex* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA); + // CHECK: status_t = hipsparseZhyb2csr(handle_t, matDescr_t, hybMat_t, &dComplexbsrSortedValA, &csrRowPtrA, &csrColIndA); + status_t = cusparseZhyb2csr(handle_t, matDescr_t, hybMat_t, &dComplexbsrSortedValA, &csrRowPtrA, &csrColIndA); + + // CUDA: CUSPARSE_DEPRECATED cusparseStatus_t CUSPARSEAPI cusparseChyb2csr(cusparseHandle_t handle, const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, cuComplex* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseChyb2csr(hipsparseHandle_t handle, const hipsparseMatDescr_t descrA, const hipsparseHybMat_t hybA, hipComplex* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA); + // CHECK: status_t = hipsparseChyb2csr(handle_t, matDescr_t, hybMat_t, &complex, &csrRowPtrA, &csrColIndA); + status_t = cusparseChyb2csr(handle_t, matDescr_t, hybMat_t, &complex, &csrRowPtrA, &csrColIndA); + + // CUDA: CUSPARSE_DEPRECATED cusparseStatus_t CUSPARSEAPI cusparseDhyb2csr(cusparseHandle_t handle, const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, double* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseDhyb2csr(hipsparseHandle_t handle, const hipsparseMatDescr_t descrA, const hipsparseHybMat_t hybA, double* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA); + // CHECK: status_t = hipsparseDhyb2csr(handle_t, matDescr_t, hybMat_t, &csrValA, &csrRowPtrA, &csrColIndA); + status_t = cusparseDhyb2csr(handle_t, matDescr_t, hybMat_t, &csrValA, &csrRowPtrA, &csrColIndA); + + // CUDA: CUSPARSE_DEPRECATED cusparseStatus_t CUSPARSEAPI cusparseShyb2csr(cusparseHandle_t handle, const cusparseMatDescr_t descrA, const cusparseHybMat_t hybA, float* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA); + // HIP: HIPSPARSE_EXPORT hipsparseStatus_t hipsparseShyb2csr(hipsparseHandle_t handle, const hipsparseMatDescr_t descrA, const hipsparseHybMat_t hybA, float* csrSortedValA, int* csrSortedRowPtrA, int* csrSortedColIndA); + // CHECK: status_t = hipsparseShyb2csr(handle_t, matDescr_t, hybMat_t, &fcsrValA, &csrRowPtrA, &csrColIndA); + status_t = cusparseShyb2csr(handle_t, matDescr_t, hybMat_t, &fcsrValA, &csrRowPtrA, &csrColIndA); #endif #if CUDA_VERSION >= 11010 && CUSPARSE_VERSION >= 11300 diff --git a/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu b/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu index 9565220e..9c484183 100644 --- a/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu +++ b/tests/unit_tests/synthetic/libraries/cusparse2rocsparse.cu @@ -159,6 +159,7 @@ int main() { void *indices = nullptr; void *values = nullptr; void *cooRowInd = nullptr; + int icooRowInd = 0; void *cscRowInd = nullptr; void *csrColInd = nullptr; void *cooColInd = nullptr; @@ -194,6 +195,10 @@ int main() { float fbsrSortedValA = 0.f; float fbsrSortedValC = 0.f; float fcsrSortedValC = 0.f; + double percentage = 0.f; + + // CHECK: rocsparse_mat_info prune_info; + pruneInfo_t prune_info; // TODO: should be rocsparse_double_complex // TODO: add to TypeOverloads cuDoubleComplex -> rocsparse_double_complex under a new option --sparse @@ -448,6 +453,11 @@ int main() { // CHECK: status_t = rocsparse_create_identity_permutation(handle_t, n, P); status_t = cusparseCreateIdentityPermutation(handle_t, n, P); + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseXcoo2csr(cusparseHandle_t handle, const int* cooRowInd, int nnz, int m, int* csrSortedRowPtr, cusparseIndexBase_t idxBase); + // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_coo2csr(rocsparse_handle handle, const rocsparse_int* coo_row_ind, rocsparse_int nnz, rocsparse_int m, rocsparse_int* csr_row_ptr, rocsparse_index_base idx_base); + // CHECK: status_t = rocsparse_coo2csr(handle_t, &icooRowInd, nnz, m, &csrRowPtrA, indexBase_t); + status_t = cusparseXcoo2csr(handle_t, &icooRowInd, nnz, m, &csrRowPtrA, indexBase_t); + #if CUDA_VERSION >= 8000 // CHECK: hipDataType dataType_t; // TODO: [#899] There should be rocsparse_datatype @@ -456,6 +466,13 @@ int main() { cudaDataType dataType; #endif +#if CUDA_VERSION >= 9000 + // CUDA: cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrByPercentage(cusparseHandle_t handle, int m, int n, int nnzA, const cusparseMatDescr_t descrA, const double* csrSortedValA, const int* csrSortedRowPtrA, const int* csrSortedColIndA, float percentage, const cusparseMatDescr_t descrC, double* csrSortedValC, const int* csrSortedRowPtrC, int* csrSortedColIndC, pruneInfo_t info, void* pBuffer); + // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_dprune_csr2csr_by_percentage(rocsparse_handle handle, rocsparse_int m, rocsparse_int n, rocsparse_int nnz_A, const rocsparse_mat_descr csr_descr_A, const double* csr_val_A, const rocsparse_int* csr_row_ptr_A, const rocsparse_int* csr_col_ind_A, double percentage, const rocsparse_mat_descr csr_descr_C, double* csr_val_C, const rocsparse_int* csr_row_ptr_C, rocsparse_int* csr_col_ind_C, rocsparse_mat_info info, void* temp_buffer); + // CHECK: status_t = rocsparse_dprune_csr2csr_by_percentage(handle_t, m, n, nnz, matDescr_A, &csrValA, &csrRowPtrA, &csrColIndA, percentage, matDescr_C, &csrValC, &csrRowPtrC, &csrColIndC, prune_info, pBuffer); + status_t = cusparseDpruneCsr2csrByPercentage(handle_t, m, n, nnz, matDescr_A, &csrValA, &csrRowPtrA, &csrColIndA, percentage, matDescr_C, &csrValC, &csrRowPtrC, &csrColIndC, prune_info, pBuffer); +#endif + #if CUDA_VERSION >= 8000 && CUDA_VERSION < 12000 // CUDA: cusparseStatus_t CUSPARSEAPI cusparseCopyMatDescr(cusparseMatDescr_t dest, const cusparseMatDescr_t src); // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_copy_mat_descr(rocsparse_mat_descr dest, const rocsparse_mat_descr src); From f68444e872bb95bec7ec92461f105e6173f5d0c6 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 17 Jul 2023 12:10:14 +0200 Subject: [PATCH 16/20] [HIPIFY] Sync with CUDA 12.2.0 - Part 8 - Device API - fp16 functions + Updated the regenerated hipify-perl and docs accordingly --- bin/hipify-perl | 3 +++ docs/tables/CUDA_Device_API_supported_by_HIP.md | 3 +++ src/CUDA2HIP_Device_functions.cpp | 6 ++++++ 3 files changed, 12 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index 98b8b8c2..313e9482 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -5891,6 +5891,7 @@ sub warnUnsupportedDeviceFunctions { "mulhi", "mul64hi", "mul24", + "make_half2", "make_bfloat162", "llmin", "llmax", @@ -6073,6 +6074,8 @@ sub warnUnsupportedDeviceFunctions { "__heq2_mask", "__hcmadd", "__halves2bfloat162", + "__half2uchar_rz", + "__half2char_rz", "__hadd_rn", "__hadd2_rn", "__fsub_rz", diff --git a/docs/tables/CUDA_Device_API_supported_by_HIP.md b/docs/tables/CUDA_Device_API_supported_by_HIP.md index efa8b2d6..a38bc6b4 100644 --- a/docs/tables/CUDA_Device_API_supported_by_HIP.md +++ b/docs/tables/CUDA_Device_API_supported_by_HIP.md @@ -186,6 +186,7 @@ |`__hadd_rn`|11.6| | | | | | | | |`__hadd_sat`| | | |`__hadd_sat`|1.6.0| | | | |`__half22float2`| | | |`__half22float2`|1.6.0| | | | +|`__half2char_rz`|12.2| | | | | | | | |`__half2float`| | | |`__half2float`|1.6.0| | | | |`__half2half2`| | | |`__half2half2`|1.9.0| | | | |`__half2int_rd`| | | |`__half2int_rd`|1.6.0| | | | @@ -200,6 +201,7 @@ |`__half2short_rn`| | | |`__half2short_rn`|1.6.0| | | | |`__half2short_ru`| | | |`__half2short_ru`|1.6.0| | | | |`__half2short_rz`| | | |`__half2short_rz`|1.6.0| | | | +|`__half2uchar_rz`|12.2| | | | | | | | |`__half2uint_rd`| | | |`__half2uint_rd`|1.6.0| | | | |`__half2uint_rn`| | | |`__half2uint_rn`|1.6.0| | | | |`__half2uint_ru`| | | |`__half2uint_ru`|1.6.0| | | | @@ -711,6 +713,7 @@ |`lround`| | | |`lround`|1.6.0| | | | |`lroundf`| | | |`lroundf`|1.6.0| | | | |`make_bfloat162`|12.2| | | | | | | | +|`make_half2`|12.2| | | | | | | | |`max`| | | |`max`|1.6.0| | | | |`min`| | | |`min`|1.6.0| | | | |`modf`| | | |`modf`|1.9.0| | | | diff --git a/src/CUDA2HIP_Device_functions.cpp b/src/CUDA2HIP_Device_functions.cpp index 55932cf9..1b3c83ae 100644 --- a/src/CUDA2HIP_Device_functions.cpp +++ b/src/CUDA2HIP_Device_functions.cpp @@ -710,6 +710,9 @@ const std::map CUDA_DEVICE_FUNCTION_MAP { {"__hgeu2_mask", {"__hgeu2_mask", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__hltu2_mask", {"__hltu2_mask", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__hgtu2_mask", {"__hgtu2_mask", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"make_half2", {"make_half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__half2char_rz", {"__half2char_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__half2uchar_rz", {"__half2uchar_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, // bfp16 functions {"__double2bfloat16", {"__double2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__float2bfloat16", {"__float2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, @@ -953,6 +956,9 @@ const std::map CUDA_DEVICE_FUNCTION_VER_MAP { {"__bfloat162char_rz", {CUDA_122, CUDA_0, CUDA_0 }}, {"__bfloat162uchar_rz", {CUDA_122, CUDA_0, CUDA_0 }}, {"make_bfloat162", {CUDA_122, CUDA_0, CUDA_0 }}, + {"make_half2", {CUDA_122, CUDA_0, CUDA_0 }}, + {"__half2char_rz", {CUDA_122, CUDA_0, CUDA_0 }}, + {"__half2uchar_rz", {CUDA_122, CUDA_0, CUDA_0 }}, }; const std::map HIP_DEVICE_FUNCTION_VER_MAP { From 26ee18f479fd00323a915203d1d86bbba518ca13 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 17 Jul 2023 12:25:09 +0200 Subject: [PATCH 17/20] [HIPIFY][doc] CUDA 12.2.0 is the latest supported release + CUDA 12.2 is supported by LLVM >= 16.0.0 + Updated README.md accordingly + Tested on Windows 10 and Ubuntu 21.10 --- docs/hipify-clang.md | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/hipify-clang.md b/docs/hipify-clang.md index eb213a33..4e437667 100644 --- a/docs/hipify-clang.md +++ b/docs/hipify-clang.md @@ -23,7 +23,7 @@ After applying all the matchers, the output HIP source is produced. 1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [4.0.0](http://releases.llvm.org/download.html#4.0.0); the latest stable and recommended release: [**16.0.6**](https://github.com/llvm/llvm-project/releases/tag/llvmorg-16.0.6). -2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive), the latest supported version is [**12.1.1**](https://developer.nvidia.com/cuda-downloads). +2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive), the latest supported version is [**12.2.0**](https://developer.nvidia.com/cuda-downloads). @@ -169,12 +169,12 @@ After applying all the matchers, the output HIP source is produced. 16.0.4, 16.0.5,
16.0.6 - + - + @@ -199,14 +199,14 @@ To process a file, `hipify-clang` needs access to the same headers that would be For example: ```shell -./hipify-clang square.cu --cuda-path=/usr/local/cuda-12.1 -I /usr/local/cuda-12.1/samples/common/inc +./hipify-clang square.cu --cuda-path=/usr/local/cuda-12.2 -I /usr/local/cuda-12.2/samples/common/inc ``` `hipify-clang` arguments are given first, followed by a separator `'--'`, and then the arguments you'd pass to `clang` if you were compiling the input file. For example: ```bash -./hipify-clang cpp17.cu --cuda-path=/usr/local/cuda-12.1 -- -std=c++17 +./hipify-clang cpp17.cu --cuda-path=/usr/local/cuda-12.2 -- -std=c++17 ``` The [Clang manual for compiling CUDA](https://llvm.org/docs/CompileCudaWithLLVM.html#compiling-cuda-code) may be useful. @@ -327,9 +327,9 @@ Run `Visual Studio 17 2022`, open the generated `LLVM.sln`, build all, build pro - ***Linux***: `-DCUDA_TOOLKIT_ROOT_DIR=/usr/include` - - ***Windows***: `-DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.1"` + - ***Windows***: `-DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.2"` - `-DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.1"` + `-DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.2"` 4. Ensure [`cuDNN`](https://developer.nvidia.com/rdp/cudnn-archive) of the version corresponding to CUDA's version is installed. @@ -389,7 +389,7 @@ Ubuntu 14: LLVM 4.0.0 - 7.1.0, CUDA 7.0 - 9.0, cuDNN 5.0.5 - 7.6.5 Ubuntu 16-18: LLVM 8.0.0 - 14.0.6, CUDA 8.0 - 10.2, cuDNN 5.1.10 - 8.0.5 -Ubuntu 20-21: LLVM 9.0.0 - 16.0.6, CUDA 8.0 - 12.1.1, cuDNN 5.1.10 - 8.9.2 +Ubuntu 20-21: LLVM 9.0.0 - 16.0.6, CUDA 8.0 - 12.2.0, cuDNN 5.1.10 - 8.9.2 Minimum build system requirements for the above configurations: @@ -443,7 +443,7 @@ cmake -- Performing Test CMAKE_HAVE_LIBC_PTHREAD -- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success -- Found Threads: TRUE --- Found CUDA: /usr/local/cuda (found version "12.1") +-- Found CUDA: /usr/local/cuda (found version "12.2") -- Configuring done -- Generating done -- Build files have been written to: /usr/hipify/build @@ -457,7 +457,7 @@ make test-hipify ```shell Running HIPify regression tests ======================================== -CUDA 12.1 - will be used for testing +CUDA 12.2 - will be used for testing LLVM 16.0.6 - will be used for testing x86_64 - Platform architecture Linux 5.13.0-21-generic - Platform OS @@ -576,8 +576,8 @@ Testing Time: 7.90s | 12.0.0 - 13.0.1 | 7.0 - 11.5.1 | 7.6.5 - 8.3.2 | 2017.15.9.43, 2019.16.11.9 | 3.22.2 | 3.10.2 | | 14.0.0 - 14.0.6 | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.49, 2019.16.11.17, 2022.17.2.6 | 3.24.0 | 3.10.6 | | 15.0.0 - 15.0.7 | 7.0 - 11.8.0 | 8.0.5 - 8.8.1 | 2017.15.9.53, 2019.16.11.25, 2022.17.5.2 | 3.26.0 | 3.11.2 | -| 16.0.0 - 16.0.6 | 7.0 - 12.1.1 | 8.0.5 - 8.9.2 | 2017.15.9.55, 2019.16.11.27, 2022.17.6.4 | 3.26.4 | 3.11.4 | -| 17.0.0git | 7.0 - 12.1.1 | 8.0.5 - 8.9.2 | 2017.15.9.55, 2019.16.11.27, 2022.17.6.4 | 3.26.4 | 3.11.4 | +| 16.0.0 - 16.0.6 | 7.0 - 12.2.0 | 8.0.5 - 8.9.2 | 2017.15.9.55, 2019.16.11.27, 2022.17.6.4 | 3.26.4 | 3.11.4 | +| 17.0.0git | 7.0 - 12.2.0 | 8.0.5 - 8.9.2 | 2017.15.9.55, 2019.16.11.27, 2022.17.6.4 | 3.26.4 | 3.11.4 | *Building with testing support by `Visual Studio 17 2022` on `Windows 10`:* @@ -590,9 +590,9 @@ cmake -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ -DCMAKE_PREFIX_PATH=d:/LLVM/16.0.6/dist \ - -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.1" \ - -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.1" \ - -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-12.1-windows-x64-v8.9.2 \ + -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.2" \ + -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.2" \ + -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-12.2-windows-x64-v8.9.2 \ -DCUDA_CUB_ROOT_DIR=d:/GIT/cub \ -DLLVM_EXTERNAL_LIT=d:/LLVM/16.0.6/build/Release/bin/llvm-lit.py \ ../hipify @@ -606,7 +606,7 @@ cmake -- Found PythonInterp: c:/Program Files/Python311/python.exe (found suitable version "3.11.4", minimum required is "3.6") -- Found lit: c:/Program Files/Python311/Scripts/lit.exe -- Found FileCheck: d:/LLVM/16.0.6/dist/bin/FileCheck.exe --- Found CUDA: c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.1 (found version "12.1") +-- Found CUDA: c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.2 (found version "12.2") -- Configuring done -- Generating done -- Build files have been written to: d:/hipify/build From 8b565b877f642bf9a40ffc64d7405475cb166be9 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 17 Jul 2023 15:52:45 +0200 Subject: [PATCH 18/20] [HIPIFY][HIP][5.7.0] Mark the deprecated HIP functions + Updated the generated docs accordingly --- .../CUDA_Driver_API_functions_supported_by_HIP.md | 6 +++--- .../CUDA_Runtime_API_functions_supported_by_HIP.md | 2 +- src/CUDA2HIP_Driver_API_functions.cpp | 12 ++++++------ src/CUDA2HIP_Runtime_API_functions.cpp | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md b/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md index ac4b69f0..dca42564 100644 --- a/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md @@ -1590,9 +1590,9 @@ |`cuMemsetD8`| | | |`hipMemsetD8`|1.6.0| | | | |`cuMemsetD8Async`| | | |`hipMemsetD8Async`|3.0.0| | | | |`cuMemsetD8_v2`| | | |`hipMemsetD8`|1.6.0| | | | -|`cuMipmappedArrayCreate`| | | |`hipMipmappedArrayCreate`|3.5.0| | | | -|`cuMipmappedArrayDestroy`| | | |`hipMipmappedArrayDestroy`|3.5.0| | | | -|`cuMipmappedArrayGetLevel`| | | |`hipMipmappedArrayGetLevel`|3.5.0| | | | +|`cuMipmappedArrayCreate`| | | |`hipMipmappedArrayCreate`|3.5.0|5.7.0| | | +|`cuMipmappedArrayDestroy`| | | |`hipMipmappedArrayDestroy`|3.5.0|5.7.0| | | +|`cuMipmappedArrayGetLevel`| | | |`hipMipmappedArrayGetLevel`|3.5.0|5.7.0| | | |`cuMipmappedArrayGetMemoryRequirements`|11.6| | | | | | | | ## **14. Virtual Memory Management** diff --git a/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md b/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md index 21dbbfbd..4f8a4eb3 100644 --- a/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md +++ b/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md @@ -1448,7 +1448,7 @@ |`cudaBindTexture`| |11.0|12.0|`hipBindTexture`|1.6.0|3.8.0| | | |`cudaBindTexture2D`| |11.0|12.0|`hipBindTexture2D`|1.7.0|3.8.0| | | |`cudaBindTextureToArray`| |11.0|12.0|`hipBindTextureToArray`|1.6.0|3.8.0| | | -|`cudaBindTextureToMipmappedArray`| |11.0|12.0|`hipBindTextureToMipmappedArray`|1.7.0| | | | +|`cudaBindTextureToMipmappedArray`| |11.0|12.0|`hipBindTextureToMipmappedArray`|1.7.0|5.7.0| | | |`cudaGetTextureAlignmentOffset`| |11.0|12.0|`hipGetTextureAlignmentOffset`|1.9.0|3.8.0| | | |`cudaGetTextureReference`| |11.0|12.0|`hipGetTextureReference`|1.7.0|5.3.0| | | |`cudaUnbindTexture`| |11.0|12.0|`hipUnbindTexture`|1.6.0|3.8.0| | | diff --git a/src/CUDA2HIP_Driver_API_functions.cpp b/src/CUDA2HIP_Driver_API_functions.cpp index 012b0076..1711d102 100644 --- a/src/CUDA2HIP_Driver_API_functions.cpp +++ b/src/CUDA2HIP_Driver_API_functions.cpp @@ -353,13 +353,13 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { {"cuMemsetD8Async", {"hipMemsetD8Async", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY}}, // no analogue // NOTE: Not equal to cudaMallocMipmappedArray due to different signatures - {"cuMipmappedArrayCreate", {"hipMipmappedArrayCreate", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY}}, + {"cuMipmappedArrayCreate", {"hipMipmappedArrayCreate", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_DEPRECATED}}, // no analogue // NOTE: Not equal to cudaFreeMipmappedArray due to different signatures - {"cuMipmappedArrayDestroy", {"hipMipmappedArrayDestroy", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY}}, + {"cuMipmappedArrayDestroy", {"hipMipmappedArrayDestroy", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_DEPRECATED}}, // no analogue // NOTE: Not equal to cudaGetMipmappedArrayLevel due to different signatures - {"cuMipmappedArrayGetLevel", {"hipMipmappedArrayGetLevel", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY}}, + {"cuMipmappedArrayGetLevel", {"hipMipmappedArrayGetLevel", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_DEPRECATED}}, // cudaArrayGetSparseProperties {"cuArrayGetSparseProperties", {"hipArrayGetSparseProperties", "", CONV_MEMORY, API_DRIVER, SEC::MEMORY, HIP_UNSUPPORTED}}, // cudaArrayGetPlane @@ -1458,9 +1458,9 @@ const std::map HIP_DRIVER_FUNCTION_VER_MAP { {"hipMemsetD32Async", {HIP_2030, HIP_0, HIP_0 }}, {"hipMemsetD8", {HIP_1060, HIP_0, HIP_0 }}, {"hipMemsetD8Async", {HIP_3000, HIP_0, HIP_0 }}, - {"hipMipmappedArrayCreate", {HIP_3050, HIP_0, HIP_0 }}, - {"hipMipmappedArrayDestroy", {HIP_3050, HIP_0, HIP_0 }}, - {"hipMipmappedArrayGetLevel", {HIP_3050, HIP_0, HIP_0 }}, + {"hipMipmappedArrayCreate", {HIP_3050, HIP_5070, HIP_0 }}, + {"hipMipmappedArrayDestroy", {HIP_3050, HIP_5070, HIP_0 }}, + {"hipMipmappedArrayGetLevel", {HIP_3050, HIP_5070, HIP_0 }}, {"hipFuncGetAttribute", {HIP_2080, HIP_0, HIP_0 }}, {"hipModuleLaunchKernel", {HIP_1060, HIP_0, HIP_0 }}, {"hipModuleOccupancyMaxActiveBlocksPerMultiprocessor", {HIP_3050, HIP_0, HIP_0 }}, diff --git a/src/CUDA2HIP_Runtime_API_functions.cpp b/src/CUDA2HIP_Runtime_API_functions.cpp index 81b1af53..7ab124ac 100644 --- a/src/CUDA2HIP_Runtime_API_functions.cpp +++ b/src/CUDA2HIP_Runtime_API_functions.cpp @@ -885,7 +885,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // no analogue {"cudaBindTextureToArray", {"hipBindTextureToArray", "", CONV_TEXTURE, API_RUNTIME, SEC::TEXTURE_REMOVED, HIP_DEPRECATED | CUDA_REMOVED}}, // no analogue - {"cudaBindTextureToMipmappedArray", {"hipBindTextureToMipmappedArray", "", CONV_TEXTURE, API_RUNTIME, SEC::TEXTURE_REMOVED, CUDA_REMOVED}}, + {"cudaBindTextureToMipmappedArray", {"hipBindTextureToMipmappedArray", "", CONV_TEXTURE, API_RUNTIME, SEC::TEXTURE_REMOVED, HIP_DEPRECATED | CUDA_REMOVED}}, // no analogue {"cudaGetTextureAlignmentOffset", {"hipGetTextureAlignmentOffset", "", CONV_TEXTURE, API_RUNTIME, SEC::TEXTURE_REMOVED, HIP_DEPRECATED | CUDA_REMOVED}}, // no analogue @@ -1245,7 +1245,7 @@ const std::map HIP_RUNTIME_FUNCTION_VER_MAP { {"hipBindTexture", {HIP_1060, HIP_3080, HIP_0 }}, {"hipBindTexture2D", {HIP_1070, HIP_3080, HIP_0 }}, {"hipBindTextureToArray", {HIP_1060, HIP_3080, HIP_0 }}, - {"hipBindTextureToMipmappedArray", {HIP_1070, HIP_0, HIP_0 }}, + {"hipBindTextureToMipmappedArray", {HIP_1070, HIP_5070, HIP_0 }}, {"hipCreateChannelDesc", {HIP_1060, HIP_0, HIP_0 }}, {"hipGetChannelDesc", {HIP_1070, HIP_0, HIP_0 }}, {"hipGetTextureAlignmentOffset", {HIP_1090, HIP_3080, HIP_0 }}, From 8ed6d8ab2429720e2f6187c19432a27416b752e7 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 17 Jul 2023 17:56:48 +0200 Subject: [PATCH 19/20] [HIPIFY][rocSPARSE][5.7.0] Sync with `rocSPARSE` 5.7.0 + Updated the corresponding synthetic tests + Updated the generated hipify-perl and docs accordingly --- bin/hipify-perl | 13 ++++++----- docs/tables/CUBLAS_API_supported_by_HIP.md | 2 +- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 14 ++++++------ docs/tables/CUBLAS_API_supported_by_ROC.md | 14 ++++++------ src/CUDA2HIP_BLAS_API_functions.cpp | 7 ++++-- src/CUDA2HIP_BLAS_API_types.cpp | 21 +++++++++++------- .../synthetic/libraries/cublas2rocblas.cu | 22 +++++++++++++++++++ 7 files changed, 62 insertions(+), 31 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 313e9482..b090469f 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1173,6 +1173,7 @@ sub rocSubstitutions { subst("cublasGemmEx", "rocblas_gemm_ex", "library"); subst("cublasGemmStridedBatchedEx", "rocblas_gemm_strided_batched_ex", "library"); subst("cublasGetAtomicsMode", "rocblas_get_atomics_mode", "library"); + subst("cublasGetMathMode", "rocblas_get_math_mode", "library"); subst("cublasGetMatrix", "rocblas_get_matrix", "library"); subst("cublasGetMatrixAsync", "rocblas_get_matrix_async", "library"); subst("cublasGetPointerMode", "rocblas_get_pointer_mode", "library"); @@ -1219,6 +1220,7 @@ sub rocSubstitutions { subst("cublasSdot", "rocblas_sdot", "library"); subst("cublasSdot_v2", "rocblas_sdot", "library"); subst("cublasSetAtomicsMode", "rocblas_set_atomics_mode", "library"); + subst("cublasSetMathMode", "rocblas_set_math_mode", "library"); subst("cublasSetMatrix", "rocblas_set_matrix", "library"); subst("cublasSetMatrixAsync", "rocblas_set_matrix_async", "library"); subst("cublasSetPointerMode", "rocblas_set_pointer_mode", "library"); @@ -1563,12 +1565,14 @@ sub rocSubstitutions { subst("cuDoubleComplex", "rocblas_double_complex", "type"); subst("cuFloatComplex", "rocblas_float_complex", "type"); subst("cublasAtomicsMode_t", "rocblas_atomics_mode", "type"); + subst("cublasComputeType_t", "rocblas_computetype", "type"); subst("cublasContext", "_rocblas_handle", "type"); subst("cublasDataType_t", "rocblas_datatype", "type"); subst("cublasDiagType_t", "rocblas_diagonal", "type"); subst("cublasFillMode_t", "rocblas_fill", "type"); subst("cublasGemmAlgo_t", "rocblas_gemm_algo", "type"); subst("cublasHandle_t", "rocblas_handle", "type"); + subst("cublasMath_t", "rocblas_math_mode", "type"); subst("cublasOperation_t", "rocblas_operation", "type"); subst("cublasPointerMode_t", "rocblas_pointer_mode", "type"); subst("cublasSideMode_t", "rocblas_side", "type"); @@ -1656,6 +1660,8 @@ sub rocSubstitutions { subst("pruneInfo_t", "rocsparse_mat_info", "type"); subst("CUBLAS_ATOMICS_ALLOWED", "rocblas_atomics_allowed", "numeric_literal"); subst("CUBLAS_ATOMICS_NOT_ALLOWED", "rocblas_atomics_not_allowed", "numeric_literal"); + subst("CUBLAS_COMPUTE_32F", "rocblas_compute_type_f32", "numeric_literal"); + subst("CUBLAS_DEFAULT_MATH", "rocblas_default_math", "numeric_literal"); subst("CUBLAS_DIAG_NON_UNIT", "rocblas_diagonal_non_unit", "numeric_literal"); subst("CUBLAS_DIAG_UNIT", "rocblas_diagonal_unit", "numeric_literal"); subst("CUBLAS_FILL_MODE_FULL", "rocblas_fill_full", "numeric_literal"); @@ -1680,6 +1686,7 @@ sub rocSubstitutions { subst("CUBLAS_STATUS_NOT_INITIALIZED", "rocblas_status_invalid_handle", "numeric_literal"); subst("CUBLAS_STATUS_NOT_SUPPORTED", "rocblas_status_perf_degraded", "numeric_literal"); subst("CUBLAS_STATUS_SUCCESS", "rocblas_status_success", "numeric_literal"); + subst("CUBLAS_TF32_TENSOR_OP_MATH", "rocblas_xf32_xdl_math_op", "numeric_literal"); subst("CUDA_C_16BF", "rocblas_datatype_bf16_c", "numeric_literal"); subst("CUDA_C_16F", "rocblas_datatype_f16_c", "numeric_literal"); subst("CUDA_C_32F", "rocblas_datatype_f32_c", "numeric_literal"); @@ -9578,7 +9585,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSetSmCountTarget", "cublasSetMatrix_64", "cublasSetMatrixAsync_64", - "cublasSetMathMode", "cublasSetLoggerCallback", "cublasSetKernelStream", "cublasSdot_v2_64", @@ -9602,7 +9608,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasRotEx_64", "cublasNrm2Ex_64", "cublasMigrateComputeType", - "cublasMath_t", "cublasLoggerConfigure", "cublasLogCallback", "cublasIzamin_v2_64", @@ -9645,7 +9650,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasGetProperty", "cublasGetMatrix_64", "cublasGetMatrixAsync_64", - "cublasGetMathMode", "cublasGetLoggerCallback", "cublasGetError", "cublasGetCudartVersion", @@ -9872,7 +9876,6 @@ sub warnRocOnlyUnsupportedFunctions { "CUDA_C_4I", "CUDA_C_16U", "CUDA_C_16I", - "CUBLAS_TF32_TENSOR_OP_MATH", "CUBLAS_TENSOR_OP_MATH", "CUBLAS_STATUS_LICENSE_ERROR", "CUBLAS_PEDANTIC_MATH", @@ -9920,7 +9923,6 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLAS_GEMM_ALGO1", "CUBLAS_GEMM_ALGO0_TENSOR_OP", "CUBLAS_GEMM_ALGO0", - "CUBLAS_DEFAULT_MATH", "CUBLAS_COMPUTE_64F_PEDANTIC", "CUBLAS_COMPUTE_64F", "CUBLAS_COMPUTE_32I_PEDANTIC", @@ -9929,7 +9931,6 @@ sub warnRocOnlyUnsupportedFunctions { "CUBLAS_COMPUTE_32F_FAST_TF32", "CUBLAS_COMPUTE_32F_FAST_16F", "CUBLAS_COMPUTE_32F_FAST_16BF", - "CUBLAS_COMPUTE_32F", "CUBLAS_COMPUTE_16F_PEDANTIC", "CUBLAS_COMPUTE_16F" ) diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 2c350b76..b0850b1d 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -182,7 +182,7 @@ |`cublasSetAtomicsMode`| | | |`hipblasSetAtomicsMode`|3.10.0| | | | |`cublasSetKernelStream`| | | | | | | | | |`cublasSetLoggerCallback`|9.2| | | | | | | | -|`cublasSetMathMode`| | | | | | | | | +|`cublasSetMathMode`|9.0| | | | | | | | |`cublasSetMatrix`| | | |`hipblasSetMatrix`|1.8.2| | | | |`cublasSetMatrixAsync`| | | |`hipblasSetMatrixAsync`|3.7.0| | | | |`cublasSetMatrixAsync_64`|12.0| | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 8c232a5c..282b2eca 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -8,7 +8,7 @@ |`CUBLAS_ATOMICS_NOT_ALLOWED`| | | |`HIPBLAS_ATOMICS_NOT_ALLOWED`|3.10.0| | | |`rocblas_atomics_not_allowed`|3.8.0| | | | |`CUBLAS_COMPUTE_16F`|11.0| | | | | | | | | | | | | |`CUBLAS_COMPUTE_16F_PEDANTIC`|11.0| | | | | | | | | | | | | -|`CUBLAS_COMPUTE_32F`|11.0| | | | | | | | | | | | | +|`CUBLAS_COMPUTE_32F`|11.0| | | | | | | |`rocblas_compute_type_f32`|5.7.0| | | | |`CUBLAS_COMPUTE_32F_FAST_16BF`|11.0| | | | | | | | | | | | | |`CUBLAS_COMPUTE_32F_FAST_16F`|11.0| | | | | | | | | | | | | |`CUBLAS_COMPUTE_32F_FAST_TF32`|11.0| | | | | | | | | | | | | @@ -17,7 +17,7 @@ |`CUBLAS_COMPUTE_32I_PEDANTIC`|11.0| | | | | | | | | | | | | |`CUBLAS_COMPUTE_64F`|11.0| | | | | | | | | | | | | |`CUBLAS_COMPUTE_64F_PEDANTIC`|11.0| | | | | | | | | | | | | -|`CUBLAS_DEFAULT_MATH`|9.0| | | | | | | | | | | | | +|`CUBLAS_DEFAULT_MATH`|9.0| | | | | | | |`rocblas_default_math`|5.7.0| | | | |`CUBLAS_DIAG_NON_UNIT`| | | |`HIPBLAS_DIAG_NON_UNIT`|1.8.2| | | |`rocblas_diagonal_non_unit`|1.5.0| | | | |`CUBLAS_DIAG_UNIT`| | | |`HIPBLAS_DIAG_UNIT`|1.8.2| | | |`rocblas_diagonal_unit`|1.5.0| | | | |`CUBLAS_FILL_MODE_FULL`|10.1| | |`HIPBLAS_FILL_MODE_FULL`|1.8.2| | | |`rocblas_fill_full`|1.5.0| | | | @@ -89,16 +89,16 @@ |`CUBLAS_STATUS_NOT_SUPPORTED`| | | |`HIPBLAS_STATUS_NOT_SUPPORTED`|1.8.2| | | |`rocblas_status_perf_degraded`|3.5.0| | | | |`CUBLAS_STATUS_SUCCESS`| | | |`HIPBLAS_STATUS_SUCCESS`|1.8.2| | | |`rocblas_status_success`|1.5.0| | | | |`CUBLAS_TENSOR_OP_MATH`|9.0|11.0| | | | | | | | | | | | -|`CUBLAS_TF32_TENSOR_OP_MATH`|11.0| | | | | | | | | | | | | +|`CUBLAS_TF32_TENSOR_OP_MATH`|11.0| | | | | | | |`rocblas_xf32_xdl_math_op`|5.7.0| | | | |`cublasAtomicsMode_t`| | | |`hipblasAtomicsMode_t`|3.10.0| | | |`rocblas_atomics_mode`|3.8.0| | | | -|`cublasComputeType_t`|11.0| | |`hipblasDatatype_t`|1.8.2| | | | | | | | | +|`cublasComputeType_t`|11.0| | |`hipblasDatatype_t`|1.8.2| | | |`rocblas_computetype`|5.7.0| | | | |`cublasContext`| | | | | | | | |`_rocblas_handle`|1.5.0| | | | |`cublasDataType_t`|7.5| | |`hipblasDatatype_t`|1.8.2| | | |`rocblas_datatype`|1.8.2| | | | |`cublasDiagType_t`| | | |`hipblasDiagType_t`|1.8.2| | | |`rocblas_diagonal`|1.5.0| | | | |`cublasFillMode_t`| | | |`hipblasFillMode_t`|1.8.2| | | |`rocblas_fill`|1.5.0| | | | |`cublasGemmAlgo_t`|8.0| | |`hipblasGemmAlgo_t`|1.8.2| | | |`rocblas_gemm_algo`|1.8.2| | | | |`cublasHandle_t`| | | |`hipblasHandle_t`|3.0.0| | | |`rocblas_handle`|1.5.0| | | | -|`cublasMath_t`|9.0| | | | | | | | | | | | | +|`cublasMath_t`|9.0| | | | | | | |`rocblas_math_mode`|5.7.0| | | | |`cublasOperation_t`| | | |`hipblasOperation_t`|1.8.2| | | |`rocblas_operation`|1.5.0| | | | |`cublasPointerMode_t`| | | |`hipblasPointerMode_t`|1.8.2| | | |`rocblas_pointer_mode`|1.6.0| | | | |`cublasSideMode_t`| | | |`hipblasSideMode_t`|1.8.2| | | |`rocblas_side`|1.5.0| | | | @@ -156,7 +156,7 @@ |`cublasGetCudartVersion`|10.1| | | | | | | | | | | | | |`cublasGetError`| | | | | | | | | | | | | | |`cublasGetLoggerCallback`|9.2| | | | | | | | | | | | | -|`cublasGetMathMode`|9.0| | | | | | | | | | | | | +|`cublasGetMathMode`|9.0| | | | | | | |`rocblas_get_math_mode`|5.7.0| | | | |`cublasGetMatrix`| | | |`hipblasGetMatrix`|1.8.2| | | |`rocblas_get_matrix`|1.6.0| | | | |`cublasGetMatrixAsync`| | | |`hipblasGetMatrixAsync`|3.7.0| | | |`rocblas_get_matrix_async`|3.5.0| | | | |`cublasGetMatrixAsync_64`|12.0| | | | | | | | | | | | | @@ -182,7 +182,7 @@ |`cublasSetAtomicsMode`| | | |`hipblasSetAtomicsMode`|3.10.0| | | |`rocblas_set_atomics_mode`|3.8.0| | | | |`cublasSetKernelStream`| | | | | | | | | | | | | | |`cublasSetLoggerCallback`|9.2| | | | | | | | | | | | | -|`cublasSetMathMode`| | | | | | | | | | | | | | +|`cublasSetMathMode`|9.0| | | | | | | |`rocblas_set_math_mode`|5.7.0| | | | |`cublasSetMatrix`| | | |`hipblasSetMatrix`|1.8.2| | | |`rocblas_set_matrix`|1.6.0| | | | |`cublasSetMatrixAsync`| | | |`hipblasSetMatrixAsync`|3.7.0| | | |`rocblas_set_matrix_async`|3.5.0| | | | |`cublasSetMatrixAsync_64`|12.0| | | | | | | | | | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index c1ab92b9..8d89c7ca 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -8,7 +8,7 @@ |`CUBLAS_ATOMICS_NOT_ALLOWED`| | | |`rocblas_atomics_not_allowed`|3.8.0| | | | |`CUBLAS_COMPUTE_16F`|11.0| | | | | | | | |`CUBLAS_COMPUTE_16F_PEDANTIC`|11.0| | | | | | | | -|`CUBLAS_COMPUTE_32F`|11.0| | | | | | | | +|`CUBLAS_COMPUTE_32F`|11.0| | |`rocblas_compute_type_f32`|5.7.0| | | | |`CUBLAS_COMPUTE_32F_FAST_16BF`|11.0| | | | | | | | |`CUBLAS_COMPUTE_32F_FAST_16F`|11.0| | | | | | | | |`CUBLAS_COMPUTE_32F_FAST_TF32`|11.0| | | | | | | | @@ -17,7 +17,7 @@ |`CUBLAS_COMPUTE_32I_PEDANTIC`|11.0| | | | | | | | |`CUBLAS_COMPUTE_64F`|11.0| | | | | | | | |`CUBLAS_COMPUTE_64F_PEDANTIC`|11.0| | | | | | | | -|`CUBLAS_DEFAULT_MATH`|9.0| | | | | | | | +|`CUBLAS_DEFAULT_MATH`|9.0| | |`rocblas_default_math`|5.7.0| | | | |`CUBLAS_DIAG_NON_UNIT`| | | |`rocblas_diagonal_non_unit`|1.5.0| | | | |`CUBLAS_DIAG_UNIT`| | | |`rocblas_diagonal_unit`|1.5.0| | | | |`CUBLAS_FILL_MODE_FULL`|10.1| | |`rocblas_fill_full`|1.5.0| | | | @@ -89,16 +89,16 @@ |`CUBLAS_STATUS_NOT_SUPPORTED`| | | |`rocblas_status_perf_degraded`|3.5.0| | | | |`CUBLAS_STATUS_SUCCESS`| | | |`rocblas_status_success`|1.5.0| | | | |`CUBLAS_TENSOR_OP_MATH`|9.0|11.0| | | | | | | -|`CUBLAS_TF32_TENSOR_OP_MATH`|11.0| | | | | | | | +|`CUBLAS_TF32_TENSOR_OP_MATH`|11.0| | |`rocblas_xf32_xdl_math_op`|5.7.0| | | | |`cublasAtomicsMode_t`| | | |`rocblas_atomics_mode`|3.8.0| | | | -|`cublasComputeType_t`|11.0| | | | | | | | +|`cublasComputeType_t`|11.0| | |`rocblas_computetype`|5.7.0| | | | |`cublasContext`| | | |`_rocblas_handle`|1.5.0| | | | |`cublasDataType_t`|7.5| | |`rocblas_datatype`|1.8.2| | | | |`cublasDiagType_t`| | | |`rocblas_diagonal`|1.5.0| | | | |`cublasFillMode_t`| | | |`rocblas_fill`|1.5.0| | | | |`cublasGemmAlgo_t`|8.0| | |`rocblas_gemm_algo`|1.8.2| | | | |`cublasHandle_t`| | | |`rocblas_handle`|1.5.0| | | | -|`cublasMath_t`|9.0| | | | | | | | +|`cublasMath_t`|9.0| | |`rocblas_math_mode`|5.7.0| | | | |`cublasOperation_t`| | | |`rocblas_operation`|1.5.0| | | | |`cublasPointerMode_t`| | | |`rocblas_pointer_mode`|1.6.0| | | | |`cublasSideMode_t`| | | |`rocblas_side`|1.5.0| | | | @@ -156,7 +156,7 @@ |`cublasGetCudartVersion`|10.1| | | | | | | | |`cublasGetError`| | | | | | | | | |`cublasGetLoggerCallback`|9.2| | | | | | | | -|`cublasGetMathMode`|9.0| | | | | | | | +|`cublasGetMathMode`|9.0| | |`rocblas_get_math_mode`|5.7.0| | | | |`cublasGetMatrix`| | | |`rocblas_get_matrix`|1.6.0| | | | |`cublasGetMatrixAsync`| | | |`rocblas_get_matrix_async`|3.5.0| | | | |`cublasGetMatrixAsync_64`|12.0| | | | | | | | @@ -182,7 +182,7 @@ |`cublasSetAtomicsMode`| | | |`rocblas_set_atomics_mode`|3.8.0| | | | |`cublasSetKernelStream`| | | | | | | | | |`cublasSetLoggerCallback`|9.2| | | | | | | | -|`cublasSetMathMode`| | | | | | | | | +|`cublasSetMathMode`|9.0| | |`rocblas_set_math_mode`|5.7.0| | | | |`cublasSetMatrix`| | | |`rocblas_set_matrix`|1.6.0| | | | |`cublasSetMatrixAsync`| | | |`rocblas_set_matrix_async`|3.5.0| | | | |`cublasSetMatrixAsync_64`|12.0| | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index fcc8fb66..c1cffdef 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -35,8 +35,8 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasSetKernelStream", {"hipblasSetKernelStream", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasGetAtomicsMode", {"hipblasGetAtomicsMode", "rocblas_get_atomics_mode", CONV_LIB_FUNC, API_BLAS, 4}}, {"cublasSetAtomicsMode", {"hipblasSetAtomicsMode", "rocblas_set_atomics_mode", CONV_LIB_FUNC, API_BLAS, 4}}, - {"cublasGetMathMode", {"hipblasGetMathMode", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, - {"cublasSetMathMode", {"hipblasSetMathMode", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, + {"cublasGetMathMode", {"hipblasGetMathMode", "rocblas_get_math_mode", CONV_LIB_FUNC, API_BLAS, 4, HIP_UNSUPPORTED}}, + {"cublasSetMathMode", {"hipblasSetMathMode", "rocblas_set_math_mode", CONV_LIB_FUNC, API_BLAS, 4, HIP_UNSUPPORTED}}, {"cublasMigrateComputeType", {"hipblasMigrateComputeType", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasGetSmCountTarget", {"hipblasGetSmCountTarget", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasSetSmCountTarget", {"hipblasSetSmCountTarget", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, @@ -1075,6 +1075,7 @@ const std::map CUDA_BLAS_FUNCTION_MAP { const std::map CUDA_BLAS_FUNCTION_VER_MAP { {"cublasGetMathMode", {CUDA_90, CUDA_0, CUDA_0}}, + {"cublasSetMathMode", {CUDA_90, CUDA_0, CUDA_0}}, {"cublasMigrateComputeType", {CUDA_110, CUDA_0, CUDA_0}}, {"cublasLogCallback", {CUDA_92, CUDA_0, CUDA_0}}, {"cublasLoggerConfigure", {CUDA_92, CUDA_0, CUDA_0}}, @@ -1962,6 +1963,8 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasDgelsBatched", {HIP_5040, HIP_0, HIP_0 }}, {"hipblasCgelsBatched", {HIP_5040, HIP_0, HIP_0 }}, {"hipblasZgelsBatched", {HIP_5040, HIP_0, HIP_0 }}, + {"rocblas_get_math_mode", {HIP_5070, HIP_0, HIP_0 }}, + {"rocblas_set_math_mode", {HIP_5070, HIP_0, HIP_0 }}, }; const std::map CUDA_BLAS_API_SECTION_MAP { diff --git a/src/CUDA2HIP_BLAS_API_types.cpp b/src/CUDA2HIP_BLAS_API_types.cpp index fdaccdb0..d7b31796 100644 --- a/src/CUDA2HIP_BLAS_API_types.cpp +++ b/src/CUDA2HIP_BLAS_API_types.cpp @@ -73,12 +73,12 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP { {"CUBLAS_ATOMICS_ALLOWED", {"HIPBLAS_ATOMICS_ALLOWED", "rocblas_atomics_allowed", CONV_NUMERIC_LITERAL, API_BLAS, 2}}, // Blas Math mode/tensor operation - {"cublasMath_t", {"hipblasMath_t", "", CONV_TYPE, API_BLAS, 2, UNSUPPORTED}}, - {"CUBLAS_DEFAULT_MATH", {"HIPBLAS_DEFAULT_MATH", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 0 - {"CUBLAS_TENSOR_OP_MATH", {"HIPBLAS_TENSOR_OP_MATH", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED | CUDA_DEPRECATED}}, // 1 - {"CUBLAS_PEDANTIC_MATH", {"HIPBLAS_PEDANTIC_MATH", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 2 - {"CUBLAS_TF32_TENSOR_OP_MATH", {"HIPBLAS_TF32_TENSOR_OP_MATH", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 3 - {"CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION", {"HIPBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 16 + {"cublasMath_t", {"hipblasMath_t", "rocblas_math_mode", CONV_TYPE, API_BLAS, 2, HIP_UNSUPPORTED}}, + {"CUBLAS_DEFAULT_MATH", {"HIPBLAS_DEFAULT_MATH", "rocblas_default_math", CONV_NUMERIC_LITERAL, API_BLAS, 2, HIP_UNSUPPORTED}}, // 0 + {"CUBLAS_TENSOR_OP_MATH", {"HIPBLAS_TENSOR_OP_MATH", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED | CUDA_DEPRECATED}}, // 1 + {"CUBLAS_PEDANTIC_MATH", {"HIPBLAS_PEDANTIC_MATH", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 2 + {"CUBLAS_TF32_TENSOR_OP_MATH", {"HIPBLAS_TF32_TENSOR_OP_MATH", "rocblas_xf32_xdl_math_op", CONV_NUMERIC_LITERAL, API_BLAS, 2, HIP_UNSUPPORTED}}, // 3 + {"CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION", {"HIPBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 16 // Blass different GEMM algorithms {"cublasGemmAlgo_t", {"hipblasGemmAlgo_t", "rocblas_gemm_algo", CONV_TYPE, API_BLAS, 2}}, @@ -170,10 +170,10 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP { // NOTE: renamed UNSUPPORTED hipblasComputeType_t to the HIP supported hipblasDatatype_t (workaround) // TODO: change the type to the correct one after fixing https://github.com/ROCmSoftwarePlatform/hipBLAS/issues/529 - {"cublasComputeType_t", {"hipblasDatatype_t", "", CONV_TYPE, API_BLAS, 2}}, + {"cublasComputeType_t", {"hipblasDatatype_t", "rocblas_computetype", CONV_TYPE, API_BLAS, 2}}, {"CUBLAS_COMPUTE_16F", {"HIPBLAS_COMPUTE_16F", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 64 {"CUBLAS_COMPUTE_16F_PEDANTIC", {"HIPBLAS_COMPUTE_16F_PEDANTIC", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 65 - {"CUBLAS_COMPUTE_32F", {"HIPBLAS_COMPUTE_32F", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 68 + {"CUBLAS_COMPUTE_32F", {"HIPBLAS_COMPUTE_32F", "rocblas_compute_type_f32", CONV_NUMERIC_LITERAL, API_BLAS, 2, HIP_UNSUPPORTED}}, // 68 {"CUBLAS_COMPUTE_32F_PEDANTIC", {"HIPBLAS_COMPUTE_32F_PEDANTIC", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 69 {"CUBLAS_COMPUTE_32F_FAST_16F", {"HIPBLAS_COMPUTE_32F_FAST_16F", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 74 {"CUBLAS_COMPUTE_32F_FAST_16BF", {"HIPBLAS_COMPUTE_32F_FAST_16BF", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 75 @@ -387,4 +387,9 @@ const std::map HIP_BLAS_TYPE_NAME_VER_MAP { {"rocblas_atomics_allowed", {HIP_3080, HIP_0, HIP_0 }}, {"rocblas_gemm_algo", {HIP_1082, HIP_0, HIP_0 }}, {"rocblas_gemm_algo_standard", {HIP_1082, HIP_0, HIP_0 }}, + {"rocblas_math_mode", {HIP_5070, HIP_0, HIP_0 }}, + {"rocblas_default_math", {HIP_5070, HIP_0, HIP_0 }}, + {"rocblas_xf32_xdl_math_op", {HIP_5070, HIP_0, HIP_0 }}, + {"rocblas_computetype", {HIP_5070, HIP_0, HIP_0 }}, + {"rocblas_compute_type_f32", {HIP_5070, HIP_0, HIP_0 }}, }; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu index 26acd8d6..7589371d 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas.cu @@ -1665,6 +1665,23 @@ int main() { #if CUDA_VERSION >= 9000 // CHECK: rocblas_gemm_algo BLAS_GEMM_DEFAULT = rocblas_gemm_algo_standard; cublasGemmAlgo_t BLAS_GEMM_DEFAULT = CUBLAS_GEMM_DEFAULT; + + // CHECK: rocblas_math_mode blasMath; + // CHECK-NEXT: rocblas_math_mode BLAS_DEFAULT_MATH = rocblas_default_math; + // CHECK-NEXT: rocblas_math_mode BLAS_TF32_TENSOR_OP_MATH = rocblas_xf32_xdl_math_op; + cublasMath_t blasMath; + cublasMath_t BLAS_DEFAULT_MATH = CUBLAS_DEFAULT_MATH; + cublasMath_t BLAS_TF32_TENSOR_OP_MATH = CUBLAS_TF32_TENSOR_OP_MATH; + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetMathMode(cublasHandle_t handle, cublasMath_t* mode); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_get_math_mode(rocblas_handle handle, rocblas_math_mode* math_mode); + // CHECK: blasStatus = rocblas_get_math_mode(blasHandle, &blasMath); + blasStatus = cublasGetMathMode(blasHandle, &blasMath); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSetMathMode(cublasHandle_t handle, cublasMath_t mode); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_set_math_mode(rocblas_handle handle, rocblas_math_mode math_mode); + // CHECK: blasStatus = rocblas_set_math_mode(blasHandle, blasMath); + blasStatus = cublasSetMathMode(blasHandle, blasMath); #endif #if CUDA_VERSION >= 9010 && CUDA_VERSION < 11000 @@ -1700,6 +1717,11 @@ int main() { // CHECK-NEXT: rocblas_datatype C_16BF = rocblas_datatype_bf16_c; cublasDataType_t R_16BF = CUDA_R_16BF; cublasDataType_t C_16BF = CUDA_C_16BF; + + // CHECK: rocblas_computetype blasComputeType; + // CHECK-NEXT: rocblas_computetype BLAS_COMPUTE_32F = rocblas_compute_type_f32; + cublasComputeType_t blasComputeType; + cublasComputeType_t BLAS_COMPUTE_32F = CUBLAS_COMPUTE_32F; #endif #if CUDA_VERSION >= 11040 && CUBLAS_VERSION >= 11600 From 481dd5cc217110958b3457ffe12f31c2048f557d Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 17 Jul 2023 19:04:33 +0200 Subject: [PATCH 20/20] [HIPIFY][doc][5.7.0] `CHANGELOG.md` update --- CHANGELOG.md | 16 ++++++++++++++++ src/ArgParse.cpp | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e5a8b87..d3ae1564 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,22 @@ Full documentation for HIPIFY is available at [hipify.readthedocs.io](https://hipify.readthedocs.io/en/latest/). +## HIPIFY for ROCm 5.7.0 +### Added +- CUDA 12.2.0 support +- cuDNN 8.9.2 support +- LLVM 16.0.6 support +- Initial rocSPARSE support +- Initial CUDA2ROC documentation generation for rocBLAS, rocSPARSE, and MIOpen: + - in separate files: hipify-clang --md --doc-format=full --doc-roc=separate + - in a single file: hipify-clang --md --doc-format=full --doc-roc=joint +- New options: + - --use-hip-data-types (Use 'hipDataType' instead of 'hipblasDatatype_t' or 'rocblas_datatype') + - --doc-roc=\ (ROC documentation generation: 'skip' (default), 'separate', or 'joint'; the '--md' or '--csv' option must be specified) +### Fixed +- [#822] Add a new function call transformation type "additional const by value arg" +- [#830] Add a new function call transformation type "move arg from place X to place Y" + ## HIPIFY for ROCm 5.6.0 ### Added - CUDA 12.1.0 support diff --git a/src/ArgParse.cpp b/src/ArgParse.cpp index 904b5c2e..acead48e 100644 --- a/src/ArgParse.cpp +++ b/src/ArgParse.cpp @@ -172,7 +172,7 @@ cl::opt DocFormat("doc-format", cl::cat(ToolTemplateCategory)); cl::opt DocRoc("doc-roc", - cl::desc("ROC cocumentation generation: 'skip' (default), 'separate', or 'joint'; the '--md' or '--csv' option must be specified"), + cl::desc("ROC documentation generation: 'skip' (default), 'separate', or 'joint'; the '--md' or '--csv' option must be specified"), cl::value_desc("value"), cl::cat(ToolTemplateCategory));
12.1.112.2.0 LATEST STABLE CONFIG
17.0.0git12.1.112.2.0 + +