From 4707e1469ff09e386bbebebaff136b62adbc78b7 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sun, 18 Sep 2022 11:53:47 +0200 Subject: [PATCH 01/43] [HIPIFY][#601][BLAS][tests] Synthetic test for cuBLAS API - Part 12 + Added tests for SYRK, HERK, SYR2K, and HER2K v2 functions + Added tests for HERKX (eXtended HERK) functions --- src/CUDA2HIP_BLAS_API_functions.cpp | 24 ++-- .../synthetic/libraries/cublas2hipblas.cu | 104 ++++++++++++++++++ 2 files changed, 116 insertions(+), 12 deletions(-) diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index b36ae477..1305caad 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -288,20 +288,20 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasHgemmStridedBatched", {"hipblasHgemmStridedBatched", "rocblas_hgemm_strided_batched", CONV_LIB_FUNC, API_BLAS, 7}}, // SYRK - {"cublasSsyrk", {"hipblasSsyrk", "rocblas_ssyrk", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasDsyrk", {"hipblasDsyrk", "rocblas_dsyrk", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasCsyrk", {"hipblasCsyrk", "rocblas_csyrk", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasZsyrk", {"hipblasZsyrk", "rocblas_zsyrk", CONV_LIB_FUNC, API_BLAS, 7}}, + {"cublasSsyrk", {"hipblasSsyrk", "rocblas_ssyrk", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDsyrk", {"hipblasDsyrk", "rocblas_dsyrk", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCsyrk", {"hipblasCsyrk", "rocblas_csyrk", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZsyrk", {"hipblasZsyrk", "rocblas_zsyrk", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, // HERK - {"cublasCherk", {"hipblasCherk", "rocblas_cherk", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasZherk", {"hipblasZherk", "rocblas_zherk", CONV_LIB_FUNC, API_BLAS, 7}}, + {"cublasCherk", {"hipblasCherk", "rocblas_cherk", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZherk", {"hipblasZherk", "rocblas_zherk", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, // SYR2K - {"cublasSsyr2k", {"hipblasSsyr2k", "rocblas_ssyr2k", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasDsyr2k", {"hipblasDsyr2k", "rocblas_dsyr2k", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasCsyr2k", {"hipblasCsyr2k", "rocblas_csyr2k", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasZsyr2k", {"hipblasZsyr2k", "rocblas_zsyr2k", CONV_LIB_FUNC, API_BLAS, 7}}, + {"cublasSsyr2k", {"hipblasSsyr2k", "rocblas_ssyr2k", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDsyr2k", {"hipblasDsyr2k", "rocblas_dsyr2k", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCsyr2k", {"hipblasCsyr2k", "rocblas_csyr2k", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZsyr2k", {"hipblasZsyr2k", "rocblas_zsyr2k", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, // SYRKX - eXtended SYRK {"cublasSsyrkx", {"hipblasSsyrkx", "rocblas_ssyrkx", CONV_LIB_FUNC, API_BLAS, 7}}, @@ -310,8 +310,8 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasZsyrkx", {"hipblasZsyrkx", "rocblas_zsyrkx", CONV_LIB_FUNC, API_BLAS, 7}}, // HER2K - {"cublasCher2k", {"hipblasCher2k", "rocblas_cher2k", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasZher2k", {"hipblasZher2k", "rocblas_zher2k", CONV_LIB_FUNC, API_BLAS, 7}}, + {"cublasCher2k", {"hipblasCher2k", "rocblas_cher2k", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZher2k", {"hipblasZher2k", "rocblas_zher2k", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, // HERKX - eXtended HERK {"cublasCherkx", {"hipblasCherkx", "rocblas_cherkx", CONV_LIB_FUNC, API_BLAS, 7}}, diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu index 61643cdd..b4eab679 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu @@ -1192,6 +1192,110 @@ int main() { // CHECK: blasStatus = hipblasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); blasStatus = cublasZgemmBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, &dcomplexb, dcomplexCarray, ldc, batchCount); + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* beta, float* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* AP, int lda, const float* beta, float* CP, int ldc); + // CHECK: blasStatus = hipblasSsyrk(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, &fC, ldc); + // CHECK-NEXT: blasStatus = hipblasSsyrk(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, &fC, ldc); + blasStatus = cublasSsyrk(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, &fC, ldc); + blasStatus = cublasSsyrk_v2(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, &fC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const double* alpha, const double* A, int lda, const double* beta, double* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* AP, int lda, const double* beta, double* CP, int ldc); + // CHECK: blasStatus = hipblasDsyrk(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, &dC, ldc); + // CHECK-NEXT: blasStatus = hipblasDsyrk(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, &dC, ldc); + blasStatus = cublasDsyrk(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, &dC, ldc); + blasStatus = cublasDsyrk_v2(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, &dC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* beta, cuComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* beta, hipblasComplex* CP, int ldc); + // CHECK: blasStatus = hipblasCsyrk(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, &complexC, ldc); + // CHECK-NEXT: blasStatus = hipblasCsyrk(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, &complexC, ldc); + blasStatus = cublasCsyrk(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, &complexC, ldc); + blasStatus = cublasCsyrk_v2(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyrk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZsyrk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* beta, hipblasDoubleComplex* CP, int ldc); + // CHECK: blasStatus = hipblasZsyrk(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexC, ldc); + // CHECK-NEXT: blasStatus = hipblasZsyrk(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZsyrk(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZsyrk_v2(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const cuComplex* A, int lda, const float* beta, cuComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCherk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const hipblasComplex* AP, int lda, const float* beta, hipblasComplex* CP, int ldc); + // CHECK: blasStatus = hipblasCherk(blasHandle, blasFillMode, transa, n, k, &fa, &complexA, lda, &fb, &complexC, ldc); + // CHECK-NEXT: blasStatus = hipblasCherk(blasHandle, blasFillMode, transa, n, k, &fa, &complexA, lda, &fb, &complexC, ldc); + blasStatus = cublasCherk(blasHandle, blasFillMode, transa, n, k, &fa, &complexA, lda, &fb, &complexC, ldc); + blasStatus = cublasCherk_v2(blasHandle, blasFillMode, transa, n, k, &fa, &complexA, lda, &fb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherk_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const double* alpha, const cuDoubleComplex* A, int lda, const double* beta, cuDoubleComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZherk(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const hipblasDoubleComplex* AP, int lda, const double* beta, hipblasDoubleComplex* CP, int ldc); + // CHECK: blasStatus = hipblasZherk(blasHandle, blasFillMode, transa, n, k, &da, &dcomplexA, lda, &db, &dcomplexC, ldc); + // CHECK-NEXT: blasStatus = hipblasZherk(blasHandle, blasFillMode, transa, n, k, &da, &dcomplexA, lda, &db, &dcomplexC, ldc); + blasStatus = cublasZherk(blasHandle, blasFillMode, transa, n, k, &da, &dcomplexA, lda, &db, &dcomplexC, ldc); + blasStatus = cublasZherk_v2(blasHandle, blasFillMode, transa, n, k, &da, &dcomplexA, lda, &db, &dcomplexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsyr2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* AP, int lda, const float* BP, int ldb, const float* beta, float* CP, int ldc); + // CHECK: blasStatus = hipblasSsyr2k(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, ldb, &fb, &fC, ldc); + // CHECK-NEXT: blasStatus = hipblasSsyr2k(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, ldb, &fb, &fC, ldc); + blasStatus = cublasSsyr2k(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, ldb, &fb, &fC, ldc); + blasStatus = cublasSsyr2k_v2(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fb, ldb, &fb, &fC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsyr2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* AP, int lda, const double* BP, int ldb, const double* beta, double* CP, int ldc); + // CHECK: blasStatus = hipblasDsyr2k(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, ldb, &db, &dC, ldc); + // CHECK-NEXT: blasStatus = hipblasDsyr2k(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, ldb, &db, &dC, ldc); + blasStatus = cublasDsyr2k(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, ldb, &db, &dC, ldc); + blasStatus = cublasDsyr2k_v2(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &db, ldb, &db, &dC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const cuComplex* beta, cuComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsyr2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* BP, int ldb, const hipblasComplex* beta, hipblasComplex* CP, int ldc); + // CHECK: blasStatus = hipblasCsyr2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &complexb, &complexC, ldc); + // CHECK-NEXT: blasStatus = hipblasCsyr2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &complexb, &complexC, ldc); + blasStatus = cublasCsyr2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &complexb, &complexC, ldc); + blasStatus = cublasCsyr2k_v2(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &complexb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZsyr2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* BP, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* CP, int ldc); + // CHECK: blasStatus = hipblasZsyr2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &dcomplexb, &dcomplexC, ldc); + // CHECK-NEXT: blasStatus = hipblasZsyr2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZsyr2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZsyr2k_v2(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &dcomplexb, &dcomplexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsyrkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const float* AP, int lda, const float* BP, int ldb, const float* beta, float* CP, int ldc); + // CHECK: blasStatus = hipblasSsyrkx(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + blasStatus = cublasSsyrkx(blasHandle, blasFillMode, transa, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyrkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsyrkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const double* AP, int lda, const double* BP, int ldb, const double* beta, double* CP, int ldc); + // CHECK: blasStatus = hipblasDsyrkx(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + blasStatus = cublasDsyrkx(blasHandle, blasFillMode, transa, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const cuComplex* beta, cuComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsyrkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* BP, int ldb, const hipblasComplex* beta, hipblasComplex* CP, int ldc); + // CHECK: blasStatus = hipblasCsyrkx(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + blasStatus = cublasCsyrkx(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyrkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZsyrkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* BP, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* CP, int ldc); + // CHECK: blasStatus = hipblasZsyrkx(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZsyrkx(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const float* beta, cuComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCher2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* BP, int ldb, const float* beta, hipblasComplex* CP, int ldc); + // CHECK: blasStatus = hipblasCher2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &fb, &complexC, ldc); + // CHECK-NEXT: blasStatus = hipblasCher2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &fb, &complexC, ldc); + blasStatus = cublasCher2k(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &fb, &complexC, ldc); + blasStatus = cublasCher2k_v2(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexb, ldb, &fb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher2k_v2(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const double* beta, cuDoubleComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZher2k(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* BP, int ldb, const double* beta, hipblasDoubleComplex* CP, int ldc); + // CHECK: blasStatus = hipblasZher2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &db, &dcomplexC, ldc); + // CHECK-NEXT: blasStatus = hipblasZher2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &db, &dcomplexC, ldc); + blasStatus = cublasZher2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &db, &dcomplexC, ldc); + blasStatus = cublasZher2k_v2(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &db, &dcomplexC, ldc); + long long int strideA = 0; long long int strideB = 0; long long int strideC = 0; From e13b666c181a179036f6a9ed962a19ffc863df92 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 19 Sep 2022 17:33:10 +0200 Subject: [PATCH 02/43] [HIPIFY][#601][BLAS][tests] Synthetic test for cuBLAS API - Part 13 + Added tests for SYRK, SYMM, HEMM, and TRSM v2 functions + Added tests for HERKX (eXtended HERK) and GEAM functions [ToDo] + Mark TRMM functions as UNSUPPORTED (both in `hip` and `roc`) till https://github.com/ROCmSoftwarePlatform/hipBLAS/issues/524 is fixed + File an analogous bug issue to rocBLAS --- src/CUDA2HIP_BLAS_API_functions.cpp | 20 ++-- .../synthetic/libraries/cublas2hipblas.cu | 100 ++++++++++++++++++ 2 files changed, 110 insertions(+), 10 deletions(-) diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 1305caad..0c8ef776 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -318,20 +318,20 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasZherkx", {"hipblasZherkx", "rocblas_zherkx", CONV_LIB_FUNC, API_BLAS, 7}}, // SYMM - {"cublasSsymm", {"hipblasSsymm", "rocblas_ssymm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasDsymm", {"hipblasDsymm", "rocblas_dsymm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasCsymm", {"hipblasCsymm", "rocblas_csymm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasZsymm", {"hipblasZsymm", "rocblas_zsymm", CONV_LIB_FUNC, API_BLAS, 7}}, + {"cublasSsymm", {"hipblasSsymm", "rocblas_ssymm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDsymm", {"hipblasDsymm", "rocblas_dsymm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCsymm", {"hipblasCsymm", "rocblas_csymm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZsymm", {"hipblasZsymm", "rocblas_zsymm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, // HEMM - {"cublasChemm", {"hipblasChemm", "rocblas_chemm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasZhemm", {"hipblasZhemm", "rocblas_zhemm", CONV_LIB_FUNC, API_BLAS, 7}}, + {"cublasChemm", {"hipblasChemm", "rocblas_chemm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZhemm", {"hipblasZhemm", "rocblas_zhemm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, // TRSM - {"cublasStrsm", {"hipblasStrsm", "rocblas_strsm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasDtrsm", {"hipblasDtrsm", "rocblas_dtrsm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasCtrsm", {"hipblasCtrsm", "rocblas_ctrsm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasZtrsm", {"hipblasZtrsm", "rocblas_ztrsm", CONV_LIB_FUNC, API_BLAS, 7}}, + {"cublasStrsm", {"hipblasStrsm", "rocblas_strsm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasDtrsm", {"hipblasDtrsm", "rocblas_dtrsm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasCtrsm", {"hipblasCtrsm", "rocblas_ctrsm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, + {"cublasZtrsm", {"hipblasZtrsm", "rocblas_ztrsm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, // TRMM {"cublasStrmm", {"hipblasStrmm", "rocblas_strmm", CONV_LIB_FUNC, API_BLAS, 7}}, diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu index b4eab679..e1097197 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu @@ -1296,6 +1296,106 @@ int main() { blasStatus = cublasZher2k(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &db, &dcomplexC, ldc); blasStatus = cublasZher2k_v2(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexb, ldb, &db, &dcomplexC, ldc); + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const float* beta, cuComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCherkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* BP, int ldb, const float* beta, hipblasComplex* CP, int ldc); + // CHECK: blasStatus = hipblasCherkx(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexB, ldb, &fb, &complexC, ldc); + blasStatus = cublasCherkx(blasHandle, blasFillMode, transa, n, k, &complexa, &complexA, lda, &complexB, ldb, &fb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherkx(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const double* beta, cuDoubleComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZherkx(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* BP, int ldb, const double* beta, hipblasDoubleComplex* CP, int ldc); + // CHECK: blasStatus = hipblasZherkx(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &db, &dcomplexC, ldc); + blasStatus = cublasZherkx(blasHandle, blasFillMode, transa, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &db, &dcomplexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsymm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsymm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const float* alpha, const float* AP, int lda, const float* BP, int ldb, const float* beta, float* CP, int ldc); + // CHECK: blasStatus = hipblasSsymm(blasHandle, blasSideMode, blasFillMode, m, n, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + // CHECK-NEXT: blasStatus = hipblasSsymm(blasHandle, blasSideMode, blasFillMode, m, n, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + blasStatus = cublasSsymm(blasHandle, blasSideMode, blasFillMode, m, n, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + blasStatus = cublasSsymm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsymm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsymm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const double* alpha, const double* AP, int lda, const double* BP, int ldb, const double* beta, double* CP, int ldc); + // CHECK: blasStatus = hipblasDsymm(blasHandle, blasSideMode, blasFillMode, m, n, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + // CHECK-NEXT: blasStatus = hipblasDsymm(blasHandle, blasSideMode, blasFillMode, m, n, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + blasStatus = cublasDsymm(blasHandle, blasSideMode, blasFillMode, m, n, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + blasStatus = cublasDsymm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsymm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const cuComplex* beta, cuComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsymm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* BP, int ldb, const hipblasComplex* beta, hipblasComplex* CP, int ldc); + // CHECK: blasStatus = hipblasCsymm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + // CHECK-NEXT: blasStatus = hipblasCsymm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + blasStatus = cublasCsymm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + blasStatus = cublasCsymm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsymm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZsymm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* BP, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* CP, int ldc); + // CHECK: blasStatus = hipblasZsymm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + // CHECK-NEXT: blasStatus = hipblasZsymm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZsymm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZsymm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChemm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* B, int ldb, const cuComplex* beta, cuComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasChemm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* BP, int ldb, const hipblasComplex* beta, hipblasComplex* CP, int ldc); + // CHECK: blasStatus = hipblasChemm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + // CHECK-NEXT: blasStatus = hipblasChemm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + blasStatus = cublasChemm(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + blasStatus = cublasChemm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhemm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* B, int ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZhemm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* BP, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* CP, int ldc); + // CHECK: blasStatus = hipblasZhemm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + // CHECK-NEXT: blasStatus = hipblasZhemm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZhemm(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZhemm_v2(blasHandle, blasSideMode, blasFillMode, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* A, int lda, float* B, int ldb); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrsm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, float* AP, int lda, float* BP, int ldb); + // CHECK: blasStatus = hipblasStrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb); + // CHECK-NEXT: blasStatus = hipblasStrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb); + blasStatus = cublasStrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb); + blasStatus = cublasStrsm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, &fA, lda, &fB, ldb); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* A, int lda, double* B, int ldb); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, double* AP, int lda, double* BP, int ldb); + // CHECK: blasStatus = hipblasDtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb); + // CHECK-NEXT: blasStatus = hipblasDtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb); + blasStatus = cublasDtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb); + blasStatus = cublasDtrsm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, &dA, lda, &dB, ldb); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, cuComplex* B, int ldb); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, hipblasComplex* AP, int lda, hipblasComplex* BP, int ldb); + // CHECK: blasStatus = hipblasCtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb); + // CHECK-NEXT: blasStatus = hipblasCtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb); + blasStatus = cublasCtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb); + blasStatus = cublasCtrsm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, &complexA, lda, &complexB, ldb); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsm_v2(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, cuDoubleComplex* B, int ldb); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsm(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* AP, int lda, hipblasDoubleComplex* BP, int ldb); + // CHECK: blasStatus = hipblasZtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb); + // CHECK-NEXT: blasStatus = hipblasZtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb); + blasStatus = cublasZtrsm(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb); + blasStatus = cublasZtrsm_v2(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgeam(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const float* alpha, const float* A, int lda, const float* beta, const float* B, int ldb, float* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgeam(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const float* alpha, const float* AP, int lda, const float* beta, const float* BP, int ldb, float* CP, int ldc); + // CHECK: blasStatus = hipblasSgeam(blasHandle, transa, transb, m, n, &fa, &fA, lda, &fb, &fB, ldb, &fC, ldc); + blasStatus = cublasSgeam(blasHandle, transa, transb, m, n, &fa, &fA, lda, &fb, &fB, ldb, &fC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgeam(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const double* alpha, const double* A, int lda, const double* beta, const double* B, int ldb, double* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgeam(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const double* alpha, const double* AP, int lda, const double* beta, const double* BP, int ldb, double* CP, int ldc); + // CHECK: blasStatus = hipblasDgeam(blasHandle, transa, transb, m, n, &da, &dA, lda, &db, &dB, ldb, &dC, ldc); + blasStatus = cublasDgeam(blasHandle, transa, transb, m, n, &da, &dA, lda, &db, &dB, ldb, &dC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeam(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* beta, const cuComplex* B, int ldb, cuComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgeam(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasComplex* alpha, const hipblasComplex* AP, int lda, const hipblasComplex* beta, const hipblasComplex* BP, int ldb, hipblasComplex* CP, int ldc); + // CHECK: blasStatus = hipblasCgeam(blasHandle, transa, transb, m, n, &complexa, &complexA, lda, &complexb, &complexB, ldb, &complexC, ldc); + blasStatus = cublasCgeam(blasHandle, transa, transb, m, n, &complexa, &complexA, lda, &complexb, &complexB, ldb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeam(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* beta, const cuDoubleComplex* B, int ldb, cuDoubleComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgeam(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* beta, const hipblasDoubleComplex* BP, int ldb, hipblasDoubleComplex* CP, int ldc); + // CHECK: blasStatus = hipblasZgeam(blasHandle, transa, transb, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexB, ldb, &dcomplexC, ldc); + blasStatus = cublasZgeam(blasHandle, transa, transb, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexB, ldb, &dcomplexC, ldc); + long long int strideA = 0; long long int strideB = 0; long long int strideC = 0; From d8fa3c5d731d7fef977944dc905b5eff6e9eb5ef Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 20 Sep 2022 17:46:51 +0200 Subject: [PATCH 03/43] [HIPIFY][#601][BLAS][tests] Synthetic test for cuBLAS API - Part 14 + Added tests for the following BLAS functions: GETRF - Batched LU Batched inversion based on LU factorization from getrf Batched solver based on LU factorization from getrf TRSM - Batched Triangular Solver Batch QR Factorization --- .../synthetic/libraries/cublas2hipblas.cu | 109 ++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu index e1097197..9adcf3b7 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu @@ -144,6 +144,7 @@ int main() { blasStatus = cublasGetPointerMode_v2(blasHandle, &blasPointerMode); int n = 0; + int nrhs = 0; int m = 0; int num = 0; int lda = 0; @@ -156,6 +157,8 @@ int main() { int kl = 0; int ku = 0; int batchCount = 0; + int P = 0; + int info = 0; void* image = nullptr; void* image_2 = nullptr; void* deviceptr = nullptr; @@ -221,6 +224,7 @@ int main() { float** fAarray = 0; float** fBarray = 0; float** fCarray = 0; + float** fTauarray = 0; double da = 0; double dA = 0; @@ -240,6 +244,7 @@ int main() { double** dAarray = 0; double** dBarray = 0; double** dCarray = 0; + double** dTauarray = 0; // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle, int n, const float* x, int incx, float* result); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSnrm2(hipblasHandle_t handle, int n, const float* x, int incx, float* result); @@ -263,16 +268,20 @@ int main() { // CHECK: hipComplex** complexAarray = 0; // CHECK-NEXT: hipComplex** complexBarray = 0; // CHECK-NEXT: hipComplex** complexCarray = 0; + // CHECK-NEXT: hipComplex** complexTauarray = 0; cuComplex** complexAarray = 0; cuComplex** complexBarray = 0; cuComplex** complexCarray = 0; + cuComplex** complexTauarray = 0; // CHECK: hipDoubleComplex** dcomplexAarray = 0; // CHECK-NEXT: hipDoubleComplex** dcomplexBarray = 0; // CHECK-NEXT: hipDoubleComplex** dcomplexCarray = 0; + // CHECK-NEXT: hipDoubleComplex** dcomplexTauarray = 0; cuDoubleComplex** dcomplexAarray = 0; cuDoubleComplex** dcomplexBarray = 0; cuDoubleComplex** dcomplexCarray = 0; + cuDoubleComplex** dcomplexTauarray = 0; // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScnrm2_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, float* result); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasScnrm2(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, float* result); @@ -1396,6 +1405,106 @@ int main() { // CHECK: blasStatus = hipblasZgeam(blasHandle, transa, transb, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexB, ldb, &dcomplexC, ldc); blasStatus = cublasZgeam(blasHandle, transa, transb, m, n, &dcomplexa, &dcomplexA, lda, &dcomplexb, &dcomplexB, ldb, &dcomplexC, ldc); + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetrfBatched(cublasHandle_t handle, int n, float* const A[], int lda, int* P, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgetrfBatched(hipblasHandle_t handle, const int n, float* const A[], const int lda, int* ipiv, int* info, const int batchCount); + // CHECK: blasStatus = hipblasSgetrfBatched(blasHandle, n, fAarray, lda, &P, &info, batchCount); + blasStatus = cublasSgetrfBatched(blasHandle, n, fAarray, lda, &P, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetrfBatched(cublasHandle_t handle, int n, double* const A[], int lda, int* P, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgetrfBatched(hipblasHandle_t handle, const int n, double* const A[], const int lda, int* ipiv, int* info, const int batchCount); + // CHECK: blasStatus = hipblasDgetrfBatched(blasHandle, n, dAarray, lda, &P, &info, batchCount); + blasStatus = cublasDgetrfBatched(blasHandle, n, dAarray, lda, &P, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetrfBatched(cublasHandle_t handle, int n, cuComplex* const A[], int lda, int* P, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgetrfBatched(hipblasHandle_t handle, const int n, hipblasComplex* const A[], const int lda, int* ipiv, int* info, const int batchCount); + // CHECK: blasStatus = hipblasCgetrfBatched(blasHandle, n, complexAarray, lda, &P, &info, batchCount); + blasStatus = cublasCgetrfBatched(blasHandle, n, complexAarray, lda, &P, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetrfBatched(cublasHandle_t handle, int n, cuDoubleComplex* const A[], int lda, int* P, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgetrfBatched(hipblasHandle_t handle, const int n, hipblasDoubleComplex* const A[], const int lda, int* ipiv, int* info, const int batchCount); + // CHECK: blasStatus = hipblasZgetrfBatched(blasHandle, n, dcomplexAarray, lda, &P, &info, batchCount); + blasStatus = cublasZgetrfBatched(blasHandle, n, dcomplexAarray, lda, &P, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetriBatched(cublasHandle_t handle, int n, const float* const A[], int lda, const int* P, float* const C[], int ldc, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgetriBatched(hipblasHandle_t handle, const int n, float* const A[], const int lda, int* ipiv, float* const C[], const int ldc, int* info, const int batchCount); + // CHECK: blasStatus = hipblasSgetriBatched(blasHandle, n, fAarray, lda, &P, fCarray, ldc, &info, batchCount); + blasStatus = cublasSgetriBatched(blasHandle, n, fAarray, lda, &P, fCarray, ldc, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetriBatched(cublasHandle_t handle, int n, const double* const A[], int lda, const int* P, double* const C[], int ldc, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgetriBatched(hipblasHandle_t handle, const int n, double* const A[], const int lda, int* ipiv, double* const C[], const int ldc, int* info, const int batchCount); + // CHECK: blasStatus = hipblasDgetriBatched(blasHandle, n, dAarray, lda, &P, dCarray, ldc, &info, batchCount); + blasStatus = cublasDgetriBatched(blasHandle, n, dAarray, lda, &P, dCarray, ldc, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetriBatched(cublasHandle_t handle, int n, const cuComplex* const A[], int lda, const int* P, cuComplex* const C[], int ldc, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgetriBatched(hipblasHandle_t handle, const int n, hipblasComplex* const A[], const int lda, int* ipiv, hipblasComplex* const C[], const int ldc, int* info, const int batchCount); + // CHECK: blasStatus = hipblasCgetriBatched(blasHandle, n, complexAarray, lda, &P, complexCarray, ldc, &info, batchCount); + blasStatus = cublasCgetriBatched(blasHandle, n, complexAarray, lda, &P, complexCarray, ldc, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetriBatched(cublasHandle_t handle, int n, const cuDoubleComplex* const A[], int lda, const int* P, cuDoubleComplex* const C[], int ldc, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgetriBatched(hipblasHandle_t handle, const int n, hipblasDoubleComplex* const A[], const int lda, int* ipiv, hipblasDoubleComplex* const C[], const int ldc, int* info, const int batchCount); + // CHECK: blasStatus = hipblasZgetriBatched(blasHandle, n, dcomplexAarray, lda, &P, dcomplexCarray, ldc, &info, batchCount); + blasStatus = cublasZgetriBatched(blasHandle, n, dcomplexAarray, lda, &P, dcomplexCarray, ldc, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const float* const Aarray[], int lda, const int* devIpiv, float* const Barray[], int ldb, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, float* const A[], const int lda, const int* ipiv, float* const B[], const int ldb, int* info, const int batchCount); + // CHECK: blasStatus = hipblasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray, lda, &P, fBarray, ldb, &info, batchCount); + blasStatus = cublasSgetrsBatched(blasHandle, transa, n, nrhs, fAarray, lda, &P, fBarray, ldb, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const double* const Aarray[], int lda, const int* devIpiv, double* const Barray[], int ldb, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, double* const A[], const int lda, const int* ipiv, double* const B[], const int ldb, int* info, const int batchCount); + // CHECK: blasStatus = hipblasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray, lda, &P, dBarray, ldb, &info, batchCount); + blasStatus = cublasDgetrsBatched(blasHandle, transa, n, nrhs, dAarray, lda, &P, dBarray, ldb, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const cuComplex* const Aarray[], int lda, const int* devIpiv, cuComplex* const Barray[], int ldb, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex* const A[], const int lda, const int* ipiv, hipblasComplex* const B[], const int ldb, int* info, const int batchCount); + // CHECK: blasStatus = hipblasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray, lda, &P, complexBarray, ldb, &info, batchCount); + blasStatus = cublasCgetrsBatched(blasHandle, transa, n, nrhs, complexAarray, lda, &P, complexBarray, ldb, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetrsBatched(cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, const cuDoubleComplex* const Aarray[], int lda, const int* devIpiv, cuDoubleComplex* const Barray[], int ldb, int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgetrsBatched(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex* const A[], const int lda, const int* ipiv, hipblasDoubleComplex* const B[], const int ldb, int* info, const int batchCount); + // CHECK: blasStatus = hipblasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray, lda, &P, dcomplexBarray, ldb, &info, batchCount); + blasStatus = cublasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray, lda, &P, dcomplexBarray, ldb, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* const A[], int lda, float* const B[], int ldb, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, float* const AP[], int lda, float* BP[], int ldb, int batchCount); + // CHECK: blasStatus = hipblasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount); + blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* const A[], int lda, double* const B[], int ldb, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, double* const AP[], int lda, double* BP[], int ldb, int batchCount); + // CHECK: blasStatus = hipblasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount); + blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* const A[], int lda, cuComplex* const B[], int ldb, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, hipblasComplex* const AP[], int lda, hipblasComplex* BP[], int ldb, int batchCount); + // CHECK: blasStatus = hipblasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount); + blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* const A[], int lda, cuDoubleComplex* const B[], int ldb, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* const AP[], int lda, hipblasDoubleComplex* BP[], int ldb, int batchCount); + // CHECK: blasStatus = hipblasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount); + blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgeqrfBatched(cublasHandle_t handle, int m, int n, float* const Aarray[], int lda, float* const TauArray[], int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgeqrfBatched(hipblasHandle_t handle, const int m, const int n, float* const A[], const int lda, float* const ipiv[], int* info, const int batchCount); + // CHECK: blasStatus = hipblasSgeqrfBatched(blasHandle, m, n, fAarray, lda, fTauarray, &info, batchCount); + blasStatus = cublasSgeqrfBatched(blasHandle, m, n, fAarray, lda, fTauarray, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgeqrfBatched(cublasHandle_t handle, int m, int n, double* const Aarray[], int lda, double* const TauArray[], int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgeqrfBatched(hipblasHandle_t handle, const int m, const int n, double* const A[], const int lda, double* const ipiv[], int* info, const int batchCount); + // CHECK: blasStatus = hipblasDgeqrfBatched(blasHandle, m, n, dAarray, lda, dTauarray, &info, batchCount); + blasStatus = cublasDgeqrfBatched(blasHandle, m, n, dAarray, lda, dTauarray, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeqrfBatched(cublasHandle_t handle, int m, int n, cuComplex* const Aarray[], int lda, cuComplex* const TauArray[], int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgeqrfBatched(hipblasHandle_t handle, const int m, const int n, hipblasComplex* const A[], const int lda, hipblasComplex* const ipiv[], int* info, const int batchCount); + // CHECK: blasStatus = hipblasCgeqrfBatched(blasHandle, m, n, complexAarray, lda, complexTauarray, &info, batchCount); + blasStatus = cublasCgeqrfBatched(blasHandle, m, n, complexAarray, lda, complexTauarray, &info, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeqrfBatched(cublasHandle_t handle, int m, int n, cuDoubleComplex* const Aarray[], int lda, cuDoubleComplex* const TauArray[], int* info, int batchSize); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgeqrfBatched(hipblasHandle_t handle, const int m, const int n, hipblasDoubleComplex* const A[], const int lda, hipblasDoubleComplex* const ipiv[], int* info, const int batchCount); + // CHECK: blasStatus = hipblasZgeqrfBatched(blasHandle, m, n, dcomplexAarray, lda, dcomplexTauarray, &info, batchCount); + blasStatus = cublasZgeqrfBatched(blasHandle, m, n, dcomplexAarray, lda, dcomplexTauarray, &info, batchCount); + long long int strideA = 0; long long int strideB = 0; long long int strideC = 0; From 1c6664b81f86c484b375c990fd49d23c890a36af Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 20 Sep 2022 23:11:51 +0200 Subject: [PATCH 04/43] [HIPIFY][doc] LLVM 15.0.1 is the latest supported LLVM release + No patches are needed + Update README.md accordingly + Tested on Windows 10 and Ubuntu 21.10 --- README.md | 57 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 7e7f8871..728f3bed 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ After applying all the matchers, the output HIP source is produced. `hipify-clang` requires: -1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [4.0.0](http://releases.llvm.org/download.html#4.0.0); the latest stable and recommended release: [**15.0.0**](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.0). +1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [4.0.0](http://releases.llvm.org/download.html#4.0.0); the latest stable and recommended release: [**15.0.1**](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.1). 2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive), the latest supported version is [**11.7.1**](https://developer.nvidia.com/cuda-downloads). @@ -170,8 +170,9 @@ After applying all the matchers, the output HIP source is produced. + 14.0.5, - 14.0.6, - 15.0.0 + 14.0.6,
+ 15.0.0, + 15.0.1 11.7.1 LATEST STABLE CONFIG @@ -187,7 +188,7 @@ After applying all the matchers, the output HIP source is produced. In most cases, you can get a suitable version of `LLVM+CLANG` with your package manager. Failing that or having multiple versions of `LLVM`, you can [download a release archive](http://releases.llvm.org/), build or install it, and set -[CMAKE_PREFIX_PATH](https://cmake.org/cmake/help/v3.5/variable/CMAKE_PREFIX_PATH.html) so `cmake` can find it; for instance: `-DCMAKE_PREFIX_PATH=d:\LLVM\15.0.0\dist` +[CMAKE_PREFIX_PATH](https://cmake.org/cmake/help/v3.5/variable/CMAKE_PREFIX_PATH.html) so `cmake` can find it; for instance: `-DCMAKE_PREFIX_PATH=d:\LLVM\15.0.1\dist` ### hipify-clang: usage @@ -285,7 +286,7 @@ Run `Visual Studio 16 2019`, open the generated `LLVM.sln`, build all, and build **LLVM >= 10.0.0:** -1. download [`LLVM project`](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.0) sources; +1. download [`LLVM project`](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.1) sources; 2. build [`LLVM project`](http://llvm.org/docs/CMake.html): ```bash @@ -350,21 +351,21 @@ Run `Visual Studio 17 2022`, open the generated `LLVM.sln`, build all, build pro * Install `lit` into `python`: - - ***Linux***: `python /usr/llvm/15.0.0/llvm-project/llvm/utils/lit/setup.py install` + - ***Linux***: `python /usr/llvm/15.0.1/llvm-project/llvm/utils/lit/setup.py install` - - ***Windows***: `python d:/LLVM/15.0.0/llvm-project/llvm/utils/lit/setup.py install` + - ***Windows***: `python d:/LLVM/15.0.1/llvm-project/llvm/utils/lit/setup.py install` * Starting with LLVM 6.0.1 path to `llvm-lit` python script should be specified by the `LLVM_EXTERNAL_LIT` option: - - ***Linux***: `-DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.0/build/bin/llvm-lit` + - ***Linux***: `-DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.1/build/bin/llvm-lit` - - ***Windows***: `-DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.0/build/Release/bin/llvm-lit.py` + - ***Windows***: `-DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.1/build/Release/bin/llvm-lit.py` * `FileCheck`: - - ***Linux***: copy from `/usr/llvm/15.0.0/build/bin/` to `CMAKE_INSTALL_PREFIX/dist/bin` + - ***Linux***: copy from `/usr/llvm/15.0.1/build/bin/` to `CMAKE_INSTALL_PREFIX/dist/bin` - - ***Windows***: copy from `d:/LLVM/15.0.0/build/Release/bin` to `CMAKE_INSTALL_PREFIX/dist/bin` + - ***Windows***: copy from `d:/LLVM/15.0.1/build/Release/bin` to `CMAKE_INSTALL_PREFIX/dist/bin` - Or specify the path to `FileCheck` in `CMAKE_INSTALL_PREFIX` option @@ -386,7 +387,7 @@ Ubuntu 14: LLVM 4.0.0 - 7.1.0, CUDA 7.0 - 9.0, cuDNN 5.0.5 - 7.6.5 Ubuntu 16-18: LLVM 8.0.0 - 14.0.6, CUDA 8.0 - 10.2, cuDNN 5.1.10 - 8.0.5 -Ubuntu 20-21: LLVM 9.0.0 - 15.0.0, CUDA 8.0 - 11.7.1, cuDNN 5.1.10 - 8.4.1 +Ubuntu 20-21: LLVM 9.0.0 - 15.0.1, CUDA 8.0 - 11.7.1, cuDNN 5.1.10 - 8.4.1 Minimum build system requirements for the above configurations: @@ -403,11 +404,11 @@ cmake -DHIPIFY_CLANG_TESTS=1 \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=/usr/llvm/15.0.0/dist \ + -DCMAKE_PREFIX_PATH=/usr/llvm/15.0.1/dist \ -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda \ -DCUDA_DNN_ROOT_DIR=/usr/local/cuda \ -DCUDA_CUB_ROOT_DIR=/usr/CUB \ - -DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.0/build/bin/llvm-lit \ + -DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.1/build/bin/llvm-lit \ ../hipify ``` *A corresponding successful output:* @@ -425,14 +426,14 @@ cmake -- Detecting CXX compile features -- Detecting CXX compile features - done -- Found ZLIB: /usr/lib/x86_64-linux-gnu/libz.so (found version "1.2.11") --- Found LLVM 15.0.0: --- - CMake module path: /usr/llvm/15.0.0/dist/lib/cmake/llvm --- - Include path : /usr/llvm/15.0.0/dist/include --- - Binary path : /usr/llvm/15.0.0/dist/bin +-- Found LLVM 15.0.1: +-- - CMake module path: /usr/llvm/15.0.1/dist/lib/cmake/llvm +-- - Include path : /usr/llvm/15.0.1/dist/include +-- - Binary path : /usr/llvm/15.0.1/dist/bin -- Linker detection: GNU ld -- Found PythonInterp: /usr/bin/python (found suitable version "3.9.7", minimum required is "2.7") -- Found lit: /usr/local/bin/lit --- Found FileCheck: /usr/llvm/15.0.0/dist/bin/FileCheck +-- Found FileCheck: /usr/llvm/15.0.1/dist/bin/FileCheck -- Looking for pthread.h -- Looking for pthread.h - found -- Performing Test CMAKE_HAVE_LIBC_PTHREAD @@ -451,7 +452,7 @@ make test-hipify Running HIPify regression tests ======================================== CUDA 11.7 - will be used for testing -LLVM 15.0.0 - will be used for testing +LLVM 15.0.1 - will be used for testing x86_64 - Platform architecture Linux 5.13.0-21-generic - Platform OS 64 - hipify-clang binary bitness @@ -565,7 +566,7 @@ Testing Time: 6.22s | 11.0.1 - 11.1.0 | 7.0 - 11.2.2 | 7.6.5 - 8.0.5 | 2017.15.9.31, 2019.16.8.4 | 3.19.3 | 3.9.2 | | 12.0.0 - 13.0.1 | 7.0 - 11.5.1 | 7.6.5 - 8.3.2 | 2017.15.9.43, 2019.16.11.9 | 3.22.2 | 3.10.2 | | 14.0.0 - 14.0.6 | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.49, 2019.16.11.17, 2022.17.2.6 | 3.24.0 | 3.10.6 | -| 15.0.0 | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | +| 15.0.0 - 15.0.1 | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | | 16.0.0git | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | *Building with testing support by `Visual Studio 17 2022` on `Windows 10`:* @@ -578,23 +579,23 @@ cmake -DHIPIFY_CLANG_TESTS=1 \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=d:/LLVM/15.0.0/dist \ + -DCMAKE_PREFIX_PATH=d:/LLVM/15.0.1/dist \ -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.7" \ -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v11.7" \ -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.7-windows-x64-v8.4.1 \ -DCUDA_CUB_ROOT_DIR=d:/GIT/cub \ - -DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.0/build/Release/bin/llvm-lit.py \ + -DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.1/build/Release/bin/llvm-lit.py \ ../hipify ``` *A corresponding successful output:* ```shell --- Found LLVM 15.0.0: --- - CMake module path: d:/LLVM/15.0.0/dist/lib/cmake/llvm --- - Include path : d:/LLVM/15.0.0/dist/include --- - Binary path : d:/LLVM/15.0.0/dist/bin +-- Found LLVM 15.0.1: +-- - CMake module path: d:/LLVM/15.0.1/dist/lib/cmake/llvm +-- - Include path : d:/LLVM/15.0.1/dist/include +-- - Binary path : d:/LLVM/15.0.1/dist/bin -- Found PythonInterp: c:/Program Files/Python39/python.exe (found suitable version "3.9.5", minimum required is "3.6") -- Found lit: c:/Program Files/Python39/Scripts/lit.exe --- Found FileCheck: d:/LLVM/15.0.0/dist/bin/FileCheck.exe +-- Found FileCheck: d:/LLVM/15.0.1/dist/bin/FileCheck.exe -- Found CUDA: c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.7 (found version "11.7") -- Configuring done -- Generating done From 7778e2c518a023c4491d6ac0e19b81da7ef688bb Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 22 Sep 2022 14:22:08 +0200 Subject: [PATCH 05/43] [HIPIFY][BLAS][fix] Fix hipBLAS and rocBLAS TRMM functions support types + Mark rocBLAS TRMM functions rocblas_(s|d|c|z)trmm_outofplace, as supported only for TRMM v2 CUDA analogues + Mark hipBLAS TRMM functions hipblas(S|D|C|Z)trmm as HIP_UNSUPPORTED + Regenerate and update docs and hipify-perl accordingly [Reasons] + hipBLAS TRMM functions hipblas(S|D|C|Z)trmm, actually, do not match neither cublas TRMM functions, nor cublas TRMM _v2 functions: https://github.com/ROCmSoftwarePlatform/hipBLAS/issues/524 + There is a correspondence between cuBLAS cublas_(s|d|c|z)trmm and rocBLAS TRMM rocblas_(s|d|c|z)trmm_outofplace, not rocblas_(s|d|c|z)trmm: fixed it [ToDo] + Close https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1265 as erroneous + Remove HIP_UNSUPPORTED mark from hipblas(S|D|C|Z)trmm functions after merging https://github.com/ROCmSoftwarePlatform/hipBLAS/pull/504 + Add cublas2rocblas and update cublas2hipblas synthetic tests --- bin/hipify-perl | 32 ++++++++++----------- doc/markdown/CUBLAS_API_supported_by_HIP.md | 16 +++++------ src/CUDA2HIP_BLAS_API_functions.cpp | 16 +++++------ 3 files changed, 32 insertions(+), 32 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 4dfd064d..2c8356f7 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1076,8 +1076,8 @@ sub rocSubstitutions { subst("cublasCtpmv_v2", "rocblas_ctpmv", "library"); subst("cublasCtpsv", "rocblas_ctpsv", "library"); subst("cublasCtpsv_v2", "rocblas_ctpsv", "library"); - subst("cublasCtrmm", "rocblas_ctrmm", "library"); - subst("cublasCtrmm_v2", "rocblas_ctrmm", "library"); + subst("cublasCtrmm", "rocblas_ctrmm_outofplace", "library"); + subst("cublasCtrmm_v2", "rocblas_ctrmm_outofplace", "library"); subst("cublasCtrmv", "rocblas_ctrmv", "library"); subst("cublasCtrmv_v2", "rocblas_ctrmv", "library"); subst("cublasCtrsm", "rocblas_ctrsm", "library"); @@ -1152,8 +1152,8 @@ sub rocSubstitutions { subst("cublasDtpmv_v2", "rocblas_dtpmv", "library"); subst("cublasDtpsv", "rocblas_dtpsv", "library"); subst("cublasDtpsv_v2", "rocblas_dtpsv", "library"); - subst("cublasDtrmm", "rocblas_dtrmm", "library"); - subst("cublasDtrmm_v2", "rocblas_dtrmm", "library"); + subst("cublasDtrmm", "rocblas_dtrmm_outofplace", "library"); + subst("cublasDtrmm_v2", "rocblas_dtrmm_outofplace", "library"); subst("cublasDtrmv", "rocblas_dtrmv", "library"); subst("cublasDtrmv_v2", "rocblas_dtrmv", "library"); subst("cublasDtrsm", "rocblas_dtrsm", "library"); @@ -1273,8 +1273,8 @@ sub rocSubstitutions { subst("cublasStpmv_v2", "rocblas_stpmv", "library"); subst("cublasStpsv", "rocblas_stpsv", "library"); subst("cublasStpsv_v2", "rocblas_stpsv", "library"); - subst("cublasStrmm", "rocblas_strmm", "library"); - subst("cublasStrmm_v2", "rocblas_strmm", "library"); + subst("cublasStrmm", "rocblas_strmm_outofplace", "library"); + subst("cublasStrmm_v2", "rocblas_strmm_outofplace", "library"); subst("cublasStrmv", "rocblas_strmv", "library"); subst("cublasStrmv_v2", "rocblas_strmv", "library"); subst("cublasStrsm", "rocblas_strsm", "library"); @@ -1358,8 +1358,8 @@ sub rocSubstitutions { subst("cublasZtpmv_v2", "rocblas_ztpmv", "library"); subst("cublasZtpsv", "rocblas_ztpsv", "library"); subst("cublasZtpsv_v2", "rocblas_ztpsv", "library"); - subst("cublasZtrmm", "rocblas_ztrmm", "library"); - subst("cublasZtrmm_v2", "rocblas_ztrmm", "library"); + subst("cublasZtrmm", "rocblas_ztrmm_outofplace", "library"); + subst("cublasZtrmm_v2", "rocblas_ztrmm_outofplace", "library"); subst("cublasZtrmv", "rocblas_ztrmv", "library"); subst("cublasZtrmv_v2", "rocblas_ztrmv", "library"); subst("cublasZtrsm", "rocblas_ztrsm", "library"); @@ -2047,8 +2047,6 @@ sub simpleSubstitutions { subst("cublasCtpmv_v2", "hipblasCtpmv", "library"); subst("cublasCtpsv", "hipblasCtpsv", "library"); subst("cublasCtpsv_v2", "hipblasCtpsv", "library"); - subst("cublasCtrmm", "hipblasCtrmm", "library"); - subst("cublasCtrmm_v2", "hipblasCtrmm", "library"); subst("cublasCtrmv", "hipblasCtrmv", "library"); subst("cublasCtrmv_v2", "hipblasCtrmv", "library"); subst("cublasCtrsm", "hipblasCtrsm", "library"); @@ -2127,8 +2125,6 @@ sub simpleSubstitutions { subst("cublasDtpmv_v2", "hipblasDtpmv", "library"); subst("cublasDtpsv", "hipblasDtpsv", "library"); subst("cublasDtpsv_v2", "hipblasDtpsv", "library"); - subst("cublasDtrmm", "hipblasDtrmm", "library"); - subst("cublasDtrmm_v2", "hipblasDtrmm", "library"); subst("cublasDtrmv", "hipblasDtrmv", "library"); subst("cublasDtrmv_v2", "hipblasDtrmv", "library"); subst("cublasDtrsm", "hipblasDtrsm", "library"); @@ -2254,8 +2250,6 @@ sub simpleSubstitutions { subst("cublasStpmv_v2", "hipblasStpmv", "library"); subst("cublasStpsv", "hipblasStpsv", "library"); subst("cublasStpsv_v2", "hipblasStpsv", "library"); - subst("cublasStrmm", "hipblasStrmm", "library"); - subst("cublasStrmm_v2", "hipblasStrmm", "library"); subst("cublasStrmv", "hipblasStrmv", "library"); subst("cublasStrmv_v2", "hipblasStrmv", "library"); subst("cublasStrsm", "hipblasStrsm", "library"); @@ -2343,8 +2337,6 @@ sub simpleSubstitutions { subst("cublasZtpmv_v2", "hipblasZtpmv", "library"); subst("cublasZtpsv", "hipblasZtpsv", "library"); subst("cublasZtpsv_v2", "hipblasZtpsv", "library"); - subst("cublasZtrmm", "hipblasZtrmm", "library"); - subst("cublasZtrmm_v2", "hipblasZtrmm", "library"); subst("cublasZtrmv", "hipblasZtrmv", "library"); subst("cublasZtrmv_v2", "hipblasZtrmv", "library"); subst("cublasZtrsm", "hipblasZtrsm", "library"); @@ -7651,6 +7643,8 @@ sub warnHipOnlyUnsupportedFunctions { my $k = 0; foreach $func ( "cublasZtrttp", + "cublasZtrmm_v2", + "cublasZtrmm", "cublasZtpttr", "cublasZmatinvBatched", "cublasZgemm3m", @@ -7659,6 +7653,8 @@ sub warnHipOnlyUnsupportedFunctions { "cublasUint8gemmBias", "cublasSwapEx", "cublasStrttp", + "cublasStrmm_v2", + "cublasStrmm", "cublasStpttr", "cublasSmatinvBatched", "cublasShutdown", @@ -7690,10 +7686,14 @@ sub warnHipOnlyUnsupportedFunctions { "cublasGetCudartVersion", "cublasFree", "cublasDtrttp", + "cublasDtrmm_v2", + "cublasDtrmm", "cublasDtpttr", "cublasDmatinvBatched", "cublasDgelsBatched", "cublasCtrttp", + "cublasCtrmm_v2", + "cublasCtrmm", "cublasCtpttr", "cublasCsyrkEx", "cublasCsyrk3mEx", diff --git a/doc/markdown/CUBLAS_API_supported_by_HIP.md b/doc/markdown/CUBLAS_API_supported_by_HIP.md index 238db117..456a93fb 100644 --- a/doc/markdown/CUBLAS_API_supported_by_HIP.md +++ b/doc/markdown/CUBLAS_API_supported_by_HIP.md @@ -483,8 +483,8 @@ |`cublasCsyrk`| | | |`hipblasCsyrk`|3.5.0| | | | |`cublasCsyrk_v2`| | | |`hipblasCsyrk`|3.5.0| | | | |`cublasCsyrkx`| | | |`hipblasCsyrkx`|3.5.0| | | | -|`cublasCtrmm`| | | |`hipblasCtrmm`|3.5.0| | | | -|`cublasCtrmm_v2`| | | |`hipblasCtrmm`|3.5.0| | | | +|`cublasCtrmm`| | | | | | | | | +|`cublasCtrmm_v2`| | | | | | | | | |`cublasCtrsm`| | | |`hipblasCtrsm`|3.5.0| | | | |`cublasCtrsm_v2`| | | |`hipblasCtrsm`|3.5.0| | | | |`cublasDgemm`| | | |`hipblasDgemm`|1.8.2| | | | @@ -498,8 +498,8 @@ |`cublasDsyrk`| | | |`hipblasDsyrk`|3.5.0| | | | |`cublasDsyrk_v2`| | | |`hipblasDsyrk`|3.5.0| | | | |`cublasDsyrkx`| | | |`hipblasDsyrkx`|3.5.0| | | | -|`cublasDtrmm`| | | |`hipblasDtrmm`|3.2.0| | | | -|`cublasDtrmm_v2`| | | |`hipblasDtrmm`|3.2.0| | | | +|`cublasDtrmm`| | | | | | | | | +|`cublasDtrmm_v2`| | | | | | | | | |`cublasDtrsm`| | | |`hipblasDtrsm`|1.8.2| | | | |`cublasDtrsm_v2`| | | |`hipblasDtrsm`|1.8.2| | | | |`cublasHgemm`|7.5| | |`hipblasHgemm`|1.8.2| | | | @@ -516,8 +516,8 @@ |`cublasSsyrk`| | | |`hipblasSsyrk`|3.5.0| | | | |`cublasSsyrk_v2`| | | |`hipblasSsyrk`|3.5.0| | | | |`cublasSsyrkx`| | | |`hipblasSsyrkx`|3.5.0| | | | -|`cublasStrmm`| | | |`hipblasStrmm`|3.2.0| | | | -|`cublasStrmm_v2`| | | |`hipblasStrmm`|3.2.0| | | | +|`cublasStrmm`| | | | | | | | | +|`cublasStrmm_v2`| | | | | | | | | |`cublasStrsm`| | | |`hipblasStrsm`|1.8.2| | | | |`cublasStrsm_v2`| | | |`hipblasStrsm`|1.8.2| | | | |`cublasZgemm`| | | |`hipblasZgemm`|1.8.2| | | | @@ -539,8 +539,8 @@ |`cublasZsyrk`| | | |`hipblasZsyrk`|3.5.0| | | | |`cublasZsyrk_v2`| | | |`hipblasZsyrk`|3.5.0| | | | |`cublasZsyrkx`| | | |`hipblasZsyrkx`|3.5.0| | | | -|`cublasZtrmm`| | | |`hipblasZtrmm`|3.5.0| | | | -|`cublasZtrmm_v2`| | | |`hipblasZtrmm`|3.5.0| | | | +|`cublasZtrmm`| | | | | | | | | +|`cublasZtrmm_v2`| | | | | | | | | |`cublasZtrsm`| | | |`hipblasZtrsm`|3.5.0| | | | |`cublasZtrsm_v2`| | | |`hipblasZtrsm`|3.5.0| | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 0c8ef776..72186afa 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -334,10 +334,10 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasZtrsm", {"hipblasZtrsm", "rocblas_ztrsm", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY}}, // TRMM - {"cublasStrmm", {"hipblasStrmm", "rocblas_strmm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasDtrmm", {"hipblasDtrmm", "rocblas_dtrmm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasCtrmm", {"hipblasCtrmm", "rocblas_ctrmm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasZtrmm", {"hipblasZtrmm", "rocblas_ztrmm", CONV_LIB_FUNC, API_BLAS, 7}}, + {"cublasStrmm", {"hipblasStrmm", "rocblas_strmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}}, + {"cublasDtrmm", {"hipblasDtrmm", "rocblas_dtrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}}, + {"cublasCtrmm", {"hipblasCtrmm", "rocblas_ctrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}}, + {"cublasZtrmm", {"hipblasZtrmm", "rocblas_ztrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}}, // ------------------------ CUBLAS BLAS - like extension (cublas_api.h) // GEAM @@ -575,10 +575,10 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasZtrsm_v2", {"hipblasZtrsm", "rocblas_ztrsm", CONV_LIB_FUNC, API_BLAS, 7}}, // TRMM - {"cublasStrmm_v2", {"hipblasStrmm", "rocblas_strmm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasDtrmm_v2", {"hipblasDtrmm", "rocblas_dtrmm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasCtrmm_v2", {"hipblasCtrmm", "rocblas_ctrmm", CONV_LIB_FUNC, API_BLAS, 7}}, - {"cublasZtrmm_v2", {"hipblasZtrmm", "rocblas_ztrmm", CONV_LIB_FUNC, API_BLAS, 7}}, + {"cublasStrmm_v2", {"hipblasStrmm", "rocblas_strmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}}, + {"cublasDtrmm_v2", {"hipblasDtrmm", "rocblas_dtrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}}, + {"cublasCtrmm_v2", {"hipblasCtrmm", "rocblas_ctrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}}, + {"cublasZtrmm_v2", {"hipblasZtrmm", "rocblas_ztrmm_outofplace", CONV_LIB_FUNC, API_BLAS, 7, HIP_SUPPORTED_V2_ONLY | HIP_UNSUPPORTED}}, // NRM2 {"cublasSnrm2_v2", {"hipblasSnrm2", "rocblas_snrm2", CONV_LIB_FUNC, API_BLAS, 5}}, From 7b9b958e8f1e38e335b716d375522789239e91f2 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sat, 24 Sep 2022 23:38:14 +0200 Subject: [PATCH 06/43] [HIPIFY][#601][BLAS][tests] Synthetic test for cuBLAS API - Part 15 + Added tests for the following BLAS functions: DGMM, GemmEx, GemmBatchedEx, GemmStridedBatchedEx + Gem(Batched|StridedBatched)Ex have two different signatures (before CUDA 11.0 and after CUDA 10.2) + [Workaround][https://github.com/ROCmSoftwarePlatform/hipBLAS/issues/529]: `cublasComputeType_t` -> `hipblasDatatype_t` (instead of yet unsupported `hipblasComputeType_t`) + Regenerate and update hipify-perl and docs --- bin/hipify-perl | 3 +- doc/markdown/CUBLAS_API_supported_by_HIP.md | 2 +- src/CUDA2HIP_BLAS_API_types.cpp | 4 +- .../synthetic/libraries/cublas2hipblas.cu | 87 ++++++++++++++++++- 4 files changed, 88 insertions(+), 8 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 2c8356f7..74c8a531 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -3319,6 +3319,7 @@ sub simpleSubstitutions { subst("cuDoubleComplex", "hipDoubleComplex", "type"); subst("cuFloatComplex", "hipFloatComplex", "type"); subst("cublasAtomicsMode_t", "hipblasAtomicsMode_t", "type"); + subst("cublasComputeType_t", "hipblasDatatype_t", "type"); subst("cublasDataType_t", "hipblasDatatype_t", "type"); subst("cublasDiagType_t", "hipblasDiagType_t", "type"); subst("cublasFillMode_t", "hipblasFillMode_t", "type"); @@ -7699,7 +7700,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCsyrk3mEx", "cublasCopyEx", "cublasContext", - "cublasComputeType_t", "cublasCmatinvBatched", "cublasCherkEx", "cublasCherk3mEx", @@ -7852,7 +7852,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCsyrkEx", "cublasCsyrk3mEx", "cublasCopyEx", - "cublasComputeType_t", "cublasCmatinvBatched", "cublasCherkEx", "cublasCherk3mEx", diff --git a/doc/markdown/CUBLAS_API_supported_by_HIP.md b/doc/markdown/CUBLAS_API_supported_by_HIP.md index 456a93fb..e9844337 100644 --- a/doc/markdown/CUBLAS_API_supported_by_HIP.md +++ b/doc/markdown/CUBLAS_API_supported_by_HIP.md @@ -96,7 +96,7 @@ |`CUBLAS_VER_MINOR`|10.1| | | | | | | | |`CUBLAS_VER_PATCH`|10.1| | | | | | | | |`cublasAtomicsMode_t`| | | |`hipblasAtomicsMode_t`|3.10.0| | | | -|`cublasComputeType_t`|11.0| | | | | | | | +|`cublasComputeType_t`|11.0| | |`hipblasDatatype_t`|1.8.2| | | | |`cublasContext`| | | | | | | | | |`cublasDataType_t`|7.5| | |`hipblasDatatype_t`|1.8.2| | | | |`cublasDiagType_t`| | | |`hipblasDiagType_t`|1.8.2| | | | diff --git a/src/CUDA2HIP_BLAS_API_types.cpp b/src/CUDA2HIP_BLAS_API_types.cpp index 997183f1..6cafb97b 100644 --- a/src/CUDA2HIP_BLAS_API_types.cpp +++ b/src/CUDA2HIP_BLAS_API_types.cpp @@ -173,7 +173,9 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP { // TODO: dereferencing: typedef struct cublasContext *cublasHandle_t; {"cublasContext", {"hipblasContext", "_rocblas_handle", CONV_TYPE, API_BLAS, 2, HIP_UNSUPPORTED}}, - {"cublasComputeType_t", {"hipblasComputeType_t", "", CONV_TYPE, API_BLAS, 2, UNSUPPORTED}}, + // NOTE: renamed UNSUPPORTED hipblasComputeType_t to the HIP supported hipblasDatatype_t (workaround) + // TODO: change the type to the correct one after fixing https://github.com/ROCmSoftwarePlatform/hipBLAS/issues/529 + {"cublasComputeType_t", {"hipblasDatatype_t", "", CONV_TYPE, API_BLAS, 2}}, {"CUBLAS_COMPUTE_16F", {"HIPBLAS_COMPUTE_16F", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 64 {"CUBLAS_COMPUTE_16F_PEDANTIC", {"HIPBLAS_COMPUTE_16F_PEDANTIC", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 65 {"CUBLAS_COMPUTE_32F", {"HIPBLAS_COMPUTE_32F", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 68 diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu index 9adcf3b7..439dcc65 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu @@ -246,6 +246,10 @@ int main() { double** dCarray = 0; double** dTauarray = 0; + void** voidAarray = nullptr; + void** voidBarray = nullptr; + void** voidCarray = nullptr; + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle, int n, const float* x, int incx, float* result); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSnrm2(hipblasHandle_t handle, int n, const float* x, int incx, float* result); // CHECK: blasStatus = hipblasSnrm2(blasHandle, n, &fx, incx, &fresult); @@ -1466,22 +1470,22 @@ int main() { blasStatus = cublasZgetrsBatched(blasHandle, transa, n, nrhs, dcomplexAarray, lda, &P, dcomplexBarray, ldb, &info, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const float* alpha, const float* const A[], int lda, float* const B[], int ldb, int batchCount); - // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, float* const AP[], int lda, float* BP[], int ldb, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float* alpha, float* const AP[], int lda, float* BP[], int ldb, int batchCount); // CHECK: blasStatus = hipblasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount); blasStatus = cublasStrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &fa, fAarray, lda, fBarray, ldb, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const double* alpha, const double* const A[], int lda, double* const B[], int ldb, int batchCount); - // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, double* const AP[], int lda, double* BP[], int ldb, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double* alpha, double* const AP[], int lda, double* BP[], int ldb, int batchCount); // CHECK: blasStatus = hipblasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount); blasStatus = cublasDtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &da, dAarray, lda, dBarray, ldb, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuComplex* alpha, const cuComplex* const A[], int lda, cuComplex* const B[], int ldb, int batchCount); - // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, hipblasComplex* const AP[], int lda, hipblasComplex* BP[], int ldb, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, hipblasComplex* const AP[], int lda, hipblasComplex* BP[], int ldb, int batchCount); // CHECK: blasStatus = hipblasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount); blasStatus = cublasCtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &complexa, complexAarray, lda, complexBarray, ldb, batchCount); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched(cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* const A[], int lda, cuDoubleComplex* const B[], int ldb, int batchCount); - // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* const AP[], int lda, hipblasDoubleComplex* BP[], int ldb, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsmBatched(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* const AP[], int lda, hipblasDoubleComplex* BP[], int ldb, int batchCount); // CHECK: blasStatus = hipblasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount); blasStatus = cublasZtrsmBatched(blasHandle, blasSideMode, blasFillMode, transa, blasDiagType, m, n, &dcomplexa, dcomplexAarray, lda, dcomplexBarray, ldb, batchCount); @@ -1505,6 +1509,26 @@ int main() { // CHECK: blasStatus = hipblasZgeqrfBatched(blasHandle, m, n, dcomplexAarray, lda, dcomplexTauarray, &info, batchCount); blasStatus = cublasZgeqrfBatched(blasHandle, m, n, dcomplexAarray, lda, dcomplexTauarray, &info, batchCount); + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const float* A, int lda, const float* x, int incx, float* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSdgmm(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const float* AP, int lda, const float* x, int incx, float* CP, int ldc); + // CHECK: blasStatus = hipblasSdgmm(blasHandle, blasSideMode, m, n, &fa, lda, &fx, incx, &fC, ldc); + blasStatus = cublasSdgmm(blasHandle, blasSideMode, m, n, &fa, lda, &fx, incx, &fC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const double* A, int lda, const double* x, int incx, double* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDdgmm(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const double* AP, int lda, const double* x, int incx, double* CP, int ldc); + // CHECK: blasStatus = hipblasDdgmm(blasHandle, blasSideMode, m, n, &da, lda, &dx, incx, &dC, ldc); + blasStatus = cublasDdgmm(blasHandle, blasSideMode, m, n, &da, lda, &dx, incx, &dC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const cuComplex* A, int lda, const cuComplex* x, int incx, cuComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCdgmm(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasComplex* AP, int lda, const hipblasComplex* x, int incx, hipblasComplex* CP, int ldc); + // CHECK: blasStatus = hipblasCdgmm(blasHandle, blasSideMode, m, n, &complexa, lda, &complexx, incx, &complexC, ldc); + blasStatus = cublasCdgmm(blasHandle, blasSideMode, m, n, &complexa, lda, &complexx, incx, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdgmm(cublasHandle_t handle, cublasSideMode_t mode, int m, int n, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, cuDoubleComplex* C, int ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZdgmm(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasDoubleComplex* AP, int lda, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* CP, int ldc); + // CHECK: blasStatus = hipblasZdgmm(blasHandle, blasSideMode, m, n, &dcomplexa, lda, &dcomplexx, incx, &dcomplexC, ldc); + blasStatus = cublasZdgmm(blasHandle, blasSideMode, m, n, &dcomplexa, lda, &dcomplexx, incx, &dcomplexC, ldc); + long long int strideA = 0; long long int strideB = 0; long long int strideC = 0; @@ -1577,6 +1601,29 @@ int main() { // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemmStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int lda, long long strideA, const hipblasDoubleComplex* BP, int ldb, long long strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* CP, int ldc, long long strideC, int batchCount); // CHECK: blasStatus = hipblasZgemmStridedBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, strideA, &dcomplexB, ldb, strideB, &dcomplexb, &dcomplexC, ldc, strideC, batchCount); blasStatus = cublasZgemmStridedBatched(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, strideA, &dcomplexB, ldb, strideB, &dcomplexb, &dcomplexC, ldc, strideC, batchCount); + + void* aptr = nullptr; + void* Aptr = nullptr; + void* bptr = nullptr; + void* Bptr = nullptr; + void* Cptr = nullptr; + + // CHECK: hipblasDatatype_t Atype; + // CHECK-NEXT: hipblasDatatype_t Btype; + // CHECK-NEXT: hipblasDatatype_t Ctype; + cudaDataType Atype; + cudaDataType Btype; + cudaDataType Ctype; +#endif + +#if CUDA_VERSION >= 8000 && CUDA_VERSION < 11000 + // CHECK: hipblasDatatype_t computeType; + cudaDataType computeType; + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, const void* B, cudaDataType Btype, int ldb, const void* beta, void* C, cudaDataType Ctype, int ldc, cudaDataType computeType, cublasGemmAlgo_t algo); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipblasDatatype_t aType, int lda, const void* B, hipblasDatatype_t bType, int ldb, const void* beta, void* C, hipblasDatatype_t cType, int ldc, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo); + // CHECK: blasStatus = hipblasGemmEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, Bptr, Btype, ldb, bptr, Cptr, Ctype, ldc, computeType, blasGemmAlgo); + blasStatus = cublasGemmEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, Bptr, Btype, ldb, bptr, Cptr, Ctype, ldc, computeType, blasGemmAlgo); #endif #if CUDA_VERSION >= 9000 @@ -1584,6 +1631,18 @@ int main() { cublasGemmAlgo_t BLAS_GEMM_DEFAULT = CUBLAS_GEMM_DEFAULT; #endif +#if CUDA_VERSION >= 9010 && CUDA_VERSION < 11000 + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A[], hipblasDatatype_t aType, int lda, const void* B[], hipblasDatatype_t bType, int ldb, const void* beta, void* C[], hipblasDatatype_t cType, int ldc, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo); + // CHECK: blasStatus = hipblasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); + blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, computeType, blasGemmAlgo); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmStridedBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipblasDatatype_t aType, int lda, hipblasStride strideA, const void* B, hipblasDatatype_t bType, int ldb, hipblasStride strideB, const void* beta, void* C, hipblasDatatype_t cType, int ldc, hipblasStride strideC, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo); + // CHECK: blasStatus = hipblasGemmStridedBatchedEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, strideA, Bptr, Btype, ldb, strideB, bptr, Cptr, Ctype, ldc, strideC, batchCount, computeType, blasGemmAlgo); + blasStatus = cublasGemmStridedBatchedEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, strideA, Bptr, Btype, ldb, strideB, bptr, Cptr, Ctype, ldc, strideC, batchCount, computeType, blasGemmAlgo); +#endif + #if CUDA_VERSION >= 10010 // CHECK: hipblasOperation_t BLAS_OP_HERMITAN = HIPBLAS_OP_C; cublasOperation_t BLAS_OP_HERMITAN = CUBLAS_OP_HERMITAN; @@ -1597,6 +1656,26 @@ int main() { // CHECK-NEXT: hipblasDatatype_t C_16BF = HIPBLAS_C_16B; cublasDataType_t R_16BF = CUDA_R_16BF; cublasDataType_t C_16BF = CUDA_C_16BF; + + // NOTE: WORKAROUND: cublasComputeType_t is not actually supported by hipBLAS + // TODO: Fix it after fixing https://github.com/ROCmSoftwarePlatform/hipBLAS/issues/529 + // CHECK: hipblasDatatype_t blasComputeType; + cublasComputeType_t blasComputeType; + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, const void* B, cudaDataType Btype, int ldb, const void* beta, void* C, cudaDataType Ctype, int ldc, cublasComputeType_t computeType, cublasGemmAlgo_t algo); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipblasDatatype_t aType, int lda, const void* B, hipblasDatatype_t bType, int ldb, const void* beta, void* C, hipblasDatatype_t cType, int ldc, hipblasDatatype_t computeType, ipblasGemmAlgo_t algo); + // CHECK: blasStatus = hipblasGemmEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, Bptr, Btype, ldb, bptr, Cptr, Ctype, ldc, blasComputeType, blasGemmAlgo); + blasStatus = cublasGemmEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, Bptr, Btype, ldb, bptr, Cptr, Ctype, ldc, blasComputeType, blasGemmAlgo); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* const Aarray[], cudaDataType Atype, int lda, const void* const Barray[], cudaDataType Btype, int ldb, const void* beta, void* const Carray[], cudaDataType Ctype, int ldc, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A[], hipblasDatatype_t aType, int lda, const void* B[], hipblasDatatype_t bType, int ldb, const void* beta, void* C[], hipblasDatatype_t cType, int ldc, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo); + // CHECK: blasStatus = hipblasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, blasComputeType, blasGemmAlgo); + blasStatus = cublasGemmBatchedEx(blasHandle, transa, transb, m, n, k, aptr, voidAarray, Atype, lda, voidBarray, Btype, ldb, bptr, voidCarray, Ctype, ldc, batchCount, blasComputeType, blasGemmAlgo); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void* alpha, const void* A, cudaDataType Atype, int lda, long long int strideA, const void* B, cudaDataType Btype, int ldb, long long int strideB, const void* beta, void* C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, cublasComputeType_t computeType, cublasGemmAlgo_t algo); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasGemmStridedBatchedEx(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void* alpha, const void* A, hipblasDatatype_t aType, int lda, hipblasStride strideA, const void* B, hipblasDatatype_t bType, int ldb, hipblasStride strideB, const void* beta, void* C, hipblasDatatype_t cType, int ldc, hipblasStride strideC, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo); + // CHECK: blasStatus = hipblasGemmStridedBatchedEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, strideA, Bptr, Btype, ldb, strideB, bptr, Cptr, Ctype, ldc, strideC, batchCount, blasComputeType, blasGemmAlgo); + blasStatus = cublasGemmStridedBatchedEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, strideA, Bptr, Btype, ldb, strideB, bptr, Cptr, Ctype, ldc, strideC, batchCount, blasComputeType, blasGemmAlgo); #endif return 0; From 9db3b8d866199491b72b05ad7a26d4f33e4ea735 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sun, 25 Sep 2022 12:39:21 +0200 Subject: [PATCH 07/43] [HIPIFY][#601][BLAS][tests] Synthetic test for cuBLAS API - Part 16 - final + Added tests for the following BLAS functions: ScalEx, AxpyEx, RotEx --- .../synthetic/libraries/cublas2hipblas.cu | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu index 439dcc65..0a24616d 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu @@ -1606,14 +1606,36 @@ int main() { void* Aptr = nullptr; void* bptr = nullptr; void* Bptr = nullptr; + void* cptr = nullptr; void* Cptr = nullptr; + void* xptr = nullptr; + void* yptr = nullptr; + void* sptr = nullptr; // CHECK: hipblasDatatype_t Atype; // CHECK-NEXT: hipblasDatatype_t Btype; // CHECK-NEXT: hipblasDatatype_t Ctype; + // CHECK-NEXT: hipblasDatatype_t Xtype; + // CHECK-NEXT: hipblasDatatype_t Ytype; + // CHECK-NEXT: hipblasDatatype_t CStype; + // CHECK-NEXT: hipblasDatatype_t Executiontype; cudaDataType Atype; cudaDataType Btype; cudaDataType Ctype; + cudaDataType Xtype; + cudaDataType Ytype; + cudaDataType CStype; + cudaDataType Executiontype; + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScalEx(cublasHandle_t handle, int n, const void* alpha, cudaDataType alphaType, void* x, cudaDataType xType, int incx, cudaDataType executionType); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasScalEx(hipblasHandle_t handle, int n, const void* alpha, hipblasDatatype_t alphaType, void* x, hipblasDatatype_t xType, int incx, hipblasDatatype_t executionType); + // CHECK: blasStatus = hipblasScalEx(blasHandle, n, aptr, Atype, xptr, Xtype, incx, Executiontype); + blasStatus = cublasScalEx(blasHandle, n, aptr, Atype, xptr, Xtype, incx, Executiontype); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasAxpyEx(cublasHandle_t handle, int n, const void* alpha, cudaDataType alphaType, const void* x, cudaDataType xType, int incx, void* y, cudaDataType yType, int incy, cudaDataType executiontype); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasAxpyEx(hipblasHandle_t handle, int n, const void* alpha, hipblasDatatype_t alphaType, const void* x, hipblasDatatype_t xType, int incx, void* y, hipblasDatatype_t yType, int incy, hipblasDatatype_t executionType); + // CHECK: blasStatus = hipblasAxpyEx(blasHandle, n, aptr, Atype, xptr, Xtype, incx, yptr, Ytype, incy, Executiontype); + blasStatus = cublasAxpyEx(blasHandle, n, aptr, Atype, xptr, Xtype, incx, yptr, Ytype, incy, Executiontype); #endif #if CUDA_VERSION >= 8000 && CUDA_VERSION < 11000 @@ -1649,6 +1671,11 @@ int main() { // CHECK: hipblasFillMode_t BLAS_FILL_MODE_FULL = HIPBLAS_FILL_MODE_FULL; cublasFillMode_t BLAS_FILL_MODE_FULL = CUBLAS_FILL_MODE_FULL; + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasRotEx(cublasHandle_t handle, int n, void* x, cudaDataType xType, int incx, void* y, cudaDataType yType, int incy, const void* c, const void* s, cudaDataType csType, cudaDataType executiontype); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasRotEx(hipblasHandle_t handle, int n, void* x, hipblasDatatype_t xType, int incx, void* y, hipblasDatatype_t yType, int incy, const void* c, const void* s, hipblasDatatype_t csType, hipblasDatatype_t executionType); + // CHECK: blasStatus = hipblasRotEx(blasHandle, n, xptr, Xtype, incx, yptr, Ytype, incy, cptr, sptr, CStype, Executiontype); + blasStatus = cublasRotEx(blasHandle, n, xptr, Xtype, incx, yptr, Ytype, incy, cptr, sptr, CStype, Executiontype); #endif #if CUDA_VERSION >= 11000 From fb0d748bd339ce481d24e473ae2b226da586dc46 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sat, 1 Oct 2022 21:11:00 +0200 Subject: [PATCH 08/43] [HIPIFY][DNN] Sync with cuDNN 8.5.0 - Part 1 + Update regenerated hipify-perl and CUDNN_API_supported_by_HIP.md --- bin/hipify-perl | 31 +++++++++++ doc/markdown/CUDNN_API_supported_by_HIP.md | 31 +++++++++++ src/CUDA2HIP_DNN_API_types.cpp | 62 ++++++++++++++++++++++ src/Statistics.cpp | 1 + src/Statistics.h | 1 + 5 files changed, 126 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index 74c8a531..b9f580d4 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -7159,23 +7159,53 @@ sub warnUnsupportedFunctions { "CUDNN_PTR_16B_ALIGNED", "CUDNN_POINTWISE_TANH_FWD", "CUDNN_POINTWISE_TANH_BWD", + "CUDNN_POINTWISE_TAN", "CUDNN_POINTWISE_SWISH_FWD", "CUDNN_POINTWISE_SWISH_BWD", + "CUDNN_POINTWISE_SUB", "CUDNN_POINTWISE_SQRT", "CUDNN_POINTWISE_SOFTPLUS_FWD", "CUDNN_POINTWISE_SOFTPLUS_BWD", + "CUDNN_POINTWISE_SIN", "CUDNN_POINTWISE_SIGMOID_FWD", "CUDNN_POINTWISE_SIGMOID_BWD", + "CUDNN_POINTWISE_RSQRT", "CUDNN_POINTWISE_RELU_FWD", "CUDNN_POINTWISE_RELU_BWD", + "CUDNN_POINTWISE_POW", + "CUDNN_POINTWISE_NEG", "CUDNN_POINTWISE_MUL", + "CUDNN_POINTWISE_MOD", "CUDNN_POINTWISE_MIN", "CUDNN_POINTWISE_MAX", + "CUDNN_POINTWISE_LOGICAL_OR", + "CUDNN_POINTWISE_LOGICAL_NOT", + "CUDNN_POINTWISE_LOGICAL_AND", + "CUDNN_POINTWISE_LOG", + "CUDNN_POINTWISE_IDENTITY", + "CUDNN_POINTWISE_GEN_INDEX", "CUDNN_POINTWISE_GELU_FWD", "CUDNN_POINTWISE_GELU_BWD", + "CUDNN_POINTWISE_GELU_APPROX_TANH_FWD", + "CUDNN_POINTWISE_GELU_APPROX_TANH_BWD", + "CUDNN_POINTWISE_FLOOR", + "CUDNN_POINTWISE_EXP", + "CUDNN_POINTWISE_ERF", "CUDNN_POINTWISE_ELU_FWD", "CUDNN_POINTWISE_ELU_BWD", + "CUDNN_POINTWISE_DIV", + "CUDNN_POINTWISE_COS", + "CUDNN_POINTWISE_CMP_NEQ", + "CUDNN_POINTWISE_CMP_LT", + "CUDNN_POINTWISE_CMP_LE", + "CUDNN_POINTWISE_CMP_GT", + "CUDNN_POINTWISE_CMP_GE", + "CUDNN_POINTWISE_CMP_EQ", + "CUDNN_POINTWISE_CEIL", + "CUDNN_POINTWISE_BINARY_SELECT", + "CUDNN_POINTWISE_ADD_SQUARE", "CUDNN_POINTWISE_ADD", + "CUDNN_POINTWISE_ABS", "CUDNN_PATCHLEVEL", "CUDNN_PARAM_ZDESC", "CUDNN_PARAM_ZDATA_PLACEHOLDER", @@ -7311,6 +7341,7 @@ sub warnUnsupportedFunctions { "CUDNN_DATA_UINT8", "CUDNN_DATA_INT8x32", "CUDNN_DATA_INT64", + "CUDNN_DATA_BOOLEAN", "CUDNN_DATA_BFLOAT16", "CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC", "CUDNN_CTC_LOSS_ALGO_DETERMINISTIC", diff --git a/doc/markdown/CUDNN_API_supported_by_HIP.md b/doc/markdown/CUDNN_API_supported_by_HIP.md index 7a5f5ae3..5fc64983 100644 --- a/doc/markdown/CUDNN_API_supported_by_HIP.md +++ b/doc/markdown/CUDNN_API_supported_by_HIP.md @@ -237,6 +237,7 @@ |`CUDNN_CTC_LOSS_ALGO_DETERMINISTIC`|7.0.5| | | | | | | | |`CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC`|7.0.5| | | | | | | | |`CUDNN_DATA_BFLOAT16`|8.1.0| | | | | | | | +|`CUDNN_DATA_BOOLEAN`|8.5.0| | | | | | | | |`CUDNN_DATA_DOUBLE`|1.0.0| | |`HIPDNN_DATA_DOUBLE`| | | | | |`CUDNN_DATA_FLOAT`|1.0.0| | |`HIPDNN_DATA_FLOAT`| | | | | |`CUDNN_DATA_HALF`|3.0.0| | |`HIPDNN_DATA_HALF`| | | | | @@ -389,23 +390,53 @@ |`CUDNN_PARAM_ZDATA_PLACEHOLDER`|7.6.0| | | | | | | | |`CUDNN_PARAM_ZDESC`|7.6.0| | | | | | | | |`CUDNN_PATCHLEVEL`|3.0.0| | | | | | | | +|`CUDNN_POINTWISE_ABS`|8.5.0| | | | | | | | |`CUDNN_POINTWISE_ADD`|8.0.1| | | | | | | | +|`CUDNN_POINTWISE_ADD_SQUARE`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_BINARY_SELECT`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_CEIL`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_CMP_EQ`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_CMP_GE`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_CMP_GT`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_CMP_LE`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_CMP_LT`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_CMP_NEQ`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_COS`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_DIV`|8.5.0| | | | | | | | |`CUDNN_POINTWISE_ELU_BWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_ELU_FWD`|8.0.1| | | | | | | | +|`CUDNN_POINTWISE_ERF`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_EXP`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_FLOOR`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_GELU_APPROX_TANH_BWD`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_GELU_APPROX_TANH_FWD`|8.5.0| | | | | | | | |`CUDNN_POINTWISE_GELU_BWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_GELU_FWD`|8.1.0| | | | | | | | +|`CUDNN_POINTWISE_GEN_INDEX`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_IDENTITY`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_LOG`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_LOGICAL_AND`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_LOGICAL_NOT`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_LOGICAL_OR`|8.5.0| | | | | | | | |`CUDNN_POINTWISE_MAX`|8.0.1| | | | | | | | |`CUDNN_POINTWISE_MIN`|8.0.1| | | | | | | | +|`CUDNN_POINTWISE_MOD`|8.5.0| | | | | | | | |`CUDNN_POINTWISE_MUL`|8.0.1| | | | | | | | +|`CUDNN_POINTWISE_NEG`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_POW`|8.5.0| | | | | | | | |`CUDNN_POINTWISE_RELU_BWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_RELU_FWD`|8.0.1| | | | | | | | +|`CUDNN_POINTWISE_RSQRT`|8.5.0| | | | | | | | |`CUDNN_POINTWISE_SIGMOID_BWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_SIGMOID_FWD`|8.0.1| | | | | | | | +|`CUDNN_POINTWISE_SIN`|8.5.0| | | | | | | | |`CUDNN_POINTWISE_SOFTPLUS_BWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_SOFTPLUS_FWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_SQRT`|8.0.1| | | | | | | | +|`CUDNN_POINTWISE_SUB`|8.5.0| | | | | | | | |`CUDNN_POINTWISE_SWISH_BWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_SWISH_FWD`|8.1.0| | | | | | | | +|`CUDNN_POINTWISE_TAN`|8.5.0| | | | | | | | |`CUDNN_POINTWISE_TANH_BWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_TANH_FWD`|8.0.1| | | | | | | | |`CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING`|2.0.0| | |`HIPDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING`| | | | | diff --git a/src/CUDA2HIP_DNN_API_types.cpp b/src/CUDA2HIP_DNN_API_types.cpp index 98580c33..373af63b 100644 --- a/src/CUDA2HIP_DNN_API_types.cpp +++ b/src/CUDA2HIP_DNN_API_types.cpp @@ -98,6 +98,7 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_DATA_INT8x32", {"HIPDNN_DATA_INT8x32", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 8 {"CUDNN_DATA_BFLOAT16", {"HIPDNN_DATA_BFLOAT16", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 9 {"CUDNN_DATA_INT64", {"HIPDNN_DATA_INT64", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 10 + {"CUDNN_DATA_BOOLEAN", {"HIPDNN_DATA_BOOLEAN", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 11 {"cudnnErrQueryMode_t", {"hipdnnErrQueryMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_ERRQUERY_RAWCODE", {"HIPDNN_ERRQUERY_RAWCODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 0 {"CUDNN_ERRQUERY_NONBLOCKING", {"HIPDNN_ERRQUERY_NONBLOCKING", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1 @@ -370,6 +371,23 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_POINTWISE_MIN", {"HIPDNN_POINTWISE_MIN", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2 {"CUDNN_POINTWISE_MAX", {"HIPDNN_POINTWISE_MAX", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 3 {"CUDNN_POINTWISE_SQRT", {"HIPDNN_POINTWISE_SQRT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 4 + {"CUDNN_POINTWISE_ADD_SQUARE", {"HIPDNN_POINTWISE_ADD_SQUARE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 5 + {"CUDNN_POINTWISE_DIV", {"HIPDNN_POINTWISE_DIV", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 6 + {"CUDNN_POINTWISE_MOD", {"HIPDNN_POINTWISE_MOD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 7 + {"CUDNN_POINTWISE_POW", {"HIPDNN_POINTWISE_POW", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 8 + {"CUDNN_POINTWISE_SUB", {"HIPDNN_POINTWISE_SUB", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 9 + {"CUDNN_POINTWISE_ABS", {"HIPDNN_POINTWISE_ABS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 10 + {"CUDNN_POINTWISE_CEIL", {"HIPDNN_POINTWISE_CEIL", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 11 + {"CUDNN_POINTWISE_COS", {"HIPDNN_POINTWISE_COS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 12 + {"CUDNN_POINTWISE_EXP", {"HIPDNN_POINTWISE_EXP", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 13 + {"CUDNN_POINTWISE_FLOOR", {"HIPDNN_POINTWISE_FLOOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 14 + {"CUDNN_POINTWISE_LOG", {"HIPDNN_POINTWISE_LOG", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 15 + {"CUDNN_POINTWISE_NEG", {"HIPDNN_POINTWISE_NEG", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 16 + {"CUDNN_POINTWISE_RSQRT", {"HIPDNN_POINTWISE_RSQRT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 17 + {"CUDNN_POINTWISE_SIN", {"HIPDNN_POINTWISE_SIN", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 18 + {"CUDNN_POINTWISE_TAN", {"HIPDNN_POINTWISE_TAN", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 19 + {"CUDNN_POINTWISE_ERF", {"HIPDNN_POINTWISE_ERF", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 20 + {"CUDNN_POINTWISE_IDENTITY", {"HIPDNN_POINTWISE_IDENTITY", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 21 {"CUDNN_POINTWISE_RELU_FWD", {"HIPDNN_POINTWISE_RELU_FWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 100 {"CUDNN_POINTWISE_TANH_FWD", {"HIPDNN_POINTWISE_TANH_FWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 101 {"CUDNN_POINTWISE_SIGMOID_FWD", {"HIPDNN_POINTWISE_SIGMOID_FWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 102 @@ -377,6 +395,7 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_POINTWISE_GELU_FWD", {"HIPDNN_POINTWISE_GELU_FWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 104 {"CUDNN_POINTWISE_SOFTPLUS_FWD", {"HIPDNN_POINTWISE_SOFTPLUS_FWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 105 {"CUDNN_POINTWISE_SWISH_FWD", {"HIPDNN_POINTWISE_SWISH_FWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 106 + {"CUDNN_POINTWISE_GELU_APPROX_TANH_FWD", {"HIPDNN_POINTWISE_GELU_APPROX_TANH_FWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 107 {"CUDNN_POINTWISE_RELU_BWD", {"HIPDNN_POINTWISE_RELU_BWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 200 {"CUDNN_POINTWISE_TANH_BWD", {"HIPDNN_POINTWISE_TANH_BWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 201 {"CUDNN_POINTWISE_SIGMOID_BWD", {"HIPDNN_POINTWISE_SIGMOID_BWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 202 @@ -384,6 +403,18 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_POINTWISE_GELU_BWD", {"HIPDNN_POINTWISE_GELU_BWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 204 {"CUDNN_POINTWISE_SOFTPLUS_BWD", {"HIPDNN_POINTWISE_SOFTPLUS_BWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 205 {"CUDNN_POINTWISE_SWISH_BWD", {"HIPDNN_POINTWISE_SWISH_BWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 206 + {"CUDNN_POINTWISE_GELU_APPROX_TANH_BWD", {"HIPDNN_POINTWISE_GELU_APPROX_TANH_BWD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 207 + {"CUDNN_POINTWISE_CMP_EQ", {"HIPDNN_POINTWISE_CMP_EQ", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 300 + {"CUDNN_POINTWISE_CMP_NEQ", {"HIPDNN_POINTWISE_CMP_NEQ", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 301 + {"CUDNN_POINTWISE_CMP_GT", {"HIPDNN_POINTWISE_CMP_GT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 302 + {"CUDNN_POINTWISE_CMP_GE", {"HIPDNN_POINTWISE_CMP_GE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 303 + {"CUDNN_POINTWISE_CMP_LT", {"HIPDNN_POINTWISE_CMP_LT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 304 + {"CUDNN_POINTWISE_CMP_LE", {"HIPDNN_POINTWISE_CMP_LE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 305 + {"CUDNN_POINTWISE_LOGICAL_AND", {"HIPDNN_POINTWISE_LOGICAL_AND", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 400 + {"CUDNN_POINTWISE_LOGICAL_OR", {"HIPDNN_POINTWISE_LOGICAL_OR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 401 + {"CUDNN_POINTWISE_LOGICAL_NOT", {"HIPDNN_POINTWISE_LOGICAL_NOT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 402 + {"CUDNN_POINTWISE_GEN_INDEX", {"HIPDNN_POINTWISE_GEN_INDEX", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 501 + {"CUDNN_POINTWISE_BINARY_SELECT", {"HIPDNN_POINTWISE_BINARY_SELECT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 601 {"cudnnGenStatsMode_t", {"hipdnnGenStatsMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_GENSTATS_SUM_SQSUM", {"HIPDNN_GENSTATS_SUM_SQSUM", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 0 {"cudnnBackendAttributeName_t", {"hipdnnBackendAttributeName_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, @@ -1350,6 +1381,37 @@ const std::map CUDA_DNN_TYPE_NAME_VER_MAP { {"cudnnBackendBehaviorNote_t", {CUDNN_820, CUDA_0, CUDA_0 }}, {"CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", {CUDNN_820, CUDA_0, CUDA_0 }}, {"CUDNN_BEHAVIOR_NOTE_TYPE_COUNT", {CUDNN_820, CUDA_0, CUDA_0 }}, + {"CUDNN_DATA_BOOLEAN", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_ADD_SQUARE", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_DIV", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_MOD", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_POW", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_SUB", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_ABS", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_CEIL", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_COS", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_EXP", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_FLOOR", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_LOG", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_NEG", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_RSQRT", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_SIN", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_TAN", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_ERF", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_IDENTITY", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_GELU_APPROX_TANH_FWD", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_GELU_APPROX_TANH_BWD", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_CMP_EQ", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_CMP_NEQ", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_CMP_GT", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_CMP_GE", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_CMP_LT", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_CMP_LE", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_LOGICAL_AND", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_LOGICAL_OR", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_LOGICAL_NOT", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_GEN_INDEX", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_BINARY_SELECT", {CUDNN_850, CUDA_0, CUDA_0 }}, }; const std::map HIP_DNN_TYPE_NAME_VER_MAP { diff --git a/src/Statistics.cpp b/src/Statistics.cpp index ed6aea21..9ee05065 100644 --- a/src/Statistics.cpp +++ b/src/Statistics.cpp @@ -475,6 +475,7 @@ std::string Statistics::getCudaVersion(const cudaVersions& ver) { case CUDNN_810: return "8.1.0"; case CUDNN_811: return "8.1.1"; case CUDNN_820: return "8.2.0"; + case CUDNN_850: return "8.5.0"; } return ""; } diff --git a/src/Statistics.h b/src/Statistics.h index 02b4da7d..abae64be 100644 --- a/src/Statistics.h +++ b/src/Statistics.h @@ -231,6 +231,7 @@ enum cudaVersions { CUDNN_810 = 810, CUDNN_811 = 811, CUDNN_820 = 820, + CUDNN_850 = 850, }; enum hipVersions { From 966f151f008270d6b297b6615d475eaf92da6c37 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 4 Oct 2022 23:03:19 +0200 Subject: [PATCH 09/43] [HIPIFY][doc] LLVM 15.0.2 is the latest supported LLVM release + No patches are needed + Update README.md accordingly + Tested on Windows 10 and Ubuntu 21.10 --- README.md | 55 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 728f3bed..4060ed0e 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ After applying all the matchers, the output HIP source is produced. `hipify-clang` requires: -1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [4.0.0](http://releases.llvm.org/download.html#4.0.0); the latest stable and recommended release: [**15.0.1**](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.1). +1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [4.0.0](http://releases.llvm.org/download.html#4.0.0); the latest stable and recommended release: [**15.0.2**](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.2). 2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive), the latest supported version is [**11.7.1**](https://developer.nvidia.com/cuda-downloads). @@ -172,7 +172,8 @@ After applying all the matchers, the output HIP source is produced. 14.0.5, 14.0.6,
15.0.0, - 15.0.1 + 15.0.1, + 15.0.2 11.7.1 LATEST STABLE CONFIG @@ -188,7 +189,7 @@ After applying all the matchers, the output HIP source is produced. In most cases, you can get a suitable version of `LLVM+CLANG` with your package manager. Failing that or having multiple versions of `LLVM`, you can [download a release archive](http://releases.llvm.org/), build or install it, and set -[CMAKE_PREFIX_PATH](https://cmake.org/cmake/help/v3.5/variable/CMAKE_PREFIX_PATH.html) so `cmake` can find it; for instance: `-DCMAKE_PREFIX_PATH=d:\LLVM\15.0.1\dist` +[CMAKE_PREFIX_PATH](https://cmake.org/cmake/help/v3.5/variable/CMAKE_PREFIX_PATH.html) so `cmake` can find it; for instance: `-DCMAKE_PREFIX_PATH=d:\LLVM\15.0.2\dist` ### hipify-clang: usage @@ -286,7 +287,7 @@ Run `Visual Studio 16 2019`, open the generated `LLVM.sln`, build all, and build **LLVM >= 10.0.0:** -1. download [`LLVM project`](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.1) sources; +1. download [`LLVM project`](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.2) sources; 2. build [`LLVM project`](http://llvm.org/docs/CMake.html): ```bash @@ -351,21 +352,21 @@ Run `Visual Studio 17 2022`, open the generated `LLVM.sln`, build all, build pro * Install `lit` into `python`: - - ***Linux***: `python /usr/llvm/15.0.1/llvm-project/llvm/utils/lit/setup.py install` + - ***Linux***: `python /usr/llvm/15.0.2/llvm-project/llvm/utils/lit/setup.py install` - - ***Windows***: `python d:/LLVM/15.0.1/llvm-project/llvm/utils/lit/setup.py install` + - ***Windows***: `python d:/LLVM/15.0.2/llvm-project/llvm/utils/lit/setup.py install` * Starting with LLVM 6.0.1 path to `llvm-lit` python script should be specified by the `LLVM_EXTERNAL_LIT` option: - - ***Linux***: `-DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.1/build/bin/llvm-lit` + - ***Linux***: `-DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.2/build/bin/llvm-lit` - - ***Windows***: `-DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.1/build/Release/bin/llvm-lit.py` + - ***Windows***: `-DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.2/build/Release/bin/llvm-lit.py` * `FileCheck`: - - ***Linux***: copy from `/usr/llvm/15.0.1/build/bin/` to `CMAKE_INSTALL_PREFIX/dist/bin` + - ***Linux***: copy from `/usr/llvm/15.0.2/build/bin/` to `CMAKE_INSTALL_PREFIX/dist/bin` - - ***Windows***: copy from `d:/LLVM/15.0.1/build/Release/bin` to `CMAKE_INSTALL_PREFIX/dist/bin` + - ***Windows***: copy from `d:/LLVM/15.0.2/build/Release/bin` to `CMAKE_INSTALL_PREFIX/dist/bin` - Or specify the path to `FileCheck` in `CMAKE_INSTALL_PREFIX` option @@ -387,7 +388,7 @@ Ubuntu 14: LLVM 4.0.0 - 7.1.0, CUDA 7.0 - 9.0, cuDNN 5.0.5 - 7.6.5 Ubuntu 16-18: LLVM 8.0.0 - 14.0.6, CUDA 8.0 - 10.2, cuDNN 5.1.10 - 8.0.5 -Ubuntu 20-21: LLVM 9.0.0 - 15.0.1, CUDA 8.0 - 11.7.1, cuDNN 5.1.10 - 8.4.1 +Ubuntu 20-21: LLVM 9.0.0 - 15.0.2, CUDA 8.0 - 11.7.1, cuDNN 5.1.10 - 8.4.1 Minimum build system requirements for the above configurations: @@ -404,11 +405,11 @@ cmake -DHIPIFY_CLANG_TESTS=1 \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=/usr/llvm/15.0.1/dist \ + -DCMAKE_PREFIX_PATH=/usr/llvm/15.0.2/dist \ -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda \ -DCUDA_DNN_ROOT_DIR=/usr/local/cuda \ -DCUDA_CUB_ROOT_DIR=/usr/CUB \ - -DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.1/build/bin/llvm-lit \ + -DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.2/build/bin/llvm-lit \ ../hipify ``` *A corresponding successful output:* @@ -426,14 +427,14 @@ cmake -- Detecting CXX compile features -- Detecting CXX compile features - done -- Found ZLIB: /usr/lib/x86_64-linux-gnu/libz.so (found version "1.2.11") --- Found LLVM 15.0.1: --- - CMake module path: /usr/llvm/15.0.1/dist/lib/cmake/llvm --- - Include path : /usr/llvm/15.0.1/dist/include --- - Binary path : /usr/llvm/15.0.1/dist/bin +-- Found LLVM 15.0.2: +-- - CMake module path: /usr/llvm/15.0.2/dist/lib/cmake/llvm +-- - Include path : /usr/llvm/15.0.2/dist/include +-- - Binary path : /usr/llvm/15.0.2/dist/bin -- Linker detection: GNU ld -- Found PythonInterp: /usr/bin/python (found suitable version "3.9.7", minimum required is "2.7") -- Found lit: /usr/local/bin/lit --- Found FileCheck: /usr/llvm/15.0.1/dist/bin/FileCheck +-- Found FileCheck: /usr/llvm/15.0.2/dist/bin/FileCheck -- Looking for pthread.h -- Looking for pthread.h - found -- Performing Test CMAKE_HAVE_LIBC_PTHREAD @@ -452,7 +453,7 @@ make test-hipify Running HIPify regression tests ======================================== CUDA 11.7 - will be used for testing -LLVM 15.0.1 - will be used for testing +LLVM 15.0.2 - will be used for testing x86_64 - Platform architecture Linux 5.13.0-21-generic - Platform OS 64 - hipify-clang binary bitness @@ -566,7 +567,7 @@ Testing Time: 6.22s | 11.0.1 - 11.1.0 | 7.0 - 11.2.2 | 7.6.5 - 8.0.5 | 2017.15.9.31, 2019.16.8.4 | 3.19.3 | 3.9.2 | | 12.0.0 - 13.0.1 | 7.0 - 11.5.1 | 7.6.5 - 8.3.2 | 2017.15.9.43, 2019.16.11.9 | 3.22.2 | 3.10.2 | | 14.0.0 - 14.0.6 | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.49, 2019.16.11.17, 2022.17.2.6 | 3.24.0 | 3.10.6 | -| 15.0.0 - 15.0.1 | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | +| 15.0.0 - 15.0.2 | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | | 16.0.0git | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | *Building with testing support by `Visual Studio 17 2022` on `Windows 10`:* @@ -579,23 +580,23 @@ cmake -DHIPIFY_CLANG_TESTS=1 \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=d:/LLVM/15.0.1/dist \ + -DCMAKE_PREFIX_PATH=d:/LLVM/15.0.2/dist \ -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.7" \ -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v11.7" \ -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.7-windows-x64-v8.4.1 \ -DCUDA_CUB_ROOT_DIR=d:/GIT/cub \ - -DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.1/build/Release/bin/llvm-lit.py \ + -DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.2/build/Release/bin/llvm-lit.py \ ../hipify ``` *A corresponding successful output:* ```shell --- Found LLVM 15.0.1: --- - CMake module path: d:/LLVM/15.0.1/dist/lib/cmake/llvm --- - Include path : d:/LLVM/15.0.1/dist/include --- - Binary path : d:/LLVM/15.0.1/dist/bin +-- Found LLVM 15.0.2: +-- - CMake module path: d:/LLVM/15.0.2/dist/lib/cmake/llvm +-- - Include path : d:/LLVM/15.0.2/dist/include +-- - Binary path : d:/LLVM/15.0.2/dist/bin -- Found PythonInterp: c:/Program Files/Python39/python.exe (found suitable version "3.9.5", minimum required is "3.6") -- Found lit: c:/Program Files/Python39/Scripts/lit.exe --- Found FileCheck: d:/LLVM/15.0.1/dist/bin/FileCheck.exe +-- Found FileCheck: d:/LLVM/15.0.2/dist/bin/FileCheck.exe -- Found CUDA: c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.7 (found version "11.7") -- Configuring done -- Generating done From 7e0111ba892c6437f501f3dd72040eeae4cc70a0 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 7 Oct 2022 20:51:40 +0200 Subject: [PATCH 10/43] [HIPIFY][DNN] Sync with cuDNN 8.5.0 - Part 2 + Add some missing cuDNN 8.3.0 and 8.4.0 APIs + Update regenerated hipify-perl and CUDNN_API_supported_by_HIP.md --- bin/hipify-perl | 14 ++++ doc/markdown/CUDNN_API_supported_by_HIP.md | 68 +++++++++++------- src/CUDA2HIP_DNN_API_types.cpp | 82 +++++++++++++++------- src/Statistics.cpp | 2 + src/Statistics.h | 2 + 5 files changed, 114 insertions(+), 54 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index b9f580d4..045cbc3c 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -5742,6 +5742,7 @@ sub warnUnsupportedFunctions { "cudnnSpatialTfSamplerBackward", "cudnnSpatialTfGridGeneratorForward", "cudnnSpatialTfGridGeneratorBackward", + "cudnnSignalMode_t", "cudnnSeverity_t", "cudnnSetTensorTransformDescriptor", "cudnnSetTensorNdDescriptorEx", @@ -5773,6 +5774,7 @@ sub warnUnsupportedFunctions { "cudnnRuntimeTag_t", "cudnnRestoreDropoutDescriptor", "cudnnRestoreAlgorithm", + "cudnnResampleMode_t", "cudnnReorderType_t", "cudnnReorderFilterAndBias", "cudnnReduceTensorStruct", @@ -5884,6 +5886,8 @@ sub warnUnsupportedFunctions { "cudnnFusedOpsConstParamStruct", "cudnnFusedOpsConstParamPack_t", "cudnnFusedOpsConstParamLabel_t", + "cudnnFraction_t", + "cudnnFractionStruct", "cudnnForwardMode_t", "cudnnFoldingDirection_t", "cudnnFindRNNForwardTrainingAlgorithmEx", @@ -7105,6 +7109,8 @@ sub warnUnsupportedFunctions { "CUDNN_STATUS_VERSION_MISMATCH", "CUDNN_STATUS_RUNTIME_IN_PROGRESS", "CUDNN_STATUS_RUNTIME_FP_OVERFLOW", + "CUDNN_SIGNAL_WAIT", + "CUDNN_SIGNAL_SET", "CUDNN_SEV_WARNING_EN", "CUDNN_SEV_WARNING", "CUDNN_SEV_INFO_EN", @@ -7130,6 +7136,10 @@ sub warnUnsupportedFunctions { "CUDNN_RNN_CLIP_NONE", "CUDNN_RNN_CLIP_MINMAX", "CUDNN_RNN_ALGO_COUNT", + "CUDNN_RESAMPLE_NEAREST", + "CUDNN_RESAMPLE_MAXPOOL", + "CUDNN_RESAMPLE_BILINEAR", + "CUDNN_RESAMPLE_AVGPOOL", "CUDNN_PTR_ZDATA", "CUDNN_PTR_YSUM", "CUDNN_PTR_YSQSUM", @@ -7390,6 +7400,7 @@ sub warnUnsupportedFunctions { "CUDNN_ATTR_TENSOR_VECTORIZED_DIMENSION", "CUDNN_ATTR_TENSOR_UNIQUE_ID", "CUDNN_ATTR_TENSOR_STRIDES", + "CUDNN_ATTR_TENSOR_REORDERING_MODE", "CUDNN_ATTR_TENSOR_IS_VIRTUAL", "CUDNN_ATTR_TENSOR_IS_BY_VALUE", "CUDNN_ATTR_TENSOR_DIMENSIONS", @@ -7406,11 +7417,13 @@ sub warnUnsupportedFunctions { "CUDNN_ATTR_POINTWISE_MODE", "CUDNN_ATTR_POINTWISE_MATH_PREC", "CUDNN_ATTR_POINTWISE_ELU_ALPHA", + "CUDNN_ATTR_POINTWISE_AXIS", "CUDNN_ATTR_OPERATION_REDUCTION_YDESC", "CUDNN_ATTR_OPERATION_REDUCTION_XDESC", "CUDNN_ATTR_OPERATION_REDUCTION_DESC", "CUDNN_ATTR_OPERATION_POINTWISE_YDESC", "CUDNN_ATTR_OPERATION_POINTWISE_XDESC", + "CUDNN_ATTR_OPERATION_POINTWISE_TDESC", "CUDNN_ATTR_OPERATION_POINTWISE_PW_DESCRIPTOR", "CUDNN_ATTR_OPERATION_POINTWISE_DYDESC", "CUDNN_ATTR_OPERATION_POINTWISE_DXDESC", @@ -7491,6 +7504,7 @@ sub warnUnsupportedFunctions { "CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_ATTRIBUTES", "CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE", "CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS", + "CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION", "CUDNN_ATTR_EXECUTION_PLAN_HANDLE", "CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG", "CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS", diff --git a/doc/markdown/CUDNN_API_supported_by_HIP.md b/doc/markdown/CUDNN_API_supported_by_HIP.md index 5fc64983..315e1077 100644 --- a/doc/markdown/CUDNN_API_supported_by_HIP.md +++ b/doc/markdown/CUDNN_API_supported_by_HIP.md @@ -48,6 +48,7 @@ |`CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS`|8.0.2| | | | | | | | |`CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG`|8.0.1| | | | | | | | |`CUDNN_ATTR_EXECUTION_PLAN_HANDLE`|8.0.1| | | | | | | | +|`CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION`|8.4.0| | | | | | | | |`CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS`|8.0.2| | | | | | | | |`CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE`|8.0.1| | | | | | | | |`CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_ATTRIBUTES`|8.0.2| | | | | | | | @@ -128,11 +129,13 @@ |`CUDNN_ATTR_OPERATION_POINTWISE_DXDESC`|8.1.0| | | | | | | | |`CUDNN_ATTR_OPERATION_POINTWISE_DYDESC`|8.1.0| | | | | | | | |`CUDNN_ATTR_OPERATION_POINTWISE_PW_DESCRIPTOR`|8.0.1| | | | | | | | +|`CUDNN_ATTR_OPERATION_POINTWISE_TDESC`|8.3.0| | | | | | | | |`CUDNN_ATTR_OPERATION_POINTWISE_XDESC`|8.0.1| | | | | | | | |`CUDNN_ATTR_OPERATION_POINTWISE_YDESC`|8.0.1| | | | | | | | |`CUDNN_ATTR_OPERATION_REDUCTION_DESC`|8.1.0| | | | | | | | |`CUDNN_ATTR_OPERATION_REDUCTION_XDESC`|8.1.0| | | | | | | | |`CUDNN_ATTR_OPERATION_REDUCTION_YDESC`|8.1.0| | | | | | | | +|`CUDNN_ATTR_POINTWISE_AXIS`|8.4.0| | | | | | | | |`CUDNN_ATTR_POINTWISE_ELU_ALPHA`|8.1.0| | | | | | | | |`CUDNN_ATTR_POINTWISE_MATH_PREC`|8.0.1| | | | | | | | |`CUDNN_ATTR_POINTWISE_MODE`|8.0.1| | | | | | | | @@ -149,6 +152,7 @@ |`CUDNN_ATTR_TENSOR_DIMENSIONS`|8.0.1| | | | | | | | |`CUDNN_ATTR_TENSOR_IS_BY_VALUE`|8.1.0| | | | | | | | |`CUDNN_ATTR_TENSOR_IS_VIRTUAL`|8.0.1| | | | | | | | +|`CUDNN_ATTR_TENSOR_REORDERING_MODE`|8.3.0| | | | | | | | |`CUDNN_ATTR_TENSOR_STRIDES`|8.0.1| | | | | | | | |`CUDNN_ATTR_TENSOR_UNIQUE_ID`|8.0.1| | | | | | | | |`CUDNN_ATTR_TENSOR_VECTORIZED_DIMENSION`|8.0.1| | | | | | | | @@ -237,7 +241,7 @@ |`CUDNN_CTC_LOSS_ALGO_DETERMINISTIC`|7.0.5| | | | | | | | |`CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC`|7.0.5| | | | | | | | |`CUDNN_DATA_BFLOAT16`|8.1.0| | | | | | | | -|`CUDNN_DATA_BOOLEAN`|8.5.0| | | | | | | | +|`CUDNN_DATA_BOOLEAN`|8.3.0| | | | | | | | |`CUDNN_DATA_DOUBLE`|1.0.0| | |`HIPDNN_DATA_DOUBLE`| | | | | |`CUDNN_DATA_FLOAT`|1.0.0| | |`HIPDNN_DATA_FLOAT`| | | | | |`CUDNN_DATA_HALF`|3.0.0| | |`HIPDNN_DATA_HALF`| | | | | @@ -390,53 +394,53 @@ |`CUDNN_PARAM_ZDATA_PLACEHOLDER`|7.6.0| | | | | | | | |`CUDNN_PARAM_ZDESC`|7.6.0| | | | | | | | |`CUDNN_PATCHLEVEL`|3.0.0| | | | | | | | -|`CUDNN_POINTWISE_ABS`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_ABS`|8.3.0| | | | | | | | |`CUDNN_POINTWISE_ADD`|8.0.1| | | | | | | | -|`CUDNN_POINTWISE_ADD_SQUARE`|8.5.0| | | | | | | | -|`CUDNN_POINTWISE_BINARY_SELECT`|8.5.0| | | | | | | | -|`CUDNN_POINTWISE_CEIL`|8.5.0| | | | | | | | -|`CUDNN_POINTWISE_CMP_EQ`|8.5.0| | | | | | | | -|`CUDNN_POINTWISE_CMP_GE`|8.5.0| | | | | | | | -|`CUDNN_POINTWISE_CMP_GT`|8.5.0| | | | | | | | -|`CUDNN_POINTWISE_CMP_LE`|8.5.0| | | | | | | | -|`CUDNN_POINTWISE_CMP_LT`|8.5.0| | | | | | | | -|`CUDNN_POINTWISE_CMP_NEQ`|8.5.0| | | | | | | | -|`CUDNN_POINTWISE_COS`|8.5.0| | | | | | | | -|`CUDNN_POINTWISE_DIV`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_ADD_SQUARE`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_BINARY_SELECT`|8.4.0| | | | | | | | +|`CUDNN_POINTWISE_CEIL`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_CMP_EQ`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_CMP_GE`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_CMP_GT`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_CMP_LE`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_CMP_LT`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_CMP_NEQ`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_COS`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_DIV`|8.3.0| | | | | | | | |`CUDNN_POINTWISE_ELU_BWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_ELU_FWD`|8.0.1| | | | | | | | |`CUDNN_POINTWISE_ERF`|8.5.0| | | | | | | | -|`CUDNN_POINTWISE_EXP`|8.5.0| | | | | | | | -|`CUDNN_POINTWISE_FLOOR`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_EXP`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_FLOOR`|8.3.0| | | | | | | | |`CUDNN_POINTWISE_GELU_APPROX_TANH_BWD`|8.5.0| | | | | | | | |`CUDNN_POINTWISE_GELU_APPROX_TANH_FWD`|8.5.0| | | | | | | | |`CUDNN_POINTWISE_GELU_BWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_GELU_FWD`|8.1.0| | | | | | | | -|`CUDNN_POINTWISE_GEN_INDEX`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_GEN_INDEX`|8.4.0| | | | | | | | |`CUDNN_POINTWISE_IDENTITY`|8.5.0| | | | | | | | -|`CUDNN_POINTWISE_LOG`|8.5.0| | | | | | | | -|`CUDNN_POINTWISE_LOGICAL_AND`|8.5.0| | | | | | | | -|`CUDNN_POINTWISE_LOGICAL_NOT`|8.5.0| | | | | | | | -|`CUDNN_POINTWISE_LOGICAL_OR`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_LOG`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_LOGICAL_AND`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_LOGICAL_NOT`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_LOGICAL_OR`|8.3.0| | | | | | | | |`CUDNN_POINTWISE_MAX`|8.0.1| | | | | | | | |`CUDNN_POINTWISE_MIN`|8.0.1| | | | | | | | -|`CUDNN_POINTWISE_MOD`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_MOD`|8.3.0| | | | | | | | |`CUDNN_POINTWISE_MUL`|8.0.1| | | | | | | | -|`CUDNN_POINTWISE_NEG`|8.5.0| | | | | | | | -|`CUDNN_POINTWISE_POW`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_NEG`|8.3.0| | | | | | | | +|`CUDNN_POINTWISE_POW`|8.3.0| | | | | | | | |`CUDNN_POINTWISE_RELU_BWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_RELU_FWD`|8.0.1| | | | | | | | -|`CUDNN_POINTWISE_RSQRT`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_RSQRT`|8.3.0| | | | | | | | |`CUDNN_POINTWISE_SIGMOID_BWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_SIGMOID_FWD`|8.0.1| | | | | | | | -|`CUDNN_POINTWISE_SIN`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_SIN`|8.3.0| | | | | | | | |`CUDNN_POINTWISE_SOFTPLUS_BWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_SOFTPLUS_FWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_SQRT`|8.0.1| | | | | | | | -|`CUDNN_POINTWISE_SUB`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_SUB`|8.3.0| | | | | | | | |`CUDNN_POINTWISE_SWISH_BWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_SWISH_FWD`|8.1.0| | | | | | | | -|`CUDNN_POINTWISE_TAN`|8.5.0| | | | | | | | +|`CUDNN_POINTWISE_TAN`|8.3.0| | | | | | | | |`CUDNN_POINTWISE_TANH_BWD`|8.1.0| | | | | | | | |`CUDNN_POINTWISE_TANH_FWD`|8.0.1| | | | | | | | |`CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING`|2.0.0| | |`HIPDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING`| | | | | @@ -482,6 +486,10 @@ |`CUDNN_REDUCE_TENSOR_NORM1`|6.0.0| | |`HIPDNN_REDUCE_TENSOR_NORM1`| | | | | |`CUDNN_REDUCE_TENSOR_NORM2`|6.0.0| | |`HIPDNN_REDUCE_TENSOR_NORM2`| | | | | |`CUDNN_REDUCE_TENSOR_NO_INDICES`|6.0.0| | |`HIPDNN_REDUCE_TENSOR_NO_INDICES`| | | | | +|`CUDNN_RESAMPLE_AVGPOOL`|8.3.0| | | | | | | | +|`CUDNN_RESAMPLE_BILINEAR`|8.3.0| | | | | | | | +|`CUDNN_RESAMPLE_MAXPOOL`|8.3.0| | | | | | | | +|`CUDNN_RESAMPLE_NEAREST`|8.3.0| | | | | | | | |`CUDNN_RNN_ALGO_COUNT`|7.1.3| | | | | | | | |`CUDNN_RNN_ALGO_PERSIST_DYNAMIC`|6.0.0| | |`HIPDNN_RNN_ALGO_PERSIST_DYNAMIC`| | | | | |`CUDNN_RNN_ALGO_PERSIST_STATIC`|6.0.0| | |`HIPDNN_RNN_ALGO_PERSIST_STATIC`| | | | | @@ -517,6 +525,8 @@ |`CUDNN_SEV_INFO_EN`|7.1.3| | | | | | | | |`CUDNN_SEV_WARNING`|7.1.3| | | | | | | | |`CUDNN_SEV_WARNING_EN`|7.1.3| | | | | | | | +|`CUDNN_SIGNAL_SET`|8.5.0| | | | | | | | +|`CUDNN_SIGNAL_WAIT`|8.5.0| | | | | | | | |`CUDNN_SKIP_INPUT`|5.0.0| | |`HIPDNN_SKIP_INPUT`| | | | | |`CUDNN_SOFTMAX_ACCURATE`|1.0.0| | |`HIPDNN_SOFTMAX_ACCURATE`| | | | | |`CUDNN_SOFTMAX_FAST`|1.0.0| | |`HIPDNN_SOFTMAX_FAST`| | | | | @@ -626,6 +636,8 @@ |`cudnnFilterStruct`|1.0.0| | | | | | | | |`cudnnFoldingDirection_t`|7.5.0| | | | | | | | |`cudnnForwardMode_t`|8.0.1| | | | | | | | +|`cudnnFractionStruct`|8.5.0| | | | | | | | +|`cudnnFraction_t`|8.5.0| | | | | | | | |`cudnnFusedOpsConstParamLabel_t`|7.6.0| | | | | | | | |`cudnnFusedOpsConstParamPack_t`|7.6.0| | | | | | | | |`cudnnFusedOpsConstParamStruct`|7.6.0| | | | | | | | @@ -674,12 +686,14 @@ |`cudnnReduceTensorOp_t`|6.0.0| | |`hipdnnReduceTensorOp_t`| | | | | |`cudnnReduceTensorStruct`|6.0.0| | | | | | | | |`cudnnReorderType_t`|7.6.0| | | | | | | | +|`cudnnResampleMode_t`|8.3.0| | | | | | | | |`cudnnRuntimeTag_t`|7.0.5| | | | | | | | |`cudnnSamplerType_t`|5.0.0| | | | | | | | |`cudnnSeqDataAxis_t`|7.5.0| | | | | | | | |`cudnnSeqDataDescriptor_t`|7.5.0| | | | | | | | |`cudnnSeqDataStruct`|7.5.0| | | | | | | | |`cudnnSeverity_t`|7.1.3| | | | | | | | +|`cudnnSignalMode_t`|8.5.0| | | | | | | | |`cudnnSoftmaxAlgorithm_t`|1.0.0| | |`hipdnnSoftmaxAlgorithm_t`| | | | | |`cudnnSoftmaxMode_t`|1.0.0| | |`hipdnnSoftmaxMode_t`| | | | | |`cudnnSpatialTransformerDescriptor_t`|5.0.0| | | | | | | | diff --git a/src/CUDA2HIP_DNN_API_types.cpp b/src/CUDA2HIP_DNN_API_types.cpp index 373af63b..0343b364 100644 --- a/src/CUDA2HIP_DNN_API_types.cpp +++ b/src/CUDA2HIP_DNN_API_types.cpp @@ -427,6 +427,7 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_ATTR_POINTWISE_ELU_ALPHA", {"HIPDNN_ATTR_POINTWISE_ELU_ALPHA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 6 {"CUDNN_ATTR_POINTWISE_SOFTPLUS_BETA", {"HIPDNN_ATTR_POINTWISE_SOFTPLUS_BETA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 7 {"CUDNN_ATTR_POINTWISE_SWISH_BETA", {"HIPDNN_ATTR_POINTWISE_SWISH_BETA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 8 + {"CUDNN_ATTR_POINTWISE_AXIS", {"HIPDNN_ATTR_POINTWISE_AXIS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 9 {"CUDNN_ATTR_CONVOLUTION_COMP_TYPE", {"HIPDNN_ATTR_CONVOLUTION_COMP_TYPE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 100 {"CUDNN_ATTR_CONVOLUTION_CONV_MODE", {"HIPDNN_ATTR_CONVOLUTION_CONV_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 101 {"CUDNN_ATTR_CONVOLUTION_DILATIONS", {"HIPDNN_ATTR_CONVOLUTION_DILATIONS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 102 @@ -445,6 +446,7 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE", {"HIPDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 402 {"CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS", {"HIPDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 403 {"CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS", {"HIPDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 404 + {"CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION", {"HIPDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 405 {"CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID", {"HIPDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 500 {"CUDNN_ATTR_INTERMEDIATE_INFO_SIZE", {"HIPDNN_ATTR_INTERMEDIATE_INFO_SIZE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 501 {"CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_DATA_UIDS", {"HIPDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_DATA_UIDS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 502 @@ -477,6 +479,7 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2", {"HIPDNN_ATTR_OPERATION_POINTWISE_ALPHA2", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 755 {"CUDNN_ATTR_OPERATION_POINTWISE_DXDESC", {"HIPDNN_ATTR_OPERATION_POINTWISE_DXDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 756 {"CUDNN_ATTR_OPERATION_POINTWISE_DYDESC", {"HIPDNN_ATTR_OPERATION_POINTWISE_DYDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 757 + {"CUDNN_ATTR_OPERATION_POINTWISE_TDESC", {"HIPDNN_ATTR_OPERATION_POINTWISE_TDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 758 {"CUDNN_ATTR_OPERATION_GENSTATS_MODE", {"HIPDNN_ATTR_OPERATION_GENSTATS_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 770 {"CUDNN_ATTR_OPERATION_GENSTATS_MATH_PREC", {"HIPDNN_ATTR_OPERATION_GENSTATS_MATH_PREC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 771 {"CUDNN_ATTR_OPERATION_GENSTATS_XDESC", {"HIPDNN_ATTR_OPERATION_GENSTATS_XDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 772 @@ -511,6 +514,7 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_ATTR_TENSOR_UNIQUE_ID", {"HIPDNN_ATTR_TENSOR_UNIQUE_ID", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 906 {"CUDNN_ATTR_TENSOR_IS_VIRTUAL", {"HIPDNN_ATTR_TENSOR_IS_VIRTUAL", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 907 {"CUDNN_ATTR_TENSOR_IS_BY_VALUE", {"HIPDNN_ATTR_TENSOR_IS_BY_VALUE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 908 + {"CUDNN_ATTR_TENSOR_REORDERING_MODE", {"HIPDNN_ATTR_TENSOR_REORDERING_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 909 {"CUDNN_ATTR_VARIANT_PACK_UNIQUE_IDS", {"HIPDNN_ATTR_VARIANT_PACK_UNIQUE_IDS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1000 {"CUDNN_ATTR_VARIANT_PACK_DATA_POINTERS", {"HIPDNN_ATTR_VARIANT_PACK_DATA_POINTERS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1001 {"CUDNN_ATTR_VARIANT_PACK_INTERMEDIATES", {"HIPDNN_ATTR_VARIANT_PACK_INTERMEDIATES", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1002 @@ -656,6 +660,14 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"cudnnBackendBehaviorNote_t", {"hipdnnBackendBehaviorNote_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", {"HIPDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 0 {"CUDNN_BEHAVIOR_NOTE_TYPE_COUNT", {"HIPDNN_BEHAVIOR_NOTE_TYPE_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"cudnnResampleMode_t", {"hipdnnResampleMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_RESAMPLE_NEAREST", {"HIPDNN_RESAMPLE_NEAREST", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_RESAMPLE_BILINEAR", {"HIPDNN_RESAMPLE_BILINEAR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_RESAMPLE_AVGPOOL", {"HIPDNN_RESAMPLE_AVGPOOL", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_RESAMPLE_MAXPOOL", {"HIPDNN_RESAMPLE_MAXPOOL", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"cudnnSignalMode_t", {"hipdnnSignalMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_SIGNAL_SET", {"HIPDNN_SIGNAL_SET", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_SIGNAL_WAIT", {"HIPDNN_SIGNAL_WAIT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // cuDNN types {"cudnnContext", {"hipdnnContext", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, @@ -718,6 +730,8 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"cudnnBackendDescriptor_t", {"hipdnnBackendDescriptor_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"libraryPropertyType", {"hipdnnLibraryPropertyType", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"libraryPropertyType_t", {"hipdnnLibraryPropertyType_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, + {"cudnnFractionStruct", {"hipdnnFractionStruct", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, + {"cudnnFraction_t", {"hipdnnFraction_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, }; const std::map CUDA_DNN_TYPE_NAME_VER_MAP { @@ -1381,37 +1395,51 @@ const std::map CUDA_DNN_TYPE_NAME_VER_MAP { {"cudnnBackendBehaviorNote_t", {CUDNN_820, CUDA_0, CUDA_0 }}, {"CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", {CUDNN_820, CUDA_0, CUDA_0 }}, {"CUDNN_BEHAVIOR_NOTE_TYPE_COUNT", {CUDNN_820, CUDA_0, CUDA_0 }}, - {"CUDNN_DATA_BOOLEAN", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_ADD_SQUARE", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_DIV", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_MOD", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_POW", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_SUB", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_ABS", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_CEIL", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_COS", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_EXP", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_FLOOR", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_LOG", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_NEG", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_RSQRT", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_SIN", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_TAN", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_DATA_BOOLEAN", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_ADD_SQUARE", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_DIV", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_MOD", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_POW", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_SUB", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_ABS", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_CEIL", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_COS", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_EXP", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_FLOOR", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_LOG", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_NEG", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_RSQRT", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_SIN", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_TAN", {CUDNN_830, CUDA_0, CUDA_0 }}, {"CUDNN_POINTWISE_ERF", {CUDNN_850, CUDA_0, CUDA_0 }}, {"CUDNN_POINTWISE_IDENTITY", {CUDNN_850, CUDA_0, CUDA_0 }}, {"CUDNN_POINTWISE_GELU_APPROX_TANH_FWD", {CUDNN_850, CUDA_0, CUDA_0 }}, {"CUDNN_POINTWISE_GELU_APPROX_TANH_BWD", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_CMP_EQ", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_CMP_NEQ", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_CMP_GT", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_CMP_GE", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_CMP_LT", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_CMP_LE", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_LOGICAL_AND", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_LOGICAL_OR", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_LOGICAL_NOT", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_GEN_INDEX", {CUDNN_850, CUDA_0, CUDA_0 }}, - {"CUDNN_POINTWISE_BINARY_SELECT", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_CMP_EQ", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_CMP_NEQ", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_CMP_GT", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_CMP_GE", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_CMP_LT", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_CMP_LE", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_LOGICAL_AND", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_LOGICAL_OR", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_LOGICAL_NOT", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_GEN_INDEX", {CUDNN_840, CUDA_0, CUDA_0 }}, + {"CUDNN_POINTWISE_BINARY_SELECT", {CUDNN_840, CUDA_0, CUDA_0 }}, + {"cudnnFractionStruct", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"cudnnFraction_t", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"cudnnResampleMode_t", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_RESAMPLE_NEAREST", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_RESAMPLE_BILINEAR", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_RESAMPLE_AVGPOOL", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_RESAMPLE_MAXPOOL", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"cudnnSignalMode_t", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_SIGNAL_SET", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_SIGNAL_WAIT", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_POINTWISE_AXIS", {CUDNN_840, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION", {CUDNN_840, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_POINTWISE_TDESC", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_TENSOR_REORDERING_MODE", {CUDNN_830, CUDA_0, CUDA_0 }}, }; const std::map HIP_DNN_TYPE_NAME_VER_MAP { diff --git a/src/Statistics.cpp b/src/Statistics.cpp index 9ee05065..271eca69 100644 --- a/src/Statistics.cpp +++ b/src/Statistics.cpp @@ -475,6 +475,8 @@ std::string Statistics::getCudaVersion(const cudaVersions& ver) { case CUDNN_810: return "8.1.0"; case CUDNN_811: return "8.1.1"; case CUDNN_820: return "8.2.0"; + case CUDNN_830: return "8.3.0"; + case CUDNN_840: return "8.4.0"; case CUDNN_850: return "8.5.0"; } return ""; diff --git a/src/Statistics.h b/src/Statistics.h index abae64be..eae40e4e 100644 --- a/src/Statistics.h +++ b/src/Statistics.h @@ -231,6 +231,8 @@ enum cudaVersions { CUDNN_810 = 810, CUDNN_811 = 811, CUDNN_820 = 820, + CUDNN_830 = 830, + CUDNN_840 = 840, CUDNN_850 = 850, }; From cd31e387729f3ac8eea10021850908805d06f603 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sat, 8 Oct 2022 12:57:08 +0200 Subject: [PATCH 11/43] [HIPIFY][DNN] Sync with cuDNN 8.5.0 - Part 3 + Add some missing cuDNN 8.3.0 APIs + Update regenerated hipify-perl and CUDNN_API_supported_by_HIP.md --- bin/hipify-perl | 45 +++++++++++ doc/markdown/CUDNN_API_supported_by_HIP.md | 45 +++++++++++ src/CUDA2HIP_DNN_API_types.cpp | 90 ++++++++++++++++++++++ 3 files changed, 180 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index 045cbc3c..107609bb 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -7406,6 +7406,15 @@ sub warnUnsupportedFunctions { "CUDNN_ATTR_TENSOR_DIMENSIONS", "CUDNN_ATTR_TENSOR_DATA_TYPE", "CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT", + "CUDNN_ATTR_RESAMPLE_WINDOW_DIMS", + "CUDNN_ATTR_RESAMPLE_STRIDES", + "CUDNN_ATTR_RESAMPLE_SPATIAL_DIMS", + "CUDNN_ATTR_RESAMPLE_PRE_PADDINGS", + "CUDNN_ATTR_RESAMPLE_POST_PADDINGS", + "CUDNN_ATTR_RESAMPLE_PADDING_MODE", + "CUDNN_ATTR_RESAMPLE_NAN_PROPAGATION", + "CUDNN_ATTR_RESAMPLE_MODE", + "CUDNN_ATTR_RESAMPLE_COMP_TYPE", "CUDNN_ATTR_REDUCTION_OPERATOR", "CUDNN_ATTR_REDUCTION_COMP_TYPE", "CUDNN_ATTR_POINTWISE_SWISH_BETA", @@ -7418,6 +7427,23 @@ sub warnUnsupportedFunctions { "CUDNN_ATTR_POINTWISE_MATH_PREC", "CUDNN_ATTR_POINTWISE_ELU_ALPHA", "CUDNN_ATTR_POINTWISE_AXIS", + "CUDNN_ATTR_OPERATION_SIGNAL_YDESC", + "CUDNN_ATTR_OPERATION_SIGNAL_XDESC", + "CUDNN_ATTR_OPERATION_SIGNAL_VALUE", + "CUDNN_ATTR_OPERATION_SIGNAL_MODE", + "CUDNN_ATTR_OPERATION_SIGNAL_FLAGDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA", "CUDNN_ATTR_OPERATION_REDUCTION_YDESC", "CUDNN_ATTR_OPERATION_REDUCTION_XDESC", "CUDNN_ATTR_OPERATION_REDUCTION_DESC", @@ -7430,6 +7456,21 @@ sub warnUnsupportedFunctions { "CUDNN_ATTR_OPERATION_POINTWISE_BDESC", "CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2", "CUDNN_ATTR_OPERATION_POINTWISE_ALPHA1", + "CUDNN_ATTR_OPERATION_NORM_FWD_YDESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_XDESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_PHASE", + "CUDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS", + "CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_MODE", + "CUDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC", "CUDNN_ATTR_OPERATION_MATMUL_IRREGULARLY_STRIDED_BATCH_COUNT", "CUDNN_ATTR_OPERATION_MATMUL_DESC", "CUDNN_ATTR_OPERATION_MATMUL_CDESC", @@ -7458,6 +7499,10 @@ sub warnUnsupportedFunctions { "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC", "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA", "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA", + "CUDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC", + "CUDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS", + "CUDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX", + "CUDNN_ATTR_OPERATION_CONCAT_AXIS", "CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SUM_DESC", "CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SQ_SUM_DESC", "CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_VAR_DESC", diff --git a/doc/markdown/CUDNN_API_supported_by_HIP.md b/doc/markdown/CUDNN_API_supported_by_HIP.md index 315e1077..861df9f8 100644 --- a/doc/markdown/CUDNN_API_supported_by_HIP.md +++ b/doc/markdown/CUDNN_API_supported_by_HIP.md @@ -95,6 +95,10 @@ |`CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_VAR_DESC`|8.1.0| | | | | | | | |`CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SQ_SUM_DESC`|8.1.0| | | | | | | | |`CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SUM_DESC`|8.1.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_CONCAT_AXIS`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC`|8.5.0| | | | | | | | |`CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA`|8.0.1| | | | | | | | |`CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA`|8.0.1| | | | | | | | |`CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC`|8.0.1| | | | | | | | @@ -123,6 +127,21 @@ |`CUDNN_ATTR_OPERATION_MATMUL_CDESC`|8.1.0| | | | | | | | |`CUDNN_ATTR_OPERATION_MATMUL_DESC`|8.1.0| | | | | | | | |`CUDNN_ATTR_OPERATION_MATMUL_IRREGULARLY_STRIDED_BATCH_COUNT`|8.1.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_MODE`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_PHASE`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_XDESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_FWD_YDESC`|8.5.0| | | | | | | | |`CUDNN_ATTR_OPERATION_POINTWISE_ALPHA1`|8.0.1| | | | | | | | |`CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2`|8.0.1| | | | | | | | |`CUDNN_ATTR_OPERATION_POINTWISE_BDESC`|8.0.1| | | | | | | | @@ -135,6 +154,23 @@ |`CUDNN_ATTR_OPERATION_REDUCTION_DESC`|8.1.0| | | | | | | | |`CUDNN_ATTR_OPERATION_REDUCTION_XDESC`|8.1.0| | | | | | | | |`CUDNN_ATTR_OPERATION_REDUCTION_YDESC`|8.1.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC`|8.3.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_SIGNAL_FLAGDESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_SIGNAL_MODE`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_SIGNAL_VALUE`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_SIGNAL_XDESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_SIGNAL_YDESC`|8.5.0| | | | | | | | |`CUDNN_ATTR_POINTWISE_AXIS`|8.4.0| | | | | | | | |`CUDNN_ATTR_POINTWISE_ELU_ALPHA`|8.1.0| | | | | | | | |`CUDNN_ATTR_POINTWISE_MATH_PREC`|8.0.1| | | | | | | | @@ -147,6 +183,15 @@ |`CUDNN_ATTR_POINTWISE_SWISH_BETA`|8.1.0| | | | | | | | |`CUDNN_ATTR_REDUCTION_COMP_TYPE`|8.1.0| | | | | | | | |`CUDNN_ATTR_REDUCTION_OPERATOR`|8.1.0| | | | | | | | +|`CUDNN_ATTR_RESAMPLE_COMP_TYPE`|8.3.0| | | | | | | | +|`CUDNN_ATTR_RESAMPLE_MODE`|8.3.0| | | | | | | | +|`CUDNN_ATTR_RESAMPLE_NAN_PROPAGATION`|8.3.0| | | | | | | | +|`CUDNN_ATTR_RESAMPLE_PADDING_MODE`|8.3.0| | | | | | | | +|`CUDNN_ATTR_RESAMPLE_POST_PADDINGS`|8.3.0| | | | | | | | +|`CUDNN_ATTR_RESAMPLE_PRE_PADDINGS`|8.3.0| | | | | | | | +|`CUDNN_ATTR_RESAMPLE_SPATIAL_DIMS`|8.3.0| | | | | | | | +|`CUDNN_ATTR_RESAMPLE_STRIDES`|8.3.0| | | | | | | | +|`CUDNN_ATTR_RESAMPLE_WINDOW_DIMS`|8.3.0| | | | | | | | |`CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT`|8.0.1| | | | | | | | |`CUDNN_ATTR_TENSOR_DATA_TYPE`|8.0.1| | | | | | | | |`CUDNN_ATTR_TENSOR_DIMENSIONS`|8.0.1| | | | | | | | diff --git a/src/CUDA2HIP_DNN_API_types.cpp b/src/CUDA2HIP_DNN_API_types.cpp index 0343b364..a90c2cd4 100644 --- a/src/CUDA2HIP_DNN_API_types.cpp +++ b/src/CUDA2HIP_DNN_API_types.cpp @@ -553,6 +553,51 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_DY_SCALE_DESC", {"HIPDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_DY_SCALE_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1628 {"CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_X_SCALE_DESC", {"HIPDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_X_SCALE_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1629 {"CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_BIAS", {"HIPDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_BIAS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1630 + {"CUDNN_ATTR_RESAMPLE_MODE", {"HIPDNN_ATTR_RESAMPLE_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1700 + {"CUDNN_ATTR_RESAMPLE_COMP_TYPE", {"HIPDNN_ATTR_RESAMPLE_COMP_TYPE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1701 + {"CUDNN_ATTR_RESAMPLE_SPATIAL_DIMS", {"HIPDNN_ATTR_RESAMPLE_SPATIAL_DIMS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1702 + {"CUDNN_ATTR_RESAMPLE_POST_PADDINGS", {"HIPDNN_ATTR_RESAMPLE_POST_PADDINGS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1703 + {"CUDNN_ATTR_RESAMPLE_PRE_PADDINGS", {"HIPDNN_ATTR_RESAMPLE_PRE_PADDINGS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1704 + {"CUDNN_ATTR_RESAMPLE_STRIDES", {"HIPDNN_ATTR_RESAMPLE_STRIDES", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1705 + {"CUDNN_ATTR_RESAMPLE_WINDOW_DIMS", {"HIPDNN_ATTR_RESAMPLE_WINDOW_DIMS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1706 + {"CUDNN_ATTR_RESAMPLE_NAN_PROPAGATION", {"HIPDNN_ATTR_RESAMPLE_NAN_PROPAGATION", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1707 + {"CUDNN_ATTR_RESAMPLE_PADDING_MODE", {"HIPDNN_ATTR_RESAMPLE_PADDING_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1708 + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1710 + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1711 + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1712 + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA", {"HIPDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1713 + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA", {"HIPDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1714 + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1716 + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1720 + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1721 + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1722 + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA", {"HIPDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1723 + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA", {"HIPDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1724 + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC", {"HIPDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1725 + {"CUDNN_ATTR_OPERATION_CONCAT_AXIS", {"HIPDNN_ATTR_OPERATION_CONCAT_AXIS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1800 + {"CUDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS", {"HIPDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1801 + {"CUDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX", {"HIPDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1802 + {"CUDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC", {"HIPDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1803 + {"CUDNN_ATTR_OPERATION_SIGNAL_MODE", {"HIPDNN_ATTR_OPERATION_SIGNAL_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1900 + {"CUDNN_ATTR_OPERATION_SIGNAL_FLAGDESC", {"HIPDNN_ATTR_OPERATION_SIGNAL_FLAGDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1901 + {"CUDNN_ATTR_OPERATION_SIGNAL_VALUE", {"HIPDNN_ATTR_OPERATION_SIGNAL_VALUE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1902 + {"CUDNN_ATTR_OPERATION_SIGNAL_XDESC", {"HIPDNN_ATTR_OPERATION_SIGNAL_XDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1903 + {"CUDNN_ATTR_OPERATION_SIGNAL_YDESC", {"HIPDNN_ATTR_OPERATION_SIGNAL_YDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1904 + {"CUDNN_ATTR_OPERATION_NORM_FWD_MODE", {"HIPDNN_ATTR_OPERATION_NORM_FWD_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2000 + {"CUDNN_ATTR_OPERATION_NORM_FWD_PHASE", {"HIPDNN_ATTR_OPERATION_NORM_FWD_PHASE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2001 + {"CUDNN_ATTR_OPERATION_NORM_FWD_XDESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_XDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2002 + {"CUDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2003 + {"CUDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2004 + {"CUDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2005 + {"CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2006 + {"CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2007 + {"CUDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2008 + {"CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2009 + {"CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2010 + {"CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2011 + {"CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2012 + {"CUDNN_ATTR_OPERATION_NORM_FWD_YDESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_YDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2013 + {"CUDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS", {"HIPDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2014 {"cudnnBackendAttributeType_t", {"hipdnnBackendAttributeType_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_TYPE_HANDLE", {"HIPDNN_TYPE_HANDLE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_TYPE_DATA_TYPE", {"HIPDNN_TYPE_DATA_TYPE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, @@ -1440,6 +1485,51 @@ const std::map CUDA_DNN_TYPE_NAME_VER_MAP { {"CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION", {CUDNN_840, CUDA_0, CUDA_0 }}, {"CUDNN_ATTR_OPERATION_POINTWISE_TDESC", {CUDNN_830, CUDA_0, CUDA_0 }}, {"CUDNN_ATTR_TENSOR_REORDERING_MODE", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_RESAMPLE_MODE", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_RESAMPLE_COMP_TYPE", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_RESAMPLE_SPATIAL_DIMS", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_RESAMPLE_POST_PADDINGS", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_RESAMPLE_PRE_PADDINGS", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_RESAMPLE_STRIDES", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_RESAMPLE_WINDOW_DIMS", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_RESAMPLE_NAN_PROPAGATION", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_RESAMPLE_PADDING_MODE", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_CONCAT_AXIS", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_SIGNAL_MODE", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_SIGNAL_FLAGDESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_SIGNAL_VALUE", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_SIGNAL_XDESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_SIGNAL_YDESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_MODE", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_PHASE", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_XDESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_YDESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS", {CUDNN_850, CUDA_0, CUDA_0 }}, }; const std::map HIP_DNN_TYPE_NAME_VER_MAP { From 43f4536903e846a924a1400f95b59433283ebb5a Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sun, 9 Oct 2022 18:27:05 +0200 Subject: [PATCH 12/43] [HIPIFY][DNN] Sync with cuDNN 8.5.0 - Part 4 - final + Add some missing cuDNN 8.3.0 and 8.4.0 APIs + Update regenerated hipify-perl and CUDNN_API_supported_by_HIP.md --- bin/hipify-perl | 50 +++++++++++ doc/markdown/CUDNN_API_supported_by_HIP.md | 50 +++++++++++ src/CUDA2HIP_DNN_API_types.cpp | 100 +++++++++++++++++++++ 3 files changed, 200 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index 107609bb..735933b2 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -5799,6 +5799,7 @@ sub warnUnsupportedFunctions { "cudnnPoolingStruct", "cudnnPointwiseMode_t", "cudnnPersistentRNNPlan", + "cudnnPaddingMode_t", "cudnnOpsTrainVersionCheck", "cudnnOpsInferVersionCheck", "cudnnOpTensorStruct", @@ -5946,8 +5947,11 @@ sub warnUnsupportedFunctions { "cudnnBatchNormalizationForwardTrainingEx", "cudnnBatchNormalizationBackwardEx", "cudnnBatchNormOps_t", + "cudnnBackendTensorReordering_t", "cudnnBackendSetAttribute", "cudnnBackendNumericalNote_t", + "cudnnBackendNormMode_t", + "cudnnBackendNormFwdPhase_t", "cudnnBackendLayoutType_t", "cudnnBackendKnobType_t", "cudnnBackendInitialize", @@ -7081,23 +7085,33 @@ sub warnUnsupportedFunctions { "CUFFT_COPY_DEVICE_TO_DEVICE", "CUFFT_COMPATIBILITY_FFTW_PADDING", "CUFFT_COMPATIBILITY_DEFAULT", + "CUDNN_ZERO_PAD", "CUDNN_WGRAD_MODE_SET", "CUDNN_WGRAD_MODE_ADD", "CUDNN_TYPE_VOID_PTR", + "CUDNN_TYPE_TENSOR_REORDERING_MODE", + "CUDNN_TYPE_SIGNAL_MODE", + "CUDNN_TYPE_RESAMPLE_MODE", "CUDNN_TYPE_REDUCTION_OPERATOR_TYPE", "CUDNN_TYPE_POINTWISE_MODE", + "CUDNN_TYPE_PADDING_MODE", "CUDNN_TYPE_NUMERICAL_NOTE", + "CUDNN_TYPE_NORM_MODE", + "CUDNN_TYPE_NORM_FWD_PHASE", "CUDNN_TYPE_NAN_PROPOGATION", "CUDNN_TYPE_LAYOUT_TYPE", "CUDNN_TYPE_KNOB_TYPE", "CUDNN_TYPE_INT64", + "CUDNN_TYPE_INT32", "CUDNN_TYPE_HEUR_MODE", "CUDNN_TYPE_HANDLE", "CUDNN_TYPE_GENSTATS_MODE", + "CUDNN_TYPE_FRACTION", "CUDNN_TYPE_FLOAT", "CUDNN_TYPE_DOUBLE", "CUDNN_TYPE_DATA_TYPE", "CUDNN_TYPE_CONVOLUTION_MODE", + "CUDNN_TYPE_CHAR", "CUDNN_TYPE_BOOLEAN", "CUDNN_TYPE_BN_FINALIZE_STATS_MODE", "CUDNN_TYPE_BEHAVIOR_NOTE", @@ -7105,6 +7119,8 @@ sub warnUnsupportedFunctions { "CUDNN_TYPE_ATTRIB_NAME", "CUDNN_TRANSFORM_UNFOLD", "CUDNN_TRANSFORM_FOLD", + "CUDNN_TENSOR_REORDERING_NONE", + "CUDNN_TENSOR_REORDERING_INT8x32", "CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION", "CUDNN_STATUS_VERSION_MISMATCH", "CUDNN_STATUS_RUNTIME_IN_PROGRESS", @@ -7263,6 +7279,9 @@ sub warnUnsupportedFunctions { "CUDNN_OPS_INFER_PATCH", "CUDNN_OPS_INFER_MINOR", "CUDNN_OPS_INFER_MAJOR", + "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6", + "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4", + "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13", "CUDNN_NUMERICAL_NOTE_WINOGRAD", "CUDNN_NUMERICAL_NOTE_TYPE_COUNT", "CUDNN_NUMERICAL_NOTE_TENSOR_CORE", @@ -7276,9 +7295,12 @@ sub warnUnsupportedFunctions { "CUDNN_NORM_OPS_NORM_ADD_ACTIVATION", "CUDNN_NORM_OPS_NORM_ACTIVATION", "CUDNN_NORM_OPS_NORM", + "CUDNN_NORM_FWD_TRAINING", + "CUDNN_NORM_FWD_INFERENCE", "CUDNN_NORM_ALGO_STANDARD", "CUDNN_NORM_ALGO_PERSIST", "CUDNN_NON_DETERMINISTIC", + "CUDNN_NEG_INF_PAD", "CUDNN_MINOR", "CUDNN_MH_ATTN_V_WEIGHTS", "CUDNN_MH_ATTN_V_BIASES", @@ -7300,6 +7322,8 @@ sub warnUnsupportedFunctions { "CUDNN_LAYOUT_TYPE_PREFERRED_NHWC", "CUDNN_LAYOUT_TYPE_PREFERRED_NCHW", "CUDNN_LAYOUT_TYPE_COUNT", + "CUDNN_LAYER_NORM", + "CUDNN_KNOB_TYPE_WORKSPACE", "CUDNN_KNOB_TYPE_WINO_TILE", "CUDNN_KNOB_TYPE_USE_TEX", "CUDNN_KNOB_TYPE_TILE_SIZE", @@ -7326,9 +7350,13 @@ sub warnUnsupportedFunctions { "CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE", "CUDNN_KNOB_TYPE_COUNTS", "CUDNN_KNOB_TYPE_CHUNK_K", + "CUDNN_INSTANCE_NORM", "CUDNN_HEUR_MODE_INSTANT", + "CUDNN_HEUR_MODE_FALLBACK", "CUDNN_HEUR_MODE_B", + "CUDNN_HEUR_MODE_A", "CUDNN_HEUR_MODES_COUNT", + "CUDNN_GROUP_NORM", "CUDNN_GENSTATS_SUM_SQSUM", "CUDNN_FWD_MODE_TRAINING", "CUDNN_FWD_MODE_INFERENCE", @@ -7343,6 +7371,7 @@ sub warnUnsupportedFunctions { "CUDNN_ERRQUERY_RAWCODE", "CUDNN_ERRQUERY_NONBLOCKING", "CUDNN_ERRQUERY_BLOCKING", + "CUDNN_EDGE_VAL_PAD", "CUDNN_DIVNORM_PRECOMPUTED_MEANS", "CUDNN_DIM_MAX", "CUDNN_DETERMINISTIC", @@ -7365,20 +7394,30 @@ sub warnUnsupportedFunctions { "CUDNN_BN_FINALIZE_STATISTICS_INFERENCE", "CUDNN_BEHAVIOR_NOTE_TYPE_COUNT", "CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", + "CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER", + "CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER", + "CUDNN_BATCH_NORM", "CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION", "CUDNN_BATCHNORM_OPS_BN_ACTIVATION", "CUDNN_BATCHNORM_OPS_BN", "CUDNN_BACKEND_VARIANT_PACK_DESCRIPTOR", "CUDNN_BACKEND_TENSOR_DESCRIPTOR", + "CUDNN_BACKEND_RESAMPLE_DESCRIPTOR", "CUDNN_BACKEND_REDUCTION_DESCRIPTOR", "CUDNN_BACKEND_POINTWISE_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR", "CUDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR", "CUDNN_BACKEND_OPERATION_POINTWISE_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR", "CUDNN_BACKEND_OPERATION_MATMUL_DESCRIPTOR", "CUDNN_BACKEND_OPERATION_GEN_STATS_DESCRIPTOR", "CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR", "CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR", "CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR", "CUDNN_BACKEND_OPERATION_BN_FINALIZE_STATISTICS_DESCRIPTOR", "CUDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR", "CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR", @@ -7471,6 +7510,17 @@ sub warnUnsupportedFunctions { "CUDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC", "CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC", "CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_XDESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS", + "CUDNN_ATTR_OPERATION_NORM_BWD_MODE", + "CUDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_DYDESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_DXDESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC", "CUDNN_ATTR_OPERATION_MATMUL_IRREGULARLY_STRIDED_BATCH_COUNT", "CUDNN_ATTR_OPERATION_MATMUL_DESC", "CUDNN_ATTR_OPERATION_MATMUL_CDESC", diff --git a/doc/markdown/CUDNN_API_supported_by_HIP.md b/doc/markdown/CUDNN_API_supported_by_HIP.md index 861df9f8..4a3d5380 100644 --- a/doc/markdown/CUDNN_API_supported_by_HIP.md +++ b/doc/markdown/CUDNN_API_supported_by_HIP.md @@ -127,6 +127,17 @@ |`CUDNN_ATTR_OPERATION_MATMUL_CDESC`|8.1.0| | | | | | | | |`CUDNN_ATTR_OPERATION_MATMUL_DESC`|8.1.0| | | | | | | | |`CUDNN_ATTR_OPERATION_MATMUL_IRREGULARLY_STRIDED_BATCH_COUNT`|8.1.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_BWD_DXDESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_BWD_DYDESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_BWD_MODE`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC`|8.5.0| | | | | | | | +|`CUDNN_ATTR_OPERATION_NORM_BWD_XDESC`|8.5.0| | | | | | | | |`CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC`|8.5.0| | | | | | | | |`CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC`|8.5.0| | | | | | | | |`CUDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC`|8.5.0| | | | | | | | @@ -219,15 +230,22 @@ |`CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR`|8.0.1| | | | | | | | |`CUDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR`| | | | | | | | | |`CUDNN_BACKEND_OPERATION_BN_FINALIZE_STATISTICS_DESCRIPTOR`|8.1.0| | | | | | | | +|`CUDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR`|8.5.0| | | | | | | | |`CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR`|8.0.1| | | | | | | | |`CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR`|8.0.1| | | | | | | | |`CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR`|8.0.1| | | | | | | | |`CUDNN_BACKEND_OPERATION_GEN_STATS_DESCRIPTOR`|8.0.1| | | | | | | | |`CUDNN_BACKEND_OPERATION_MATMUL_DESCRIPTOR`|8.1.0| | | | | | | | +|`CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR`|8.5.0| | | | | | | | +|`CUDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR`|8.5.0| | | | | | | | |`CUDNN_BACKEND_OPERATION_POINTWISE_DESCRIPTOR`|8.0.1| | | | | | | | |`CUDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR`|8.1.0| | | | | | | | +|`CUDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR`|8.3.0| | | | | | | | +|`CUDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR`|8.3.0| | | | | | | | +|`CUDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR`|8.5.0| | | | | | | | |`CUDNN_BACKEND_POINTWISE_DESCRIPTOR`|8.0.1| | | | | | | | |`CUDNN_BACKEND_REDUCTION_DESCRIPTOR`|8.1.0| | | | | | | | +|`CUDNN_BACKEND_RESAMPLE_DESCRIPTOR`|8.3.0| | | | | | | | |`CUDNN_BACKEND_TENSOR_DESCRIPTOR`|8.0.1| | | | | | | | |`CUDNN_BACKEND_VARIANT_PACK_DESCRIPTOR`|8.0.1| | | | | | | | |`CUDNN_BATCHNORM_OPS_BN`|7.4.1| | | | | | | | @@ -236,6 +254,9 @@ |`CUDNN_BATCHNORM_PER_ACTIVATION`|4.0.0| | |`HIPDNN_BATCHNORM_PER_ACTIVATION`| | | | | |`CUDNN_BATCHNORM_SPATIAL`|4.0.0| | |`HIPDNN_BATCHNORM_SPATIAL`| | | | | |`CUDNN_BATCHNORM_SPATIAL_PERSISTENT`|7.0.5| | |`HIPDNN_BATCHNORM_SPATIAL_PERSISTENT`| | | | | +|`CUDNN_BATCH_NORM`|8.5.0| | | | | | | | +|`CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER`|8.3.0| | | | | | | | +|`CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER`|8.3.0| | | | | | | | |`CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION`|8.2.0| | | | | | | | |`CUDNN_BEHAVIOR_NOTE_TYPE_COUNT`|8.2.0| | | | | | | | |`CUDNN_BIDIRECTIONAL`|5.0.0| | |`HIPDNN_BIDIRECTIONAL`| | | | | @@ -302,6 +323,7 @@ |`CUDNN_DETERMINISTIC`|6.0.0| | | | | | | | |`CUDNN_DIM_MAX`|4.0.0| | | | | | | | |`CUDNN_DIVNORM_PRECOMPUTED_MEANS`|3.0.0| | | | | | | | +|`CUDNN_EDGE_VAL_PAD`|8.3.0| | | | | | | | |`CUDNN_ERRQUERY_BLOCKING`|7.0.5| | | | | | | | |`CUDNN_ERRQUERY_NONBLOCKING`|7.0.5| | | | | | | | |`CUDNN_ERRQUERY_RAWCODE`|7.0.5| | | | | | | | @@ -316,10 +338,14 @@ |`CUDNN_FWD_MODE_INFERENCE`|8.0.1| | | | | | | | |`CUDNN_FWD_MODE_TRAINING`|8.0.1| | | | | | | | |`CUDNN_GENSTATS_SUM_SQSUM`|8.0.1| | | | | | | | +|`CUDNN_GROUP_NORM`|8.5.0| | | | | | | | |`CUDNN_GRU`|5.0.0| | |`HIPDNN_GRU`| | | | | |`CUDNN_HEUR_MODES_COUNT`|8.0.1| | | | | | | | +|`CUDNN_HEUR_MODE_A`|8.3.0| | | | | | | | |`CUDNN_HEUR_MODE_B`|8.0.1| | | | | | | | +|`CUDNN_HEUR_MODE_FALLBACK`|8.3.0| | | | | | | | |`CUDNN_HEUR_MODE_INSTANT`|8.0.1| | | | | | | | +|`CUDNN_INSTANCE_NORM`|8.5.0| | | | | | | | |`CUDNN_KNOB_TYPE_CHUNK_K`|8.0.1| | | | | | | | |`CUDNN_KNOB_TYPE_COUNTS`|8.0.1| | | | | | | | |`CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE`|8.0.1| | | | | | | | @@ -346,6 +372,8 @@ |`CUDNN_KNOB_TYPE_TILE_SIZE`|8.0.1| | | | | | | | |`CUDNN_KNOB_TYPE_USE_TEX`|8.0.1| | | | | | | | |`CUDNN_KNOB_TYPE_WINO_TILE`|8.0.1| | | | | | | | +|`CUDNN_KNOB_TYPE_WORKSPACE`|8.4.0| | | | | | | | +|`CUDNN_LAYER_NORM`|8.5.0| | | | | | | | |`CUDNN_LAYOUT_TYPE_COUNT`|8.0.2| | | | | | | | |`CUDNN_LAYOUT_TYPE_PREFERRED_NCHW`|8.0.1| | | | | | | | |`CUDNN_LAYOUT_TYPE_PREFERRED_NHWC`|8.0.2| | | | | | | | @@ -370,9 +398,12 @@ |`CUDNN_MH_ATTN_V_BIASES`|7.6.3| | | | | | | | |`CUDNN_MH_ATTN_V_WEIGHTS`|7.5.0| | | | | | | | |`CUDNN_MINOR`|3.0.0| | | | | | | | +|`CUDNN_NEG_INF_PAD`|8.3.0| | | | | | | | |`CUDNN_NON_DETERMINISTIC`|6.0.0| | | | | | | | |`CUDNN_NORM_ALGO_PERSIST`|8.0.1| | | | | | | | |`CUDNN_NORM_ALGO_STANDARD`|8.0.1| | | | | | | | +|`CUDNN_NORM_FWD_INFERENCE`|8.5.0| | | | | | | | +|`CUDNN_NORM_FWD_TRAINING`|8.5.0| | | | | | | | |`CUDNN_NORM_OPS_NORM`|8.0.1| | | | | | | | |`CUDNN_NORM_OPS_NORM_ACTIVATION`|8.0.1| | | | | | | | |`CUDNN_NORM_OPS_NORM_ADD_ACTIVATION`|8.0.1| | | | | | | | @@ -387,6 +418,9 @@ |`CUDNN_NUMERICAL_NOTE_TENSOR_CORE`|8.0.1| | | | | | | | |`CUDNN_NUMERICAL_NOTE_TYPE_COUNT`|8.0.1| | | | | | | | |`CUDNN_NUMERICAL_NOTE_WINOGRAD`|8.0.1| | | | | | | | +|`CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13`|8.3.0| | | | | | | | +|`CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4`|8.3.0| | | | | | | | +|`CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6`|8.3.0| | | | | | | | |`CUDNN_OPS_INFER_MAJOR`|8.0.1| | | | | | | | |`CUDNN_OPS_INFER_MINOR`|8.0.1| | | | | | | | |`CUDNN_OPS_INFER_PATCH`|8.0.1| | | | | | | | @@ -598,6 +632,8 @@ |`CUDNN_TENSOR_NHWC`|1.0.0| | |`HIPDNN_TENSOR_NHWC`| | | | | |`CUDNN_TENSOR_OP_MATH`|7.0.5| | |`HIPDNN_TENSOR_OP_MATH`| | | | | |`CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION`|7.2.1| | | | | | | | +|`CUDNN_TENSOR_REORDERING_INT8x32`|8.3.0| | | | | | | | +|`CUDNN_TENSOR_REORDERING_NONE`|8.3.0| | | | | | | | |`CUDNN_TRANSFORM_FOLD`|7.5.0| | | | | | | | |`CUDNN_TRANSFORM_UNFOLD`|7.5.0| | | | | | | | |`CUDNN_TYPE_ATTRIB_NAME`|8.0.1| | | | | | | | @@ -605,25 +641,35 @@ |`CUDNN_TYPE_BEHAVIOR_NOTE`|8.2.0| | | | | | | | |`CUDNN_TYPE_BN_FINALIZE_STATS_MODE`|8.1.0| | | | | | | | |`CUDNN_TYPE_BOOLEAN`|8.0.1| | | | | | | | +|`CUDNN_TYPE_CHAR`|8.4.0| | | | | | | | |`CUDNN_TYPE_CONVOLUTION_MODE`|8.0.1| | | | | | | | |`CUDNN_TYPE_DATA_TYPE`|8.0.1| | | | | | | | |`CUDNN_TYPE_DOUBLE`|8.0.1| | | | | | | | |`CUDNN_TYPE_FLOAT`|8.0.1| | | | | | | | +|`CUDNN_TYPE_FRACTION`|8.5.0| | | | | | | | |`CUDNN_TYPE_GENSTATS_MODE`|8.0.1| | | | | | | | |`CUDNN_TYPE_HANDLE`|8.0.1| | | | | | | | |`CUDNN_TYPE_HEUR_MODE`|8.0.1| | | | | | | | +|`CUDNN_TYPE_INT32`|8.3.0| | | | | | | | |`CUDNN_TYPE_INT64`|8.0.1| | | | | | | | |`CUDNN_TYPE_KNOB_TYPE`|8.0.1| | | | | | | | |`CUDNN_TYPE_LAYOUT_TYPE`|8.0.2| | | | | | | | |`CUDNN_TYPE_NAN_PROPOGATION`|8.0.1| | | | | | | | +|`CUDNN_TYPE_NORM_FWD_PHASE`|8.5.0| | | | | | | | +|`CUDNN_TYPE_NORM_MODE`|8.5.0| | | | | | | | |`CUDNN_TYPE_NUMERICAL_NOTE`|8.0.1| | | | | | | | +|`CUDNN_TYPE_PADDING_MODE`|8.3.0| | | | | | | | |`CUDNN_TYPE_POINTWISE_MODE`|8.0.1| | | | | | | | |`CUDNN_TYPE_REDUCTION_OPERATOR_TYPE`|8.1.0| | | | | | | | +|`CUDNN_TYPE_RESAMPLE_MODE`|8.3.0| | | | | | | | +|`CUDNN_TYPE_SIGNAL_MODE`|8.5.0| | | | | | | | +|`CUDNN_TYPE_TENSOR_REORDERING_MODE`|8.3.0| | | | | | | | |`CUDNN_TYPE_VOID_PTR`|8.0.1| | | | | | | | |`CUDNN_UNIDIRECTIONAL`|5.0.0| | |`HIPDNN_UNIDIRECTIONAL`| | | | | |`CUDNN_VERSION`|2.0.0| | |`HIPDNN_VERSION`| | | | | |`CUDNN_WGRAD_MODE_ADD`|7.5.0| | | | | | | | |`CUDNN_WGRAD_MODE_SET`|7.5.0| | | | | | | | +|`CUDNN_ZERO_PAD`|8.3.0| | | | | | | | |`cudnnActivationDescriptor_t`|4.0.0| | |`hipdnnActivationDescriptor_t`| | | | | |`cudnnActivationMode_t`|1.0.0| | |`hipdnnActivationMode_t`| | | | | |`cudnnActivationStruct`|4.0.0| | | | | | | | @@ -644,7 +690,10 @@ |`cudnnBackendHeurMode_t`|8.0.1| | | | | | | | |`cudnnBackendKnobType_t`|8.0.1| | | | | | | | |`cudnnBackendLayoutType_t`|8.0.1| | | | | | | | +|`cudnnBackendNormFwdPhase_t`|8.5.0| | | | | | | | +|`cudnnBackendNormMode_t`|8.5.0| | | | | | | | |`cudnnBackendNumericalNote_t`|8.0.1| | | | | | | | +|`cudnnBackendTensorReordering_t`|8.3.0| | | | | | | | |`cudnnBatchNormMode_t`|4.0.0| | |`hipdnnBatchNormMode_t`| | | | | |`cudnnBatchNormOps_t`|7.4.1| | | | | | | | |`cudnnBnFinalizeStatsMode_t`|8.1.0| | | | | | | | @@ -709,6 +758,7 @@ |`cudnnOpTensorDescriptor_t`|5.0.0| | |`hipdnnOpTensorDescriptor_t`| | | | | |`cudnnOpTensorOp_t`|5.0.0| | |`hipdnnOpTensorOp_t`| | | | | |`cudnnOpTensorStruct`|5.0.0| | | | | | | | +|`cudnnPaddingMode_t`|8.3.0| | | | | | | | |`cudnnPersistentRNNPlan`|6.0.0| | | | | | | | |`cudnnPersistentRNNPlan_t`|6.0.0| | |`hipdnnPersistentRNNPlan_t`| | | | | |`cudnnPointwiseMode_t`|8.0.1| | | | | | | | diff --git a/src/CUDA2HIP_DNN_API_types.cpp b/src/CUDA2HIP_DNN_API_types.cpp index a90c2cd4..7149587e 100644 --- a/src/CUDA2HIP_DNN_API_types.cpp +++ b/src/CUDA2HIP_DNN_API_types.cpp @@ -598,6 +598,17 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2012 {"CUDNN_ATTR_OPERATION_NORM_FWD_YDESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_YDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2013 {"CUDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS", {"HIPDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2014 + {"CUDNN_ATTR_OPERATION_NORM_BWD_MODE", {"HIPDNN_ATTR_OPERATION_NORM_BWD_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2100 + {"CUDNN_ATTR_OPERATION_NORM_BWD_XDESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_XDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2101 + {"CUDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2102 + {"CUDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2103 + {"CUDNN_ATTR_OPERATION_NORM_BWD_DYDESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_DYDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2104 + {"CUDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2105 + {"CUDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2106 + {"CUDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2107 + {"CUDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2108 + {"CUDNN_ATTR_OPERATION_NORM_BWD_DXDESC", {"HIPDNN_ATTR_OPERATION_NORM_BWD_DXDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2109 + {"CUDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS", {"HIPDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2110 {"cudnnBackendAttributeType_t", {"hipdnnBackendAttributeType_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_TYPE_HANDLE", {"HIPDNN_TYPE_HANDLE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_TYPE_DATA_TYPE", {"HIPDNN_TYPE_DATA_TYPE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, @@ -619,6 +630,15 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_TYPE_BN_FINALIZE_STATS_MODE", {"HIPDNN_TYPE_BN_FINALIZE_STATS_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_TYPE_REDUCTION_OPERATOR_TYPE", {"HIPDNN_TYPE_REDUCTION_OPERATOR_TYPE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_TYPE_BEHAVIOR_NOTE", {"HIPDNN_TYPE_BEHAVIOR_NOTE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_TYPE_TENSOR_REORDERING_MODE", {"HIPDNN_TYPE_TENSOR_REORDERING_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_TYPE_RESAMPLE_MODE", {"HIPDNN_TYPE_RESAMPLE_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_TYPE_PADDING_MODE", {"HIPDNN_TYPE_PADDING_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_TYPE_INT32", {"HIPDNN_TYPE_INT32", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_TYPE_CHAR", {"HIPDNN_TYPE_CHAR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_TYPE_SIGNAL_MODE", {"HIPDNN_TYPE_SIGNAL_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_TYPE_FRACTION", {"HIPDNN_TYPE_FRACTION", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_TYPE_NORM_MODE", {"HIPDNN_TYPE_NORM_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_TYPE_NORM_FWD_PHASE", {"HIPDNN_TYPE_NORM_FWD_PHASE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"cudnnBackendDescriptorType_t", {"hipdnnBackendDescriptorType_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_BACKEND_POINTWISE_DESCRIPTOR", {"HIPDNN_BACKEND_POINTWISE_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_BACKEND_CONVOLUTION_DESCRIPTOR", {"HIPDNN_BACKEND_CONVOLUTION_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, @@ -644,6 +664,13 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_BACKEND_REDUCTION_DESCRIPTOR", {"HIPDNN_BACKEND_REDUCTION_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_BACKEND_RESAMPLE_DESCRIPTOR", {"HIPDNN_BACKEND_RESAMPLE_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"cudnnBackendNumericalNote_t", {"hipdnnBackendNumericalNote_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_NUMERICAL_NOTE_TENSOR_CORE", {"HIPDNN_NUMERICAL_NOTE_TENSOR_CORE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS", {"HIPDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, @@ -651,6 +678,9 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_NUMERICAL_NOTE_FFT", {"HIPDNN_NUMERICAL_NOTE_FFT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC", {"HIPDNN_NUMERICAL_NOTE_NONDETERMINISTIC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_NUMERICAL_NOTE_WINOGRAD", {"HIPDNN_NUMERICAL_NOTE_WINOGRAD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4", {"HIPDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6", {"HIPDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13", {"HIPDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_NUMERICAL_NOTE_TYPE_COUNT", {"HIPDNN_NUMERICAL_NOTE_TYPE_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"cudnnBackendLayoutType_t", {"hipdnnBackendLayoutType_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_LAYOUT_TYPE_PREFERRED_NCHW", {"HIPDNN_LAYOUT_TYPE_PREFERRED_NCHW", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, @@ -684,10 +714,13 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_KNOB_TYPE_LDGC", {"HIPDNN_KNOB_TYPE_LDGC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_KNOB_TYPE_SPECFILT", {"HIPDNN_KNOB_TYPE_SPECFILT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_KNOB_TYPE_KERNEL_CFG", {"HIPDNN_KNOB_TYPE_KERNEL_CFG", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_KNOB_TYPE_WORKSPACE", {"HIPDNN_KNOB_TYPE_WORKSPACE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_KNOB_TYPE_COUNTS", {"HIPDNN_KNOB_TYPE_COUNTS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"cudnnBackendHeurMode_t", {"hipdnnBackendHeurMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_HEUR_MODE_INSTANT", {"HIPDNN_HEUR_MODE_INSTANT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_HEUR_MODE_B", {"HIPDNN_HEUR_MODE_B", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_HEUR_MODE_FALLBACK", {"HIPDNN_HEUR_MODE_FALLBACK", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_HEUR_MODE_A", {"HIPDNN_HEUR_MODE_A", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_HEUR_MODES_COUNT", {"HIPDNN_HEUR_MODES_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"cudnnNormMode_t", {"hipdnnNormMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_NORM_PER_ACTIVATION", {"HIPDNN_NORM_PER_ACTIVATION", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, @@ -704,6 +737,8 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_BN_FINALIZE_STATISTICS_INFERENCE", {"HIPDNN_BN_FINALIZE_STATISTICS_INFERENCE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"cudnnBackendBehaviorNote_t", {"hipdnnBackendBehaviorNote_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", {"HIPDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 0 + {"CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER", {"HIPDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1 + {"CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER", {"HIPDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2 {"CUDNN_BEHAVIOR_NOTE_TYPE_COUNT", {"HIPDNN_BEHAVIOR_NOTE_TYPE_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"cudnnResampleMode_t", {"hipdnnResampleMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_RESAMPLE_NEAREST", {"HIPDNN_RESAMPLE_NEAREST", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, @@ -713,6 +748,21 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"cudnnSignalMode_t", {"hipdnnSignalMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_SIGNAL_SET", {"HIPDNN_SIGNAL_SET", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_SIGNAL_WAIT", {"HIPDNN_SIGNAL_WAIT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"cudnnBackendTensorReordering_t", {"hipdnnBackendTensorReordering_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_TENSOR_REORDERING_NONE", {"HIPDNN_TENSOR_REORDERING_NONE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_TENSOR_REORDERING_INT8x32", {"HIPDNN_TENSOR_REORDERING_INT8x32", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"cudnnPaddingMode_t", {"hipdnnPaddingMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_ZERO_PAD", {"HIPDNN_ZERO_PAD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_NEG_INF_PAD", {"HIPDNN_NEG_INF_PAD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_EDGE_VAL_PAD", {"HIPDNN_EDGE_VAL_PAD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"cudnnBackendNormMode_t", {"hipdnnBackendNormMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_LAYER_NORM", {"HIPDNN_LAYER_NORM", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_INSTANCE_NORM", {"HIPDNN_INSTANCE_NORM", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_BATCH_NORM", {"HIPDNN_BATCH_NORM", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_GROUP_NORM", {"HIPDNN_GROUP_NORM", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"cudnnBackendNormFwdPhase_t", {"hipdnnBackendNormFwdPhase_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_NORM_FWD_INFERENCE", {"HIPDNN_NORM_FWD_INFERENCE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_NORM_FWD_TRAINING", {"HIPDNN_NORM_FWD_TRAINING", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // cuDNN types {"cudnnContext", {"hipdnnContext", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, @@ -1530,6 +1580,56 @@ const std::map CUDA_DNN_TYPE_NAME_VER_MAP { {"CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, {"CUDNN_ATTR_OPERATION_NORM_FWD_YDESC", {CUDNN_850, CUDA_0, CUDA_0 }}, {"CUDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_BWD_MODE", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_BWD_XDESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_BWD_DYDESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_BWD_DXDESC", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_TYPE_TENSOR_REORDERING_MODE", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_TYPE_RESAMPLE_MODE", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_TYPE_PADDING_MODE", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_TYPE_INT32", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_TYPE_CHAR", {CUDNN_840, CUDA_0, CUDA_0 }}, + {"CUDNN_TYPE_SIGNAL_MODE", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_TYPE_FRACTION", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_TYPE_NORM_MODE", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_TYPE_NORM_FWD_PHASE", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_BACKEND_RESAMPLE_DESCRIPTOR", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_KNOB_TYPE_WORKSPACE", {CUDNN_840, CUDA_0, CUDA_0 }}, + {"CUDNN_HEUR_MODE_FALLBACK", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_HEUR_MODE_A", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"cudnnBackendTensorReordering_t", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_TENSOR_REORDERING_NONE", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_TENSOR_REORDERING_INT8x32", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"cudnnPaddingMode_t", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_ZERO_PAD", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_NEG_INF_PAD", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"CUDNN_EDGE_VAL_PAD", {CUDNN_830, CUDA_0, CUDA_0 }}, + {"cudnnBackendNormMode_t", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_LAYER_NORM", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_INSTANCE_NORM", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_BATCH_NORM", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_GROUP_NORM", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"cudnnBackendNormFwdPhase_t", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_NORM_FWD_INFERENCE", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_NORM_FWD_TRAINING", {CUDNN_850, CUDA_0, CUDA_0 }}, }; const std::map HIP_DNN_TYPE_NAME_VER_MAP { From 8bc8f05d1db5fdc0b4dc8a7f6486a783fe19fcc8 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sun, 9 Oct 2022 18:33:13 +0200 Subject: [PATCH 13/43] [HIPIFY][doc] cuDNN 8.5.0 is the latest supported cuDNN release --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 4060ed0e..c39258de 100644 --- a/README.md +++ b/README.md @@ -336,7 +336,7 @@ Run `Visual Studio 17 2022`, open the generated `LLVM.sln`, build all, build pro - ***Linux***: `-DCUDA_DNN_ROOT_DIR=/usr/include` - - ***Windows***: `-DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.7-windows-x64-v8.4.1` + - ***Windows***: `-DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.7-windows-x64-v8.5.0` 5. Ensure [`CUB`](https://github.com/NVlabs/cub) of the version corresponding to CUDA's version is installed. @@ -388,7 +388,7 @@ Ubuntu 14: LLVM 4.0.0 - 7.1.0, CUDA 7.0 - 9.0, cuDNN 5.0.5 - 7.6.5 Ubuntu 16-18: LLVM 8.0.0 - 14.0.6, CUDA 8.0 - 10.2, cuDNN 5.1.10 - 8.0.5 -Ubuntu 20-21: LLVM 9.0.0 - 15.0.2, CUDA 8.0 - 11.7.1, cuDNN 5.1.10 - 8.4.1 +Ubuntu 20-21: LLVM 9.0.0 - 15.0.2, CUDA 8.0 - 11.7.1, cuDNN 5.1.10 - 8.5.0 Minimum build system requirements for the above configurations: @@ -567,8 +567,8 @@ Testing Time: 6.22s | 11.0.1 - 11.1.0 | 7.0 - 11.2.2 | 7.6.5 - 8.0.5 | 2017.15.9.31, 2019.16.8.4 | 3.19.3 | 3.9.2 | | 12.0.0 - 13.0.1 | 7.0 - 11.5.1 | 7.6.5 - 8.3.2 | 2017.15.9.43, 2019.16.11.9 | 3.22.2 | 3.10.2 | | 14.0.0 - 14.0.6 | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.49, 2019.16.11.17, 2022.17.2.6 | 3.24.0 | 3.10.6 | -| 15.0.0 - 15.0.2 | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | -| 16.0.0git | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | +| 15.0.0 - 15.0.2 | 7.0 - 11.7.1 | 8.0.5 - 8.5.0 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | +| 16.0.0git | 7.0 - 11.7.1 | 8.0.5 - 8.5.0 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | *Building with testing support by `Visual Studio 17 2022` on `Windows 10`:* @@ -583,7 +583,7 @@ cmake -DCMAKE_PREFIX_PATH=d:/LLVM/15.0.2/dist \ -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.7" \ -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v11.7" \ - -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.7-windows-x64-v8.4.1 \ + -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.7-windows-x64-v8.5.0 \ -DCUDA_CUB_ROOT_DIR=d:/GIT/cub \ -DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.2/build/Release/bin/llvm-lit.py \ ../hipify From b4facf26a5acf5e9e01bb75c8cbaca83b6ec072a Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 10 Oct 2022 11:10:34 +0200 Subject: [PATCH 14/43] [HIPIFY] Sync with CUDA 11.8 - Part 1 - Driver API - types only + Mark a few APIs as `CUDA_REMOVED` since 11.8 + Update synthetic tests correspondingly + Update regenerated hipify-perl CUDA_Driver_API_functions_supported_by_HIP.md accordingly --- bin/hipify-perl | 35 ++++++ ...A_Driver_API_functions_supported_by_HIP.md | 41 +++++-- src/CUDA2HIP_Driver_API_types.cpp | 114 +++++++++++++++--- src/Statistics.cpp | 1 + src/Statistics.h | 1 + tests/unit_tests/synthetic/driver_enums.cu | 20 +-- .../unit_tests/synthetic/driver_functions.cu | 18 +-- tests/unit_tests/synthetic/driver_unions.cu | 4 +- 8 files changed, 192 insertions(+), 42 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 735933b2..e24f4027 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -682,11 +682,19 @@ my %removed_funcs = ( "cudaSetupArgument" => "10.1", "cudaLaunch" => "10.1", "cudaConfigureCall" => "10.1", + "CUkernelNodeAttrValue_v1" => "11.8", + "CUkernelNodeAttrValue_union" => "11.8", + "CUkernelNodeAttrValue" => "11.8", + "CUkernelNodeAttrID_enum" => "11.8", + "CUkernelNodeAttrID" => "11.8", "CU_TARGET_COMPUTE_73" => "10.0", "CU_TARGET_COMPUTE_13" => "9.0", "CU_TARGET_COMPUTE_12" => "9.0", "CU_TARGET_COMPUTE_11" => "9.0", "CU_TARGET_COMPUTE_10" => "9.0", + "CU_KERNEL_NODE_ATTRIBUTE_PRIORITY" => "", + "CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE" => "11.8", + "CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW" => "11.8", "CU_GRAPH_NODE_TYPE_COUNT" => "11.0", "CU_DEVICE_P2P_ATTRIBUTE_ARRAY_ACCESS_ACCESS_SUPPORTED" => "10.1", "CU_COMPUTEMODE_EXCLUSIVE" => "8.0", @@ -6593,6 +6601,10 @@ sub warnUnsupportedFunctions { "csrsm2Info", "csrilu02Info", "csrgemm2Info", + "cl_event_flags_enum", + "cl_event_flags", + "cl_context_flags_enum", + "cl_context_flags", "bsrsv2Info", "bsrilu02Info", "bsric02Info", @@ -6601,6 +6613,14 @@ sub warnUnsupportedFunctions { "_CUB_ASM_PTR_SIZE_", "_CUB_ASM_PTR_", "PATCH_LEVEL", + "NVCL_EVENT_SCHED_YIELD", + "NVCL_EVENT_SCHED_SPIN", + "NVCL_EVENT_SCHED_BLOCKING_SYNC", + "NVCL_EVENT_SCHED_AUTO", + "NVCL_CTX_SCHED_YIELD", + "NVCL_CTX_SCHED_SPIN", + "NVCL_CTX_SCHED_BLOCKING_SYNC", + "NVCL_CTX_SCHED_AUTO", "MINOR_VERSION", "MAX_CUFFT_ERROR", "MAJOR_VERSION", @@ -6671,6 +6691,8 @@ sub warnUnsupportedFunctions { "CUexecAffinityParam", "CUevent_wait_flags_enum", "CUevent_wait_flags", + "CUevent_sched_flags_enum", + "CUevent_sched_flags", "CUevent_record_flags_enum", "CUevent_record_flags", "CUevent_flags_enum", @@ -6708,6 +6730,8 @@ sub warnUnsupportedFunctions { "CUarray_cubemap_face", "CU_TRSF_SEAMLESS_CUBEMAP", "CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION", + "CU_TARGET_COMPUTE_90", + "CU_TARGET_COMPUTE_89", "CU_TARGET_COMPUTE_87", "CU_TARGET_COMPUTE_86", "CU_TARGET_COMPUTE_80", @@ -6826,6 +6850,12 @@ sub warnUnsupportedFunctions { "CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM", "CU_GET_PROC_ADDRESS_LEGACY_STREAM", "CU_GET_PROC_ADDRESS_DEFAULT", + "CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH", + "CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT", + "CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH", + "CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED", + "CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET", + "CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", "CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER", "CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES", "CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX", @@ -6842,6 +6872,10 @@ sub warnUnsupportedFunctions { "CU_EXEC_AFFINITY_TYPE_MAX", "CU_EVENT_WAIT_EXTERNAL", "CU_EVENT_WAIT_DEFAULT", + "CU_EVENT_SCHED_YIELD", + "CU_EVENT_SCHED_SPIN", + "CU_EVENT_SCHED_BLOCKING_SYNC", + "CU_EVENT_SCHED_AUTO", "CU_EVENT_RECORD_EXTERNAL", "CU_EVENT_RECORD_DEFAULT", "CU_EGL_RESOURCE_LOCATION_VIDMEM", @@ -6948,6 +6982,7 @@ sub warnUnsupportedFunctions { "CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED", "CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED", "CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED", + "CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH", "CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V2", "CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS", "CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V2", diff --git a/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md b/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md index 55ebfb57..f9ee5ecb 100644 --- a/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md @@ -288,6 +288,7 @@ |`CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR`|9.0| | |`hipDeviceAttributeCanUseStreamWaitValue`|4.3.0| | | | |`CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V2`|11.7| | | | | | | | |`CU_DEVICE_ATTRIBUTE_CLOCK_RATE`| | | |`hipDeviceAttributeClockRate`|1.6.0| | | | +|`CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH`|11.8| | | | | | | | |`CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR`| | | |`hipDeviceAttributeComputeCapabilityMajor`|1.6.0| | | | |`CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR`| | | |`hipDeviceAttributeComputeCapabilityMinor`|1.6.0| | | | |`CU_DEVICE_ATTRIBUTE_COMPUTE_MODE`| | | |`hipDeviceAttributeComputeMode`|1.6.0| | | | @@ -498,6 +499,10 @@ |`CU_EVENT_INTERPROCESS`| | | |`hipEventInterprocess`|1.6.0| | | | |`CU_EVENT_RECORD_DEFAULT`|11.1| | | | | | | | |`CU_EVENT_RECORD_EXTERNAL`|11.1| | | | | | | | +|`CU_EVENT_SCHED_AUTO`|11.8| | | | | | | | +|`CU_EVENT_SCHED_BLOCKING_SYNC`|11.8| | | | | | | | +|`CU_EVENT_SCHED_SPIN`|11.8| | | | | | | | +|`CU_EVENT_SCHED_YIELD`|11.8| | | | | | | | |`CU_EVENT_WAIT_DEFAULT`|11.1| | | | | | | | |`CU_EVENT_WAIT_EXTERNAL`|11.1| | | | | | | | |`CU_EXEC_AFFINITY_TYPE_MAX`|11.4| | | | | | | | @@ -527,14 +532,20 @@ |`CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER`|11.3| | | | | | | | |`CU_FUNC_ATTRIBUTE_BINARY_VERSION`| | | |`HIP_FUNC_ATTRIBUTE_BINARY_VERSION`|2.8.0| | | | |`CU_FUNC_ATTRIBUTE_CACHE_MODE_CA`| | | |`HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA`|2.8.0| | | | +|`CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE`|11.8| | | | | | | | +|`CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET`|11.8| | | | | | | | |`CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES`| | | |`HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES`|2.8.0| | | | |`CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES`| | | |`HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES`|2.8.0| | | | |`CU_FUNC_ATTRIBUTE_MAX`| | | |`HIP_FUNC_ATTRIBUTE_MAX`|2.8.0| | | | |`CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES`|9.0| | |`HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES`|2.8.0| | | | |`CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK`| | | |`HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK`|2.8.0| | | | +|`CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED`|11.8| | | | | | | | |`CU_FUNC_ATTRIBUTE_NUM_REGS`| | | |`HIP_FUNC_ATTRIBUTE_NUM_REGS`|2.8.0| | | | |`CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT`|9.0| | |`HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT`|2.8.0| | | | |`CU_FUNC_ATTRIBUTE_PTX_VERSION`| | | |`HIP_FUNC_ATTRIBUTE_PTX_VERSION`|2.8.0| | | | +|`CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH`|11.8| | | | | | | | +|`CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT`|11.8| | | | | | | | +|`CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH`|11.8| | | | | | | | |`CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES`| | | |`HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES`|2.8.0| | | | |`CU_FUNC_CACHE_PREFER_EQUAL`| | | |`hipFuncCachePreferEqual`|1.6.0| | | | |`CU_FUNC_CACHE_PREFER_L1`| | | |`hipFuncCachePreferL1`|1.6.0| | | | @@ -645,8 +656,8 @@ |`CU_JIT_TARGET_FROM_CUCONTEXT`| | | |`hipJitOptionTargetFromContext`|1.6.0| | | | |`CU_JIT_THREADS_PER_BLOCK`| | | |`hipJitOptionThreadsPerBlock`|1.6.0| | | | |`CU_JIT_WALL_TIME`| | | |`hipJitOptionWallTime`|1.6.0| | | | -|`CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW`|11.0| | |`hipKernelNodeAttributeAccessPolicyWindow`|5.2.0| | | | -|`CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE`|11.0| | |`hipKernelNodeAttributeCooperative`|5.2.0| | | | +|`CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW`|11.0| |11.8|`hipKernelNodeAttributeAccessPolicyWindow`|5.2.0| | | | +|`CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE`|11.0| |11.8|`hipKernelNodeAttributeCooperative`|5.2.0| | | | |`CU_KERNEL_NODE_ATTRIBUTE_PRIORITY`|11.7| | | | | | | | |`CU_LAUNCH_PARAM_BUFFER_POINTER`| | | |`HIP_LAUNCH_PARAM_BUFFER_POINTER`|1.6.0| | | | |`CU_LAUNCH_PARAM_BUFFER_POINTER_AS_INT`|11.7| | | | | | | | @@ -848,6 +859,8 @@ |`CU_TARGET_COMPUTE_80`|11.0| | | | | | | | |`CU_TARGET_COMPUTE_86`|11.1| | | | | | | | |`CU_TARGET_COMPUTE_87`|11.7| | | | | | | | +|`CU_TARGET_COMPUTE_89`|11.8| | | | | | | | +|`CU_TARGET_COMPUTE_90`|11.8| | | | | | | | |`CU_TRSA_OVERRIDE_FORMAT`| | | |`HIP_TRSA_OVERRIDE_FORMAT`|1.7.0| | | | |`CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION`|11.0| | | | | | | | |`CU_TRSF_NORMALIZED_COORDINATES`| | | |`HIP_TRSF_NORMALIZED_COORDINATES`|1.7.0| | | | @@ -925,6 +938,8 @@ |`CUevent_flags_enum`| | | | | | | | | |`CUevent_record_flags`|11.1| | | | | | | | |`CUevent_record_flags_enum`|11.1| | | | | | | | +|`CUevent_sched_flags`|11.8| | | | | | | | +|`CUevent_sched_flags_enum`|11.8| | | | | | | | |`CUevent_st`| | | |`ihipEvent_t`|1.6.0| | | | |`CUevent_wait_flags`|11.1| | | | | | | | |`CUevent_wait_flags_enum`| | | | | | | | | @@ -999,11 +1014,11 @@ |`CUjit_option_enum`| | | |`hipJitOption`|1.6.0| | | | |`CUjit_target`| | | | | | | | | |`CUjit_target_enum`| | | | | | | | | -|`CUkernelNodeAttrID`|11.0| | |`hipKernelNodeAttrID`|5.2.0| | | | -|`CUkernelNodeAttrID_enum`|11.0| | |`hipKernelNodeAttrID`|5.2.0| | | | -|`CUkernelNodeAttrValue`|11.0| | |`hipKernelNodeAttrValue`|5.2.0| | | | -|`CUkernelNodeAttrValue_union`|11.0| | |`hipKernelNodeAttrValue`|5.2.0| | | | -|`CUkernelNodeAttrValue_v1`|11.3| | |`hipKernelNodeAttrValue`|5.2.0| | | | +|`CUkernelNodeAttrID`|11.0| |11.8|`hipKernelNodeAttrID`|5.2.0| | | | +|`CUkernelNodeAttrID_enum`|11.0| |11.8|`hipKernelNodeAttrID`|5.2.0| | | | +|`CUkernelNodeAttrValue`|11.0| |11.8|`hipKernelNodeAttrValue`|5.2.0| | | | +|`CUkernelNodeAttrValue_union`|11.0| |11.8|`hipKernelNodeAttrValue`|5.2.0| | | | +|`CUkernelNodeAttrValue_v1`|11.3| |11.8|`hipKernelNodeAttrValue`|5.2.0| | | | |`CUlimit`| | | |`hipLimit_t`|1.6.0| | | | |`CUlimit_enum`| | | |`hipLimit_t`|1.6.0| | | | |`CUlinkState`| | | |`hiprtcLinkState`|5.3.0| | |5.3.0| @@ -1121,7 +1136,19 @@ |`CUuuid_st`| | | |`hipUUID_t`|5.2.0| | | | |`GLenum`| | | |`GLenum`|5.1.0| | | | |`GLuint`| | | |`GLuint`|5.1.0| | | | +|`NVCL_CTX_SCHED_AUTO`|11.8| | | | | | | | +|`NVCL_CTX_SCHED_BLOCKING_SYNC`|11.8| | | | | | | | +|`NVCL_CTX_SCHED_SPIN`|11.8| | | | | | | | +|`NVCL_CTX_SCHED_YIELD`|11.8| | | | | | | | +|`NVCL_EVENT_SCHED_AUTO`|11.8| | | | | | | | +|`NVCL_EVENT_SCHED_BLOCKING_SYNC`|11.8| | | | | | | | +|`NVCL_EVENT_SCHED_SPIN`|11.8| | | | | | | | +|`NVCL_EVENT_SCHED_YIELD`|11.8| | | | | | | | |`__CUDACC__`| | | |`__HIPCC__`|1.6.0| | | | +|`cl_context_flags`|11.8| | | | | | | | +|`cl_context_flags_enum`|11.8| | | | | | | | +|`cl_event_flags`|11.8| | | | | | | | +|`cl_event_flags_enum`|11.8| | | | | | | | |`cudaError_enum`| | | |`hipError_t`|1.5.0| | | | |`memoryBarrier`|11.7| | | | | | | | diff --git a/src/CUDA2HIP_Driver_API_types.cpp b/src/CUDA2HIP_Driver_API_types.cpp index 633a70b4..d560740f 100644 --- a/src/CUDA2HIP_Driver_API_types.cpp +++ b/src/CUDA2HIP_Driver_API_types.cpp @@ -319,9 +319,9 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUstreamBatchMemOpParams_union", {"hipStreamBatchMemOpParams", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // cudaKernelNodeAttrValue - {"CUkernelNodeAttrValue", {"hipKernelNodeAttrValue", "", CONV_TYPE, API_DRIVER, 1}}, - {"CUkernelNodeAttrValue_v1", {"hipKernelNodeAttrValue", "", CONV_TYPE, API_DRIVER, 1}}, - {"CUkernelNodeAttrValue_union", {"hipKernelNodeAttrValue", "", CONV_TYPE, API_DRIVER, 1}}, + {"CUkernelNodeAttrValue", {"hipKernelNodeAttrValue", "", CONV_TYPE, API_DRIVER, 1, CUDA_REMOVED}}, + {"CUkernelNodeAttrValue_v1", {"hipKernelNodeAttrValue", "", CONV_TYPE, API_DRIVER, 1, CUDA_REMOVED}}, + {"CUkernelNodeAttrValue_union", {"hipKernelNodeAttrValue", "", CONV_TYPE, API_DRIVER, 1, CUDA_REMOVED}}, // cudaStreamAttrValue {"CUstreamAttrValue", {"hipStreamAttrValue", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, @@ -733,6 +733,8 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING", {"hipDeviceAttributeGpuDirectRdmaWritesOrdering", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 118 // cudaDevAttrMemoryPoolSupportedHandleTypes {"CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES", {"hipDeviceAttributeMempoolSupportedHandleTypes", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 119 + // + {"CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH", {"hipDeviceAttributeClusterLaunch", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 120 // cudaDevAttrDeferredMappingCudaArraySupported {"CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED", {"hipDeviceAttributeDeferredMappingCudaArraySupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 121 // @@ -1038,8 +1040,20 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES", {"HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 8 // cudaFuncAttributePreferredSharedMemoryCarveout {"CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT", {"HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 9 + // + {"CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET", {"HIP_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 10 + // + {"CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH", {"HIP_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 11 + // + {"CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT", {"HIP_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 12 + // + {"CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH", {"HIP_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 13 + // + {"CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED", {"HIP_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 14 + // + {"CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {"HIP_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 15 // cudaFuncAttributeMax - {"CU_FUNC_ATTRIBUTE_MAX", {"HIP_FUNC_ATTRIBUTE_MAX", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 10 + {"CU_FUNC_ATTRIBUTE_MAX", {"HIP_FUNC_ATTRIBUTE_MAX", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 16 // cudaGraphicsMapFlags {"CUgraphicsMapResourceFlags", {"hipGraphicsMapFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, @@ -1217,6 +1231,8 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_TARGET_COMPUTE_80", {"hipJitTargetCompute80", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 80 {"CU_TARGET_COMPUTE_86", {"hipJitTargetCompute86", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 86 {"CU_TARGET_COMPUTE_87", {"hipJitTargetCompute87", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 87 + {"CU_TARGET_COMPUTE_89", {"hipJitTargetCompute89", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 89 + {"CU_TARGET_COMPUTE_90", {"hipJitTargetCompute90", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 90 // no analogue {"CUjitInputType", {"hiprtcJITInputType", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}}, @@ -1870,15 +1886,15 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_SYNC_POLICY_BLOCKING_SYNC", {"hipSyncPolicyBlockingSync", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 4 // cudaKernelNodeAttrID - {"CUkernelNodeAttrID", {"hipKernelNodeAttrID", "", CONV_TYPE, API_DRIVER, 1}}, - {"CUkernelNodeAttrID_enum", {"hipKernelNodeAttrID", "", CONV_TYPE, API_DRIVER, 1}}, + {"CUkernelNodeAttrID", {"hipKernelNodeAttrID", "", CONV_TYPE, API_DRIVER, 1, CUDA_REMOVED}}, + {"CUkernelNodeAttrID_enum", {"hipKernelNodeAttrID", "", CONV_TYPE, API_DRIVER, 1, CUDA_REMOVED}}, // CUkernelNodeAttrID_enum enum values // cudaKernelNodeAttributeAccessPolicyWindow - {"CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW", {"hipKernelNodeAttributeAccessPolicyWindow", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 1 + {"CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW", {"hipKernelNodeAttributeAccessPolicyWindow", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, CUDA_REMOVED}}, // 1 // cudaKernelNodeAttributeCooperative - {"CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE", {"hipKernelNodeAttributeCooperative", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 2 + {"CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE", {"hipKernelNodeAttributeCooperative", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, CUDA_REMOVED}}, // 2 // cudaKernelNodeAttributePriority - {"CU_KERNEL_NODE_ATTRIBUTE_PRIORITY", {"hipKernelNodeAttributePriority", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 8 + {"CU_KERNEL_NODE_ATTRIBUTE_PRIORITY", {"hipKernelNodeAttributePriority", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, CUDA_REMOVED | HIP_UNSUPPORTED}}, // 8 // cudaStreamAttrID {"CUstreamAttrID", {"hipStreamAttrID", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, @@ -2109,6 +2125,45 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_MODULE_EAGER_LOADING", {"HIP_MODULE_EAGER_LOADING", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, {"CU_MODULE_LAZY_LOADING", {"HIP_MODULE_LAZY_LOADING", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"CUevent_sched_flags", {"hipEventSchedFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + {"CUevent_sched_flags_enum", {"hipEventSchedFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // CUevent_sched_flags enum values + // + {"CU_EVENT_SCHED_AUTO", {"HIP_EVENT_SCHED_AUTO", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"CU_EVENT_SCHED_SPIN", {"HIP_EVENT_SCHED_SPIN", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"CU_EVENT_SCHED_YIELD", {"HIP_EVENT_SCHED_YIELD", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"CU_EVENT_SCHED_BLOCKING_SYNC", {"HIP_EVENT_SCHED_BLOCKING_SYNC", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + + // + {"cl_event_flags", {"hipClEventFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + {"cl_event_flags_enum", {"hipClEventFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cl_event_flags enum values + // + {"NVCL_EVENT_SCHED_AUTO", {"HIP_CL_EVENT_SCHED_AUTO", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"NVCL_EVENT_SCHED_SPIN", {"HIP_CL_EVENT_SCHED_SPIN", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"NVCL_EVENT_SCHED_YIELD", {"HIP_CL_EVENT_SCHED_YIELD", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"NVCL_EVENT_SCHED_BLOCKING_SYNC", {"HIP_CL_EVENT_SCHED_BLOCKING_SYNC", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + + // + {"cl_context_flags", {"hipClContextFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + {"cl_context_flags_enum", {"hipClContextFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cl_context_flags enum values + // + {"NVCL_CTX_SCHED_AUTO", {"HIP_CL_CTX_SCHED_AUTO", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"NVCL_CTX_SCHED_SPIN", {"HIP_CL_CTX_SCHED_SPIN", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"NVCL_CTX_SCHED_YIELD", {"HIP_CL_CTX_SCHED_YIELD", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"NVCL_CTX_SCHED_BLOCKING_SYNC", {"HIP_CL_CTX_SCHED_BLOCKING_SYNC", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // 4. Typedefs // no analogue @@ -2273,8 +2328,8 @@ const std::map CUDA_DRIVER_TYPE_NAME_VER_MAP { {"CUaccessPolicyWindow_st", {CUDA_110, CUDA_0, CUDA_0 }}, {"CUstreamBatchMemOpParams", {CUDA_80, CUDA_0, CUDA_0 }}, {"CUstreamBatchMemOpParams_union", {CUDA_80, CUDA_0, CUDA_0 }}, - {"CUkernelNodeAttrValue", {CUDA_110, CUDA_0, CUDA_0 }}, - {"CUkernelNodeAttrValue_union", {CUDA_110, CUDA_0, CUDA_0 }}, + {"CUkernelNodeAttrValue", {CUDA_110, CUDA_0, CUDA_118}}, + {"CUkernelNodeAttrValue_union", {CUDA_110, CUDA_0, CUDA_118}}, {"CUstreamAttrValue", {CUDA_110, CUDA_0, CUDA_0 }}, {"CUstreamAttrValue_union", {CUDA_110, CUDA_0, CUDA_0 }}, {"CU_COMPUTEMODE_EXCLUSIVE", {CUDA_0, CUDA_0, CUDA_80 }}, @@ -2556,10 +2611,10 @@ const std::map CUDA_DRIVER_TYPE_NAME_VER_MAP { {"CU_SYNC_POLICY_SPIN", {CUDA_110, CUDA_0, CUDA_0 }}, {"CU_SYNC_POLICY_YIELD", {CUDA_110, CUDA_0, CUDA_0 }}, {"CU_SYNC_POLICY_BLOCKING_SYNC", {CUDA_110, CUDA_0, CUDA_0 }}, - {"CUkernelNodeAttrID", {CUDA_110, CUDA_0, CUDA_0 }}, - {"CUkernelNodeAttrID_enum", {CUDA_110, CUDA_0, CUDA_0 }}, - {"CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW", {CUDA_110, CUDA_0, CUDA_0 }}, - {"CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE", {CUDA_110, CUDA_0, CUDA_0 }}, + {"CUkernelNodeAttrID", {CUDA_110, CUDA_0, CUDA_118}}, + {"CUkernelNodeAttrID_enum", {CUDA_110, CUDA_0, CUDA_118}}, + {"CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW", {CUDA_110, CUDA_0, CUDA_118}}, + {"CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE", {CUDA_110, CUDA_0, CUDA_118}}, {"CUstreamAttrID", {CUDA_110, CUDA_0, CUDA_0 }}, {"CUstreamAttrID_enum", {CUDA_110, CUDA_0, CUDA_0 }}, {"CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW", {CUDA_110, CUDA_0, CUDA_0 }}, @@ -2682,7 +2737,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_VER_MAP { {"CUDA_KERNEL_NODE_PARAMS_v1", {CUDA_113, CUDA_0, CUDA_0 }}, {"CUDA_MEMSET_NODE_PARAMS_v1", {CUDA_113, CUDA_0, CUDA_0 }}, {"CUDA_HOST_NODE_PARAMS_v1", {CUDA_113, CUDA_0, CUDA_0 }}, - {"CUkernelNodeAttrValue_v1", {CUDA_113, CUDA_0, CUDA_0 }}, + {"CUkernelNodeAttrValue_v1", {CUDA_113, CUDA_0, CUDA_118}}, {"CUstreamAttrValue_v1", {CUDA_113, CUDA_0, CUDA_0 }}, {"CUdriverProcAddress_flags", {CUDA_113, CUDA_0, CUDA_0 }}, {"CUdriverProcAddress_flags_enum", {CUDA_113, CUDA_0, CUDA_0 }}, @@ -2863,6 +2918,33 @@ const std::map CUDA_DRIVER_TYPE_NAME_VER_MAP { {"CUmoduleLoadingMode_enum", {CUDA_117, CUDA_0, CUDA_0 }}, {"CU_MODULE_EAGER_LOADING", {CUDA_117, CUDA_0, CUDA_0 }}, {"CU_MODULE_LAZY_LOADING", {CUDA_117, CUDA_0, CUDA_0 }}, + {"CUevent_sched_flags", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUevent_sched_flags_enum", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_EVENT_SCHED_AUTO", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_EVENT_SCHED_SPIN", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_EVENT_SCHED_YIELD", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_EVENT_SCHED_BLOCKING_SYNC", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cl_event_flags", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cl_event_flags_enum", {CUDA_118, CUDA_0, CUDA_0 }}, + {"NVCL_EVENT_SCHED_AUTO", {CUDA_118, CUDA_0, CUDA_0 }}, + {"NVCL_EVENT_SCHED_SPIN", {CUDA_118, CUDA_0, CUDA_0 }}, + {"NVCL_EVENT_SCHED_YIELD", {CUDA_118, CUDA_0, CUDA_0 }}, + {"NVCL_EVENT_SCHED_BLOCKING_SYNC", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cl_context_flags", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cl_context_flags_enum", {CUDA_118, CUDA_0, CUDA_0 }}, + {"NVCL_CTX_SCHED_AUTO", {CUDA_118, CUDA_0, CUDA_0 }}, + {"NVCL_CTX_SCHED_SPIN", {CUDA_118, CUDA_0, CUDA_0 }}, + {"NVCL_CTX_SCHED_YIELD", {CUDA_118, CUDA_0, CUDA_0 }}, + {"NVCL_CTX_SCHED_BLOCKING_SYNC", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_TARGET_COMPUTE_89", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_TARGET_COMPUTE_90", {CUDA_118, CUDA_0, CUDA_0 }}, }; const std::map HIP_DRIVER_TYPE_NAME_VER_MAP { diff --git a/src/Statistics.cpp b/src/Statistics.cpp index 271eca69..dafb5f8c 100644 --- a/src/Statistics.cpp +++ b/src/Statistics.cpp @@ -442,6 +442,7 @@ std::string Statistics::getCudaVersion(const cudaVersions& ver) { case CUDA_115: return "11.5"; case CUDA_116: return "11.6"; case CUDA_117: return "11.7"; + case CUDA_118: return "11.8"; case CUDNN_10: return "1.0.0"; case CUDNN_20: return "2.0.0"; case CUDNN_30: return "3.0.0"; diff --git a/src/Statistics.h b/src/Statistics.h index eae40e4e..85f1022a 100644 --- a/src/Statistics.h +++ b/src/Statistics.h @@ -198,6 +198,7 @@ enum cudaVersions { CUDA_115 = 11050, CUDA_116 = 11060, CUDA_117 = 11070, + CUDA_118 = 11080, CUDNN_10 = 100, CUDNN_20 = 200, CUDNN_30 = 300, diff --git a/tests/unit_tests/synthetic/driver_enums.cu b/tests/unit_tests/synthetic/driver_enums.cu index b97398c3..4512b92c 100644 --- a/tests/unit_tests/synthetic/driver_enums.cu +++ b/tests/unit_tests/synthetic/driver_enums.cu @@ -913,15 +913,6 @@ int main() { // CHECK: hipDeviceAttribute_t DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = hipDeviceAttributeMaxBlocksPerMultiprocessor; CUdevice_attribute DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR; - // CHECK: hipKernelNodeAttrID kernelNodeAttrID; - // CHECK-NEXT: hipKernelNodeAttrID kernelNodeAttrID_enum; - // CHECK-NEXT: hipKernelNodeAttrID KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = hipKernelNodeAttributeAccessPolicyWindow; - // CHECK-NEXT: hipKernelNodeAttrID KERNEL_NODE_ATTRIBUTE_COOPERATIVE = hipKernelNodeAttributeCooperative; - CUkernelNodeAttrID kernelNodeAttrID; - CUkernelNodeAttrID_enum kernelNodeAttrID_enum; - CUkernelNodeAttrID KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW; - CUkernelNodeAttrID KERNEL_NODE_ATTRIBUTE_COOPERATIVE = CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE; - // CHECK: hipAccessProperty accessProperty; // CHECK-NEXT: hipAccessProperty accessProperty_enum; // CHECK-NEXT: hipAccessProperty ACCESS_PROPERTY_NORMAL = hipAccessPropertyNormal; @@ -937,6 +928,17 @@ int main() { CUpointer_attribute POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE = CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE; #endif +#if CUDA_VERSION >= 11000 && CUDA_VERSION < 11080 + // CHECK: hipKernelNodeAttrID kernelNodeAttrID; + // CHECK-NEXT: hipKernelNodeAttrID kernelNodeAttrID_enum; + // CHECK-NEXT: hipKernelNodeAttrID KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = hipKernelNodeAttributeAccessPolicyWindow; + // CHECK-NEXT: hipKernelNodeAttrID KERNEL_NODE_ATTRIBUTE_COOPERATIVE = hipKernelNodeAttributeCooperative; + CUkernelNodeAttrID kernelNodeAttrID; + CUkernelNodeAttrID_enum kernelNodeAttrID_enum; + CUkernelNodeAttrID KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW; + CUkernelNodeAttrID KERNEL_NODE_ATTRIBUTE_COOPERATIVE = CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE; +#endif + #if CUDA_VERSION >= 11010 // CHECK: hipGraphNodeType GRAPH_NODE_TYPE_WAIT_EVENT = hipGraphNodeTypeWaitEvent; // CHECK-NEXT: hipGraphNodeType GRAPH_NODE_TYPE_EVENT_RECORD = hipGraphNodeTypeEventRecord; diff --git a/tests/unit_tests/synthetic/driver_functions.cu b/tests/unit_tests/synthetic/driver_functions.cu index 736e7bdc..b84766e5 100644 --- a/tests/unit_tests/synthetic/driver_functions.cu +++ b/tests/unit_tests/synthetic/driver_functions.cu @@ -527,6 +527,16 @@ int main() { // CHECK: result = hipDevicePrimaryCtxSetFlags(device, flags); result = cuDevicePrimaryCtxSetFlags_v2(device, flags); + // CUDA: CUresult CUDAAPI cuMemRetainAllocationHandle(CUmemGenericAllocationHandle *handle, void *addr); + // HIP: hipError_t hipMemRetainAllocationHandle(hipMemGenericAllocationHandle_t* handle, void* addr); + // CHECK: result = hipMemRetainAllocationHandle(&memGenericAllocationHandle_t, image); + result = cuMemRetainAllocationHandle(&memGenericAllocationHandle_t, image); + + // CHECK: result = hipGraphInstantiate(&graphExec, graph, &graphNode, nullptr, bytes); + result = cuGraphInstantiate_v2(&graphExec, graph, &graphNode, nullptr, bytes); +#endif + +#if CUDA_VERSION >= 11000 && CUDA_VERSION < 11080 // CHECK: hipKernelNodeAttrID kernelNodeAttrID; CUkernelNodeAttrID kernelNodeAttrID; // CHECK: hipKernelNodeAttrValue kernelNodeAttrValue; @@ -541,14 +551,6 @@ int main() { // HIP: hipError_t hipGraphKernelNodeGetAttribute(hipGraphNode_t hNode, hipKernelNodeAttrID attr, hipKernelNodeAttrValue* value); // CHECK: result = hipGraphKernelNodeGetAttribute(graphNode, kernelNodeAttrID, &kernelNodeAttrValue); result = cuGraphKernelNodeGetAttribute(graphNode, kernelNodeAttrID, &kernelNodeAttrValue); - - // CUDA: CUresult CUDAAPI cuMemRetainAllocationHandle(CUmemGenericAllocationHandle *handle, void *addr); - // HIP: hipError_t hipMemRetainAllocationHandle(hipMemGenericAllocationHandle_t* handle, void* addr); - // CHECK: result = hipMemRetainAllocationHandle(&memGenericAllocationHandle_t, image); - result = cuMemRetainAllocationHandle(&memGenericAllocationHandle_t, image); - - // CHECK: result = hipGraphInstantiate(&graphExec, graph, &graphNode, nullptr, bytes); - result = cuGraphInstantiate_v2(&graphExec, graph, &graphNode, nullptr, bytes); #endif #if CUDA_VERSION >= 11010 diff --git a/tests/unit_tests/synthetic/driver_unions.cu b/tests/unit_tests/synthetic/driver_unions.cu index 1d9bddc4..5a6a1b04 100644 --- a/tests/unit_tests/synthetic/driver_unions.cu +++ b/tests/unit_tests/synthetic/driver_unions.cu @@ -7,14 +7,14 @@ int main() { printf("10. CUDA Driver API Unions synthetic test\n"); -#if CUDA_VERSION >= 11000 +#if CUDA_VERSION >= 11000 && CUDA_VERSION < 11080 // CHECK: hipKernelNodeAttrValue kernelNodeAttrValue; // CHECK-NEXT: hipKernelNodeAttrValue kernelNodeAttrValue_union; CUkernelNodeAttrValue kernelNodeAttrValue; CUkernelNodeAttrValue_union kernelNodeAttrValue_union; #endif -#if CUDA_VERSION >= 11030 +#if CUDA_VERSION >= 11030 && CUDA_VERSION < 11080 // CHECK: hipKernelNodeAttrValue kernelNodeAttrValue_v1; CUkernelNodeAttrValue_v1 kernelNodeAttrValue_v1; #endif From 085a2a08580dbab900c400e580a5b7dfb6f2c5fa Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 10 Oct 2022 22:23:03 +0200 Subject: [PATCH 15/43] [HIPIFY] Sync with CUDA 11.8 - Part 2 - Driver API - types only + [fix] Left only `CUkernelNodeAttrID_enum` and `CUkernelNodeAttrValue_union` types marked `CUDA_REMOVED`: other types erroneously marked `CUDA_REMOVED` have fallback redefines + Update synthetic tests correspondingly + Update regenerated hipify-perl CUDA_Driver_API_functions_supported_by_HIP.md accordingly --- bin/hipify-perl | 32 +++++- ...A_Driver_API_functions_supported_by_HIP.md | 36 +++++- src/CUDA2HIP_Driver_API_types.cpp | 105 ++++++++++++++++-- tests/unit_tests/synthetic/driver_enums.cu | 9 +- .../unit_tests/synthetic/driver_functions.cu | 4 +- tests/unit_tests/synthetic/driver_unions.cu | 9 +- 6 files changed, 163 insertions(+), 32 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index e24f4027..1c5c9b88 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -682,19 +682,13 @@ my %removed_funcs = ( "cudaSetupArgument" => "10.1", "cudaLaunch" => "10.1", "cudaConfigureCall" => "10.1", - "CUkernelNodeAttrValue_v1" => "11.8", "CUkernelNodeAttrValue_union" => "11.8", - "CUkernelNodeAttrValue" => "11.8", "CUkernelNodeAttrID_enum" => "11.8", - "CUkernelNodeAttrID" => "11.8", "CU_TARGET_COMPUTE_73" => "10.0", "CU_TARGET_COMPUTE_13" => "9.0", "CU_TARGET_COMPUTE_12" => "9.0", "CU_TARGET_COMPUTE_11" => "9.0", "CU_TARGET_COMPUTE_10" => "9.0", - "CU_KERNEL_NODE_ATTRIBUTE_PRIORITY" => "", - "CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE" => "11.8", - "CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW" => "11.8", "CU_GRAPH_NODE_TYPE_COUNT" => "11.0", "CU_DEVICE_P2P_ATTRIBUTE_ARRAY_ACCESS_ACCESS_SUPPORTED" => "10.1", "CU_COMPUTEMODE_EXCLUSIVE" => "8.0", @@ -6659,6 +6653,14 @@ sub warnUnsupportedFunctions { "CUmemRangeHandleType", "CUmemAttach_flags_enum", "CUmemAttach_flags", + "CUlaunchConfig_st", + "CUlaunchConfig", + "CUlaunchAttribute_st", + "CUlaunchAttributeValue_union", + "CUlaunchAttributeValue", + "CUlaunchAttributeID_enum", + "CUlaunchAttributeID", + "CUlaunchAttribute", "CUjit_target_enum", "CUjit_target", "CUjit_fallback_enum", @@ -6726,6 +6728,8 @@ sub warnUnsupportedFunctions { "CUd3d10DeviceList", "CUctx_flags_enum", "CUctx_flags", + "CUclusterSchedulingPolicy_enum", + "CUclusterSchedulingPolicy", "CUarray_cubemap_face_enum", "CUarray_cubemap_face", "CU_TRSF_SEAMLESS_CUBEMAP", @@ -6803,7 +6807,18 @@ sub warnUnsupportedFunctions { "CU_LAUNCH_PARAM_END_AS_INT", "CU_LAUNCH_PARAM_BUFFER_SIZE_AS_INT", "CU_LAUNCH_PARAM_BUFFER_POINTER_AS_INT", + "CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY", + "CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION", + "CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT", + "CU_LAUNCH_ATTRIBUTE_PRIORITY", + "CU_LAUNCH_ATTRIBUTE_IGNORE", + "CU_LAUNCH_ATTRIBUTE_COOPERATIVE", + "CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", + "CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION", + "CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW", "CU_KERNEL_NODE_ATTRIBUTE_PRIORITY", + "CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", + "CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION", "CU_JIT_REFERENCED_VARIABLE_NAMES", "CU_JIT_REFERENCED_VARIABLE_COUNT", "CU_JIT_REFERENCED_KERNEL_NAMES", @@ -7015,6 +7030,9 @@ sub warnUnsupportedFunctions { "CU_CUBEMAP_FACE_NEGATIVE_Y", "CU_CUBEMAP_FACE_NEGATIVE_X", "CU_CTX_FLAGS_MASK", + "CU_CLUSTER_SCHEDULING_POLICY_SPREAD", + "CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING", + "CU_CLUSTER_SCHEDULING_POLICY_DEFAULT", "CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL", "CU_AD_FORMAT_UNORM_INT8X4", "CU_AD_FORMAT_UNORM_INT8X2", @@ -7716,11 +7734,13 @@ sub warnUnsupportedFunctions { "CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED", "CUDA_ERROR_MPS_MAX_CLIENTS_REACHED", "CUDA_ERROR_MPS_CONNECTION_FAILED", + "CUDA_ERROR_MPS_CLIENT_TERMINATED", "CUDA_ERROR_MISALIGNED_ADDRESS", "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING", "CUDA_ERROR_JIT_COMPILER_NOT_FOUND", "CUDA_ERROR_JIT_COMPILATION_DISABLED", "CUDA_ERROR_INVALID_PC", + "CUDA_ERROR_INVALID_CLUSTER_SIZE", "CUDA_ERROR_INVALID_ADDRESS_SPACE", "CUDA_ERROR_ILLEGAL_INSTRUCTION", "CUDA_ERROR_HARDWARE_STACK_ERROR", diff --git a/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md b/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md index f9ee5ecb..003a2387 100644 --- a/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md @@ -56,6 +56,7 @@ |`CUDA_ERROR_ILLEGAL_INSTRUCTION`| | | | | | | | | |`CUDA_ERROR_ILLEGAL_STATE`|10.0| | |`hipErrorIllegalState`|5.0.0| | | | |`CUDA_ERROR_INVALID_ADDRESS_SPACE`| | | | | | | | | +|`CUDA_ERROR_INVALID_CLUSTER_SIZE`|11.8| | | | | | | | |`CUDA_ERROR_INVALID_CONTEXT`| | | |`hipErrorInvalidContext`|1.6.0| | | | |`CUDA_ERROR_INVALID_DEVICE`| | | |`hipErrorInvalidDevice`|1.6.0| | | | |`CUDA_ERROR_INVALID_GRAPHICS_CONTEXT`| | | |`hipErrorInvalidGraphicsContext`|1.6.0| | | | @@ -73,6 +74,7 @@ |`CUDA_ERROR_LAUNCH_TIMEOUT`| | | |`hipErrorLaunchTimeOut`|1.6.0| | | | |`CUDA_ERROR_MAP_FAILED`| | | |`hipErrorMapFailed`|1.6.0| | | | |`CUDA_ERROR_MISALIGNED_ADDRESS`| | | | | | | | | +|`CUDA_ERROR_MPS_CLIENT_TERMINATED`|11.8| | | | | | | | |`CUDA_ERROR_MPS_CONNECTION_FAILED`|11.4| | | | | | | | |`CUDA_ERROR_MPS_MAX_CLIENTS_REACHED`|11.4| | | | | | | | |`CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED`|11.4| | | | | | | | @@ -239,6 +241,9 @@ |`CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL`|11.1| | | | | | | | |`CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL`|11.1| | |`hipArraySparseSubresourceTypeMiptail`|5.2.0| | | | |`CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL`|11.1| | |`hipArraySparseSubresourceTypeSparseLevel`|5.2.0| | | | +|`CU_CLUSTER_SCHEDULING_POLICY_DEFAULT`|11.8| | | | | | | | +|`CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING`|11.8| | | | | | | | +|`CU_CLUSTER_SCHEDULING_POLICY_SPREAD`|11.8| | | | | | | | |`CU_COMPUTEMODE_DEFAULT`| | | |`hipComputeModeDefault`|1.9.0| | | | |`CU_COMPUTEMODE_EXCLUSIVE`| | |8.0|`hipComputeModeExclusive`|1.9.0| | | | |`CU_COMPUTEMODE_EXCLUSIVE_PROCESS`| | | |`hipComputeModeExclusiveProcess`|2.0.0| | | | @@ -656,9 +661,20 @@ |`CU_JIT_TARGET_FROM_CUCONTEXT`| | | |`hipJitOptionTargetFromContext`|1.6.0| | | | |`CU_JIT_THREADS_PER_BLOCK`| | | |`hipJitOptionThreadsPerBlock`|1.6.0| | | | |`CU_JIT_WALL_TIME`| | | |`hipJitOptionWallTime`|1.6.0| | | | -|`CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW`|11.0| |11.8|`hipKernelNodeAttributeAccessPolicyWindow`|5.2.0| | | | -|`CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE`|11.0| |11.8|`hipKernelNodeAttributeCooperative`|5.2.0| | | | +|`CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW`|11.0| | |`hipKernelNodeAttributeAccessPolicyWindow`|5.2.0| | | | +|`CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION`|11.8| | | | | | | | +|`CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE`|11.8| | | | | | | | +|`CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE`|11.0| | |`hipKernelNodeAttributeCooperative`|5.2.0| | | | |`CU_KERNEL_NODE_ATTRIBUTE_PRIORITY`|11.7| | | | | | | | +|`CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW`|11.8| | | | | | | | +|`CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION`|11.8| | | | | | | | +|`CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE`|11.8| | | | | | | | +|`CU_LAUNCH_ATTRIBUTE_COOPERATIVE`|11.8| | | | | | | | +|`CU_LAUNCH_ATTRIBUTE_IGNORE`|11.8| | | | | | | | +|`CU_LAUNCH_ATTRIBUTE_PRIORITY`|11.8| | | | | | | | +|`CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT`|11.8| | | | | | | | +|`CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION`|11.8| | | | | | | | +|`CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY`|11.8| | | | | | | | |`CU_LAUNCH_PARAM_BUFFER_POINTER`| | | |`HIP_LAUNCH_PARAM_BUFFER_POINTER`|1.6.0| | | | |`CU_LAUNCH_PARAM_BUFFER_POINTER_AS_INT`|11.7| | | | | | | | |`CU_LAUNCH_PARAM_BUFFER_SIZE`| | | |`HIP_LAUNCH_PARAM_BUFFER_SIZE`|1.6.0| | | | @@ -891,6 +907,8 @@ |`CUarray_format`| | | |`hipArray_Format`|1.7.0| | | | |`CUarray_format_enum`| | | |`hipArray_Format`|1.7.0| | | | |`CUarray_st`| | | |`hipArray`|1.7.0| | | | +|`CUclusterSchedulingPolicy`|11.8| | | | | | | | +|`CUclusterSchedulingPolicy_enum`|11.8| | | | | | | | |`CUcomputemode`| | | |`hipComputeMode`|1.9.0| | | | |`CUcomputemode_enum`| | | |`hipComputeMode`|1.9.0| | | | |`CUcontext`| | | |`hipCtx_t`|1.6.0| | | | @@ -1014,11 +1032,19 @@ |`CUjit_option_enum`| | | |`hipJitOption`|1.6.0| | | | |`CUjit_target`| | | | | | | | | |`CUjit_target_enum`| | | | | | | | | -|`CUkernelNodeAttrID`|11.0| |11.8|`hipKernelNodeAttrID`|5.2.0| | | | +|`CUkernelNodeAttrID`|11.0| | |`hipKernelNodeAttrID`|5.2.0| | | | |`CUkernelNodeAttrID_enum`|11.0| |11.8|`hipKernelNodeAttrID`|5.2.0| | | | -|`CUkernelNodeAttrValue`|11.0| |11.8|`hipKernelNodeAttrValue`|5.2.0| | | | +|`CUkernelNodeAttrValue`|11.0| | |`hipKernelNodeAttrValue`|5.2.0| | | | |`CUkernelNodeAttrValue_union`|11.0| |11.8|`hipKernelNodeAttrValue`|5.2.0| | | | -|`CUkernelNodeAttrValue_v1`|11.3| |11.8|`hipKernelNodeAttrValue`|5.2.0| | | | +|`CUkernelNodeAttrValue_v1`|11.3| | |`hipKernelNodeAttrValue`|5.2.0| | | | +|`CUlaunchAttribute`|11.8| | | | | | | | +|`CUlaunchAttributeID`|11.8| | | | | | | | +|`CUlaunchAttributeID_enum`|11.8| | | | | | | | +|`CUlaunchAttributeValue`|11.8| | | | | | | | +|`CUlaunchAttributeValue_union`|11.8| | | | | | | | +|`CUlaunchAttribute_st`|11.8| | | | | | | | +|`CUlaunchConfig`|11.8| | | | | | | | +|`CUlaunchConfig_st`|11.8| | | | | | | | |`CUlimit`| | | |`hipLimit_t`|1.6.0| | | | |`CUlimit_enum`| | | |`hipLimit_t`|1.6.0| | | | |`CUlinkState`| | | |`hiprtcLinkState`|5.3.0| | |5.3.0| diff --git a/src/CUDA2HIP_Driver_API_types.cpp b/src/CUDA2HIP_Driver_API_types.cpp index d560740f..721cfe54 100644 --- a/src/CUDA2HIP_Driver_API_types.cpp +++ b/src/CUDA2HIP_Driver_API_types.cpp @@ -312,6 +312,14 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUDA_BATCH_MEM_OP_NODE_PARAMS_st", {"HIP_BATCH_MEM_OP_NODE_PARAMS", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, {"CUDA_BATCH_MEM_OP_NODE_PARAMS", {"HIP_BATCH_MEM_OP_NODE_PARAMS", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"CUlaunchAttribute_st", {"hipLaunchAttribute", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + {"CUlaunchAttribute", {"hipLaunchAttribute", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + + // + {"CUlaunchConfig_st", {"hipLaunchConfig", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + {"CUlaunchConfig", {"hipLaunchConfig", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // 2. Unions {"CUstreamBatchMemOpParams", {"hipStreamBatchMemOpParams", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, @@ -319,8 +327,11 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUstreamBatchMemOpParams_union", {"hipStreamBatchMemOpParams", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // cudaKernelNodeAttrValue - {"CUkernelNodeAttrValue", {"hipKernelNodeAttrValue", "", CONV_TYPE, API_DRIVER, 1, CUDA_REMOVED}}, - {"CUkernelNodeAttrValue_v1", {"hipKernelNodeAttrValue", "", CONV_TYPE, API_DRIVER, 1, CUDA_REMOVED}}, + // NOTE: Starting from CUDA 11.8 CUlaunchAttributeValue is used instead of CUkernelNodeAttrValue: + // typedef CUlaunchAttributeValue CUkernelNodeAttrValue_v1; + // typedef CUkernelNodeAttrValue_v1 CUkernelNodeAttrValue; + {"CUkernelNodeAttrValue", {"hipKernelNodeAttrValue", "", CONV_TYPE, API_DRIVER, 1}}, + {"CUkernelNodeAttrValue_v1", {"hipKernelNodeAttrValue", "", CONV_TYPE, API_DRIVER, 1}}, {"CUkernelNodeAttrValue_union", {"hipKernelNodeAttrValue", "", CONV_TYPE, API_DRIVER, 1, CUDA_REMOVED}}, // cudaStreamAttrValue @@ -332,6 +343,10 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUlinkState_st", {"ihiprtcLinkState", "", CONV_TYPE, API_DRIVER, 1}}, {"CUlinkState", {"hiprtcLinkState", "", CONV_TYPE, API_DRIVER, 1}}, + // + {"CUlaunchAttributeValue", {"hipLaunchAttributeValue", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + {"CUlaunchAttributeValue_union", {"hipLaunchAttributeValue", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // 3. Enums // TODO: HIPaddress_mode_enum and all its values should be hipTextureAddressMode as long as they are equal. {"CUaddress_mode", {"HIPaddress_mode", "", CONV_TYPE, API_DRIVER, 1}}, @@ -1600,6 +1615,8 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUDA_ERROR_MPS_MAX_CLIENTS_REACHED", {"hipErrorMpsMaxClientsReached", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 808 // cudaErrorMpsMaxConnectionsReached {"CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED", {"hipErrorMpsMaxConnectionsReached", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 809 + // + {"CUDA_ERROR_MPS_CLIENT_TERMINATED", {"hipErrorMpsClientTerminated", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 810 // cudaErrorStreamCaptureUnsupported {"CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED", {"hipErrorStreamCaptureUnsupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 900 // cudaErrorStreamCaptureInvalidated @@ -1624,6 +1641,8 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE", {"hipErrorGraphExecUpdateFailure", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 910 // cudaErrorExternalDevice {"CUDA_ERROR_EXTERNAL_DEVICE", {"hipErrorExternalDevice", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 911 + // + {"CUDA_ERROR_INVALID_CLUSTER_SIZE", {"hipErrorInvalidClusterSize", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 912 // cudaErrorUnknown {"CUDA_ERROR_UNKNOWN", {"hipErrorUnknown", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 999 @@ -1886,15 +1905,15 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_SYNC_POLICY_BLOCKING_SYNC", {"hipSyncPolicyBlockingSync", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 4 // cudaKernelNodeAttrID - {"CUkernelNodeAttrID", {"hipKernelNodeAttrID", "", CONV_TYPE, API_DRIVER, 1, CUDA_REMOVED}}, + {"CUkernelNodeAttrID", {"hipKernelNodeAttrID", "", CONV_TYPE, API_DRIVER, 1}}, {"CUkernelNodeAttrID_enum", {"hipKernelNodeAttrID", "", CONV_TYPE, API_DRIVER, 1, CUDA_REMOVED}}, // CUkernelNodeAttrID_enum enum values // cudaKernelNodeAttributeAccessPolicyWindow - {"CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW", {"hipKernelNodeAttributeAccessPolicyWindow", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, CUDA_REMOVED}}, // 1 + {"CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW", {"hipKernelNodeAttributeAccessPolicyWindow", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 1 // cudaKernelNodeAttributeCooperative - {"CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE", {"hipKernelNodeAttributeCooperative", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, CUDA_REMOVED}}, // 2 + {"CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE", {"hipKernelNodeAttributeCooperative", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 2 // cudaKernelNodeAttributePriority - {"CU_KERNEL_NODE_ATTRIBUTE_PRIORITY", {"hipKernelNodeAttributePriority", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, CUDA_REMOVED | HIP_UNSUPPORTED}}, // 8 + {"CU_KERNEL_NODE_ATTRIBUTE_PRIORITY", {"hipKernelNodeAttributePriority", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 8 // cudaStreamAttrID {"CUstreamAttrID", {"hipStreamAttrID", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, @@ -2164,6 +2183,40 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { // {"NVCL_CTX_SCHED_BLOCKING_SYNC", {"HIP_CL_CTX_SCHED_BLOCKING_SYNC", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"CUclusterSchedulingPolicy", {"hipClusterSchedulingPolicy", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + {"CUclusterSchedulingPolicy_enum", {"hipClusterSchedulingPolicy", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // CUclusterSchedulingPolicy enum values + // + {"CU_CLUSTER_SCHEDULING_POLICY_DEFAULT", {"HIP_CLUSTER_SCHEDULING_POLICY_DEFAULT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"CU_CLUSTER_SCHEDULING_POLICY_SPREAD", {"HIP_CLUSTER_SCHEDULING_POLICY_SPREAD", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING", {"HIP_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + + // + {"CUlaunchAttributeID", {"hipLaunchAttributeID", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + {"CUlaunchAttributeID_enum", {"hipLaunchAttributeID", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // CUlaunchAttributeID enum values + // + {"CU_LAUNCH_ATTRIBUTE_IGNORE", {"HIP_LAUNCH_ATTRIBUTE_IGNORE", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW", {"HIP_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"CU_LAUNCH_ATTRIBUTE_COOPERATIVE", {"HIP_LAUNCH_ATTRIBUTE_COOPERATIVE", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY", {"HIP_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION", {"HIP_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {"HIP_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE","", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION", {"HIP_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT", {"HIP_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // + {"CU_LAUNCH_ATTRIBUTE_PRIORITY", {"HIP_LAUNCH_ATTRIBUTE_PRIORITY", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // 4. Typedefs // no analogue @@ -2282,6 +2335,10 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_LAUNCH_PARAM_BUFFER_POINTER_AS_INT", {"HIP_LAUNCH_PARAM_BUFFER_POINTER_AS_INT", "", CONV_DEFINE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 0x01 // {"CU_LAUNCH_PARAM_BUFFER_SIZE_AS_INT", {"HIP_LAUNCH_PARAM_BUFFER_SIZE_AS_INT", "", CONV_DEFINE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 0x02 + // + {"CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION", {"HIP_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION", "", CONV_DEFINE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION + // + {"CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {"HIP_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", "", CONV_DEFINE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE }; const std::map CUDA_DRIVER_TYPE_NAME_VER_MAP { @@ -2328,7 +2385,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_VER_MAP { {"CUaccessPolicyWindow_st", {CUDA_110, CUDA_0, CUDA_0 }}, {"CUstreamBatchMemOpParams", {CUDA_80, CUDA_0, CUDA_0 }}, {"CUstreamBatchMemOpParams_union", {CUDA_80, CUDA_0, CUDA_0 }}, - {"CUkernelNodeAttrValue", {CUDA_110, CUDA_0, CUDA_118}}, + {"CUkernelNodeAttrValue", {CUDA_110, CUDA_0, CUDA_0 }}, {"CUkernelNodeAttrValue_union", {CUDA_110, CUDA_0, CUDA_118}}, {"CUstreamAttrValue", {CUDA_110, CUDA_0, CUDA_0 }}, {"CUstreamAttrValue_union", {CUDA_110, CUDA_0, CUDA_0 }}, @@ -2611,10 +2668,10 @@ const std::map CUDA_DRIVER_TYPE_NAME_VER_MAP { {"CU_SYNC_POLICY_SPIN", {CUDA_110, CUDA_0, CUDA_0 }}, {"CU_SYNC_POLICY_YIELD", {CUDA_110, CUDA_0, CUDA_0 }}, {"CU_SYNC_POLICY_BLOCKING_SYNC", {CUDA_110, CUDA_0, CUDA_0 }}, - {"CUkernelNodeAttrID", {CUDA_110, CUDA_0, CUDA_118}}, + {"CUkernelNodeAttrID", {CUDA_110, CUDA_0, CUDA_0 }}, {"CUkernelNodeAttrID_enum", {CUDA_110, CUDA_0, CUDA_118}}, - {"CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW", {CUDA_110, CUDA_0, CUDA_118}}, - {"CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE", {CUDA_110, CUDA_0, CUDA_118}}, + {"CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW", {CUDA_110, CUDA_0, CUDA_0 }}, + {"CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE", {CUDA_110, CUDA_0, CUDA_0 }}, {"CUstreamAttrID", {CUDA_110, CUDA_0, CUDA_0 }}, {"CUstreamAttrID_enum", {CUDA_110, CUDA_0, CUDA_0 }}, {"CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW", {CUDA_110, CUDA_0, CUDA_0 }}, @@ -2737,7 +2794,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_VER_MAP { {"CUDA_KERNEL_NODE_PARAMS_v1", {CUDA_113, CUDA_0, CUDA_0 }}, {"CUDA_MEMSET_NODE_PARAMS_v1", {CUDA_113, CUDA_0, CUDA_0 }}, {"CUDA_HOST_NODE_PARAMS_v1", {CUDA_113, CUDA_0, CUDA_0 }}, - {"CUkernelNodeAttrValue_v1", {CUDA_113, CUDA_0, CUDA_118}}, + {"CUkernelNodeAttrValue_v1", {CUDA_113, CUDA_0, CUDA_0 }}, {"CUstreamAttrValue_v1", {CUDA_113, CUDA_0, CUDA_0 }}, {"CUdriverProcAddress_flags", {CUDA_113, CUDA_0, CUDA_0 }}, {"CUdriverProcAddress_flags_enum", {CUDA_113, CUDA_0, CUDA_0 }}, @@ -2945,6 +3002,32 @@ const std::map CUDA_DRIVER_TYPE_NAME_VER_MAP { {"CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {CUDA_118, CUDA_0, CUDA_0 }}, {"CU_TARGET_COMPUTE_89", {CUDA_118, CUDA_0, CUDA_0 }}, {"CU_TARGET_COMPUTE_90", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUclusterSchedulingPolicy", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUclusterSchedulingPolicy_enum", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_CLUSTER_SCHEDULING_POLICY_DEFAULT", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_CLUSTER_SCHEDULING_POLICY_SPREAD", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUlaunchAttributeID", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUlaunchAttributeID_enum", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_LAUNCH_ATTRIBUTE_IGNORE", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_LAUNCH_ATTRIBUTE_COOPERATIVE", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_LAUNCH_ATTRIBUTE_PRIORITY", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUlaunchAttributeValue", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUlaunchAttributeValue_union", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUlaunchAttribute", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUlaunchAttribute_st", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUlaunchConfig", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUlaunchConfig_st", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUDA_ERROR_MPS_CLIENT_TERMINATED", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUDA_ERROR_INVALID_CLUSTER_SIZE", {CUDA_118, CUDA_0, CUDA_0 }}, }; const std::map HIP_DRIVER_TYPE_NAME_VER_MAP { diff --git a/tests/unit_tests/synthetic/driver_enums.cu b/tests/unit_tests/synthetic/driver_enums.cu index 4512b92c..8bd9ddf8 100644 --- a/tests/unit_tests/synthetic/driver_enums.cu +++ b/tests/unit_tests/synthetic/driver_enums.cu @@ -929,17 +929,18 @@ int main() { #endif #if CUDA_VERSION >= 11000 && CUDA_VERSION < 11080 + // CHECK: hipKernelNodeAttrID kernelNodeAttrID_enum; + CUkernelNodeAttrID_enum kernelNodeAttrID_enum; +#endif + +#if CUDA_VERSION >= 11010 // CHECK: hipKernelNodeAttrID kernelNodeAttrID; - // CHECK-NEXT: hipKernelNodeAttrID kernelNodeAttrID_enum; // CHECK-NEXT: hipKernelNodeAttrID KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = hipKernelNodeAttributeAccessPolicyWindow; // CHECK-NEXT: hipKernelNodeAttrID KERNEL_NODE_ATTRIBUTE_COOPERATIVE = hipKernelNodeAttributeCooperative; CUkernelNodeAttrID kernelNodeAttrID; - CUkernelNodeAttrID_enum kernelNodeAttrID_enum; CUkernelNodeAttrID KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW; CUkernelNodeAttrID KERNEL_NODE_ATTRIBUTE_COOPERATIVE = CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE; -#endif -#if CUDA_VERSION >= 11010 // CHECK: hipGraphNodeType GRAPH_NODE_TYPE_WAIT_EVENT = hipGraphNodeTypeWaitEvent; // CHECK-NEXT: hipGraphNodeType GRAPH_NODE_TYPE_EVENT_RECORD = hipGraphNodeTypeEventRecord; CUgraphNodeType GRAPH_NODE_TYPE_WAIT_EVENT = CU_GRAPH_NODE_TYPE_WAIT_EVENT; diff --git a/tests/unit_tests/synthetic/driver_functions.cu b/tests/unit_tests/synthetic/driver_functions.cu index b84766e5..d77eee21 100644 --- a/tests/unit_tests/synthetic/driver_functions.cu +++ b/tests/unit_tests/synthetic/driver_functions.cu @@ -536,7 +536,7 @@ int main() { result = cuGraphInstantiate_v2(&graphExec, graph, &graphNode, nullptr, bytes); #endif -#if CUDA_VERSION >= 11000 && CUDA_VERSION < 11080 +#if CUDA_VERSION >= 11000 // CHECK: hipKernelNodeAttrID kernelNodeAttrID; CUkernelNodeAttrID kernelNodeAttrID; // CHECK: hipKernelNodeAttrValue kernelNodeAttrValue; @@ -551,9 +551,7 @@ int main() { // HIP: hipError_t hipGraphKernelNodeGetAttribute(hipGraphNode_t hNode, hipKernelNodeAttrID attr, hipKernelNodeAttrValue* value); // CHECK: result = hipGraphKernelNodeGetAttribute(graphNode, kernelNodeAttrID, &kernelNodeAttrValue); result = cuGraphKernelNodeGetAttribute(graphNode, kernelNodeAttrID, &kernelNodeAttrValue); -#endif -#if CUDA_VERSION >= 11010 // CUDA: CUresult CUDAAPI cuGraphExecChildGraphNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraph childGraph); // HIP: hipError_t hipGraphExecChildGraphNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, hipGraph_t childGraph); // CHECK: result = hipGraphExecChildGraphNodeSetParams(graphExec, graphNode, graph); diff --git a/tests/unit_tests/synthetic/driver_unions.cu b/tests/unit_tests/synthetic/driver_unions.cu index 5a6a1b04..0c375f96 100644 --- a/tests/unit_tests/synthetic/driver_unions.cu +++ b/tests/unit_tests/synthetic/driver_unions.cu @@ -7,14 +7,17 @@ int main() { printf("10. CUDA Driver API Unions synthetic test\n"); -#if CUDA_VERSION >= 11000 && CUDA_VERSION < 11080 +#if CUDA_VERSION >= 11000 // CHECK: hipKernelNodeAttrValue kernelNodeAttrValue; - // CHECK-NEXT: hipKernelNodeAttrValue kernelNodeAttrValue_union; CUkernelNodeAttrValue kernelNodeAttrValue; +#endif + +#if CUDA_VERSION >= 11000 && CUDA_VERSION < 11080 + // CHECK: hipKernelNodeAttrValue kernelNodeAttrValue_union; CUkernelNodeAttrValue_union kernelNodeAttrValue_union; #endif -#if CUDA_VERSION >= 11030 && CUDA_VERSION < 11080 +#if CUDA_VERSION >= 11030 // CHECK: hipKernelNodeAttrValue kernelNodeAttrValue_v1; CUkernelNodeAttrValue_v1 kernelNodeAttrValue_v1; #endif From 41bbd42bd6e8eb7d8f39ee47074faebbeac53048 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 11 Oct 2022 14:29:40 +0200 Subject: [PATCH 16/43] [HIPIFY] Sync with CUDA 11.8 - Part 3 - Driver & Runtime API - functions only + Update regenerated hipify-perl and Markdown CUDA2HIP docs accordingly --- bin/hipify-perl | 8 ++++++++ ...CUDA_Driver_API_functions_supported_by_HIP.md | 3 +++ ...UDA_Runtime_API_functions_supported_by_HIP.md | 5 +++++ src/CUDA2HIP_Driver_API_functions.cpp | 10 ++++++++++ src/CUDA2HIP_Runtime_API_functions.cpp | 16 ++++++++++++++++ 5 files changed, 42 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index 1c5c9b88..77cbd574 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -6009,8 +6009,10 @@ sub warnUnsupportedFunctions { "cudaProfilerInitialize", "cudaOutputMode_t", "cudaOutputMode", + "cudaOccupancyMaxPotentialClusterSize", "cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags", "cudaOccupancyMaxPotentialBlockSizeVariableSMem", + "cudaOccupancyMaxActiveClusters", "cudaOccupancyDisableCachingOverride", "cudaOccupancyAvailableDynamicSMemPerBlock", "cudaNvSciSyncAttrWait", @@ -6028,6 +6030,7 @@ sub warnUnsupportedFunctions { "cudaLimitMaxL2FetchGranularity", "cudaLimitDevRuntimeSyncDepth", "cudaLimitDevRuntimePendingLaunchCount", + "cudaLaunchKernelExC", "cudaKeyValuePair", "cudaKernelNodeAttributePriority", "cudaHostRegisterReadOnly", @@ -6081,6 +6084,7 @@ sub warnUnsupportedFunctions { "cudaGraphAddMemAllocNode", "cudaGraphAddExternalSemaphoresWaitNode", "cudaGraphAddExternalSemaphoresSignalNode", + "cudaGetTextureObjectTextureDesc_v2", "cudaGetSurfaceReference", "cudaGetSurfaceObjectResourceDesc", "cudaGetParameterBufferV2", @@ -6370,6 +6374,7 @@ sub warnUnsupportedFunctions { "cudaD3D10DeviceListAll", "cudaD3D10DeviceList", "cudaCtxResetPersistingL2Cache", + "cudaCreateTextureObject_v2", "cudaChannelFormatKindUnsignedNormalized8X4", "cudaChannelFormatKindUnsignedNormalized8X2", "cudaChannelFormatKindUnsignedNormalized8X1", @@ -6438,6 +6443,8 @@ sub warnUnsupportedFunctions { "cuParamSetf", "cuParamSetTexRef", "cuParamSetSize", + "cuOccupancyMaxPotentialClusterSize", + "cuOccupancyMaxActiveClusters", "cuOccupancyAvailableDynamicSMemPerBlock", "cuModuleLoadFatBinary", "cuModuleGetSurfRef", @@ -6469,6 +6476,7 @@ sub warnUnsupportedFunctions { "cuMemcpy3DPeer", "cuMemcpy", "cuMemGetHandleForAddressRange", + "cuLaunchKernelEx", "cuLaunchGridAsync", "cuLaunchGrid", "cuLaunchCooperativeKernelMultiDevice", diff --git a/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md b/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md index 003a2387..9ce898ad 100644 --- a/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md @@ -1540,6 +1540,7 @@ |`cuLaunchCooperativeKernelMultiDevice`|9.0|11.3| | | | | | | |`cuLaunchHostFunc`|10.0| | |`hipLaunchHostFunc`|5.2.0| | | | |`cuLaunchKernel`| | | |`hipModuleLaunchKernel`|1.6.0| | | | +|`cuLaunchKernelEx`|11.8| | | | | | | | ## **20. Execution Control [DEPRECATED]** @@ -1646,8 +1647,10 @@ |`cuOccupancyAvailableDynamicSMemPerBlock`|11.0| | | | | | | | |`cuOccupancyMaxActiveBlocksPerMultiprocessor`| | | |`hipModuleOccupancyMaxActiveBlocksPerMultiprocessor`|3.5.0| | | | |`cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`| | | |`hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`|3.5.0| | | | +|`cuOccupancyMaxActiveClusters`|11.8| | | | | | | | |`cuOccupancyMaxPotentialBlockSize`| | | |`hipModuleOccupancyMaxPotentialBlockSize`|3.5.0| | | | |`cuOccupancyMaxPotentialBlockSizeWithFlags`| | | |`hipModuleOccupancyMaxPotentialBlockSizeWithFlags`|3.5.0| | | | +|`cuOccupancyMaxPotentialClusterSize`|11.8| | | | | | | | ## **23. Texture Reference Management [DEPRECATED]** diff --git a/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md b/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md index a8314136..0d29fade 100644 --- a/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md +++ b/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md @@ -122,6 +122,7 @@ |`cudaLaunchCooperativeKernelMultiDevice`|9.0|11.3| |`hipLaunchCooperativeKernelMultiDevice`|2.6.0| | | | |`cudaLaunchHostFunc`|10.0| | |`hipLaunchHostFunc`|5.2.0| | | | |`cudaLaunchKernel`| | | |`hipLaunchKernel`|1.6.0| | | | +|`cudaLaunchKernelExC`|11.8| | | | | | | | |`cudaSetDoubleForDevice`| |10.0| | | | | | | |`cudaSetDoubleForHost`| |10.0| | | | | | | @@ -132,10 +133,12 @@ |`cudaOccupancyAvailableDynamicSMemPerBlock`|11.0| | | | | | | | |`cudaOccupancyMaxActiveBlocksPerMultiprocessor`| | | |`hipOccupancyMaxActiveBlocksPerMultiprocessor`|1.6.0| | | | |`cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`| | | |`hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`|2.6.0| | | | +|`cudaOccupancyMaxActiveClusters`|11.8| | | | | | | | |`cudaOccupancyMaxPotentialBlockSize`| | | |`hipOccupancyMaxPotentialBlockSize`|1.6.0| | | | |`cudaOccupancyMaxPotentialBlockSizeVariableSMem`| | | | | | | | | |`cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags`| | | | | | | | | |`cudaOccupancyMaxPotentialBlockSizeWithFlags`| | | |`hipOccupancyMaxPotentialBlockSizeWithFlags`|3.5.0| | | | +|`cudaOccupancyMaxPotentialClusterSize`|11.8| | | | | | | | ## **9. Memory Management** @@ -393,10 +396,12 @@ |**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**| |:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:| |`cudaCreateTextureObject`| | | |`hipCreateTextureObject`|1.7.0| | | | +|`cudaCreateTextureObject_v2`|11.8| | | | | | | | |`cudaDestroyTextureObject`| | | |`hipDestroyTextureObject`|1.7.0| | | | |`cudaGetTextureObjectResourceDesc`| | | |`hipGetTextureObjectResourceDesc`|1.7.0| | | | |`cudaGetTextureObjectResourceViewDesc`| | | |`hipGetTextureObjectResourceViewDesc`|1.7.0| | | | |`cudaGetTextureObjectTextureDesc`| | | |`hipGetTextureObjectTextureDesc`|1.7.0| | | | +|`cudaGetTextureObjectTextureDesc_v2`|11.8| | | | | | | | ## **28. Surface Object Management** diff --git a/src/CUDA2HIP_Driver_API_functions.cpp b/src/CUDA2HIP_Driver_API_functions.cpp index be0b0684..29d03116 100644 --- a/src/CUDA2HIP_Driver_API_functions.cpp +++ b/src/CUDA2HIP_Driver_API_functions.cpp @@ -543,6 +543,9 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // no analogue // NOTE: Not equal to cudaLaunchKernel due to different signatures {"cuLaunchKernel", {"hipModuleLaunchKernel", "", CONV_EXECUTION, API_DRIVER, 19}}, + // no analogue + // NOTE: Not equal to cudaLaunchKernelExC due to different signatures + {"cuLaunchKernelEx", {"hipLaunchKernelEx", "", CONV_EXECUTION, API_DRIVER, 19, HIP_UNSUPPORTED}}, // 20. Execution Control [DEPRECATED] // no analogue @@ -747,6 +750,10 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { {"cuOccupancyMaxPotentialBlockSize", {"hipModuleOccupancyMaxPotentialBlockSize", "", CONV_OCCUPANCY, API_DRIVER, 22}}, // cudaOccupancyMaxPotentialBlockSizeWithFlags {"cuOccupancyMaxPotentialBlockSizeWithFlags", {"hipModuleOccupancyMaxPotentialBlockSizeWithFlags", "", CONV_OCCUPANCY, API_DRIVER, 22}}, + // cudaOccupancyMaxPotentialClusterSize + {"cuOccupancyMaxPotentialClusterSize", {"hipOccupancyMaxPotentialClusterSize", "", CONV_OCCUPANCY, API_DRIVER, 22, HIP_UNSUPPORTED}}, + // cudaOccupancyMaxActiveClusters + {"cuOccupancyMaxActiveClusters", {"hipOccupancyMaxActiveClusters", "", CONV_OCCUPANCY, API_DRIVER, 22, HIP_UNSUPPORTED}}, // 23. Texture Reference Management [DEPRECATED] // no analogues @@ -1282,6 +1289,9 @@ const std::map CUDA_DRIVER_FUNCTION_VER_MAP { {"cuGraphBatchMemOpNodeGetParams", {CUDA_117, CUDA_0, CUDA_0 }}, {"cuGraphBatchMemOpNodeSetParams", {CUDA_117, CUDA_0, CUDA_0 }}, {"cuGraphExecBatchMemOpNodeSetParams", {CUDA_117, CUDA_0, CUDA_0 }}, + {"cuLaunchKernelEx", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cuOccupancyMaxPotentialClusterSize", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cuOccupancyMaxActiveClusters", {CUDA_118, CUDA_0, CUDA_0 }}, }; const std::map HIP_DRIVER_FUNCTION_VER_MAP { diff --git a/src/CUDA2HIP_Runtime_API_functions.cpp b/src/CUDA2HIP_Runtime_API_functions.cpp index 4ead60ca..353d3cf3 100644 --- a/src/CUDA2HIP_Runtime_API_functions.cpp +++ b/src/CUDA2HIP_Runtime_API_functions.cpp @@ -229,6 +229,9 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { {"cudaSetDoubleForDevice", {"hipSetDoubleForDevice", "", CONV_EXECUTION, API_RUNTIME, 7, HIP_UNSUPPORTED | CUDA_DEPRECATED}}, // no analogue {"cudaSetDoubleForHost", {"hipSetDoubleForHost", "", CONV_EXECUTION, API_RUNTIME, 7, HIP_UNSUPPORTED | CUDA_DEPRECATED}}, + // no analogue + // NOTE: Not equal to cuLaunchKernelEx due to different signatures + {"cudaLaunchKernelExC", {"hipLaunchKernelExC", "", CONV_EXECUTION, API_RUNTIME, 7, HIP_UNSUPPORTED}}, // 8. Occupancy // cuOccupancyAvailableDynamicSMemPerBlock @@ -245,6 +248,10 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { {"cudaOccupancyMaxPotentialBlockSizeVariableSMem", {"hipOccupancyMaxPotentialBlockSizeVariableSMem", "", CONV_OCCUPANCY, API_RUNTIME, 8, HIP_UNSUPPORTED}}, // no analogue {"cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags", {"hipOccupancyMaxPotentialBlockSizeVariableSMemWithFlags", "", CONV_OCCUPANCY, API_RUNTIME, 8, HIP_UNSUPPORTED}}, + // cuOccupancyMaxPotentialClusterSize + {"cudaOccupancyMaxPotentialClusterSize", {"hipOccupancyMaxPotentialClusterSize", "", CONV_OCCUPANCY, API_RUNTIME, 8, HIP_UNSUPPORTED}}, + // cuOccupancyMaxActiveClusters + {"cudaOccupancyMaxActiveClusters", {"hipOccupancyMaxActiveClusters", "", CONV_OCCUPANCY, API_RUNTIME, 8, HIP_UNSUPPORTED}}, // 9. Memory Management // no analogue @@ -642,6 +649,10 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // no analogue // NOTE: Not equal to cuTexObjectGetTextureDesc due to different signatures {"cudaGetTextureObjectTextureDesc", {"hipGetTextureObjectTextureDesc", "", CONV_TEXTURE, API_RUNTIME, 27}}, + // + {"cudaCreateTextureObject_v2", {"hipCreateTextureObject_v2", "", CONV_TEXTURE, API_RUNTIME, 27, HIP_UNSUPPORTED}}, + // + {"cudaGetTextureObjectTextureDesc_v2", {"hipGetTextureObjectTextureDesc_v2", "", CONV_TEXTURE, API_RUNTIME, 27, HIP_UNSUPPORTED}}, // 28. Surface Object Management // no analogue @@ -1073,6 +1084,11 @@ const std::map CUDA_RUNTIME_FUNCTION_VER_MAP { {"cudaGraphInstantiateWithFlags", {CUDA_114, CUDA_0, CUDA_0 }}, {"cudaArrayGetMemoryRequirements", {CUDA_116, CUDA_0, CUDA_0 }}, {"cudaGraphNodeSetEnabled", {CUDA_116, CUDA_0, CUDA_0 }}, + {"cudaLaunchKernelExC", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaOccupancyMaxPotentialClusterSize", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaOccupancyMaxActiveClusters", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaCreateTextureObject_v2", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaGetTextureObjectTextureDesc_v2", {CUDA_118, CUDA_0, CUDA_0 }}, }; const std::map HIP_RUNTIME_FUNCTION_VER_MAP { From f85f9e48b38029418c2a429972e3839c74a93492 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 11 Oct 2022 16:54:58 +0200 Subject: [PATCH 17/43] [HIPIFY] Sync with CUDA 11.8 - Part 4 - Runtime API - types only + Sync with CUDA Driver API 11.8 changes + Update regenerated hipify-perl and CUDA2HIP Markdown docs accordingly + Minor fixes and formatting --- bin/hipify-perl | 30 +++++ ..._Runtime_API_functions_supported_by_HIP.md | 30 +++++ src/CUDA2HIP_Driver_API_types.cpp | 88 +++++++-------- src/CUDA2HIP_Runtime_API_types.cpp | 103 +++++++++++++++++- 4 files changed, 205 insertions(+), 46 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 77cbd574..630521a4 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -6031,8 +6031,25 @@ sub warnUnsupportedFunctions { "cudaLimitDevRuntimeSyncDepth", "cudaLimitDevRuntimePendingLaunchCount", "cudaLaunchKernelExC", + "cudaLaunchConfig_t", + "cudaLaunchConfig_st", + "cudaLaunchAttribute_st", + "cudaLaunchAttributeValue", + "cudaLaunchAttributeSynchronizationPolicy", + "cudaLaunchAttributeProgrammaticStreamSerialization", + "cudaLaunchAttributeProgrammaticEvent", + "cudaLaunchAttributePriority", + "cudaLaunchAttributeIgnore", + "cudaLaunchAttributeID", + "cudaLaunchAttributeCooperative", + "cudaLaunchAttributeClusterSchedulingPolicyPreference", + "cudaLaunchAttributeClusterDimension", + "cudaLaunchAttributeAccessPolicyWindow", + "cudaLaunchAttribute", "cudaKeyValuePair", "cudaKernelNodeAttributePriority", + "cudaKernelNodeAttributeClusterSchedulingPolicyPreference", + "cudaKernelNodeAttributeClusterDimension", "cudaHostRegisterReadOnly", "cudaGraphicsVDPAURegisterVideoSurface", "cudaGraphicsVDPAURegisterOutputSurface", @@ -6108,6 +6125,12 @@ sub warnUnsupportedFunctions { "cudaGLMapFlags", "cudaGLMapBufferObjectAsync", "cudaGLMapBufferObject", + "cudaFuncAttributeRequiredClusterWidth", + "cudaFuncAttributeRequiredClusterHeight", + "cudaFuncAttributeRequiredClusterDepth", + "cudaFuncAttributeNonPortableClusterSizeAllowed", + "cudaFuncAttributeClusterSchedulingPolicyPreference", + "cudaFuncAttributeClusterDimMustBeSet", "cudaFormatModeForced", "cudaFormatModeAuto", "cudaFlushGPUDirectRDMAWritesToOwner", @@ -6159,6 +6182,7 @@ sub warnUnsupportedFunctions { "cudaErrorMpsMaxConnectionsReached", "cudaErrorMpsMaxClientsReached", "cudaErrorMpsConnectionFailed", + "cudaErrorMpsClientTerminated", "cudaErrorMixedDeviceExecution", "cudaErrorMisalignedAddress", "cudaErrorMemoryValueTooLarge", @@ -6176,6 +6200,7 @@ sub warnUnsupportedFunctions { "cudaErrorInvalidNormSetting", "cudaErrorInvalidHostPointer", "cudaErrorInvalidFilterSetting", + "cudaErrorInvalidClusterSize", "cudaErrorInvalidChannelDescriptor", "cudaErrorInvalidAddressSpace", "cudaErrorIncompatibleDriverContext", @@ -6314,6 +6339,7 @@ sub warnUnsupportedFunctions { "cudaDevAttrGPUDirectRDMASupported", "cudaDevAttrGPUDirectRDMAFlushWritesOptions", "cudaDevAttrDeferredMappingCudaArraySupported", + "cudaDevAttrClusterLaunch", "cudaDevAttrCanFlushRemoteWrites", "cudaD3D9UnregisterResource", "cudaD3D9UnmapResources", @@ -6375,6 +6401,10 @@ sub warnUnsupportedFunctions { "cudaD3D10DeviceList", "cudaCtxResetPersistingL2Cache", "cudaCreateTextureObject_v2", + "cudaClusterSchedulingPolicySpread", + "cudaClusterSchedulingPolicyLoadBalancing", + "cudaClusterSchedulingPolicyDefault", + "cudaClusterSchedulingPolicy", "cudaChannelFormatKindUnsignedNormalized8X4", "cudaChannelFormatKindUnsignedNormalized8X2", "cudaChannelFormatKindUnsignedNormalized8X1", diff --git a/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md b/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md index 0d29fade..685ce925 100644 --- a/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md +++ b/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md @@ -615,6 +615,10 @@ Unsupported |`cudaChannelFormatKindUnsignedNormalized8X1`|11.5| | | | | | | | |`cudaChannelFormatKindUnsignedNormalized8X2`|11.5| | | | | | | | |`cudaChannelFormatKindUnsignedNormalized8X4`|11.5| | | | | | | | +|`cudaClusterSchedulingPolicy`|11.8| | | | | | | | +|`cudaClusterSchedulingPolicyDefault`|11.8| | | | | | | | +|`cudaClusterSchedulingPolicyLoadBalancing`|11.8| | | | | | | | +|`cudaClusterSchedulingPolicySpread`|11.8| | | | | | | | |`cudaComputeMode`| | | |`hipComputeMode`|1.9.0| | | | |`cudaComputeModeDefault`| | | |`hipComputeModeDefault`|1.9.0| | | | |`cudaComputeModeExclusive`| | | |`hipComputeModeExclusive`|1.9.0| | | | @@ -654,6 +658,7 @@ Unsupported |`cudaDevAttrCanMapHostMemory`| | | |`hipDeviceAttributeCanMapHostMemory`|2.10.0| | | | |`cudaDevAttrCanUseHostPointerForRegisteredMem`|8.0| | |`hipDeviceAttributeCanUseHostPointerForRegisteredMem`|4.3.0| | | | |`cudaDevAttrClockRate`| | | |`hipDeviceAttributeClockRate`|1.6.0| | | | +|`cudaDevAttrClusterLaunch`|11.8| | | | | | | | |`cudaDevAttrComputeCapabilityMajor`| | | |`hipDeviceAttributeComputeCapabilityMajor`|1.6.0| | | | |`cudaDevAttrComputeCapabilityMinor`| | | |`hipDeviceAttributeComputeCapabilityMinor`|1.6.0| | | | |`cudaDevAttrComputeMode`| | | |`hipDeviceAttributeComputeMode`|1.6.0| | | | @@ -903,6 +908,7 @@ Unsupported |`cudaErrorInsufficientDriver`| | | |`hipErrorInsufficientDriver`|1.7.0| | | | |`cudaErrorInvalidAddressSpace`| | | | | | | | | |`cudaErrorInvalidChannelDescriptor`| | | | | | | | | +|`cudaErrorInvalidClusterSize`|11.8| | | | | | | | |`cudaErrorInvalidConfiguration`| | | |`hipErrorInvalidConfiguration`|1.6.0| | | | |`cudaErrorInvalidDevice`| | | |`hipErrorInvalidDevice`|1.6.0| | | | |`cudaErrorInvalidDeviceFunction`| | | |`hipErrorInvalidDeviceFunction`|1.6.0| | | | @@ -939,6 +945,7 @@ Unsupported |`cudaErrorMisalignedAddress`| | | | | | | | | |`cudaErrorMissingConfiguration`| | | |`hipErrorMissingConfiguration`|1.6.0| | | | |`cudaErrorMixedDeviceExecution`| |3.1| | | | | | | +|`cudaErrorMpsClientTerminated`|11.8| | | | | | | | |`cudaErrorMpsConnectionFailed`|11.4| | | | | | | | |`cudaErrorMpsMaxClientsReached`|11.4| | | | | | | | |`cudaErrorMpsMaxConnectionsReached`|11.4| | | | | | | | @@ -1050,9 +1057,15 @@ Unsupported |`cudaFormatModeAuto`| | | | | | | | | |`cudaFormatModeForced`| | | | | | | | | |`cudaFuncAttribute`|9.0| | |`hipFuncAttribute`|3.9.0| | | | +|`cudaFuncAttributeClusterDimMustBeSet`|11.8| | | | | | | | +|`cudaFuncAttributeClusterSchedulingPolicyPreference`|11.8| | | | | | | | |`cudaFuncAttributeMax`|9.0| | |`hipFuncAttributeMax`|3.9.0| | | | |`cudaFuncAttributeMaxDynamicSharedMemorySize`|9.0| | |`hipFuncAttributeMaxDynamicSharedMemorySize`|3.9.0| | | | +|`cudaFuncAttributeNonPortableClusterSizeAllowed`|11.8| | | | | | | | |`cudaFuncAttributePreferredSharedMemoryCarveout`|9.0| | |`hipFuncAttributePreferredSharedMemoryCarveout`|3.9.0| | | | +|`cudaFuncAttributeRequiredClusterDepth`|11.8| | | | | | | | +|`cudaFuncAttributeRequiredClusterHeight`|11.8| | | | | | | | +|`cudaFuncAttributeRequiredClusterWidth`|11.8| | | | | | | | |`cudaFuncAttributes`| | | |`hipFuncAttributes`|1.9.0| | | | |`cudaFuncCache`| | | |`hipFuncCache_t`|1.6.0| | | | |`cudaFuncCachePreferEqual`| | | |`hipFuncCachePreferEqual`|1.6.0| | | | @@ -1158,10 +1171,27 @@ Unsupported |`cudaKernelNodeAttrID`|11.0| | |`hipKernelNodeAttrID`|5.2.0| | | | |`cudaKernelNodeAttrValue`|11.0| | |`hipKernelNodeAttrValue`|5.2.0| | | | |`cudaKernelNodeAttributeAccessPolicyWindow`|11.0| | |`hipKernelNodeAttributeAccessPolicyWindow`|5.2.0| | | | +|`cudaKernelNodeAttributeClusterDimension`|11.8| | | | | | | | +|`cudaKernelNodeAttributeClusterSchedulingPolicyPreference`|11.8| | | | | | | | |`cudaKernelNodeAttributeCooperative`|11.0| | |`hipKernelNodeAttributeCooperative`|5.2.0| | | | |`cudaKernelNodeAttributePriority`|11.7| | | | | | | | |`cudaKernelNodeParams`|10.0| | |`hipKernelNodeParams`|4.3.0| | | | |`cudaKeyValuePair`| | | | | | | | | +|`cudaLaunchAttribute`|11.8| | | | | | | | +|`cudaLaunchAttributeAccessPolicyWindow`|11.8| | | | | | | | +|`cudaLaunchAttributeClusterDimension`|11.8| | | | | | | | +|`cudaLaunchAttributeClusterSchedulingPolicyPreference`|11.8| | | | | | | | +|`cudaLaunchAttributeCooperative`|11.8| | | | | | | | +|`cudaLaunchAttributeID`|11.8| | | | | | | | +|`cudaLaunchAttributeIgnore`|11.8| | | | | | | | +|`cudaLaunchAttributePriority`|11.8| | | | | | | | +|`cudaLaunchAttributeProgrammaticEvent`|11.8| | | | | | | | +|`cudaLaunchAttributeProgrammaticStreamSerialization`|11.8| | | | | | | | +|`cudaLaunchAttributeSynchronizationPolicy`|11.8| | | | | | | | +|`cudaLaunchAttributeValue`|11.8| | | | | | | | +|`cudaLaunchAttribute_st`|11.8| | | | | | | | +|`cudaLaunchConfig_st`|11.8| | | | | | | | +|`cudaLaunchConfig_t`|11.8| | | | | | | | |`cudaLaunchParams`|9.0| | |`hipLaunchParams`|2.6.0| | | | |`cudaLimit`| | | |`hipLimit_t`|1.6.0| | | | |`cudaLimitDevRuntimePendingLaunchCount`| | | | | | | | | diff --git a/src/CUDA2HIP_Driver_API_types.cpp b/src/CUDA2HIP_Driver_API_types.cpp index 721cfe54..bc763129 100644 --- a/src/CUDA2HIP_Driver_API_types.cpp +++ b/src/CUDA2HIP_Driver_API_types.cpp @@ -312,12 +312,14 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUDA_BATCH_MEM_OP_NODE_PARAMS_st", {"HIP_BATCH_MEM_OP_NODE_PARAMS", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, {"CUDA_BATCH_MEM_OP_NODE_PARAMS", {"HIP_BATCH_MEM_OP_NODE_PARAMS", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, - // + // cudaLaunchAttribute_st {"CUlaunchAttribute_st", {"hipLaunchAttribute", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaLaunchAttribute {"CUlaunchAttribute", {"hipLaunchAttribute", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, - // + // cudaLaunchConfig_st {"CUlaunchConfig_st", {"hipLaunchConfig", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaLaunchConfig_t {"CUlaunchConfig", {"hipLaunchConfig", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 2. Unions @@ -343,7 +345,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUlinkState_st", {"ihiprtcLinkState", "", CONV_TYPE, API_DRIVER, 1}}, {"CUlinkState", {"hiprtcLinkState", "", CONV_TYPE, API_DRIVER, 1}}, - // + // cudaLaunchAttributeValue {"CUlaunchAttributeValue", {"hipLaunchAttributeValue", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, {"CUlaunchAttributeValue_union", {"hipLaunchAttributeValue", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, @@ -748,7 +750,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING", {"hipDeviceAttributeGpuDirectRdmaWritesOrdering", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 118 // cudaDevAttrMemoryPoolSupportedHandleTypes {"CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES", {"hipDeviceAttributeMempoolSupportedHandleTypes", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 119 - // + // cudaDevAttrClusterLaunch {"CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH", {"hipDeviceAttributeClusterLaunch", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 120 // cudaDevAttrDeferredMappingCudaArraySupported {"CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED", {"hipDeviceAttributeDeferredMappingCudaArraySupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 121 @@ -1055,17 +1057,17 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES", {"HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 8 // cudaFuncAttributePreferredSharedMemoryCarveout {"CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT", {"HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 9 - // + // cudaFuncAttributeClusterDimMustBeSet {"CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET", {"HIP_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 10 - // + // cudaFuncAttributeRequiredClusterWidth {"CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH", {"HIP_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 11 - // + // cudaFuncAttributeRequiredClusterHeight {"CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT", {"HIP_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 12 - // + // cudaFuncAttributeRequiredClusterDepth {"CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH", {"HIP_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 13 - // + // cudaFuncAttributeNonPortableClusterSizeAllowed {"CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED", {"HIP_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 14 - // + // cudaFuncAttributeClusterSchedulingPolicyPreference {"CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {"HIP_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 15 // cudaFuncAttributeMax {"CU_FUNC_ATTRIBUTE_MAX", {"HIP_FUNC_ATTRIBUTE_MAX", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 16 @@ -1615,7 +1617,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUDA_ERROR_MPS_MAX_CLIENTS_REACHED", {"hipErrorMpsMaxClientsReached", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 808 // cudaErrorMpsMaxConnectionsReached {"CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED", {"hipErrorMpsMaxConnectionsReached", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 809 - // + // cudaErrorMpsClientTerminated {"CUDA_ERROR_MPS_CLIENT_TERMINATED", {"hipErrorMpsClientTerminated", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 810 // cudaErrorStreamCaptureUnsupported {"CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED", {"hipErrorStreamCaptureUnsupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 900 @@ -1641,7 +1643,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE", {"hipErrorGraphExecUpdateFailure", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 910 // cudaErrorExternalDevice {"CUDA_ERROR_EXTERNAL_DEVICE", {"hipErrorExternalDevice", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 911 - // + // cudaErrorInvalidClusterSize {"CUDA_ERROR_INVALID_CLUSTER_SIZE", {"hipErrorInvalidClusterSize", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 912 // cudaErrorUnknown {"CUDA_ERROR_UNKNOWN", {"hipErrorUnknown", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 999 @@ -2183,39 +2185,39 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { // {"NVCL_CTX_SCHED_BLOCKING_SYNC", {"HIP_CL_CTX_SCHED_BLOCKING_SYNC", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, - // + // cudaClusterSchedulingPolicy {"CUclusterSchedulingPolicy", {"hipClusterSchedulingPolicy", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, {"CUclusterSchedulingPolicy_enum", {"hipClusterSchedulingPolicy", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // CUclusterSchedulingPolicy enum values - // - {"CU_CLUSTER_SCHEDULING_POLICY_DEFAULT", {"HIP_CLUSTER_SCHEDULING_POLICY_DEFAULT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, - // - {"CU_CLUSTER_SCHEDULING_POLICY_SPREAD", {"HIP_CLUSTER_SCHEDULING_POLICY_SPREAD", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, - // - {"CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING", {"HIP_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, - - // + // cudaClusterSchedulingPolicyDefault + {"CU_CLUSTER_SCHEDULING_POLICY_DEFAULT", {"hipClusterSchedulingPolicyDefault", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaClusterSchedulingPolicySpread + {"CU_CLUSTER_SCHEDULING_POLICY_SPREAD", {"hipClusterSchedulingPolicySpread", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaClusterSchedulingPolicyLoadBalancing + {"CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING", {"hipClusterSchedulingPolicyLoadBalancing", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + + // cudaLaunchAttributeID {"CUlaunchAttributeID", {"hipLaunchAttributeID", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, {"CUlaunchAttributeID_enum", {"hipLaunchAttributeID", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // CUlaunchAttributeID enum values - // - {"CU_LAUNCH_ATTRIBUTE_IGNORE", {"HIP_LAUNCH_ATTRIBUTE_IGNORE", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, - // - {"CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW", {"HIP_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, - // - {"CU_LAUNCH_ATTRIBUTE_COOPERATIVE", {"HIP_LAUNCH_ATTRIBUTE_COOPERATIVE", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, - // - {"CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY", {"HIP_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, - // - {"CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION", {"HIP_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, - // - {"CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {"HIP_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE","", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, - // - {"CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION", {"HIP_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, - // - {"CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT", {"HIP_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, - // - {"CU_LAUNCH_ATTRIBUTE_PRIORITY", {"HIP_LAUNCH_ATTRIBUTE_PRIORITY", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaLaunchAttributeIgnore + {"CU_LAUNCH_ATTRIBUTE_IGNORE", {"hipLaunchAttributeIgnore", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaLaunchAttributeAccessPolicyWindow + {"CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW", {"hipLaunchAttributeAccessPolicyWindow", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaLaunchAttributeCooperative + {"CU_LAUNCH_ATTRIBUTE_COOPERATIVE", {"hipLaunchAttributeCooperative", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaLaunchAttributeSynchronizationPolicy + {"CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY", {"hipLaunchAttributeSynchronizationPolicy", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaLaunchAttributeClusterDimension + {"CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION", {"hipLaunchAttributeClusterDimension", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaLaunchAttributeClusterSchedulingPolicyPreference + {"CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {"hipLaunchAttributeClusterSchedulingPolicyPreference", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaLaunchAttributeProgrammaticStreamSerialization + {"CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION", {"hipLaunchAttributeProgrammaticStreamSerialization", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaLaunchAttributeProgrammaticEvent + {"CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT", {"hipLaunchAttributeProgrammaticEvent", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, + // cudaLaunchAttributePriority + {"CU_LAUNCH_ATTRIBUTE_PRIORITY", {"hipLaunchAttributePriority", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 4. Typedefs @@ -2335,10 +2337,10 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_LAUNCH_PARAM_BUFFER_POINTER_AS_INT", {"HIP_LAUNCH_PARAM_BUFFER_POINTER_AS_INT", "", CONV_DEFINE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 0x01 // {"CU_LAUNCH_PARAM_BUFFER_SIZE_AS_INT", {"HIP_LAUNCH_PARAM_BUFFER_SIZE_AS_INT", "", CONV_DEFINE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 0x02 - // - {"CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION", {"HIP_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION", "", CONV_DEFINE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION - // - {"CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {"HIP_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", "", CONV_DEFINE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE + // cudaKernelNodeAttributeClusterDimension + {"CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION", {"hipKernelNodeAttributeClusterDimension", "", CONV_DEFINE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION + // cudaKernelNodeAttributeClusterSchedulingPolicyPreference + {"CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", {"hipKernelNodeAttributeClusterSchedulingPolicyPreference", "", CONV_DEFINE, API_DRIVER, 1, HIP_UNSUPPORTED}}, // CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE }; const std::map CUDA_DRIVER_TYPE_NAME_VER_MAP { diff --git a/src/CUDA2HIP_Runtime_API_types.cpp b/src/CUDA2HIP_Runtime_API_types.cpp index 9cc43d89..41f36ae4 100644 --- a/src/CUDA2HIP_Runtime_API_types.cpp +++ b/src/CUDA2HIP_Runtime_API_types.cpp @@ -225,6 +225,19 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // CUkernelNodeAttrValue {"cudaKernelNodeAttrValue", {"hipKernelNodeAttrValue", "", CONV_TYPE, API_RUNTIME, 36}}, + // CUlaunchAttributeValue + {"cudaLaunchAttributeValue", {"hipLaunchAttributeValue", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + + // CUlaunchAttribute_st + {"cudaLaunchAttribute_st", {"hipLaunchAttribute", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CUlaunchAttribute + {"cudaLaunchAttribute", {"hipLaunchAttribute", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + + // CUlaunchConfig_st + {"cudaLaunchConfig_st", {"hipLaunchConfig", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CUlaunchConfig + {"cudaLaunchConfig_t", {"hipLaunchConfig", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // 3. Enums // no analogue @@ -535,7 +548,7 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED {"cudaDevAttrMaxTimelineSemaphoreInteropSupported", {"hipDeviceAttributeMaxTimelineSemaphoreInteropSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED | CUDA_DEPRECATED}}, // 114 // CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED - {"cudaDevAttrTimelineSemaphoreInteropSupported", {"hipDevAttrTimelineSemaphoreInteropSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 114 + {"cudaDevAttrTimelineSemaphoreInteropSupported", {"hipDeviceAttributeTimelineSemaphoreInteropSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 114 // CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED {"cudaDevAttrMemoryPoolsSupported", {"hipDeviceAttributeMemoryPoolsSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 115 // CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED @@ -546,6 +559,8 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaDevAttrGPUDirectRDMAWritesOrdering", {"hipDeviceAttributeGpuDirectRdmaWritesOrdering", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 118 // CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES {"cudaDevAttrMemoryPoolSupportedHandleTypes", {"hipDeviceAttributeMempoolSupportedHandleTypes", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 119 + // CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH + {"cudaDevAttrClusterLaunch", {"hipDeviceAttributeClusterLaunch", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 120 // CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED {"cudaDevAttrDeferredMappingCudaArraySupported", {"hipDeviceAttributeDeferredMappingCudaArraySupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 121 // CU_DEVICE_ATTRIBUTE_MAX @@ -963,6 +978,8 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaErrorMpsMaxClientsReached", {"hipErrorMpsMaxClientsReached", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 808 // CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED {"cudaErrorMpsMaxConnectionsReached", {"hipErrorMpsMaxConnectionsReached", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 809 + // CUDA_ERROR_MPS_CLIENT_TERMINATED + {"cudaErrorMpsClientTerminated", {"hipErrorMpsClientTerminated", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 810 // CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED {"cudaErrorStreamCaptureUnsupported", {"hipErrorStreamCaptureUnsupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 900 // CUDA_ERROR_STREAM_CAPTURE_INVALIDATED @@ -987,6 +1004,8 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaErrorGraphExecUpdateFailure", {"hipErrorGraphExecUpdateFailure", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 910 // CUDA_ERROR_EXTERNAL_DEVICE {"cudaErrorExternalDevice", {"hipErrorExternalDevice", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 911 + // CUDA_ERROR_INVALID_CLUSTER_SIZE + {"cudaErrorInvalidClusterSize", {"hipErrorInvalidClusterSize", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 912 // CUDA_ERROR_UNKNOWN {"cudaErrorUnknown", {"hipErrorUnknown", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 999 // Deprecated since CUDA 4.1 @@ -1044,8 +1063,20 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaFuncAttributeMaxDynamicSharedMemorySize", {"hipFuncAttributeMaxDynamicSharedMemorySize", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 8 // CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT {"cudaFuncAttributePreferredSharedMemoryCarveout", {"hipFuncAttributePreferredSharedMemoryCarveout", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 9 + // CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET + {"cudaFuncAttributeClusterDimMustBeSet", {"hipFuncAttributeClusterDimMustBeSet", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 10 + // CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH + {"cudaFuncAttributeRequiredClusterWidth", {"hipFuncAttributeRequiredClusterWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 11 + // CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT + {"cudaFuncAttributeRequiredClusterHeight", {"hipFuncAttributeRequiredClusterHeight", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 12 + // CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH + {"cudaFuncAttributeRequiredClusterDepth", {"hipFuncAttributeRequiredClusterDepth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 13 + // CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED + {"cudaFuncAttributeNonPortableClusterSizeAllowed", {"hipFuncAttributeNonPortableClusterSizeAllowed", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 14 + // CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE + {"cudaFuncAttributeClusterSchedulingPolicyPreference", {"hipFuncAttributeClusterSchedulingPolicyPreference", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 15 // CU_FUNC_ATTRIBUTE_MAX - {"cudaFuncAttributeMax", {"hipFuncAttributeMax", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 10 + {"cudaFuncAttributeMax", {"hipFuncAttributeMax", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 16 // CUfunc_cache {"cudaFuncCache", {"hipFuncCache_t", "", CONV_TYPE, API_RUNTIME, 36}}, @@ -1517,7 +1548,7 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE {"cudaKernelNodeAttributeCooperative", {"hipKernelNodeAttributeCooperative", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 2 // CU_KERNEL_NODE_ATTRIBUTE_PRIORITY - {"cudaKernelNodeAttributePriority", {"hipKernelNodeAttributePriority", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 8 + {"cudaKernelNodeAttributePriority", {"hipKernelNodeAttributePriority", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 8 // CUmemPool_attribute {"cudaMemPoolAttr", {"hipMemPoolAttr", "", CONV_TYPE, API_RUNTIME, 36}}, @@ -1685,6 +1716,38 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY {"cudaGraphInstantiateFlagUseNodePriority", {"hipGraphInstantiateFlagUseNodePriority", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CUclusterSchedulingPolicy + {"cudaClusterSchedulingPolicy", {"hipClusterSchedulingPolicy", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // cudaClusterSchedulingPolicy enum values + // CU_CLUSTER_SCHEDULING_POLICY_DEFAULT + {"cudaClusterSchedulingPolicyDefault", {"hipClusterSchedulingPolicyDefault", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CU_CLUSTER_SCHEDULING_POLICY_SPREAD + {"cudaClusterSchedulingPolicySpread", {"hipClusterSchedulingPolicySpread", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING + {"cudaClusterSchedulingPolicyLoadBalancing", {"hipClusterSchedulingPolicyLoadBalancing", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + + // CUlaunchAttributeID + {"cudaLaunchAttributeID", {"hipLaunchAttributeID", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // cudaLaunchAttributeID enum values + // CU_LAUNCH_ATTRIBUTE_IGNORE + {"cudaLaunchAttributeIgnore", {"hipLaunchAttributeIgnore", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW + {"cudaLaunchAttributeAccessPolicyWindow", {"hipLaunchAttributeAccessPolicyWindow", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CU_LAUNCH_ATTRIBUTE_COOPERATIVE + {"cudaLaunchAttributeCooperative", {"hipLaunchAttributeCooperative", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY + {"cudaLaunchAttributeSynchronizationPolicy", {"hipLaunchAttributeSynchronizationPolicy", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION + {"cudaLaunchAttributeClusterDimension", {"hipLaunchAttributeClusterDimension", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE + {"cudaLaunchAttributeClusterSchedulingPolicyPreference", {"hipLaunchAttributeClusterSchedulingPolicyPreference", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION + {"cudaLaunchAttributeProgrammaticStreamSerialization", {"hipLaunchAttributeProgrammaticStreamSerialization", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT + {"cudaLaunchAttributeProgrammaticEvent", {"hipLaunchAttributeProgrammaticEvent", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // CU_LAUNCH_ATTRIBUTE_PRIORITY + {"cudaLaunchAttributePriority", {"hipLaunchAttributePriority", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, + // 4. Typedefs // CUhostFn @@ -1840,6 +1903,10 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaStreamPerThread", {"hipStreamPerThread", "", CONV_DEFINE, API_RUNTIME, 36}}, // ((cudaStream_t)0x2) // CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL {"cudaArraySparsePropertiesSingleMipTail", {"hipArraySparsePropertiesSingleMipTail", "", CONV_DEFINE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 0x1 + // CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION + {"cudaKernelNodeAttributeClusterDimension", {"hipKernelNodeAttributeClusterDimension", "", CONV_DEFINE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // cudaLaunchAttributeClusterDimension + // CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE + {"cudaKernelNodeAttributeClusterSchedulingPolicyPreference", {"hipKernelNodeAttributeClusterSchedulingPolicyPreference", "", CONV_DEFINE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // cudaLaunchAttributeClusterSchedulingPolicyPreference }; const std::map CUDA_RUNTIME_TYPE_NAME_VER_MAP { @@ -2272,6 +2339,36 @@ const std::map CUDA_RUNTIME_TYPE_NAME_VER_MAP {"cudaGraphExecUpdateErrorAttributesChanged", {CUDA_116, CUDA_0, CUDA_0 }}, {"cudaKernelNodeAttributePriority", {CUDA_117, CUDA_0, CUDA_0 }}, {"cudaGraphInstantiateFlagUseNodePriority", {CUDA_117, CUDA_0, CUDA_0 }}, + {"cudaErrorMpsClientTerminated", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaErrorInvalidClusterSize", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaClusterSchedulingPolicy", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaClusterSchedulingPolicyDefault", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaClusterSchedulingPolicySpread", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaClusterSchedulingPolicyLoadBalancing", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaFuncAttributeClusterDimMustBeSet", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaFuncAttributeRequiredClusterWidth", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaFuncAttributeRequiredClusterHeight", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaFuncAttributeRequiredClusterDepth", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaFuncAttributeNonPortableClusterSizeAllowed", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaFuncAttributeClusterSchedulingPolicyPreference", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaDevAttrClusterLaunch", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttributeID", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttributeIgnore", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttributeAccessPolicyWindow", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttributeCooperative", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttributeSynchronizationPolicy", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttributeClusterDimension", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttributeClusterSchedulingPolicyPreference", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttributeProgrammaticStreamSerialization", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttributeProgrammaticEvent", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttributePriority", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttributeValue", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttribute_st", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttribute", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchConfig_st", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaLaunchConfig_t", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaKernelNodeAttributeClusterDimension", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cudaKernelNodeAttributeClusterSchedulingPolicyPreference", {CUDA_118, CUDA_0, CUDA_0 }}, }; const std::map HIP_RUNTIME_TYPE_NAME_VER_MAP { From 2ba85d93ba8780ab005fe8ef3c8b1bb17ff30c37 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 13 Oct 2022 19:04:54 +0200 Subject: [PATCH 18/43] [HIPIFY] Sync with CUDA 11.8 - Part 5 - BLAS & Device API + Introduce CUDA's fp8 device functions + Update regenerated hipify-perl and CUDA2HIP Markdown docs accordingly + Minor fixes and formatting --- bin/hipify-perl | 68 +++++++++++++++---- doc/markdown/CUBLAS_API_supported_by_HIP.md | 2 + .../CUDA_Device_API_supported_by_HIP.md | 10 +++ src/CUDA2HIP_BLAS_API_types.cpp | 64 +++++++++-------- src/CUDA2HIP_Device_functions.cpp | 29 ++++++-- 5 files changed, 127 insertions(+), 46 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 630521a4..572531a6 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1383,6 +1383,8 @@ sub rocSubstitutions { subst("cublasSideMode_t", "rocblas_side", "type"); subst("cublasStatus", "rocblas_status", "type"); subst("cublasStatus_t", "rocblas_status", "type"); + subst("cudaDataType", "rocblas_datatype", "type"); + subst("cudaDataType_t", "rocblas_datatype_", "type"); subst("CUBLAS_ATOMICS_ALLOWED", "rocblas_atomics_allowed", "numeric_literal"); subst("CUBLAS_ATOMICS_NOT_ALLOWED", "rocblas_atomics_not_allowed", "numeric_literal"); subst("CUBLAS_DIAG_NON_UNIT", "rocblas_diagonal_non_unit", "numeric_literal"); @@ -1409,6 +1411,22 @@ sub rocSubstitutions { subst("CUBLAS_STATUS_NOT_INITIALIZED", "rocblas_status_invalid_handle", "numeric_literal"); subst("CUBLAS_STATUS_NOT_SUPPORTED", "rocblas_status_perf_degraded", "numeric_literal"); subst("CUBLAS_STATUS_SUCCESS", "rocblas_status_success", "numeric_literal"); + subst("CUDA_C_16BF", "rocblas_datatype_bf16_c", "numeric_literal"); + subst("CUDA_C_16F", "rocblas_datatype_f16_c", "numeric_literal"); + subst("CUDA_C_32F", "rocblas_datatype_f32_c", "numeric_literal"); + subst("CUDA_C_32I", "rocblas_datatype_i32_c", "numeric_literal"); + subst("CUDA_C_32U", "rocblas_datatype_u32_c", "numeric_literal"); + subst("CUDA_C_64F", "rocblas_datatype_f64_c", "numeric_literal"); + subst("CUDA_C_8I", "rocblas_datatype_i8_c", "numeric_literal"); + subst("CUDA_C_8U", "rocblas_datatype_u8_c", "numeric_literal"); + subst("CUDA_R_16BF", "rocblas_datatype_bf16_r", "numeric_literal"); + subst("CUDA_R_16F", "rocblas_datatype_f16_r", "numeric_literal"); + subst("CUDA_R_32F", "rocblas_datatype_f32_r", "numeric_literal"); + subst("CUDA_R_32I", "rocblas_datatype_i32_r", "numeric_literal"); + subst("CUDA_R_32U", "rocblas_datatype_u32_r", "numeric_literal"); + subst("CUDA_R_64F", "rocblas_datatype_f64_r", "numeric_literal"); + subst("CUDA_R_8I", "rocblas_datatype_i8_r", "numeric_literal"); + subst("CUDA_R_8U", "rocblas_datatype_u8_r", "numeric_literal"); } sub simpleSubstitutions { @@ -5388,6 +5406,16 @@ sub warnUnsupportedDeviceFunctions { "__pm2", "__pm1", "__pm0", + "__nv_cvt_halfraw_to_fp8", + "__nv_cvt_halfraw2_to_fp8x2", + "__nv_cvt_fp8x2_to_halfraw2", + "__nv_cvt_fp8_to_halfraw", + "__nv_cvt_float_to_fp8", + "__nv_cvt_float2_to_fp8x2", + "__nv_cvt_double_to_fp8", + "__nv_cvt_double2_to_fp8x2", + "__nv_cvt_bfloat16raw_to_fp8", + "__nv_cvt_bfloat16raw2_to_fp8x2", "__isnanl", "__isnanf", "__isnan", @@ -7724,12 +7752,6 @@ sub warnUnsupportedFunctions { "CUDNN_ADV_INFER_PATCH", "CUDNN_ADV_INFER_MINOR", "CUDNN_ADV_INFER_MAJOR", - "CUDA_R_64U", - "CUDA_R_64I", - "CUDA_R_4U", - "CUDA_R_4I", - "CUDA_R_16U", - "CUDA_R_16I", "CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_v1", "CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st", "CUDA_POINTER_ATTRIBUTE_P2P_TOKENS", @@ -7787,12 +7809,6 @@ sub warnUnsupportedFunctions { "CUDA_ERROR_DEVICE_NOT_LICENSED", "CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE", "CUDA_EGL_MAX_PLANES", - "CUDA_C_64U", - "CUDA_C_64I", - "CUDA_C_4U", - "CUDA_C_4I", - "CUDA_C_16U", - "CUDA_C_16I", "CUDA_CB", "CUDA_BATCH_MEM_OP_NODE_PARAMS_st", "CUDA_BATCH_MEM_OP_NODE_PARAMS", @@ -7944,6 +7960,20 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCgelsBatched", "cublasAsumEx", "cublasAlloc", + "CUDA_R_8F_E5M2", + "CUDA_R_8F_E4M3", + "CUDA_R_64U", + "CUDA_R_64I", + "CUDA_R_4U", + "CUDA_R_4I", + "CUDA_R_16U", + "CUDA_R_16I", + "CUDA_C_64U", + "CUDA_C_64I", + "CUDA_C_4U", + "CUDA_C_4I", + "CUDA_C_16U", + "CUDA_C_16I", "CUBLAS_VER_PATCH", "CUBLAS_VER_MINOR", "CUBLAS_VER_MAJOR", @@ -8100,6 +8130,20 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCgelsBatched", "cublasAsumEx", "cublasAlloc", + "CUDA_R_8F_E5M2", + "CUDA_R_8F_E4M3", + "CUDA_R_64U", + "CUDA_R_64I", + "CUDA_R_4U", + "CUDA_R_4I", + "CUDA_R_16U", + "CUDA_R_16I", + "CUDA_C_64U", + "CUDA_C_64I", + "CUDA_C_4U", + "CUDA_C_4I", + "CUDA_C_16U", + "CUDA_C_16I", "CUBLAS_VER_PATCH", "CUBLAS_VER_MINOR", "CUBLAS_VER_MAJOR", diff --git a/doc/markdown/CUBLAS_API_supported_by_HIP.md b/doc/markdown/CUBLAS_API_supported_by_HIP.md index e9844337..52f99a91 100644 --- a/doc/markdown/CUBLAS_API_supported_by_HIP.md +++ b/doc/markdown/CUBLAS_API_supported_by_HIP.md @@ -140,6 +140,8 @@ |`CUDA_R_64F`|8.0| | |`HIPBLAS_R_64F`|1.8.2| | | | |`CUDA_R_64I`|11.0| | | | | | | | |`CUDA_R_64U`|11.0| | | | | | | | +|`CUDA_R_8F_E4M3`|11.8| | | | | | | | +|`CUDA_R_8F_E5M2`|11.8| | | | | | | | |`CUDA_R_8I`|8.0| | |`HIPBLAS_R_8I`|3.0.0| | | | |`CUDA_R_8U`|8.0| | |`HIPBLAS_R_8U`|3.0.0| | | | |`cudaDataType`|8.0| | |`hipblasDatatype_t`|1.8.2| | | | diff --git a/doc/markdown/CUDA_Device_API_supported_by_HIP.md b/doc/markdown/CUDA_Device_API_supported_by_HIP.md index f9310b6e..6f6f2450 100644 --- a/doc/markdown/CUDA_Device_API_supported_by_HIP.md +++ b/doc/markdown/CUDA_Device_API_supported_by_HIP.md @@ -275,6 +275,16 @@ |`__mul24`| | | |`__mul24`|1.6.0| | | | |`__mul64hi`| | | |`__mul64hi`|1.6.0| | | | |`__mulhi`| | | |`__mulhi`|1.6.0| | | | +|`__nv_cvt_bfloat16raw2_to_fp8x2`|11.8| | | | | | | | +|`__nv_cvt_bfloat16raw_to_fp8`|11.8| | | | | | | | +|`__nv_cvt_double2_to_fp8x2`|11.8| | | | | | | | +|`__nv_cvt_double_to_fp8`|11.8| | | | | | | | +|`__nv_cvt_float2_to_fp8x2`|11.8| | | | | | | | +|`__nv_cvt_float_to_fp8`|11.8| | | | | | | | +|`__nv_cvt_fp8_to_halfraw`|11.8| | | | | | | | +|`__nv_cvt_fp8x2_to_halfraw2`|11.8| | | | | | | | +|`__nv_cvt_halfraw2_to_fp8x2`|11.8| | | | | | | | +|`__nv_cvt_halfraw_to_fp8`|11.8| | | | | | | | |`__pm0`| | | | | | | | | |`__pm1`| | | | | | | | | |`__pm2`| | | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_types.cpp b/src/CUDA2HIP_BLAS_API_types.cpp index 6cafb97b..cd94f7b6 100644 --- a/src/CUDA2HIP_BLAS_API_types.cpp +++ b/src/CUDA2HIP_BLAS_API_types.cpp @@ -138,36 +138,38 @@ const std::map CUDA_BLAS_TYPE_NAME_MAP { {"CUBLAS_GEMM_ALGO15_TENSOR_OP", {"HIPBLAS_GEMM_ALGO15_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, 2, UNSUPPORTED}}, // 115 // TODO: rename hipblasDatatype_t to hipDataType_t and move from hipBLAS to HIP - {"cudaDataType_t", {"hipblasDatatype_t", "rocblas_datatype_", CONV_TYPE, API_RUNTIME, 3}}, - {"cudaDataType", {"hipblasDatatype_t", "rocblas_datatype", CONV_TYPE, API_RUNTIME, 3}}, - {"CUDA_R_16F", {"HIPBLAS_R_16F", "rocblas_datatype_f16_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 2 // 150 - {"CUDA_C_16F", {"HIPBLAS_C_16F", "rocblas_datatype_f16_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 6 // 153 - {"CUDA_R_32F", {"HIPBLAS_R_32F", "rocblas_datatype_f32_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 0 // 151 - {"CUDA_C_32F", {"HIPBLAS_C_32F", "rocblas_datatype_f32_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 4 // 154 - {"CUDA_R_64F", {"HIPBLAS_R_64F", "rocblas_datatype_f64_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 1 // 152 - {"CUDA_C_64F", {"HIPBLAS_C_64F", "rocblas_datatype_f64_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 5 // 155 - {"CUDA_R_8I", {"HIPBLAS_R_8I", "rocblas_datatype_i8_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 3 // 160 - {"CUDA_C_8I", {"HIPBLAS_C_8I", "rocblas_datatype_i8_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 7 // 164 - {"CUDA_R_8U", {"HIPBLAS_R_8U", "rocblas_datatype_u8_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 8 // 161 - {"CUDA_C_8U", {"HIPBLAS_C_8U", "rocblas_datatype_u8_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 9 // 165 - {"CUDA_R_32I", {"HIPBLAS_R_32I", "rocblas_datatype_i32_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 10 // 162 - {"CUDA_C_32I", {"HIPBLAS_C_32I", "rocblas_datatype_i32_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 11 // 166 - {"CUDA_R_32U", {"HIPBLAS_R_32U", "rocblas_datatype_u32_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 12 // 163 - {"CUDA_C_32U", {"HIPBLAS_C_32U", "rocblas_datatype_u32_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 13 // 167 - {"CUDA_R_16BF", {"HIPBLAS_R_16B", "rocblas_datatype_bf16_r", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 14 // 168 - {"CUDA_C_16BF", {"HIPBLAS_C_16B", "rocblas_datatype_bf16_c", CONV_NUMERIC_LITERAL, API_RUNTIME, 3}}, // 15 // 169 - {"CUDA_R_4I", {"HIPBLAS_R_4I", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 16 - {"CUDA_C_4I", {"HIPBLAS_C_4I", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 17 - {"CUDA_R_4U", {"HIPBLAS_R_4U", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 18 - {"CUDA_C_4U", {"HIPBLAS_C_4U", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 19 - {"CUDA_R_16I", {"HIPBLAS_R_16I", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 20 - {"CUDA_C_16I", {"HIPBLAS_C_16I", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 21 - {"CUDA_R_16U", {"HIPBLAS_R_16U", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 22 - {"CUDA_C_16U", {"HIPBLAS_C_16U", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 23 - {"CUDA_R_64I", {"HIPBLAS_R_64I", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 24 - {"CUDA_C_64I", {"HIPBLAS_C_64I", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 25 - {"CUDA_R_64U", {"HIPBLAS_R_64U", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 26 - {"CUDA_C_64U", {"HIPBLAS_C_64U", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 3, UNSUPPORTED}}, // 27 + {"cudaDataType_t", {"hipblasDatatype_t", "rocblas_datatype_", CONV_TYPE, API_BLAS, 3}}, + {"cudaDataType", {"hipblasDatatype_t", "rocblas_datatype", CONV_TYPE, API_BLAS, 3}}, + {"CUDA_R_16F", {"HIPBLAS_R_16F", "rocblas_datatype_f16_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 2 // 150 + {"CUDA_C_16F", {"HIPBLAS_C_16F", "rocblas_datatype_f16_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 6 // 153 + {"CUDA_R_32F", {"HIPBLAS_R_32F", "rocblas_datatype_f32_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 0 // 151 + {"CUDA_C_32F", {"HIPBLAS_C_32F", "rocblas_datatype_f32_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 4 // 154 + {"CUDA_R_64F", {"HIPBLAS_R_64F", "rocblas_datatype_f64_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 1 // 152 + {"CUDA_C_64F", {"HIPBLAS_C_64F", "rocblas_datatype_f64_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 5 // 155 + {"CUDA_R_8I", {"HIPBLAS_R_8I", "rocblas_datatype_i8_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 3 // 160 + {"CUDA_C_8I", {"HIPBLAS_C_8I", "rocblas_datatype_i8_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 7 // 164 + {"CUDA_R_8U", {"HIPBLAS_R_8U", "rocblas_datatype_u8_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 8 // 161 + {"CUDA_C_8U", {"HIPBLAS_C_8U", "rocblas_datatype_u8_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 9 // 165 + {"CUDA_R_32I", {"HIPBLAS_R_32I", "rocblas_datatype_i32_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 10 // 162 + {"CUDA_C_32I", {"HIPBLAS_C_32I", "rocblas_datatype_i32_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 11 // 166 + {"CUDA_R_32U", {"HIPBLAS_R_32U", "rocblas_datatype_u32_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 12 // 163 + {"CUDA_C_32U", {"HIPBLAS_C_32U", "rocblas_datatype_u32_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 13 // 167 + {"CUDA_R_16BF", {"HIPBLAS_R_16B", "rocblas_datatype_bf16_r", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 14 // 168 + {"CUDA_C_16BF", {"HIPBLAS_C_16B", "rocblas_datatype_bf16_c", CONV_NUMERIC_LITERAL, API_BLAS, 3}}, // 15 // 169 + {"CUDA_R_4I", {"HIPBLAS_R_4I", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 16 + {"CUDA_C_4I", {"HIPBLAS_C_4I", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 17 + {"CUDA_R_4U", {"HIPBLAS_R_4U", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 18 + {"CUDA_C_4U", {"HIPBLAS_C_4U", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 19 + {"CUDA_R_16I", {"HIPBLAS_R_16I", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 20 + {"CUDA_C_16I", {"HIPBLAS_C_16I", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 21 + {"CUDA_R_16U", {"HIPBLAS_R_16U", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 22 + {"CUDA_C_16U", {"HIPBLAS_C_16U", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 23 + {"CUDA_R_64I", {"HIPBLAS_R_64I", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 24 + {"CUDA_C_64I", {"HIPBLAS_C_64I", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 25 + {"CUDA_R_64U", {"HIPBLAS_R_64U", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 26 + {"CUDA_C_64U", {"HIPBLAS_C_64U", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 27 + {"CUDA_R_8F_E4M3", {"HIPBLAS_R_8F_E4M3", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 28 + {"CUDA_R_8F_E5M2", {"HIPBLAS_R_8F_E5M2", "", CONV_NUMERIC_LITERAL, API_BLAS, 3, UNSUPPORTED}}, // 29 {"cublasHandle_t", {"hipblasHandle_t", "rocblas_handle", CONV_TYPE, API_BLAS, 2}}, // TODO: dereferencing: typedef struct cublasContext *cublasHandle_t; @@ -290,6 +292,8 @@ const std::map CUDA_BLAS_TYPE_NAME_VER_MAP { {"CUDA_C_64I", {CUDA_110, CUDA_0, CUDA_0}}, {"CUDA_R_64U", {CUDA_110, CUDA_0, CUDA_0}}, {"CUDA_C_64U", {CUDA_110, CUDA_0, CUDA_0}}, + {"CUDA_R_8F_E4M3", {CUDA_118, CUDA_0, CUDA_0}}, + {"CUDA_R_8F_E5M2", {CUDA_118, CUDA_0, CUDA_0}}, }; const std::map HIP_BLAS_TYPE_NAME_VER_MAP { diff --git a/src/CUDA2HIP_Device_functions.cpp b/src/CUDA2HIP_Device_functions.cpp index b54a8780..4ad1ac5a 100644 --- a/src/CUDA2HIP_Device_functions.cpp +++ b/src/CUDA2HIP_Device_functions.cpp @@ -704,13 +704,34 @@ const std::map CUDA_DEVICE_FUNCTION_MAP { // common functions {"__assert_fail", {"__assert_fail", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__assertfail", {"__assertfail", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + // fp8 functions + {"__nv_cvt_double_to_fp8", {"__hip_cvt_double_to_fp8", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, HIP_UNSUPPORTED}}, + {"__nv_cvt_double2_to_fp8x2", {"__hip_cvt_double2_to_fp8x2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, HIP_UNSUPPORTED}}, + {"__nv_cvt_float_to_fp8", {"__hip_cvt_float_to_fp8", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, HIP_UNSUPPORTED}}, + {"__nv_cvt_float2_to_fp8x2", {"__hip_cvt_float2_to_fp8x2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, HIP_UNSUPPORTED}}, + {"__nv_cvt_halfraw_to_fp8", {"__hip_cvt_halfraw_to_fp8", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, HIP_UNSUPPORTED}}, + {"__nv_cvt_halfraw2_to_fp8x2", {"__hip_cvt_halfraw2_to_fp8x2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, HIP_UNSUPPORTED}}, + {"__nv_cvt_bfloat16raw_to_fp8", {"__hip_cvt_bfloat16raw_to_fp8", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, HIP_UNSUPPORTED}}, + {"__nv_cvt_bfloat16raw2_to_fp8x2",{"__hip_cvt_bfloat16raw2_to_fp8x2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, HIP_UNSUPPORTED}}, + {"__nv_cvt_fp8_to_halfraw", {"__hip_cvt_fp8_to_halfraw", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, HIP_UNSUPPORTED}}, + {"__nv_cvt_fp8x2_to_halfraw2", {"__hip_cvt_fp8x2_to_halfraw2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, HIP_UNSUPPORTED}}, }; const std::map CUDA_DEVICE_FUNCTION_VER_MAP { - {"__shfl", {CUDA_75, CUDA_90, CUDA_0 }}, - {"__shfl_up", {CUDA_75, CUDA_90, CUDA_0 }}, - {"__shfl_down", {CUDA_75, CUDA_90, CUDA_0 }}, - {"__shfl_xor", {CUDA_75, CUDA_90, CUDA_0 }}, + {"__shfl", {CUDA_75, CUDA_90, CUDA_0 }}, + {"__shfl_up", {CUDA_75, CUDA_90, CUDA_0 }}, + {"__shfl_down", {CUDA_75, CUDA_90, CUDA_0 }}, + {"__shfl_xor", {CUDA_75, CUDA_90, CUDA_0 }}, + {"__nv_cvt_double_to_fp8", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_double2_to_fp8x2", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_float_to_fp8", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_float2_to_fp8x2", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_halfraw_to_fp8", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_halfraw2_to_fp8x2", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_bfloat16raw_to_fp8", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_bfloat16raw2_to_fp8x2", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_fp8_to_halfraw", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_fp8x2_to_halfraw2", {CUDA_118, CUDA_0, CUDA_0 }}, }; const std::map HIP_DEVICE_FUNCTION_VER_MAP { From eb6f134e2bd4d2f405663d3f09c4457c7e51fda6 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 13 Oct 2022 20:13:49 +0200 Subject: [PATCH 19/43] [HIPIFY][format] Device API --- src/CUDA2HIP_Device_functions.cpp | 2398 ++++++++++++++--------------- 1 file changed, 1199 insertions(+), 1199 deletions(-) diff --git a/src/CUDA2HIP_Device_functions.cpp b/src/CUDA2HIP_Device_functions.cpp index 4ad1ac5a..30321b9b 100644 --- a/src/CUDA2HIP_Device_functions.cpp +++ b/src/CUDA2HIP_Device_functions.cpp @@ -25,1220 +25,1220 @@ THE SOFTWARE. // Maps CUDA header names to HIP header names const std::map CUDA_DEVICE_FUNCTION_MAP { // math functions - {"abs", {"abs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"labs", {"labs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"llabs", {"llabs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fabs", {"fabs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fabsf", {"fabsf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"min", {"min", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fminf", {"fminf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fmin", {"fmin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"max", {"max", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fmaxf", {"fmaxf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fmax", {"fmax", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sin", {"sin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"cos", {"cos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sincos", {"sincos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sincosf", {"sincosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"tan", {"tan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sqrt", {"sqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rsqrt", {"rsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rsqrtf", {"rsqrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"log2", {"log2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"exp2", {"exp2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"exp2f", {"exp2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"exp10", {"exp10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"exp10f", {"exp10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"expm1", {"expm1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"expm1f", {"expm1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"log2f", {"log2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"log10", {"log10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"log", {"log", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"log1p", {"log1p", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"log1pf", {"log1pf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"floor", {"floor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"exp", {"exp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"cosh", {"cosh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sinh", {"sinh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"tanh", {"tanh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"acosh", {"acosh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"acoshf", {"acoshf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"asinh", {"asinh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"asinhf", {"asinhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atanh", {"atanh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atanhf", {"atanhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"ldexp", {"ldexp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"ldexpf", {"ldexpf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"logb", {"logb", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"logbf", {"logbf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"ilogb", {"ilogb", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"ilogbf", {"ilogbf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"scalbn", {"scalbn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"scalbnf", {"scalbnf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"scalbln", {"scalbln", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"scalblnf", {"scalblnf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"frexp", {"frexp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"frexpf", {"frexpf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"round", {"round", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"roundf", {"roundf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"lround", {"lround", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"lroundf", {"lroundf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"llround", {"llround", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"llroundf", {"llroundf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rint", {"rint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rintf", {"rintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"lrint", {"lrint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"lrintf", {"lrintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"llrint", {"llrint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"llrintf", {"llrintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"nearbyint", {"nearbyint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"nearbyintf", {"nearbyintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"ceil", {"ceil", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"trunc", {"trunc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"truncf", {"truncf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fdim", {"fdim", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fdimf", {"fdimf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atan2", {"atan2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atan", {"atan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"acos", {"acos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"asin", {"asin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hypot", {"hypot", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rhypot", {"rhypot", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hypotf", {"hypotf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rhypotf", {"rhypotf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"norm3d", {"norm3d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rnorm3d", {"rnorm3d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"norm4d", {"norm4d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rnorm4d", {"rnorm4d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"norm", {"norm", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rnorm", {"rnorm", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rnormf", {"rnormf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"normf", {"normf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"norm3df", {"norm3df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rnorm3df", {"rnorm3df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"norm4df", {"norm4df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rnorm4df", {"rnorm4df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"cbrt", {"cbrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"cbrtf", {"cbrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rcbrt", {"rcbrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"rcbrtf", {"rcbrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sinpi", {"sinpi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sinpif", {"sinpif", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"cospi", {"cospi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"cospif", {"cospif", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sincospi", {"sincospi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sincospif", {"sincospif", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"pow", {"pow", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"modf", {"modf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fmod", {"fmod", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"remainder", {"remainder", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"remainderf", {"remainderf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"remquo", {"remquo", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"remquof", {"remquof", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"j0", {"j0", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"j0f", {"j0f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"j1", {"j1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"j1f", {"j1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"jn", {"jn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"jnf", {"jnf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"y0", {"y0", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"y0f", {"y0f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"y1", {"y1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"y1f", {"y1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"yn", {"yn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"ynf", {"ynf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"cyl_bessel_i0", {"cyl_bessel_i0", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"cyl_bessel_i0f", {"cyl_bessel_i0f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"cyl_bessel_i1", {"cyl_bessel_i1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"cyl_bessel_i1f", {"cyl_bessel_i1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"erf", {"erf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"erff", {"erff", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"erfinv", {"erfinv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"erfinvf", {"erfinvf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"erfc", {"erfc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"erfcf", {"erfcf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"lgamma", {"lgamma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"erfcinv", {"erfcinv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"erfcinvf", {"erfcinvf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"normcdfinv", {"normcdfinv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"normcdfinvf", {"normcdfinvf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"normcdf", {"normcdf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"normcdff", {"normcdff", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"erfcx", {"erfcx", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"erfcxf", {"erfcxf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"lgammaf", {"lgammaf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"tgamma", {"tgamma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"tgammaf", {"tgammaf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"copysign", {"copysign", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"copysignf", {"copysignf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"nextafter", {"nextafter", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"nextafterf", {"nextafterf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"nan", {"nan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"nanf", {"nanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fma", {"fma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fmaf", {"fmaf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"acosf", {"acosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"asinf", {"asinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atanf", {"atanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atan2f", {"atan2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"cosf", {"cosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sinf", {"sinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"tanf", {"tanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"coshf", {"coshf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sinhf", {"sinhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"tanhf", {"tanhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"expf", {"expf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"logf", {"logf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"log10f", {"log10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"modff", {"modff", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"powf", {"powf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"sqrtf", {"sqrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"ceilf", {"ceilf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"floorf", {"floorf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fmodf", {"fmodf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"signbit", {"signbit", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"isfinite", {"isfinite", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"isnan", {"isnan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"isinf", {"isinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"umin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"llmin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"ullmin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"umax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"llmax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"ullmax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__isinff", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__isnanf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__finite", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__finitef", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__signbit", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__isnan", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__isinf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__signbitf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__signbitl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__finitel", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__isinfl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__isnanl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"_ldsign", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"_fdsign", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"_Pow_int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"abs", {"abs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"labs", {"labs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"llabs", {"llabs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fabs", {"fabs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fabsf", {"fabsf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"min", {"min", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fminf", {"fminf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fmin", {"fmin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"max", {"max", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fmaxf", {"fmaxf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fmax", {"fmax", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sin", {"sin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"cos", {"cos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sincos", {"sincos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sincosf", {"sincosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"tan", {"tan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sqrt", {"sqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rsqrt", {"rsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rsqrtf", {"rsqrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"log2", {"log2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"exp2", {"exp2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"exp2f", {"exp2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"exp10", {"exp10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"exp10f", {"exp10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"expm1", {"expm1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"expm1f", {"expm1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"log2f", {"log2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"log10", {"log10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"log", {"log", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"log1p", {"log1p", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"log1pf", {"log1pf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"floor", {"floor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"exp", {"exp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"cosh", {"cosh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sinh", {"sinh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"tanh", {"tanh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"acosh", {"acosh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"acoshf", {"acoshf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"asinh", {"asinh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"asinhf", {"asinhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atanh", {"atanh", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atanhf", {"atanhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"ldexp", {"ldexp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"ldexpf", {"ldexpf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"logb", {"logb", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"logbf", {"logbf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"ilogb", {"ilogb", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"ilogbf", {"ilogbf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"scalbn", {"scalbn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"scalbnf", {"scalbnf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"scalbln", {"scalbln", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"scalblnf", {"scalblnf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"frexp", {"frexp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"frexpf", {"frexpf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"round", {"round", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"roundf", {"roundf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"lround", {"lround", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"lroundf", {"lroundf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"llround", {"llround", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"llroundf", {"llroundf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rint", {"rint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rintf", {"rintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"lrint", {"lrint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"lrintf", {"lrintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"llrint", {"llrint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"llrintf", {"llrintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"nearbyint", {"nearbyint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"nearbyintf", {"nearbyintf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"ceil", {"ceil", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"trunc", {"trunc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"truncf", {"truncf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fdim", {"fdim", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fdimf", {"fdimf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atan2", {"atan2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atan", {"atan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"acos", {"acos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"asin", {"asin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hypot", {"hypot", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rhypot", {"rhypot", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hypotf", {"hypotf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rhypotf", {"rhypotf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"norm3d", {"norm3d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rnorm3d", {"rnorm3d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"norm4d", {"norm4d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rnorm4d", {"rnorm4d", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"norm", {"norm", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rnorm", {"rnorm", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rnormf", {"rnormf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"normf", {"normf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"norm3df", {"norm3df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rnorm3df", {"rnorm3df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"norm4df", {"norm4df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rnorm4df", {"rnorm4df", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"cbrt", {"cbrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"cbrtf", {"cbrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rcbrt", {"rcbrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"rcbrtf", {"rcbrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sinpi", {"sinpi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sinpif", {"sinpif", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"cospi", {"cospi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"cospif", {"cospif", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sincospi", {"sincospi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sincospif", {"sincospif", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"pow", {"pow", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"modf", {"modf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fmod", {"fmod", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"remainder", {"remainder", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"remainderf", {"remainderf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"remquo", {"remquo", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"remquof", {"remquof", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"j0", {"j0", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"j0f", {"j0f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"j1", {"j1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"j1f", {"j1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"jn", {"jn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"jnf", {"jnf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"y0", {"y0", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"y0f", {"y0f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"y1", {"y1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"y1f", {"y1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"yn", {"yn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"ynf", {"ynf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"cyl_bessel_i0", {"cyl_bessel_i0", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"cyl_bessel_i0f", {"cyl_bessel_i0f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"cyl_bessel_i1", {"cyl_bessel_i1", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"cyl_bessel_i1f", {"cyl_bessel_i1f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"erf", {"erf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"erff", {"erff", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"erfinv", {"erfinv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"erfinvf", {"erfinvf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"erfc", {"erfc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"erfcf", {"erfcf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"lgamma", {"lgamma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"erfcinv", {"erfcinv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"erfcinvf", {"erfcinvf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"normcdfinv", {"normcdfinv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"normcdfinvf", {"normcdfinvf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"normcdf", {"normcdf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"normcdff", {"normcdff", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"erfcx", {"erfcx", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"erfcxf", {"erfcxf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"lgammaf", {"lgammaf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"tgamma", {"tgamma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"tgammaf", {"tgammaf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"copysign", {"copysign", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"copysignf", {"copysignf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"nextafter", {"nextafter", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"nextafterf", {"nextafterf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"nan", {"nan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"nanf", {"nanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fma", {"fma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fmaf", {"fmaf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"acosf", {"acosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"asinf", {"asinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atanf", {"atanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atan2f", {"atan2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"cosf", {"cosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sinf", {"sinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"tanf", {"tanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"coshf", {"coshf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sinhf", {"sinhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"tanhf", {"tanhf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"expf", {"expf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"logf", {"logf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"log10f", {"log10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"modff", {"modff", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"powf", {"powf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"sqrtf", {"sqrtf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"ceilf", {"ceilf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"floorf", {"floorf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fmodf", {"fmodf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"signbit", {"signbit", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"isfinite", {"isfinite", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"isnan", {"isnan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"isinf", {"isinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"umin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"llmin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"ullmin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"umax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"llmax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"ullmax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__isinff", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__isnanf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__finite", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__finitef", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__signbit", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__isnan", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__isinf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__signbitf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__signbitl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__finitel", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__isinfl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__isnanl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"_ldsign", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"_fdsign", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"_Pow_int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, // static math functions declared in device-functions.h - {"mulhi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"mul64hi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"float_as_int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"int_as_float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"float_as_uint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"uint_as_float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"saturate", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"mul24", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"umul24", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"float2int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"int2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"uint2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"mulhi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"mul64hi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"float_as_int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"int_as_float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"float_as_uint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"uint_as_float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"saturate", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"mul24", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"umul24", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"float2int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"int2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"uint2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, // device functions - {"__mulhi", {"__mulhi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__umulhi", {"__umulhi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__mul64hi", {"__mul64hi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__umul64hi", {"__umul64hi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__int_as_float", {"__int_as_float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float_as_int", {"__float_as_int", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__uint_as_float", {"__uint_as_float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float_as_uint", {"__float_as_uint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__syncthreads", {"__syncthreads", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__syncthreads_count", {"__syncthreads_count", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__syncthreads_and", {"__syncthreads_and", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__syncthreads_or", {"__syncthreads_or", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__threadfence", {"__threadfence", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__threadfence_block", {"__threadfence_block", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__threadfence_system",{"__threadfence_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__saturatef", {"__saturatef", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__sad", {"__sad", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__usad", {"__usad", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__mul24", {"__mul24", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__umul24", {"__umul24", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fdividef", {"fdividef", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__fdividef", {"__fdividef", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"fdivide", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__sinf", {"__sinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__cosf", {"__cosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__tanf", {"__tanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__sincosf", {"__sincosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__expf", {"__expf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__exp10f", {"__exp10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__log2f", {"__log2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__log10f", {"__log10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__logf", {"__logf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__powf", {"__powf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2int_rn", {"__float2int_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2int_rz", {"__float2int_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2int_ru", {"__float2int_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2int_rd", {"__float2int_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2uint_rn", {"__float2uint_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2uint_rz", {"__float2uint_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2uint_ru", {"__float2uint_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2uint_rd", {"__float2uint_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__int2float_rn", {"__int2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__int2float_rz", {"__int2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__int2float_ru", {"__int2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__int2float_rd", {"__int2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__uint2float_rn", {"__uint2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__uint2float_rz", {"__uint2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__uint2float_ru", {"__uint2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__uint2float_rd", {"__uint2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2ll_rn", {"__float2ll_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2ll_rz", {"__float2ll_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2ll_ru", {"__float2ll_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2ll_rd", {"__float2ll_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2ull_rn", {"__float2ull_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2ull_rz", {"__float2ull_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2ull_ru", {"__float2ull_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2ull_rd", {"__float2ull_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2float_rn", {"__ll2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2float_rz", {"__ll2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2float_ru", {"__ll2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2float_rd", {"__ll2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2float_rn", {"__ull2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2float_rz", {"__ull2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2float_ru", {"__ull2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2float_rd", {"__ull2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__fadd_rn", {"__fadd_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__fadd_rz", {"__fadd_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fadd_ru", {"__fadd_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fadd_rd", {"__fadd_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fsub_rn", {"__fsub_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__fsub_rz", {"__fsub_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fsub_ru", {"__fsub_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fsub_rd", {"__fsub_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fmul_rn", {"__fmul_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__fmul_rz", {"__fmul_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fmul_ru", {"__fmul_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fmul_rd", {"__fmul_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fmaf_rn", {"__fmaf_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__fmaf_rz", {"__fmaf_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fmaf_ru", {"__fmaf_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fmaf_rd", {"__fmaf_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__frcp_rn", {"__frcp_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__frcp_rz", {"__frcp_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__frcp_ru", {"__frcp_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__frcp_rd", {"__frcp_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fsqrt_rn", {"__fsqrt_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__fsqrt_rz", {"__fsqrt_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fsqrt_ru", {"__fsqrt_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fsqrt_rd", {"__fsqrt_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__frsqrt_rn", {"__frsqrt_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__fdiv_rn", {"__fdiv_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__fdiv_rz", {"__fdiv_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fdiv_ru", {"__fdiv_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fdiv_rd", {"__fdiv_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__clz", {"__clz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ffs", {"__ffs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__popc", {"__popc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__brev", {"__brev", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__clzll", {"__clzll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ffsll", {"__ffsll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__popcll", {"__popcll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__brevll", {"__brevll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__byte_perm", {"__byte_perm", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hadd", {"__hadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__rhadd", {"__rhadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__uhadd", {"__uhadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__urhadd", {"__urhadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__prof_trigger", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__trap", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__brkpt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__pm0", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__pm1", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__pm2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__pm3", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__mulhi", {"__mulhi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__umulhi", {"__umulhi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__mul64hi", {"__mul64hi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__umul64hi", {"__umul64hi", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__int_as_float", {"__int_as_float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float_as_int", {"__float_as_int", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__uint_as_float", {"__uint_as_float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float_as_uint", {"__float_as_uint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__syncthreads", {"__syncthreads", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__syncthreads_count", {"__syncthreads_count", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__syncthreads_and", {"__syncthreads_and", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__syncthreads_or", {"__syncthreads_or", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__threadfence", {"__threadfence", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__threadfence_block", {"__threadfence_block", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__threadfence_system", {"__threadfence_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__saturatef", {"__saturatef", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__sad", {"__sad", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__usad", {"__usad", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__mul24", {"__mul24", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__umul24", {"__umul24", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fdividef", {"fdividef", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__fdividef", {"__fdividef", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"fdivide", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__sinf", {"__sinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__cosf", {"__cosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__tanf", {"__tanf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__sincosf", {"__sincosf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__expf", {"__expf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__exp10f", {"__exp10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__log2f", {"__log2f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__log10f", {"__log10f", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__logf", {"__logf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__powf", {"__powf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2int_rn", {"__float2int_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2int_rz", {"__float2int_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2int_ru", {"__float2int_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2int_rd", {"__float2int_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2uint_rn", {"__float2uint_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2uint_rz", {"__float2uint_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2uint_ru", {"__float2uint_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2uint_rd", {"__float2uint_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__int2float_rn", {"__int2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__int2float_rz", {"__int2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__int2float_ru", {"__int2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__int2float_rd", {"__int2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__uint2float_rn", {"__uint2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__uint2float_rz", {"__uint2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__uint2float_ru", {"__uint2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__uint2float_rd", {"__uint2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2ll_rn", {"__float2ll_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2ll_rz", {"__float2ll_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2ll_ru", {"__float2ll_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2ll_rd", {"__float2ll_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2ull_rn", {"__float2ull_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2ull_rz", {"__float2ull_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2ull_ru", {"__float2ull_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2ull_rd", {"__float2ull_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2float_rn", {"__ll2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2float_rz", {"__ll2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2float_ru", {"__ll2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2float_rd", {"__ll2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2float_rn", {"__ull2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2float_rz", {"__ull2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2float_ru", {"__ull2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2float_rd", {"__ull2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__fadd_rn", {"__fadd_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__fadd_rz", {"__fadd_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fadd_ru", {"__fadd_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fadd_rd", {"__fadd_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fsub_rn", {"__fsub_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__fsub_rz", {"__fsub_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fsub_ru", {"__fsub_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fsub_rd", {"__fsub_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fmul_rn", {"__fmul_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__fmul_rz", {"__fmul_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fmul_ru", {"__fmul_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fmul_rd", {"__fmul_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fmaf_rn", {"__fmaf_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__fmaf_rz", {"__fmaf_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fmaf_ru", {"__fmaf_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fmaf_rd", {"__fmaf_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__frcp_rn", {"__frcp_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__frcp_rz", {"__frcp_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__frcp_ru", {"__frcp_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__frcp_rd", {"__frcp_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fsqrt_rn", {"__fsqrt_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__fsqrt_rz", {"__fsqrt_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fsqrt_ru", {"__fsqrt_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fsqrt_rd", {"__fsqrt_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__frsqrt_rn", {"__frsqrt_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__fdiv_rn", {"__fdiv_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__fdiv_rz", {"__fdiv_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fdiv_ru", {"__fdiv_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fdiv_rd", {"__fdiv_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__clz", {"__clz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ffs", {"__ffs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__popc", {"__popc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__brev", {"__brev", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__clzll", {"__clzll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ffsll", {"__ffsll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__popcll", {"__popcll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__brevll", {"__brevll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__byte_perm", {"__byte_perm", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hadd", {"__hadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__rhadd", {"__rhadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__uhadd", {"__uhadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__urhadd", {"__urhadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__prof_trigger", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__trap", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__brkpt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__pm0", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__pm1", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__pm2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__pm3", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, // device double functions - {"__dadd_rz", {"__dadd_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dadd_ru", {"__dadd_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dadd_rd", {"__dadd_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dadd_rn", {"__dadd_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ddiv_rz", {"__ddiv_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__ddiv_ru", {"__ddiv_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__ddiv_rd", {"__ddiv_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__ddiv_rn", {"__ddiv_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__dmul_rz", {"__dmul_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dmul_ru", {"__dmul_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dmul_rd", {"__dmul_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dmul_rn", {"__dmul_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__drcp_rz", {"__drcp_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__drcp_ru", {"__drcp_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__drcp_rd", {"__drcp_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__drcp_rn", {"__drcp_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__dsqrt_rz", {"__dsqrt_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dsqrt_ru", {"__dsqrt_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dsqrt_rd", {"__dsqrt_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dsqrt_rn", {"__dsqrt_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__dsub_rz", {"__dsub_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dsub_ru", {"__dsub_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dsub_rd", {"__dsub_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__dsub_rn", {"__dsub_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__fma_rz", {"__fma_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fma_ru", {"__fma_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fma_rd", {"__fma_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__fma_rn", {"__fma_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2float_rd", {"__double2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2float_rn", {"__double2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2float_ru", {"__double2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2float_rz", {"__double2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2hiint", {"__double2hiint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2loint", {"__double2loint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2int_rd", {"__double2int_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2int_rn", {"__double2int_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2int_ru", {"__double2int_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2int_rz", {"__double2int_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2ll_rd", {"__double2ll_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2ll_rn", {"__double2ll_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2ll_ru", {"__double2ll_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2ll_rz", {"__double2ll_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2uint_rd", {"__double2uint_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2uint_rn", {"__double2uint_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2uint_ru", {"__double2uint_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2uint_rz", {"__double2uint_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2ull_rd", {"__double2ull_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2ull_rn", {"__double2ull_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2ull_ru", {"__double2ull_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double2ull_rz", {"__double2ull_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__double_as_longlong",{"__double_as_longlong", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hiloint2double", {"__hiloint2double", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__int2double_rn", {"__int2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2double_rd", {"__ll2double_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2double_rn", {"__ll2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2double_ru", {"__ll2double_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2double_rz", {"__ll2double_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__longlong_as_double",{"__longlong_as_double", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__uint2double_rn", {"__uint2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2double_rd", {"__ull2double_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2double_rn", {"__ull2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2double_ru", {"__ull2double_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2double_rz", {"__ull2double_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__dadd_rz", {"__dadd_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dadd_ru", {"__dadd_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dadd_rd", {"__dadd_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dadd_rn", {"__dadd_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ddiv_rz", {"__ddiv_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ddiv_ru", {"__ddiv_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ddiv_rd", {"__ddiv_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ddiv_rn", {"__ddiv_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__dmul_rz", {"__dmul_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dmul_ru", {"__dmul_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dmul_rd", {"__dmul_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dmul_rn", {"__dmul_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__drcp_rz", {"__drcp_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__drcp_ru", {"__drcp_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__drcp_rd", {"__drcp_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__drcp_rn", {"__drcp_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__dsqrt_rz", {"__dsqrt_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dsqrt_ru", {"__dsqrt_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dsqrt_rd", {"__dsqrt_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dsqrt_rn", {"__dsqrt_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__dsub_rz", {"__dsub_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dsub_ru", {"__dsub_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dsub_rd", {"__dsub_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__dsub_rn", {"__dsub_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__fma_rz", {"__fma_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fma_ru", {"__fma_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fma_rd", {"__fma_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__fma_rn", {"__fma_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2float_rd", {"__double2float_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2float_rn", {"__double2float_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2float_ru", {"__double2float_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2float_rz", {"__double2float_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2hiint", {"__double2hiint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2loint", {"__double2loint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2int_rd", {"__double2int_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2int_rn", {"__double2int_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2int_ru", {"__double2int_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2int_rz", {"__double2int_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2ll_rd", {"__double2ll_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2ll_rn", {"__double2ll_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2ll_ru", {"__double2ll_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2ll_rz", {"__double2ll_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2uint_rd", {"__double2uint_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2uint_rn", {"__double2uint_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2uint_ru", {"__double2uint_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2uint_rz", {"__double2uint_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2ull_rd", {"__double2ull_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2ull_rn", {"__double2ull_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2ull_ru", {"__double2ull_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2ull_rz", {"__double2ull_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double_as_longlong", {"__double_as_longlong", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hiloint2double", {"__hiloint2double", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__int2double_rn", {"__int2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2double_rd", {"__ll2double_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2double_rn", {"__ll2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2double_ru", {"__ll2double_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2double_rz", {"__ll2double_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__longlong_as_double", {"__longlong_as_double", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__uint2double_rn", {"__uint2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2double_rd", {"__ull2double_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2double_rn", {"__ull2double_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2double_ru", {"__ull2double_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2double_rz", {"__ull2double_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, // SIMD functions - {"__vabs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vabsss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vadd2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vaddss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vaddus2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vavgs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vavgu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vhaddu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpeq2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpges2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpgeu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpgts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpgtu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmples2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmplts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpltu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpne2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vabsdiffu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vmaxs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vmaxu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vmins2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vminu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vseteq2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetges2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetgeu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetgts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetles2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetleu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetlts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetltu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetne2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsadu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsub2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsubss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsubus2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vneg2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vnegss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vabsdiffs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsads2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vabs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vabsss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vadd4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vaddss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vaddus4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vavgs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vavgu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vhaddu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpeq4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpges4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpgeu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpgts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpgtu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmples4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpleu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmplts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpltu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vcmpne4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vabsdiffu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vmaxs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vmaxu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vmins4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vminu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vseteq4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetles4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetleu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetlts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetltu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetges4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetgeu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetgts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetgtu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsetne4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsadu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsub4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsubss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsubus4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vneg4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vnegss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vabsdiffs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__vsads4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vabs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vabsss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vadd2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vaddss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vaddus2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vavgs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vavgu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vhaddu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpeq2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpges2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpgeu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpgts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpgtu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmples2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmplts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpltu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpne2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vabsdiffu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vmaxs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vmaxu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vmins2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vminu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vseteq2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetges2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetgeu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetgts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetles2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetleu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetlts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetltu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetne2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsadu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsub2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsubss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsubus2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vneg2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vnegss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vabsdiffs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsads2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vabs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vabsss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vadd4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vaddss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vaddus4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vavgs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vavgu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vhaddu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpeq4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpges4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpgeu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpgts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpgtu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmples4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpleu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmplts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpltu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vcmpne4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vabsdiffu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vmaxs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vmaxu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vmins4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vminu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vseteq4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetles4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetleu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetlts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetltu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetges4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetgeu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetgts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetgtu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsetne4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsadu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsub4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsubss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsubus4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vneg4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vnegss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vabsdiffs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__vsads4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, // fp16 functions - {"__float2half", {"__float2half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2half_rn", {"__float2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2half_rz", {"__float2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2half_rd", {"__float2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2half_ru", {"__float2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2float", {"__half2float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float2half2_rn", {"__float2half2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__floats2half2_rn", {"__floats2half2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__low2float", {"__low2float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__high2float", {"__high2float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__float22half2_rn", {"__float22half2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half22float2", {"__half22float2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2int_rn", {"__half2int_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2int_rz", {"__half2int_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2int_rd", {"__half2int_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2int_ru", {"__half2int_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__int2half_rn", {"__int2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__int2half_rz", {"__int2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__int2half_rd", {"__int2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__int2half_ru", {"__int2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2short_rn", {"__half2short_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2short_rz", {"__half2short_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2short_rd", {"__half2short_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2short_ru", {"__half2short_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__short2half_rn", {"__short2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__short2half_rz", {"__short2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__short2half_rd", {"__short2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__short2half_ru", {"__short2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2uint_rn", {"__half2uint_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2uint_rz", {"__half2uint_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2uint_rd", {"__half2uint_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2uint_ru", {"__half2uint_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__uint2half_rn", {"__uint2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__uint2half_rz", {"__uint2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__uint2half_rd", {"__uint2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__uint2half_ru", {"__uint2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ushort_rn", {"__half2ushort_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ushort_rz", {"__half2ushort_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ushort_rd", {"__half2ushort_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ushort_ru", {"__half2ushort_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ushort2half_rn", {"__ushort2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ushort2half_rz", {"__ushort2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ushort2half_rd", {"__ushort2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ushort2half_ru", {"__ushort2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ull_rn", {"__half2ull_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ull_rz", {"__half2ull_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ull_rd", {"__half2ull_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ull_ru", {"__half2ull_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2half_rn", {"__ull2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2half_rz", {"__ull2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2half_rd", {"__ull2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ull2half_ru", {"__ull2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ll_rn", {"__half2ll_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ll_rz", {"__half2ll_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ll_rd", {"__half2ll_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2ll_ru", {"__half2ll_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2half_rn", {"__ll2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2half_rz", {"__ll2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2half_rd", {"__ll2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ll2half_ru", {"__ll2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"htrunc", {"htrunc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hceil", {"hceil", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hfloor", {"hfloor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hrint", {"hrint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2trunc", {"h2trunc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2ceil", {"h2ceil", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2floor", {"h2floor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2rint", {"h2rint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half2half2", {"__half2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__lowhigh2highlow", {"__lowhigh2highlow", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__lows2half2", {"__lows2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__highs2half2", {"__highs2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__high2half", {"__high2half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__low2half", {"__low2half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hisinf", {"__hisinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__halves2half2", {"__halves2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__low2half2", {"__low2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__high2half2", {"__high2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half_as_short", {"__half_as_short", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__half_as_ushort", {"__half_as_ushort", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__short_as_half", {"__short_as_half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ushort_as_half", {"__ushort_as_half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ldg", {"__ldg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ldcg", {"__ldcg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ldca", {"__ldca", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ldcs", {"__ldcs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__heq2", {"__heq2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hne2", {"__hne2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hle2", {"__hle2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hge2", {"__hge2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hlt2", {"__hlt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hgt2", {"__hgt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hequ2", {"__hequ2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hneu2", {"__hneu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hleu2", {"__hleu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hgeu2", {"__hgeu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hltu2", {"__hltu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hgtu2", {"__hgtu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hisnan2", {"__hisnan2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hadd2", {"__hadd2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hsub2", {"__hsub2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hmul2", {"__hmul2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__h2div", {"__h2div", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hadd2_sat", {"__hadd2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hsub2_sat", {"__hsub2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hmul2_sat", {"__hmul2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hfma2", {"__hfma2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hfma2_sat", {"__hfma2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hneg2", {"__hneg2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hsub", {"__hsub", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hmul", {"__hmul", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hdiv", {"__hdiv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hadd_sat", {"__hadd_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hsub_sat", {"__hsub_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hmul_sat", {"__hmul_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hfma", {"__hfma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hfma_sat", {"__hfma_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hneg", {"__hneg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__habs2", {"__habs2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__habs", {"__habs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hbeq2", {"__hbeq2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hbne2", {"__hbne2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hble2", {"__hble2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hbge2", {"__hbge2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hblt2", {"__hblt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hbgt2", {"__hbgt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hbequ2", {"__hbequ2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hbneu2", {"__hbneu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hbleu2", {"__hbleu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hbgeu2", {"__hbgeu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hbltu2", {"__hbltu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hbgtu2", {"__hbgtu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__heq", {"__heq", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hne", {"__hne", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hle", {"__hle", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hge", {"__hge", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hlt", {"__hlt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hgt", {"__hgt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hequ", {"__hequ", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hneu", {"__hneu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hleu", {"__hleu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hgeu", {"__hgeu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hltu", {"__hltu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hgtu", {"__hgtu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__hisnan", {"__hisnan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hsqrt", {"hsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hrsqrt", {"hrsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hrcp", {"hrcp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hlog", {"hlog", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hlog2", {"hlog2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hlog10", {"hlog10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hexp", {"hexp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hexp2", {"hexp2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hexp10", {"hexp10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hcos", {"hcos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"hsin", {"hsin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2sqrt", {"h2sqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2rsqrt", {"h2rsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2rcp", {"h2rcp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2log", {"h2log", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2log2", {"h2log2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2log10", {"h2log10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2exp", {"h2exp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2exp2", {"h2exp2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2exp10", {"h2exp10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2cos", {"h2cos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"h2sin", {"h2sin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__shfl_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__shfl", {"__shfl", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, - {"__shfl_up_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__shfl_up", {"__shfl_up", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, - {"__shfl_down_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__shfl_down", {"__shfl_down", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, - {"__shfl_xor_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, - {"__shfl_xor", {"__shfl_xor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, - {"__funnelshift_l", {"__funnelshift_l", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__funnelshift_lc", {"__funnelshift_lc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__funnelshift_r", {"__funnelshift_r", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__funnelshift_rc", {"__funnelshift_rc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2half", {"__float2half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2half_rn", {"__float2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2half_rz", {"__float2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2half_rd", {"__float2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2half_ru", {"__float2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2float", {"__half2float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float2half2_rn", {"__float2half2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__floats2half2_rn", {"__floats2half2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__low2float", {"__low2float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__high2float", {"__high2float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__float22half2_rn", {"__float22half2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half22float2", {"__half22float2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2int_rn", {"__half2int_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2int_rz", {"__half2int_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2int_rd", {"__half2int_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2int_ru", {"__half2int_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__int2half_rn", {"__int2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__int2half_rz", {"__int2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__int2half_rd", {"__int2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__int2half_ru", {"__int2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2short_rn", {"__half2short_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2short_rz", {"__half2short_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2short_rd", {"__half2short_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2short_ru", {"__half2short_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__short2half_rn", {"__short2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__short2half_rz", {"__short2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__short2half_rd", {"__short2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__short2half_ru", {"__short2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2uint_rn", {"__half2uint_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2uint_rz", {"__half2uint_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2uint_rd", {"__half2uint_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2uint_ru", {"__half2uint_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__uint2half_rn", {"__uint2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__uint2half_rz", {"__uint2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__uint2half_rd", {"__uint2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__uint2half_ru", {"__uint2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ushort_rn", {"__half2ushort_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ushort_rz", {"__half2ushort_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ushort_rd", {"__half2ushort_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ushort_ru", {"__half2ushort_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ushort2half_rn", {"__ushort2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ushort2half_rz", {"__ushort2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ushort2half_rd", {"__ushort2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ushort2half_ru", {"__ushort2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ull_rn", {"__half2ull_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ull_rz", {"__half2ull_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ull_rd", {"__half2ull_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ull_ru", {"__half2ull_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2half_rn", {"__ull2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2half_rz", {"__ull2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2half_rd", {"__ull2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ull2half_ru", {"__ull2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ll_rn", {"__half2ll_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ll_rz", {"__half2ll_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ll_rd", {"__half2ll_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2ll_ru", {"__half2ll_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2half_rn", {"__ll2half_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2half_rz", {"__ll2half_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2half_rd", {"__ll2half_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ll2half_ru", {"__ll2half_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"htrunc", {"htrunc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hceil", {"hceil", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hfloor", {"hfloor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hrint", {"hrint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2trunc", {"h2trunc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2ceil", {"h2ceil", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2floor", {"h2floor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2rint", {"h2rint", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half2half2", {"__half2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__lowhigh2highlow", {"__lowhigh2highlow", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__lows2half2", {"__lows2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__highs2half2", {"__highs2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__high2half", {"__high2half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__low2half", {"__low2half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hisinf", {"__hisinf", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__halves2half2", {"__halves2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__low2half2", {"__low2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__high2half2", {"__high2half2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half_as_short", {"__half_as_short", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__half_as_ushort", {"__half_as_ushort", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__short_as_half", {"__short_as_half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ushort_as_half", {"__ushort_as_half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ldg", {"__ldg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ldcg", {"__ldcg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ldca", {"__ldca", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ldcs", {"__ldcs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__heq2", {"__heq2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hne2", {"__hne2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hle2", {"__hle2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hge2", {"__hge2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hlt2", {"__hlt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hgt2", {"__hgt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hequ2", {"__hequ2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hneu2", {"__hneu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hleu2", {"__hleu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hgeu2", {"__hgeu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hltu2", {"__hltu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hgtu2", {"__hgtu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hisnan2", {"__hisnan2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hadd2", {"__hadd2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hsub2", {"__hsub2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hmul2", {"__hmul2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__h2div", {"__h2div", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hadd2_sat", {"__hadd2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hsub2_sat", {"__hsub2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hmul2_sat", {"__hmul2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hfma2", {"__hfma2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hfma2_sat", {"__hfma2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hneg2", {"__hneg2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hsub", {"__hsub", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hmul", {"__hmul", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hdiv", {"__hdiv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hadd_sat", {"__hadd_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hsub_sat", {"__hsub_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hmul_sat", {"__hmul_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hfma", {"__hfma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hfma_sat", {"__hfma_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hneg", {"__hneg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__habs2", {"__habs2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__habs", {"__habs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hbeq2", {"__hbeq2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hbne2", {"__hbne2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hble2", {"__hble2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hbge2", {"__hbge2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hblt2", {"__hblt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hbgt2", {"__hbgt2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hbequ2", {"__hbequ2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hbneu2", {"__hbneu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hbleu2", {"__hbleu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hbgeu2", {"__hbgeu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hbltu2", {"__hbltu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hbgtu2", {"__hbgtu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__heq", {"__heq", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hne", {"__hne", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hle", {"__hle", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hge", {"__hge", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hlt", {"__hlt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hgt", {"__hgt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hequ", {"__hequ", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hneu", {"__hneu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hleu", {"__hleu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hgeu", {"__hgeu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hltu", {"__hltu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hgtu", {"__hgtu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hisnan", {"__hisnan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hsqrt", {"hsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hrsqrt", {"hrsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hrcp", {"hrcp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hlog", {"hlog", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hlog2", {"hlog2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hlog10", {"hlog10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hexp", {"hexp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hexp2", {"hexp2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hexp10", {"hexp10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hcos", {"hcos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"hsin", {"hsin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2sqrt", {"h2sqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2rsqrt", {"h2rsqrt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2rcp", {"h2rcp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2log", {"h2log", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2log2", {"h2log2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2log10", {"h2log10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2exp", {"h2exp", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2exp2", {"h2exp2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2exp10", {"h2exp10", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2cos", {"h2cos", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"h2sin", {"h2sin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__shfl_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__shfl", {"__shfl", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, + {"__shfl_up_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__shfl_up", {"__shfl_up", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, + {"__shfl_down_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__shfl_down", {"__shfl_down", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, + {"__shfl_xor_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__shfl_xor", {"__shfl_xor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, CUDA_DEPRECATED}}, + {"__funnelshift_l", {"__funnelshift_l", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__funnelshift_lc", {"__funnelshift_lc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__funnelshift_r", {"__funnelshift_r", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__funnelshift_rc", {"__funnelshift_rc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, // atomic functions - {"atomicAdd", {"atomicAdd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicAdd_system", {"atomicAdd_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicSub", {"atomicSub", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicSub_system", {"atomicSub_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicExch", {"atomicExch", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicExch_system", {"atomicExch_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicMin", {"atomicMin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicMin_system", {"atomicMin_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicMax", {"atomicMax", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicMax_system", {"atomicMax_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicInc", {"atomicInc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicDec", {"atomicDec", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicAnd", {"atomicAnd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicAnd_system", {"atomicAnd_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicOr", {"atomicOr", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicOr_system", {"atomicOr_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicXor", {"atomicXor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicXor_system", {"atomicXor_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicCAS", {"atomicCAS", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"atomicCAS_system", {"atomicCAS_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__all", {"__all", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__any", {"__any", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__ballot", {"__ballot", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicAdd", {"atomicAdd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicAdd_system", {"atomicAdd_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicSub", {"atomicSub", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicSub_system", {"atomicSub_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicExch", {"atomicExch", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicExch_system", {"atomicExch_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicMin", {"atomicMin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicMin_system", {"atomicMin_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicMax", {"atomicMax", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicMax_system", {"atomicMax_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicInc", {"atomicInc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicDec", {"atomicDec", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicAnd", {"atomicAnd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicAnd_system", {"atomicAnd_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicOr", {"atomicOr", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicOr_system", {"atomicOr_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicXor", {"atomicXor", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicXor_system", {"atomicXor_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicCAS", {"atomicCAS", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"atomicCAS_system", {"atomicCAS_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__all", {"__all", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__any", {"__any", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ballot", {"__ballot", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, // clock functions - {"clock64", {"clock64", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"clock", {"clock", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"clock64", {"clock64", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"clock", {"clock", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, // common functions - {"__assert_fail", {"__assert_fail", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, - {"__assertfail", {"__assertfail", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__assert_fail", {"__assert_fail", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__assertfail", {"__assertfail", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, // fp8 functions - {"__nv_cvt_double_to_fp8", {"__hip_cvt_double_to_fp8", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, HIP_UNSUPPORTED}}, - {"__nv_cvt_double2_to_fp8x2", {"__hip_cvt_double2_to_fp8x2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, HIP_UNSUPPORTED}}, - {"__nv_cvt_float_to_fp8", {"__hip_cvt_float_to_fp8", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, HIP_UNSUPPORTED}}, - {"__nv_cvt_float2_to_fp8x2", {"__hip_cvt_float2_to_fp8x2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, HIP_UNSUPPORTED}}, - {"__nv_cvt_halfraw_to_fp8", {"__hip_cvt_halfraw_to_fp8", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, HIP_UNSUPPORTED}}, - {"__nv_cvt_halfraw2_to_fp8x2", {"__hip_cvt_halfraw2_to_fp8x2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, HIP_UNSUPPORTED}}, - {"__nv_cvt_bfloat16raw_to_fp8", {"__hip_cvt_bfloat16raw_to_fp8", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, HIP_UNSUPPORTED}}, - {"__nv_cvt_bfloat16raw2_to_fp8x2",{"__hip_cvt_bfloat16raw2_to_fp8x2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, HIP_UNSUPPORTED}}, - {"__nv_cvt_fp8_to_halfraw", {"__hip_cvt_fp8_to_halfraw", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, HIP_UNSUPPORTED}}, - {"__nv_cvt_fp8x2_to_halfraw2", {"__hip_cvt_fp8x2_to_halfraw2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, HIP_UNSUPPORTED}}, + {"__nv_cvt_double_to_fp8", {"__hip_cvt_double_to_fp8", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__nv_cvt_double2_to_fp8x2", {"__hip_cvt_double2_to_fp8x2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__nv_cvt_float_to_fp8", {"__hip_cvt_float_to_fp8", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__nv_cvt_float2_to_fp8x2", {"__hip_cvt_float2_to_fp8x2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__nv_cvt_halfraw_to_fp8", {"__hip_cvt_halfraw_to_fp8", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__nv_cvt_halfraw2_to_fp8x2", {"__hip_cvt_halfraw2_to_fp8x2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__nv_cvt_bfloat16raw_to_fp8", {"__hip_cvt_bfloat16raw_to_fp8", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__nv_cvt_bfloat16raw2_to_fp8x2", {"__hip_cvt_bfloat16raw2_to_fp8x2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__nv_cvt_fp8_to_halfraw", {"__hip_cvt_fp8_to_halfraw", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__nv_cvt_fp8x2_to_halfraw2", {"__hip_cvt_fp8x2_to_halfraw2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, }; const std::map CUDA_DEVICE_FUNCTION_VER_MAP { - {"__shfl", {CUDA_75, CUDA_90, CUDA_0 }}, - {"__shfl_up", {CUDA_75, CUDA_90, CUDA_0 }}, - {"__shfl_down", {CUDA_75, CUDA_90, CUDA_0 }}, - {"__shfl_xor", {CUDA_75, CUDA_90, CUDA_0 }}, - {"__nv_cvt_double_to_fp8", {CUDA_118, CUDA_0, CUDA_0 }}, - {"__nv_cvt_double2_to_fp8x2", {CUDA_118, CUDA_0, CUDA_0 }}, - {"__nv_cvt_float_to_fp8", {CUDA_118, CUDA_0, CUDA_0 }}, - {"__nv_cvt_float2_to_fp8x2", {CUDA_118, CUDA_0, CUDA_0 }}, - {"__nv_cvt_halfraw_to_fp8", {CUDA_118, CUDA_0, CUDA_0 }}, - {"__nv_cvt_halfraw2_to_fp8x2", {CUDA_118, CUDA_0, CUDA_0 }}, - {"__nv_cvt_bfloat16raw_to_fp8", {CUDA_118, CUDA_0, CUDA_0 }}, - {"__nv_cvt_bfloat16raw2_to_fp8x2", {CUDA_118, CUDA_0, CUDA_0 }}, - {"__nv_cvt_fp8_to_halfraw", {CUDA_118, CUDA_0, CUDA_0 }}, - {"__nv_cvt_fp8x2_to_halfraw2", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__shfl", {CUDA_75, CUDA_90, CUDA_0 }}, + {"__shfl_up", {CUDA_75, CUDA_90, CUDA_0 }}, + {"__shfl_down", {CUDA_75, CUDA_90, CUDA_0 }}, + {"__shfl_xor", {CUDA_75, CUDA_90, CUDA_0 }}, + {"__nv_cvt_double_to_fp8", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_double2_to_fp8x2", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_float_to_fp8", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_float2_to_fp8x2", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_halfraw_to_fp8", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_halfraw2_to_fp8x2", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_bfloat16raw_to_fp8", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_bfloat16raw2_to_fp8x2", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_fp8_to_halfraw", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_cvt_fp8x2_to_halfraw2", {CUDA_118, CUDA_0, CUDA_0 }}, }; const std::map HIP_DEVICE_FUNCTION_VER_MAP { - {"abs", {HIP_1060, HIP_0, HIP_0 }}, - {"labs", {HIP_1090, HIP_0, HIP_0 }}, - {"llabs", {HIP_1090, HIP_0, HIP_0 }}, - {"fabs", {HIP_1060, HIP_0, HIP_0 }}, - {"fabsf", {HIP_1060, HIP_0, HIP_0 }}, - {"min", {HIP_1060, HIP_0, HIP_0 }}, - {"fminf", {HIP_1060, HIP_0, HIP_0 }}, - {"fmin", {HIP_1060, HIP_0, HIP_0 }}, - {"max", {HIP_1060, HIP_0, HIP_0 }}, - {"fmaxf", {HIP_1060, HIP_0, HIP_0 }}, - {"fmax", {HIP_1060, HIP_0, HIP_0 }}, - {"sin", {HIP_1060, HIP_0, HIP_0 }}, - {"cos", {HIP_1060, HIP_0, HIP_0 }}, - {"sincos", {HIP_1060, HIP_0, HIP_0 }}, - {"sincosf", {HIP_1060, HIP_0, HIP_0 }}, - {"tan", {HIP_1060, HIP_0, HIP_0 }}, - {"sqrt", {HIP_1060, HIP_0, HIP_0 }}, - {"rsqrt", {HIP_1060, HIP_0, HIP_0 }}, - {"rsqrtf", {HIP_1060, HIP_0, HIP_0 }}, - {"log2", {HIP_1060, HIP_0, HIP_0 }}, - {"exp2", {HIP_1060, HIP_0, HIP_0 }}, - {"exp2f", {HIP_1060, HIP_0, HIP_0 }}, - {"exp10", {HIP_1060, HIP_0, HIP_0 }}, - {"exp10f", {HIP_1060, HIP_0, HIP_0 }}, - {"expm1", {HIP_1060, HIP_0, HIP_0 }}, - {"expm1f", {HIP_1060, HIP_0, HIP_0 }}, - {"log2f", {HIP_1060, HIP_0, HIP_0 }}, - {"log10", {HIP_1060, HIP_0, HIP_0 }}, - {"log", {HIP_1060, HIP_0, HIP_0 }}, - {"log1p", {HIP_1060, HIP_0, HIP_0 }}, - {"log1pf", {HIP_1060, HIP_0, HIP_0 }}, - {"floor", {HIP_1060, HIP_0, HIP_0 }}, - {"exp", {HIP_1060, HIP_0, HIP_0 }}, - {"cosh", {HIP_1060, HIP_0, HIP_0 }}, - {"sinh", {HIP_1060, HIP_0, HIP_0 }}, - {"tanh", {HIP_1060, HIP_0, HIP_0 }}, - {"acosh", {HIP_1060, HIP_0, HIP_0 }}, - {"acoshf", {HIP_1060, HIP_0, HIP_0 }}, - {"asinh", {HIP_1060, HIP_0, HIP_0 }}, - {"asinhf", {HIP_1060, HIP_0, HIP_0 }}, - {"atanh", {HIP_1060, HIP_0, HIP_0 }}, - {"atanhf", {HIP_1060, HIP_0, HIP_0 }}, - {"ldexp", {HIP_1060, HIP_0, HIP_0 }}, - {"ldexpf", {HIP_1060, HIP_0, HIP_0 }}, - {"logb", {HIP_1060, HIP_0, HIP_0 }}, - {"logbf", {HIP_1060, HIP_0, HIP_0 }}, - {"ilogb", {HIP_1060, HIP_0, HIP_0 }}, - {"ilogbf", {HIP_1060, HIP_0, HIP_0 }}, - {"scalbn", {HIP_1060, HIP_0, HIP_0 }}, - {"scalbnf", {HIP_1060, HIP_0, HIP_0 }}, - {"scalbln", {HIP_1060, HIP_0, HIP_0 }}, - {"scalblnf", {HIP_1060, HIP_0, HIP_0 }}, - {"frexp", {HIP_1060, HIP_0, HIP_0 }}, - {"frexpf", {HIP_1060, HIP_0, HIP_0 }}, - {"round", {HIP_1060, HIP_0, HIP_0 }}, - {"roundf", {HIP_1060, HIP_0, HIP_0 }}, - {"lround", {HIP_1060, HIP_0, HIP_0 }}, - {"lroundf", {HIP_1060, HIP_0, HIP_0 }}, - {"llround", {HIP_1060, HIP_0, HIP_0 }}, - {"llroundf", {HIP_1060, HIP_0, HIP_0 }}, - {"rint", {HIP_1060, HIP_0, HIP_0 }}, - {"rintf", {HIP_1060, HIP_0, HIP_0 }}, - {"lrint", {HIP_1060, HIP_0, HIP_0 }}, - {"lrintf", {HIP_1060, HIP_0, HIP_0 }}, - {"llrint", {HIP_1060, HIP_0, HIP_0 }}, - {"llrintf", {HIP_1060, HIP_0, HIP_0 }}, - {"nearbyint", {HIP_1060, HIP_0, HIP_0 }}, - {"nearbyintf", {HIP_1060, HIP_0, HIP_0 }}, - {"ceil", {HIP_1060, HIP_0, HIP_0 }}, - {"trunc", {HIP_1060, HIP_0, HIP_0 }}, - {"truncf", {HIP_1060, HIP_0, HIP_0 }}, - {"fdim", {HIP_1060, HIP_0, HIP_0 }}, - {"fdimf", {HIP_1060, HIP_0, HIP_0 }}, - {"atan2", {HIP_1060, HIP_0, HIP_0 }}, - {"atan", {HIP_1060, HIP_0, HIP_0 }}, - {"acos", {HIP_1060, HIP_0, HIP_0 }}, - {"asin", {HIP_1060, HIP_0, HIP_0 }}, - {"hypot", {HIP_1060, HIP_0, HIP_0 }}, - {"rhypot", {HIP_1060, HIP_0, HIP_0 }}, - {"hypotf", {HIP_1060, HIP_0, HIP_0 }}, - {"rhypotf", {HIP_1060, HIP_0, HIP_0 }}, - {"norm3d", {HIP_1060, HIP_0, HIP_0 }}, - {"rnorm3d", {HIP_1060, HIP_0, HIP_0 }}, - {"norm4d", {HIP_1060, HIP_0, HIP_0 }}, - {"rnorm4d", {HIP_1060, HIP_0, HIP_0 }}, - {"norm", {HIP_1060, HIP_0, HIP_0 }}, - {"rnorm", {HIP_1060, HIP_0, HIP_0 }}, - {"rnormf", {HIP_1060, HIP_0, HIP_0 }}, - {"normf", {HIP_1060, HIP_0, HIP_0 }}, - {"norm3df", {HIP_1060, HIP_0, HIP_0 }}, - {"rnorm3df", {HIP_1060, HIP_0, HIP_0 }}, - {"norm4df", {HIP_1060, HIP_0, HIP_0 }}, - {"rnorm4df", {HIP_1060, HIP_0, HIP_0 }}, - {"cbrt", {HIP_1060, HIP_0, HIP_0 }}, - {"cbrtf", {HIP_1060, HIP_0, HIP_0 }}, - {"rcbrt", {HIP_1060, HIP_0, HIP_0 }}, - {"rcbrtf", {HIP_1060, HIP_0, HIP_0 }}, - {"sinpi", {HIP_1060, HIP_0, HIP_0 }}, - {"sinpif", {HIP_1060, HIP_0, HIP_0 }}, - {"cospi", {HIP_1060, HIP_0, HIP_0 }}, - {"cospif", {HIP_1060, HIP_0, HIP_0 }}, - {"sincospi", {HIP_1060, HIP_0, HIP_0 }}, - {"sincospif", {HIP_1060, HIP_0, HIP_0 }}, - {"pow", {HIP_1060, HIP_0, HIP_0 }}, - {"modf", {HIP_1090, HIP_0, HIP_0 }}, - {"fmod", {HIP_1060, HIP_0, HIP_0 }}, - {"remainder", {HIP_1060, HIP_0, HIP_0 }}, - {"remainderf", {HIP_1060, HIP_0, HIP_0 }}, - {"remquo", {HIP_1090, HIP_0, HIP_0 }}, - {"remquof", {HIP_1060, HIP_0, HIP_0 }}, - {"j0", {HIP_1060, HIP_0, HIP_0 }}, - {"j0f", {HIP_1060, HIP_0, HIP_0 }}, - {"j1", {HIP_1060, HIP_0, HIP_0 }}, - {"j1f", {HIP_1060, HIP_0, HIP_0 }}, - {"jn", {HIP_1060, HIP_0, HIP_0 }}, - {"jnf", {HIP_1060, HIP_0, HIP_0 }}, - {"y0", {HIP_1060, HIP_0, HIP_0 }}, - {"y0f", {HIP_1060, HIP_0, HIP_0 }}, - {"y1", {HIP_1060, HIP_0, HIP_0 }}, - {"y1f", {HIP_1060, HIP_0, HIP_0 }}, - {"yn", {HIP_1060, HIP_0, HIP_0 }}, - {"ynf", {HIP_1060, HIP_0, HIP_0 }}, - {"cyl_bessel_i0", {HIP_1090, HIP_0, HIP_0 }}, - {"cyl_bessel_i0f", {HIP_1090, HIP_0, HIP_0 }}, - {"cyl_bessel_i1", {HIP_1090, HIP_0, HIP_0 }}, - {"cyl_bessel_i1f", {HIP_1090, HIP_0, HIP_0 }}, - {"erf", {HIP_1060, HIP_0, HIP_0 }}, - {"erff", {HIP_1060, HIP_0, HIP_0 }}, - {"erfinv", {HIP_1060, HIP_0, HIP_0 }}, - {"erfinvf", {HIP_1060, HIP_0, HIP_0 }}, - {"erfc", {HIP_1060, HIP_0, HIP_0 }}, - {"erfcf", {HIP_1060, HIP_0, HIP_0 }}, - {"lgamma", {HIP_1060, HIP_0, HIP_0 }}, - {"erfcinv", {HIP_1060, HIP_0, HIP_0 }}, - {"erfcinvf", {HIP_1060, HIP_0, HIP_0 }}, - {"normcdfinv", {HIP_1060, HIP_0, HIP_0 }}, - {"normcdfinvf", {HIP_1060, HIP_0, HIP_0 }}, - {"normcdf", {HIP_1060, HIP_0, HIP_0 }}, - {"normcdff", {HIP_1060, HIP_0, HIP_0 }}, - {"erfcx", {HIP_1060, HIP_0, HIP_0 }}, - {"erfcxf", {HIP_1060, HIP_0, HIP_0 }}, - {"lgammaf", {HIP_1060, HIP_0, HIP_0 }}, - {"tgamma", {HIP_1060, HIP_0, HIP_0 }}, - {"tgammaf", {HIP_1060, HIP_0, HIP_0 }}, - {"copysign", {HIP_1060, HIP_0, HIP_0 }}, - {"copysignf", {HIP_1060, HIP_0, HIP_0 }}, - {"nextafter", {HIP_1060, HIP_0, HIP_0 }}, - {"nextafterf", {HIP_1090, HIP_0, HIP_0 }}, - {"nan", {HIP_1060, HIP_0, HIP_0 }}, - {"nanf", {HIP_1060, HIP_0, HIP_0 }}, - {"fma", {HIP_1060, HIP_0, HIP_0 }}, - {"fmaf", {HIP_1060, HIP_0, HIP_0 }}, - {"acosf", {HIP_1060, HIP_0, HIP_0 }}, - {"asinf", {HIP_1060, HIP_0, HIP_0 }}, - {"atanf", {HIP_1060, HIP_0, HIP_0 }}, - {"atan2f", {HIP_1060, HIP_0, HIP_0 }}, - {"cosf", {HIP_1060, HIP_0, HIP_0 }}, - {"sinf", {HIP_1060, HIP_0, HIP_0 }}, - {"tanf", {HIP_1060, HIP_0, HIP_0 }}, - {"coshf", {HIP_1060, HIP_0, HIP_0 }}, - {"sinhf", {HIP_1060, HIP_0, HIP_0 }}, - {"tanhf", {HIP_1060, HIP_0, HIP_0 }}, - {"expf", {HIP_1060, HIP_0, HIP_0 }}, - {"logf", {HIP_1060, HIP_0, HIP_0 }}, - {"log10f", {HIP_1060, HIP_0, HIP_0 }}, - {"modff", {HIP_1090, HIP_0, HIP_0 }}, - {"powf", {HIP_1060, HIP_0, HIP_0 }}, - {"sqrtf", {HIP_1060, HIP_0, HIP_0 }}, - {"ceilf", {HIP_1060, HIP_0, HIP_0 }}, - {"floorf", {HIP_1060, HIP_0, HIP_0 }}, - {"fmodf", {HIP_1060, HIP_0, HIP_0 }}, - {"signbit", {HIP_1060, HIP_0, HIP_0 }}, - {"isfinite", {HIP_1060, HIP_0, HIP_0 }}, - {"isnan", {HIP_1060, HIP_0, HIP_0 }}, - {"isinf", {HIP_1060, HIP_0, HIP_0 }}, - {"__mulhi", {HIP_1060, HIP_0, HIP_0 }}, - {"__umulhi", {HIP_1060, HIP_0, HIP_0 }}, - {"__mul64hi", {HIP_1060, HIP_0, HIP_0 }}, - {"__umul64hi", {HIP_1060, HIP_0, HIP_0 }}, - {"__int_as_float", {HIP_1060, HIP_0, HIP_0 }}, - {"__float_as_int", {HIP_1060, HIP_0, HIP_0 }}, - {"__float_as_uint", {HIP_1060, HIP_0, HIP_0 }}, - {"__uint_as_float", {HIP_1060, HIP_0, HIP_0 }}, - {"__syncthreads", {HIP_1060, HIP_0, HIP_0 }}, - {"__syncthreads_count", {HIP_3070, HIP_0, HIP_0 }}, - {"__syncthreads_and", {HIP_3070, HIP_0, HIP_0 }}, - {"__syncthreads_or", {HIP_3070, HIP_0, HIP_0 }}, - {"__threadfence", {HIP_1060, HIP_0, HIP_0 }}, - {"__threadfence_block", {HIP_1060, HIP_0, HIP_0 }}, - {"__threadfence_system",{HIP_1060, HIP_0, HIP_0 }}, - {"__saturatef", {HIP_1060, HIP_0, HIP_0 }}, - {"__sad", {HIP_1060, HIP_0, HIP_0 }}, - {"__usad", {HIP_1060, HIP_0, HIP_0 }}, - {"__mul24", {HIP_1060, HIP_0, HIP_0 }}, - {"__umul24", {HIP_1060, HIP_0, HIP_0 }}, - {"fdividef", {HIP_1060, HIP_0, HIP_0 }}, - {"__fdividef", {HIP_1060, HIP_0, HIP_0 }}, - {"__sinf", {HIP_1060, HIP_0, HIP_0 }}, - {"__cosf", {HIP_1060, HIP_0, HIP_0 }}, - {"__tanf", {HIP_1060, HIP_0, HIP_0 }}, - {"__sincosf", {HIP_1060, HIP_0, HIP_0 }}, - {"__expf", {HIP_1060, HIP_0, HIP_0 }}, - {"__exp10f", {HIP_1060, HIP_0, HIP_0 }}, - {"__log2f", {HIP_1060, HIP_0, HIP_0 }}, - {"__log10f", {HIP_1060, HIP_0, HIP_0 }}, - {"__logf", {HIP_1060, HIP_0, HIP_0 }}, - {"__powf", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2int_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2int_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2int_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2int_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2uint_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2uint_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2uint_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2uint_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__int2float_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__int2float_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__int2float_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__int2float_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__uint2float_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__uint2float_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__uint2float_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__uint2float_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2ll_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2ll_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2ll_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2ll_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2ull_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2ull_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2ull_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2ull_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2float_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2float_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2float_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2float_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2float_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2float_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2float_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2float_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__fadd_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__fsub_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__fmul_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__fmaf_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__frcp_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__fsqrt_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__frsqrt_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__fdiv_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__clz", {HIP_1060, HIP_0, HIP_0 }}, - {"__ffs", {HIP_1060, HIP_0, HIP_0 }}, - {"__popc", {HIP_1060, HIP_0, HIP_0 }}, - {"__brev", {HIP_1060, HIP_0, HIP_0 }}, - {"__clzll", {HIP_1060, HIP_0, HIP_0 }}, - {"__ffsll", {HIP_1060, HIP_0, HIP_0 }}, - {"__popcll", {HIP_1060, HIP_0, HIP_0 }}, - {"__brevll", {HIP_1060, HIP_0, HIP_0 }}, - {"__byte_perm", {HIP_1060, HIP_0, HIP_0 }}, - {"__hadd", {HIP_1060, HIP_0, HIP_0 }}, - {"__rhadd", {HIP_1060, HIP_0, HIP_0 }}, - {"__uhadd", {HIP_1060, HIP_0, HIP_0 }}, - {"__urhadd", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2float_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2float_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2float_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2float_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2hiint", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2loint", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2int_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2int_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2int_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2int_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2ll_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2ll_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2ll_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2ll_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2uint_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2uint_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2uint_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2uint_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2ull_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2ull_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2ull_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__double2ull_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__double_as_longlong",{HIP_1060, HIP_0, HIP_0 }}, - {"__hiloint2double", {HIP_1060, HIP_0, HIP_0 }}, - {"__int2double_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2double_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2double_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2double_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2double_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__longlong_as_double",{HIP_1060, HIP_0, HIP_0 }}, - {"__uint2double_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2double_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2double_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2double_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2double_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2half", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2half_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2half_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2half_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2half_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2float", {HIP_1060, HIP_0, HIP_0 }}, - {"__float2half2_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__floats2half2_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__low2float", {HIP_1060, HIP_0, HIP_0 }}, - {"__high2float", {HIP_1060, HIP_0, HIP_0 }}, - {"__float22half2_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__half22float2", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2int_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2int_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2int_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2int_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__int2half_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__int2half_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__int2half_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__int2half_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2short_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2short_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2short_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2short_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__short2half_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__short2half_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__short2half_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__short2half_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2uint_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2uint_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2uint_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2uint_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__uint2half_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__uint2half_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__uint2half_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__uint2half_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ushort_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ushort_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ushort_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ushort_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__ushort2half_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__ushort2half_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__ushort2half_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__ushort2half_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ull_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ull_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ull_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ull_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2half_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2half_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2half_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__ull2half_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ll_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ll_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ll_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__half2ll_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2half_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2half_rz", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2half_rd", {HIP_1060, HIP_0, HIP_0 }}, - {"__ll2half_ru", {HIP_1060, HIP_0, HIP_0 }}, - {"htrunc", {HIP_1060, HIP_0, HIP_0 }}, - {"hceil", {HIP_1060, HIP_0, HIP_0 }}, - {"hfloor", {HIP_1060, HIP_0, HIP_0 }}, - {"hrint", {HIP_1060, HIP_0, HIP_0 }}, - {"h2trunc", {HIP_1060, HIP_0, HIP_0 }}, - {"h2ceil", {HIP_1060, HIP_0, HIP_0 }}, - {"h2floor", {HIP_1060, HIP_0, HIP_0 }}, - {"h2rint", {HIP_1090, HIP_0, HIP_0 }}, - {"__half2half2", {HIP_1090, HIP_0, HIP_0 }}, - {"__lowhigh2highlow", {HIP_1060, HIP_0, HIP_0 }}, - {"__lows2half2", {HIP_1060, HIP_0, HIP_0 }}, - {"__highs2half2", {HIP_1060, HIP_0, HIP_0 }}, - {"__high2half", {HIP_1060, HIP_0, HIP_0 }}, - {"__low2half", {HIP_1060, HIP_0, HIP_0 }}, - {"__hisinf", {HIP_1060, HIP_0, HIP_0 }}, - {"__halves2half2", {HIP_1060, HIP_0, HIP_0 }}, - {"__low2half2", {HIP_1060, HIP_0, HIP_0 }}, - {"__high2half2", {HIP_1060, HIP_0, HIP_0 }}, - {"__half_as_short", {HIP_1060, HIP_0, HIP_0 }}, - {"__half_as_ushort", {HIP_1060, HIP_0, HIP_0 }}, - {"__short_as_half", {HIP_1090, HIP_0, HIP_0 }}, - {"__ushort_as_half", {HIP_1060, HIP_0, HIP_0 }}, - {"__ldg", {HIP_1060, HIP_0, HIP_0 }}, - {"__ldcg", {HIP_1090, HIP_0, HIP_0 }}, - {"__ldca", {HIP_1090, HIP_0, HIP_0 }}, - {"__ldcs", {HIP_1090, HIP_0, HIP_0 }}, - {"__heq2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hne2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hle2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hge2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hlt2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hgt2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hequ2", {HIP_1090, HIP_0, HIP_0 }}, - {"__hneu2", {HIP_1090, HIP_0, HIP_0 }}, - {"__hleu2", {HIP_1090, HIP_0, HIP_0 }}, - {"__hgeu2", {HIP_1090, HIP_0, HIP_0 }}, - {"__hltu2", {HIP_1090, HIP_0, HIP_0 }}, - {"__hgtu2", {HIP_1090, HIP_0, HIP_0 }}, - {"__hisnan2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hadd2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hsub2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hmul2", {HIP_1060, HIP_0, HIP_0 }}, - {"__h2div", {HIP_1090, HIP_0, HIP_0 }}, - {"__hadd2_sat", {HIP_1060, HIP_0, HIP_0 }}, - {"__hsub2_sat", {HIP_1060, HIP_0, HIP_0 }}, - {"__hmul2_sat", {HIP_1060, HIP_0, HIP_0 }}, - {"__hfma2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hfma2_sat", {HIP_1060, HIP_0, HIP_0 }}, - {"__hneg2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hsub", {HIP_1060, HIP_0, HIP_0 }}, - {"__hmul", {HIP_1060, HIP_0, HIP_0 }}, - {"__hdiv", {HIP_1090, HIP_0, HIP_0 }}, - {"__hadd_sat", {HIP_1060, HIP_0, HIP_0 }}, - {"__hsub_sat", {HIP_1060, HIP_0, HIP_0 }}, - {"__hmul_sat", {HIP_1060, HIP_0, HIP_0 }}, - {"__hfma", {HIP_1060, HIP_0, HIP_0 }}, - {"__hfma_sat", {HIP_1060, HIP_0, HIP_0 }}, - {"__hneg", {HIP_1060, HIP_0, HIP_0 }}, - {"__habs2", {HIP_3050, HIP_0, HIP_0 }}, - {"__habs", {HIP_3050, HIP_0, HIP_0 }}, - {"__hbeq2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hbne2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hble2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hbge2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hblt2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hbgt2", {HIP_1060, HIP_0, HIP_0 }}, - {"__hbequ2", {HIP_1090, HIP_0, HIP_0 }}, - {"__hbneu2", {HIP_1090, HIP_0, HIP_0 }}, - {"__hbleu2", {HIP_1090, HIP_0, HIP_0 }}, - {"__hbgeu2", {HIP_1090, HIP_0, HIP_0 }}, - {"__hbltu2", {HIP_1090, HIP_0, HIP_0 }}, - {"__hbgtu2", {HIP_1090, HIP_0, HIP_0 }}, - {"__heq", {HIP_1060, HIP_0, HIP_0 }}, - {"__hne", {HIP_1060, HIP_0, HIP_0 }}, - {"__hle", {HIP_1060, HIP_0, HIP_0 }}, - {"__hge", {HIP_1060, HIP_0, HIP_0 }}, - {"__hlt", {HIP_1060, HIP_0, HIP_0 }}, - {"__hgt", {HIP_1060, HIP_0, HIP_0 }}, - {"__hequ", {HIP_1090, HIP_0, HIP_0 }}, - {"__hneu", {HIP_1090, HIP_0, HIP_0 }}, - {"__hleu", {HIP_1090, HIP_0, HIP_0 }}, - {"__hgeu", {HIP_1090, HIP_0, HIP_0 }}, - {"__hltu", {HIP_1090, HIP_0, HIP_0 }}, - {"__hgtu", {HIP_1090, HIP_0, HIP_0 }}, - {"__hisnan", {HIP_1060, HIP_0, HIP_0 }}, - {"hsqrt", {HIP_1060, HIP_0, HIP_0 }}, - {"hrsqrt", {HIP_1060, HIP_0, HIP_0 }}, - {"hrcp", {HIP_1090, HIP_0, HIP_0 }}, - {"hlog", {HIP_1060, HIP_0, HIP_0 }}, - {"hlog2", {HIP_1060, HIP_0, HIP_0 }}, - {"hlog10", {HIP_1060, HIP_0, HIP_0 }}, - {"hexp", {HIP_1060, HIP_0, HIP_0 }}, - {"hexp2", {HIP_1060, HIP_0, HIP_0 }}, - {"hexp10", {HIP_1060, HIP_0, HIP_0 }}, - {"hcos", {HIP_1060, HIP_0, HIP_0 }}, - {"hsin", {HIP_1060, HIP_0, HIP_0 }}, - {"h2sqrt", {HIP_1060, HIP_0, HIP_0 }}, - {"h2rsqrt", {HIP_1060, HIP_0, HIP_0 }}, - {"h2rcp", {HIP_1060, HIP_0, HIP_0 }}, - {"h2log", {HIP_1060, HIP_0, HIP_0 }}, - {"h2log2", {HIP_1060, HIP_0, HIP_0 }}, - {"h2log10", {HIP_1060, HIP_0, HIP_0 }}, - {"h2exp", {HIP_1060, HIP_0, HIP_0 }}, - {"h2exp2", {HIP_1060, HIP_0, HIP_0 }}, - {"h2exp10", {HIP_1060, HIP_0, HIP_0 }}, - {"h2cos", {HIP_1060, HIP_0, HIP_0 }}, - {"h2sin", {HIP_1060, HIP_0, HIP_0 }}, - {"__shfl", {HIP_1060, HIP_0, HIP_0 }}, - {"__shfl_up", {HIP_1060, HIP_0, HIP_0 }}, - {"__shfl_down", {HIP_1060, HIP_0, HIP_0 }}, - {"__shfl_xor", {HIP_1060, HIP_0, HIP_0 }}, - {"atomicAdd", {HIP_1060, HIP_0, HIP_0 }}, - {"atomicSub", {HIP_1060, HIP_0, HIP_0 }}, - {"atomicExch", {HIP_1060, HIP_0, HIP_0 }}, - {"atomicMin", {HIP_1060, HIP_0, HIP_0 }}, - {"atomicMax", {HIP_1060, HIP_0, HIP_0 }}, - {"atomicInc", {HIP_1060, HIP_0, HIP_0 }}, - {"atomicDec", {HIP_1060, HIP_0, HIP_0 }}, - {"atomicAnd", {HIP_1060, HIP_0, HIP_0 }}, - {"atomicOr", {HIP_1060, HIP_0, HIP_0 }}, - {"atomicXor", {HIP_1060, HIP_0, HIP_0 }}, - {"atomicCAS", {HIP_1060, HIP_0, HIP_0 }}, - {"__all", {HIP_1060, HIP_0, HIP_0 }}, - {"__any", {HIP_1060, HIP_0, HIP_0 }}, - {"__ballot", {HIP_1060, HIP_0, HIP_0 }}, - {"clock64", {HIP_1060, HIP_0, HIP_0 }}, - {"clock", {HIP_1060, HIP_0, HIP_0 }}, - {"__dadd_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__ddiv_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__dmul_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__drcp_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__dsqrt_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__dsub_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__fma_rn", {HIP_1060, HIP_0, HIP_0 }}, - {"__assert_fail", {HIP_1090, HIP_0, HIP_0 }}, - {"__assertfail", {HIP_1090, HIP_0, HIP_0 }}, - {"atomicCAS_system", {HIP_4030, HIP_0, HIP_0 }}, - {"atomicSub_system", {HIP_4030, HIP_0, HIP_0 }}, - {"atomicAdd_system", {HIP_4030, HIP_0, HIP_0 }}, - {"atomicExch_system", {HIP_4030, HIP_0, HIP_0 }}, - {"atomicMin_system", {HIP_4030, HIP_0, HIP_0 }}, - {"atomicMax_system", {HIP_4030, HIP_0, HIP_0 }}, - {"atomicAnd_system", {HIP_4030, HIP_0, HIP_0 }}, - {"atomicOr_system", {HIP_4030, HIP_0, HIP_0 }}, - {"atomicXor_system", {HIP_4030, HIP_0, HIP_0 }}, - {"__funnelshift_l", {HIP_4040, HIP_0, HIP_0 }}, - {"__funnelshift_lc", {HIP_4040, HIP_0, HIP_0 }}, - {"__funnelshift_r", {HIP_4040, HIP_0, HIP_0 }}, - {"__funnelshift_rc", {HIP_4040, HIP_0, HIP_0 }}, + {"abs", {HIP_1060, HIP_0, HIP_0 }}, + {"labs", {HIP_1090, HIP_0, HIP_0 }}, + {"llabs", {HIP_1090, HIP_0, HIP_0 }}, + {"fabs", {HIP_1060, HIP_0, HIP_0 }}, + {"fabsf", {HIP_1060, HIP_0, HIP_0 }}, + {"min", {HIP_1060, HIP_0, HIP_0 }}, + {"fminf", {HIP_1060, HIP_0, HIP_0 }}, + {"fmin", {HIP_1060, HIP_0, HIP_0 }}, + {"max", {HIP_1060, HIP_0, HIP_0 }}, + {"fmaxf", {HIP_1060, HIP_0, HIP_0 }}, + {"fmax", {HIP_1060, HIP_0, HIP_0 }}, + {"sin", {HIP_1060, HIP_0, HIP_0 }}, + {"cos", {HIP_1060, HIP_0, HIP_0 }}, + {"sincos", {HIP_1060, HIP_0, HIP_0 }}, + {"sincosf", {HIP_1060, HIP_0, HIP_0 }}, + {"tan", {HIP_1060, HIP_0, HIP_0 }}, + {"sqrt", {HIP_1060, HIP_0, HIP_0 }}, + {"rsqrt", {HIP_1060, HIP_0, HIP_0 }}, + {"rsqrtf", {HIP_1060, HIP_0, HIP_0 }}, + {"log2", {HIP_1060, HIP_0, HIP_0 }}, + {"exp2", {HIP_1060, HIP_0, HIP_0 }}, + {"exp2f", {HIP_1060, HIP_0, HIP_0 }}, + {"exp10", {HIP_1060, HIP_0, HIP_0 }}, + {"exp10f", {HIP_1060, HIP_0, HIP_0 }}, + {"expm1", {HIP_1060, HIP_0, HIP_0 }}, + {"expm1f", {HIP_1060, HIP_0, HIP_0 }}, + {"log2f", {HIP_1060, HIP_0, HIP_0 }}, + {"log10", {HIP_1060, HIP_0, HIP_0 }}, + {"log", {HIP_1060, HIP_0, HIP_0 }}, + {"log1p", {HIP_1060, HIP_0, HIP_0 }}, + {"log1pf", {HIP_1060, HIP_0, HIP_0 }}, + {"floor", {HIP_1060, HIP_0, HIP_0 }}, + {"exp", {HIP_1060, HIP_0, HIP_0 }}, + {"cosh", {HIP_1060, HIP_0, HIP_0 }}, + {"sinh", {HIP_1060, HIP_0, HIP_0 }}, + {"tanh", {HIP_1060, HIP_0, HIP_0 }}, + {"acosh", {HIP_1060, HIP_0, HIP_0 }}, + {"acoshf", {HIP_1060, HIP_0, HIP_0 }}, + {"asinh", {HIP_1060, HIP_0, HIP_0 }}, + {"asinhf", {HIP_1060, HIP_0, HIP_0 }}, + {"atanh", {HIP_1060, HIP_0, HIP_0 }}, + {"atanhf", {HIP_1060, HIP_0, HIP_0 }}, + {"ldexp", {HIP_1060, HIP_0, HIP_0 }}, + {"ldexpf", {HIP_1060, HIP_0, HIP_0 }}, + {"logb", {HIP_1060, HIP_0, HIP_0 }}, + {"logbf", {HIP_1060, HIP_0, HIP_0 }}, + {"ilogb", {HIP_1060, HIP_0, HIP_0 }}, + {"ilogbf", {HIP_1060, HIP_0, HIP_0 }}, + {"scalbn", {HIP_1060, HIP_0, HIP_0 }}, + {"scalbnf", {HIP_1060, HIP_0, HIP_0 }}, + {"scalbln", {HIP_1060, HIP_0, HIP_0 }}, + {"scalblnf", {HIP_1060, HIP_0, HIP_0 }}, + {"frexp", {HIP_1060, HIP_0, HIP_0 }}, + {"frexpf", {HIP_1060, HIP_0, HIP_0 }}, + {"round", {HIP_1060, HIP_0, HIP_0 }}, + {"roundf", {HIP_1060, HIP_0, HIP_0 }}, + {"lround", {HIP_1060, HIP_0, HIP_0 }}, + {"lroundf", {HIP_1060, HIP_0, HIP_0 }}, + {"llround", {HIP_1060, HIP_0, HIP_0 }}, + {"llroundf", {HIP_1060, HIP_0, HIP_0 }}, + {"rint", {HIP_1060, HIP_0, HIP_0 }}, + {"rintf", {HIP_1060, HIP_0, HIP_0 }}, + {"lrint", {HIP_1060, HIP_0, HIP_0 }}, + {"lrintf", {HIP_1060, HIP_0, HIP_0 }}, + {"llrint", {HIP_1060, HIP_0, HIP_0 }}, + {"llrintf", {HIP_1060, HIP_0, HIP_0 }}, + {"nearbyint", {HIP_1060, HIP_0, HIP_0 }}, + {"nearbyintf", {HIP_1060, HIP_0, HIP_0 }}, + {"ceil", {HIP_1060, HIP_0, HIP_0 }}, + {"trunc", {HIP_1060, HIP_0, HIP_0 }}, + {"truncf", {HIP_1060, HIP_0, HIP_0 }}, + {"fdim", {HIP_1060, HIP_0, HIP_0 }}, + {"fdimf", {HIP_1060, HIP_0, HIP_0 }}, + {"atan2", {HIP_1060, HIP_0, HIP_0 }}, + {"atan", {HIP_1060, HIP_0, HIP_0 }}, + {"acos", {HIP_1060, HIP_0, HIP_0 }}, + {"asin", {HIP_1060, HIP_0, HIP_0 }}, + {"hypot", {HIP_1060, HIP_0, HIP_0 }}, + {"rhypot", {HIP_1060, HIP_0, HIP_0 }}, + {"hypotf", {HIP_1060, HIP_0, HIP_0 }}, + {"rhypotf", {HIP_1060, HIP_0, HIP_0 }}, + {"norm3d", {HIP_1060, HIP_0, HIP_0 }}, + {"rnorm3d", {HIP_1060, HIP_0, HIP_0 }}, + {"norm4d", {HIP_1060, HIP_0, HIP_0 }}, + {"rnorm4d", {HIP_1060, HIP_0, HIP_0 }}, + {"norm", {HIP_1060, HIP_0, HIP_0 }}, + {"rnorm", {HIP_1060, HIP_0, HIP_0 }}, + {"rnormf", {HIP_1060, HIP_0, HIP_0 }}, + {"normf", {HIP_1060, HIP_0, HIP_0 }}, + {"norm3df", {HIP_1060, HIP_0, HIP_0 }}, + {"rnorm3df", {HIP_1060, HIP_0, HIP_0 }}, + {"norm4df", {HIP_1060, HIP_0, HIP_0 }}, + {"rnorm4df", {HIP_1060, HIP_0, HIP_0 }}, + {"cbrt", {HIP_1060, HIP_0, HIP_0 }}, + {"cbrtf", {HIP_1060, HIP_0, HIP_0 }}, + {"rcbrt", {HIP_1060, HIP_0, HIP_0 }}, + {"rcbrtf", {HIP_1060, HIP_0, HIP_0 }}, + {"sinpi", {HIP_1060, HIP_0, HIP_0 }}, + {"sinpif", {HIP_1060, HIP_0, HIP_0 }}, + {"cospi", {HIP_1060, HIP_0, HIP_0 }}, + {"cospif", {HIP_1060, HIP_0, HIP_0 }}, + {"sincospi", {HIP_1060, HIP_0, HIP_0 }}, + {"sincospif", {HIP_1060, HIP_0, HIP_0 }}, + {"pow", {HIP_1060, HIP_0, HIP_0 }}, + {"modf", {HIP_1090, HIP_0, HIP_0 }}, + {"fmod", {HIP_1060, HIP_0, HIP_0 }}, + {"remainder", {HIP_1060, HIP_0, HIP_0 }}, + {"remainderf", {HIP_1060, HIP_0, HIP_0 }}, + {"remquo", {HIP_1090, HIP_0, HIP_0 }}, + {"remquof", {HIP_1060, HIP_0, HIP_0 }}, + {"j0", {HIP_1060, HIP_0, HIP_0 }}, + {"j0f", {HIP_1060, HIP_0, HIP_0 }}, + {"j1", {HIP_1060, HIP_0, HIP_0 }}, + {"j1f", {HIP_1060, HIP_0, HIP_0 }}, + {"jn", {HIP_1060, HIP_0, HIP_0 }}, + {"jnf", {HIP_1060, HIP_0, HIP_0 }}, + {"y0", {HIP_1060, HIP_0, HIP_0 }}, + {"y0f", {HIP_1060, HIP_0, HIP_0 }}, + {"y1", {HIP_1060, HIP_0, HIP_0 }}, + {"y1f", {HIP_1060, HIP_0, HIP_0 }}, + {"yn", {HIP_1060, HIP_0, HIP_0 }}, + {"ynf", {HIP_1060, HIP_0, HIP_0 }}, + {"cyl_bessel_i0", {HIP_1090, HIP_0, HIP_0 }}, + {"cyl_bessel_i0f", {HIP_1090, HIP_0, HIP_0 }}, + {"cyl_bessel_i1", {HIP_1090, HIP_0, HIP_0 }}, + {"cyl_bessel_i1f", {HIP_1090, HIP_0, HIP_0 }}, + {"erf", {HIP_1060, HIP_0, HIP_0 }}, + {"erff", {HIP_1060, HIP_0, HIP_0 }}, + {"erfinv", {HIP_1060, HIP_0, HIP_0 }}, + {"erfinvf", {HIP_1060, HIP_0, HIP_0 }}, + {"erfc", {HIP_1060, HIP_0, HIP_0 }}, + {"erfcf", {HIP_1060, HIP_0, HIP_0 }}, + {"lgamma", {HIP_1060, HIP_0, HIP_0 }}, + {"erfcinv", {HIP_1060, HIP_0, HIP_0 }}, + {"erfcinvf", {HIP_1060, HIP_0, HIP_0 }}, + {"normcdfinv", {HIP_1060, HIP_0, HIP_0 }}, + {"normcdfinvf", {HIP_1060, HIP_0, HIP_0 }}, + {"normcdf", {HIP_1060, HIP_0, HIP_0 }}, + {"normcdff", {HIP_1060, HIP_0, HIP_0 }}, + {"erfcx", {HIP_1060, HIP_0, HIP_0 }}, + {"erfcxf", {HIP_1060, HIP_0, HIP_0 }}, + {"lgammaf", {HIP_1060, HIP_0, HIP_0 }}, + {"tgamma", {HIP_1060, HIP_0, HIP_0 }}, + {"tgammaf", {HIP_1060, HIP_0, HIP_0 }}, + {"copysign", {HIP_1060, HIP_0, HIP_0 }}, + {"copysignf", {HIP_1060, HIP_0, HIP_0 }}, + {"nextafter", {HIP_1060, HIP_0, HIP_0 }}, + {"nextafterf", {HIP_1090, HIP_0, HIP_0 }}, + {"nan", {HIP_1060, HIP_0, HIP_0 }}, + {"nanf", {HIP_1060, HIP_0, HIP_0 }}, + {"fma", {HIP_1060, HIP_0, HIP_0 }}, + {"fmaf", {HIP_1060, HIP_0, HIP_0 }}, + {"acosf", {HIP_1060, HIP_0, HIP_0 }}, + {"asinf", {HIP_1060, HIP_0, HIP_0 }}, + {"atanf", {HIP_1060, HIP_0, HIP_0 }}, + {"atan2f", {HIP_1060, HIP_0, HIP_0 }}, + {"cosf", {HIP_1060, HIP_0, HIP_0 }}, + {"sinf", {HIP_1060, HIP_0, HIP_0 }}, + {"tanf", {HIP_1060, HIP_0, HIP_0 }}, + {"coshf", {HIP_1060, HIP_0, HIP_0 }}, + {"sinhf", {HIP_1060, HIP_0, HIP_0 }}, + {"tanhf", {HIP_1060, HIP_0, HIP_0 }}, + {"expf", {HIP_1060, HIP_0, HIP_0 }}, + {"logf", {HIP_1060, HIP_0, HIP_0 }}, + {"log10f", {HIP_1060, HIP_0, HIP_0 }}, + {"modff", {HIP_1090, HIP_0, HIP_0 }}, + {"powf", {HIP_1060, HIP_0, HIP_0 }}, + {"sqrtf", {HIP_1060, HIP_0, HIP_0 }}, + {"ceilf", {HIP_1060, HIP_0, HIP_0 }}, + {"floorf", {HIP_1060, HIP_0, HIP_0 }}, + {"fmodf", {HIP_1060, HIP_0, HIP_0 }}, + {"signbit", {HIP_1060, HIP_0, HIP_0 }}, + {"isfinite", {HIP_1060, HIP_0, HIP_0 }}, + {"isnan", {HIP_1060, HIP_0, HIP_0 }}, + {"isinf", {HIP_1060, HIP_0, HIP_0 }}, + {"__mulhi", {HIP_1060, HIP_0, HIP_0 }}, + {"__umulhi", {HIP_1060, HIP_0, HIP_0 }}, + {"__mul64hi", {HIP_1060, HIP_0, HIP_0 }}, + {"__umul64hi", {HIP_1060, HIP_0, HIP_0 }}, + {"__int_as_float", {HIP_1060, HIP_0, HIP_0 }}, + {"__float_as_int", {HIP_1060, HIP_0, HIP_0 }}, + {"__float_as_uint", {HIP_1060, HIP_0, HIP_0 }}, + {"__uint_as_float", {HIP_1060, HIP_0, HIP_0 }}, + {"__syncthreads", {HIP_1060, HIP_0, HIP_0 }}, + {"__syncthreads_count", {HIP_3070, HIP_0, HIP_0 }}, + {"__syncthreads_and", {HIP_3070, HIP_0, HIP_0 }}, + {"__syncthreads_or", {HIP_3070, HIP_0, HIP_0 }}, + {"__threadfence", {HIP_1060, HIP_0, HIP_0 }}, + {"__threadfence_block", {HIP_1060, HIP_0, HIP_0 }}, + {"__threadfence_system", {HIP_1060, HIP_0, HIP_0 }}, + {"__saturatef", {HIP_1060, HIP_0, HIP_0 }}, + {"__sad", {HIP_1060, HIP_0, HIP_0 }}, + {"__usad", {HIP_1060, HIP_0, HIP_0 }}, + {"__mul24", {HIP_1060, HIP_0, HIP_0 }}, + {"__umul24", {HIP_1060, HIP_0, HIP_0 }}, + {"fdividef", {HIP_1060, HIP_0, HIP_0 }}, + {"__fdividef", {HIP_1060, HIP_0, HIP_0 }}, + {"__sinf", {HIP_1060, HIP_0, HIP_0 }}, + {"__cosf", {HIP_1060, HIP_0, HIP_0 }}, + {"__tanf", {HIP_1060, HIP_0, HIP_0 }}, + {"__sincosf", {HIP_1060, HIP_0, HIP_0 }}, + {"__expf", {HIP_1060, HIP_0, HIP_0 }}, + {"__exp10f", {HIP_1060, HIP_0, HIP_0 }}, + {"__log2f", {HIP_1060, HIP_0, HIP_0 }}, + {"__log10f", {HIP_1060, HIP_0, HIP_0 }}, + {"__logf", {HIP_1060, HIP_0, HIP_0 }}, + {"__powf", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2int_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2int_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2int_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2int_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2uint_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2uint_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2uint_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2uint_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__int2float_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__int2float_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__int2float_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__int2float_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__uint2float_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__uint2float_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__uint2float_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__uint2float_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2ll_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2ll_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2ll_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2ll_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2ull_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2ull_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2ull_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2ull_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2float_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2float_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2float_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2float_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2float_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2float_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2float_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2float_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__fadd_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__fsub_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__fmul_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__fmaf_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__frcp_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__fsqrt_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__frsqrt_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__fdiv_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__clz", {HIP_1060, HIP_0, HIP_0 }}, + {"__ffs", {HIP_1060, HIP_0, HIP_0 }}, + {"__popc", {HIP_1060, HIP_0, HIP_0 }}, + {"__brev", {HIP_1060, HIP_0, HIP_0 }}, + {"__clzll", {HIP_1060, HIP_0, HIP_0 }}, + {"__ffsll", {HIP_1060, HIP_0, HIP_0 }}, + {"__popcll", {HIP_1060, HIP_0, HIP_0 }}, + {"__brevll", {HIP_1060, HIP_0, HIP_0 }}, + {"__byte_perm", {HIP_1060, HIP_0, HIP_0 }}, + {"__hadd", {HIP_1060, HIP_0, HIP_0 }}, + {"__rhadd", {HIP_1060, HIP_0, HIP_0 }}, + {"__uhadd", {HIP_1060, HIP_0, HIP_0 }}, + {"__urhadd", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2float_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2float_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2float_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2float_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2hiint", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2loint", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2int_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2int_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2int_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2int_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2ll_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2ll_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2ll_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2ll_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2uint_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2uint_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2uint_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2uint_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2ull_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2ull_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2ull_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__double2ull_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__double_as_longlong", {HIP_1060, HIP_0, HIP_0 }}, + {"__hiloint2double", {HIP_1060, HIP_0, HIP_0 }}, + {"__int2double_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2double_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2double_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2double_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2double_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__longlong_as_double", {HIP_1060, HIP_0, HIP_0 }}, + {"__uint2double_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2double_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2double_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2double_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2double_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2half", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2half_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2half_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2half_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2half_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2float", {HIP_1060, HIP_0, HIP_0 }}, + {"__float2half2_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__floats2half2_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__low2float", {HIP_1060, HIP_0, HIP_0 }}, + {"__high2float", {HIP_1060, HIP_0, HIP_0 }}, + {"__float22half2_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__half22float2", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2int_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2int_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2int_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2int_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__int2half_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__int2half_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__int2half_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__int2half_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2short_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2short_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2short_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2short_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__short2half_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__short2half_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__short2half_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__short2half_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2uint_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2uint_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2uint_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2uint_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__uint2half_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__uint2half_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__uint2half_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__uint2half_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ushort_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ushort_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ushort_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ushort_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__ushort2half_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__ushort2half_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__ushort2half_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__ushort2half_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ull_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ull_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ull_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ull_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2half_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2half_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2half_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__ull2half_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ll_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ll_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ll_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2ll_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2half_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2half_rz", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2half_rd", {HIP_1060, HIP_0, HIP_0 }}, + {"__ll2half_ru", {HIP_1060, HIP_0, HIP_0 }}, + {"htrunc", {HIP_1060, HIP_0, HIP_0 }}, + {"hceil", {HIP_1060, HIP_0, HIP_0 }}, + {"hfloor", {HIP_1060, HIP_0, HIP_0 }}, + {"hrint", {HIP_1060, HIP_0, HIP_0 }}, + {"h2trunc", {HIP_1060, HIP_0, HIP_0 }}, + {"h2ceil", {HIP_1060, HIP_0, HIP_0 }}, + {"h2floor", {HIP_1060, HIP_0, HIP_0 }}, + {"h2rint", {HIP_1090, HIP_0, HIP_0 }}, + {"__half2half2", {HIP_1090, HIP_0, HIP_0 }}, + {"__lowhigh2highlow", {HIP_1060, HIP_0, HIP_0 }}, + {"__lows2half2", {HIP_1060, HIP_0, HIP_0 }}, + {"__highs2half2", {HIP_1060, HIP_0, HIP_0 }}, + {"__high2half", {HIP_1060, HIP_0, HIP_0 }}, + {"__low2half", {HIP_1060, HIP_0, HIP_0 }}, + {"__hisinf", {HIP_1060, HIP_0, HIP_0 }}, + {"__halves2half2", {HIP_1060, HIP_0, HIP_0 }}, + {"__low2half2", {HIP_1060, HIP_0, HIP_0 }}, + {"__high2half2", {HIP_1060, HIP_0, HIP_0 }}, + {"__half_as_short", {HIP_1060, HIP_0, HIP_0 }}, + {"__half_as_ushort", {HIP_1060, HIP_0, HIP_0 }}, + {"__short_as_half", {HIP_1090, HIP_0, HIP_0 }}, + {"__ushort_as_half", {HIP_1060, HIP_0, HIP_0 }}, + {"__ldg", {HIP_1060, HIP_0, HIP_0 }}, + {"__ldcg", {HIP_1090, HIP_0, HIP_0 }}, + {"__ldca", {HIP_1090, HIP_0, HIP_0 }}, + {"__ldcs", {HIP_1090, HIP_0, HIP_0 }}, + {"__heq2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hne2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hle2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hge2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hlt2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hgt2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hequ2", {HIP_1090, HIP_0, HIP_0 }}, + {"__hneu2", {HIP_1090, HIP_0, HIP_0 }}, + {"__hleu2", {HIP_1090, HIP_0, HIP_0 }}, + {"__hgeu2", {HIP_1090, HIP_0, HIP_0 }}, + {"__hltu2", {HIP_1090, HIP_0, HIP_0 }}, + {"__hgtu2", {HIP_1090, HIP_0, HIP_0 }}, + {"__hisnan2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hadd2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hsub2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hmul2", {HIP_1060, HIP_0, HIP_0 }}, + {"__h2div", {HIP_1090, HIP_0, HIP_0 }}, + {"__hadd2_sat", {HIP_1060, HIP_0, HIP_0 }}, + {"__hsub2_sat", {HIP_1060, HIP_0, HIP_0 }}, + {"__hmul2_sat", {HIP_1060, HIP_0, HIP_0 }}, + {"__hfma2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hfma2_sat", {HIP_1060, HIP_0, HIP_0 }}, + {"__hneg2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hsub", {HIP_1060, HIP_0, HIP_0 }}, + {"__hmul", {HIP_1060, HIP_0, HIP_0 }}, + {"__hdiv", {HIP_1090, HIP_0, HIP_0 }}, + {"__hadd_sat", {HIP_1060, HIP_0, HIP_0 }}, + {"__hsub_sat", {HIP_1060, HIP_0, HIP_0 }}, + {"__hmul_sat", {HIP_1060, HIP_0, HIP_0 }}, + {"__hfma", {HIP_1060, HIP_0, HIP_0 }}, + {"__hfma_sat", {HIP_1060, HIP_0, HIP_0 }}, + {"__hneg", {HIP_1060, HIP_0, HIP_0 }}, + {"__habs2", {HIP_3050, HIP_0, HIP_0 }}, + {"__habs", {HIP_3050, HIP_0, HIP_0 }}, + {"__hbeq2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hbne2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hble2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hbge2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hblt2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hbgt2", {HIP_1060, HIP_0, HIP_0 }}, + {"__hbequ2", {HIP_1090, HIP_0, HIP_0 }}, + {"__hbneu2", {HIP_1090, HIP_0, HIP_0 }}, + {"__hbleu2", {HIP_1090, HIP_0, HIP_0 }}, + {"__hbgeu2", {HIP_1090, HIP_0, HIP_0 }}, + {"__hbltu2", {HIP_1090, HIP_0, HIP_0 }}, + {"__hbgtu2", {HIP_1090, HIP_0, HIP_0 }}, + {"__heq", {HIP_1060, HIP_0, HIP_0 }}, + {"__hne", {HIP_1060, HIP_0, HIP_0 }}, + {"__hle", {HIP_1060, HIP_0, HIP_0 }}, + {"__hge", {HIP_1060, HIP_0, HIP_0 }}, + {"__hlt", {HIP_1060, HIP_0, HIP_0 }}, + {"__hgt", {HIP_1060, HIP_0, HIP_0 }}, + {"__hequ", {HIP_1090, HIP_0, HIP_0 }}, + {"__hneu", {HIP_1090, HIP_0, HIP_0 }}, + {"__hleu", {HIP_1090, HIP_0, HIP_0 }}, + {"__hgeu", {HIP_1090, HIP_0, HIP_0 }}, + {"__hltu", {HIP_1090, HIP_0, HIP_0 }}, + {"__hgtu", {HIP_1090, HIP_0, HIP_0 }}, + {"__hisnan", {HIP_1060, HIP_0, HIP_0 }}, + {"hsqrt", {HIP_1060, HIP_0, HIP_0 }}, + {"hrsqrt", {HIP_1060, HIP_0, HIP_0 }}, + {"hrcp", {HIP_1090, HIP_0, HIP_0 }}, + {"hlog", {HIP_1060, HIP_0, HIP_0 }}, + {"hlog2", {HIP_1060, HIP_0, HIP_0 }}, + {"hlog10", {HIP_1060, HIP_0, HIP_0 }}, + {"hexp", {HIP_1060, HIP_0, HIP_0 }}, + {"hexp2", {HIP_1060, HIP_0, HIP_0 }}, + {"hexp10", {HIP_1060, HIP_0, HIP_0 }}, + {"hcos", {HIP_1060, HIP_0, HIP_0 }}, + {"hsin", {HIP_1060, HIP_0, HIP_0 }}, + {"h2sqrt", {HIP_1060, HIP_0, HIP_0 }}, + {"h2rsqrt", {HIP_1060, HIP_0, HIP_0 }}, + {"h2rcp", {HIP_1060, HIP_0, HIP_0 }}, + {"h2log", {HIP_1060, HIP_0, HIP_0 }}, + {"h2log2", {HIP_1060, HIP_0, HIP_0 }}, + {"h2log10", {HIP_1060, HIP_0, HIP_0 }}, + {"h2exp", {HIP_1060, HIP_0, HIP_0 }}, + {"h2exp2", {HIP_1060, HIP_0, HIP_0 }}, + {"h2exp10", {HIP_1060, HIP_0, HIP_0 }}, + {"h2cos", {HIP_1060, HIP_0, HIP_0 }}, + {"h2sin", {HIP_1060, HIP_0, HIP_0 }}, + {"__shfl", {HIP_1060, HIP_0, HIP_0 }}, + {"__shfl_up", {HIP_1060, HIP_0, HIP_0 }}, + {"__shfl_down", {HIP_1060, HIP_0, HIP_0 }}, + {"__shfl_xor", {HIP_1060, HIP_0, HIP_0 }}, + {"atomicAdd", {HIP_1060, HIP_0, HIP_0 }}, + {"atomicSub", {HIP_1060, HIP_0, HIP_0 }}, + {"atomicExch", {HIP_1060, HIP_0, HIP_0 }}, + {"atomicMin", {HIP_1060, HIP_0, HIP_0 }}, + {"atomicMax", {HIP_1060, HIP_0, HIP_0 }}, + {"atomicInc", {HIP_1060, HIP_0, HIP_0 }}, + {"atomicDec", {HIP_1060, HIP_0, HIP_0 }}, + {"atomicAnd", {HIP_1060, HIP_0, HIP_0 }}, + {"atomicOr", {HIP_1060, HIP_0, HIP_0 }}, + {"atomicXor", {HIP_1060, HIP_0, HIP_0 }}, + {"atomicCAS", {HIP_1060, HIP_0, HIP_0 }}, + {"__all", {HIP_1060, HIP_0, HIP_0 }}, + {"__any", {HIP_1060, HIP_0, HIP_0 }}, + {"__ballot", {HIP_1060, HIP_0, HIP_0 }}, + {"clock64", {HIP_1060, HIP_0, HIP_0 }}, + {"clock", {HIP_1060, HIP_0, HIP_0 }}, + {"__dadd_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__ddiv_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__dmul_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__drcp_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__dsqrt_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__dsub_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__fma_rn", {HIP_1060, HIP_0, HIP_0 }}, + {"__assert_fail", {HIP_1090, HIP_0, HIP_0 }}, + {"__assertfail", {HIP_1090, HIP_0, HIP_0 }}, + {"atomicCAS_system", {HIP_4030, HIP_0, HIP_0 }}, + {"atomicSub_system", {HIP_4030, HIP_0, HIP_0 }}, + {"atomicAdd_system", {HIP_4030, HIP_0, HIP_0 }}, + {"atomicExch_system", {HIP_4030, HIP_0, HIP_0 }}, + {"atomicMin_system", {HIP_4030, HIP_0, HIP_0 }}, + {"atomicMax_system", {HIP_4030, HIP_0, HIP_0 }}, + {"atomicAnd_system", {HIP_4030, HIP_0, HIP_0 }}, + {"atomicOr_system", {HIP_4030, HIP_0, HIP_0 }}, + {"atomicXor_system", {HIP_4030, HIP_0, HIP_0 }}, + {"__funnelshift_l", {HIP_4040, HIP_0, HIP_0 }}, + {"__funnelshift_lc", {HIP_4040, HIP_0, HIP_0 }}, + {"__funnelshift_r", {HIP_4040, HIP_0, HIP_0 }}, + {"__funnelshift_rc", {HIP_4040, HIP_0, HIP_0 }}, }; const std::map CUDA_DEVICE_TYPE_NAME_MAP { From 0fdca48b6d97f0fdff02ac40a5d7e1b9f9aa196c Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 13 Oct 2022 20:31:10 +0200 Subject: [PATCH 20/43] [HIPIFY][format] CUDA2HIP headers mapping --- src/CUDA2HIP.cpp | 98 ++++++++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/src/CUDA2HIP.cpp b/src/CUDA2HIP.cpp index 1711966c..7708f478 100644 --- a/src/CUDA2HIP.cpp +++ b/src/CUDA2HIP.cpp @@ -25,63 +25,63 @@ THE SOFTWARE. // Maps CUDA header names to HIP header names const std::map CUDA_INCLUDE_MAP { // CUDA includes - {"cuda.h", {"hip/hip_runtime.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_DRIVER, 0}}, - {"cuda_runtime.h", {"hip/hip_runtime.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_RUNTIME, 0}}, - {"cuda_runtime_api.h", {"hip/hip_runtime_api.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, - {"channel_descriptor.h", {"hip/channel_descriptor.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, - {"device_functions.h", {"hip/device_functions.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, - {"driver_types.h", {"hip/driver_types.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, - {"cuda_fp16.h", {"hip/hip_fp16.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, - {"cuda_texture_types.h", {"hip/hip_texture_types.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, - {"texture_fetch_functions.h", {"", "", CONV_INCLUDE, API_RUNTIME, 0}}, - {"vector_types.h", {"hip/hip_vector_types.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, - {"cuda_profiler_api.h", {"hip/hip_runtime_api.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, - {"cooperative_groups.h", {"hip/hip_cooperative_groups.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, + {"cuda.h", {"hip/hip_runtime.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_DRIVER, 0}}, + {"cuda_runtime.h", {"hip/hip_runtime.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_RUNTIME, 0}}, + {"cuda_runtime_api.h", {"hip/hip_runtime_api.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, + {"channel_descriptor.h", {"hip/channel_descriptor.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, + {"device_functions.h", {"hip/device_functions.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, + {"driver_types.h", {"hip/driver_types.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, + {"cuda_fp16.h", {"hip/hip_fp16.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, + {"cuda_texture_types.h", {"hip/hip_texture_types.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, + {"texture_fetch_functions.h", {"", "", CONV_INCLUDE, API_RUNTIME, 0}}, + {"vector_types.h", {"hip/hip_vector_types.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, + {"cuda_profiler_api.h", {"hip/hip_runtime_api.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, + {"cooperative_groups.h", {"hip/hip_cooperative_groups.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, // cuComplex includes - {"cuComplex.h", {"hip/hip_complex.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_COMPLEX, 0}}, + {"cuComplex.h", {"hip/hip_complex.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_COMPLEX, 0}}, // cuBLAS includes - {"cublas.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_H, API_BLAS, 0}}, - {"cublas_v2.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_V2_H, API_BLAS, 0}}, - {"cublas_api.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE, API_BLAS, 0}}, + {"cublas.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_H, API_BLAS, 0}}, + {"cublas_v2.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_V2_H, API_BLAS, 0}}, + {"cublas_api.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE, API_BLAS, 0}}, // cuRAND includes - {"curand.h", {"hiprand.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_RAND, 0}}, - {"curand_kernel.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_discrete.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_discrete2.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_globals.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_lognormal.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_mrg32k3a.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_mtgp32.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_mtgp32_host.h", {"hiprand_mtgp32_host.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_mtgp32_kernel.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_mtgp32dc_p_11213.h", {"rocrand_mtgp32_11213.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_normal.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_normal_static.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_philox4x32_x.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_poisson.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_precalc.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, - {"curand_uniform.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand.h", {"hiprand.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_RAND, 0}}, + {"curand_kernel.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_discrete.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_discrete2.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_globals.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_lognormal.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_mrg32k3a.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_mtgp32.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_mtgp32_host.h", {"hiprand_mtgp32_host.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_mtgp32_kernel.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_mtgp32dc_p_11213.h", {"rocrand_mtgp32_11213.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_normal.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_normal_static.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_philox4x32_x.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_poisson.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_precalc.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, + {"curand_uniform.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, // cuDNN includes - {"cudnn.h", {"hipDNN.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_DNN, 0}}, + {"cudnn.h", {"hipDNN.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_DNN, 0}}, // cuFFT includes - {"cufft.h", {"hipfft.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_FFT, 0}}, - {"cufftXt.h", {"hipfftXt.h", "", CONV_INCLUDE, API_FFT, 0}}, + {"cufft.h", {"hipfft.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_FFT, 0}}, + {"cufftXt.h", {"hipfftXt.h", "", CONV_INCLUDE, API_FFT, 0}}, // cuSPARSE includes - {"cusparse.h", {"hipsparse.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_SPARSE, 0}}, - {"cusparse_v2.h", {"hipsparse.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_SPARSE, 0}}, + {"cusparse.h", {"hipsparse.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_SPARSE, 0}}, + {"cusparse_v2.h", {"hipsparse.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_SPARSE, 0}}, // CUB includes - {"cub/cub.cuh", {"hipcub/hipcub.hpp", "", CONV_INCLUDE_CUDA_MAIN_H, API_CUB, 0}}, + {"cub/cub.cuh", {"hipcub/hipcub.hpp", "", CONV_INCLUDE_CUDA_MAIN_H, API_CUB, 0}}, // CAFFE2 includes - {"caffe2/core/common_gpu.h", {"caffe2/core/hip/common_gpu.h", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, - {"caffe2/core/context_gpu.h", {"caffe2/core/hip/context_gpu.h", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, - {"caffe2/operators/operator_fallback_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, - {"caffe2/operators/spatial_batch_norm_op.h", {"caffe2/operators/hip/spatial_batch_norm_op_miopen.hip", "", CONV_INCLUDE, API_CAFFE2, 0}}, - {"caffe2/operators/generate_proposals_op_util_nms_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, - {"caffe2/operators/max_pool_with_index_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, - {"caffe2/operators/rnn/recurrent_network_executor_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, - {"caffe2/utils/math/reduce.cuh", {"caffe2/utils/math/hip/reduce.cuh", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, - {"caffe2/operators/gather_op.cuh", {"caffe2/operators/math/gather_op.cuh", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, - {"caffe2/core/common_cudnn.h", {"caffe2/core/hip/common_miopen.h", "", CONV_INCLUDE, API_CAFFE2, 0}}, + {"caffe2/core/common_gpu.h", {"caffe2/core/hip/common_gpu.h", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, + {"caffe2/core/context_gpu.h", {"caffe2/core/hip/context_gpu.h", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, + {"caffe2/operators/operator_fallback_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, + {"caffe2/operators/spatial_batch_norm_op.h", {"caffe2/operators/hip/spatial_batch_norm_op_miopen.hip", "", CONV_INCLUDE, API_CAFFE2, 0}}, + {"caffe2/operators/generate_proposals_op_util_nms_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, + {"caffe2/operators/max_pool_with_index_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, + {"caffe2/operators/rnn/recurrent_network_executor_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, + {"caffe2/utils/math/reduce.cuh", {"caffe2/utils/math/hip/reduce.cuh", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, + {"caffe2/operators/gather_op.cuh", {"caffe2/operators/math/gather_op.cuh", "", CONV_INCLUDE, API_CAFFE2, 0, UNSUPPORTED}}, + {"caffe2/core/common_cudnn.h", {"caffe2/core/hip/common_miopen.h", "", CONV_INCLUDE, API_CAFFE2, 0}}, // RTC includes {"nvrtc.h", {"hiprtc.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_RTC, 0}}, }; From ddd3931338db8eca107664d69c668a636365b99b Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 18 Oct 2022 20:41:33 +0200 Subject: [PATCH 21/43] [HIPIFY][misc] Separate Device functions and types + Device types are actually device/host types [ToDo] + Provide fp16 and fp8 (new) types --- .../CUDA_Device_API_supported_by_HIP.md | 4 +++ src/CUDA2HIP_Device_functions.cpp | 10 +----- src/CUDA2HIP_Device_types.cpp | 33 +++++++++++++++++++ 3 files changed, 38 insertions(+), 9 deletions(-) create mode 100644 src/CUDA2HIP_Device_types.cpp diff --git a/doc/markdown/CUDA_Device_API_supported_by_HIP.md b/doc/markdown/CUDA_Device_API_supported_by_HIP.md index 6f6f2450..b7d0df66 100644 --- a/doc/markdown/CUDA_Device_API_supported_by_HIP.md +++ b/doc/markdown/CUDA_Device_API_supported_by_HIP.md @@ -686,5 +686,9 @@ |`yn`| | | |`yn`|1.6.0| | | | |`ynf`| | | |`ynf`|1.6.0| | | | +## **2. Device Types** + +Unsupported + \*A - Added; D - Deprecated; R - Removed; E - Experimental \ No newline at end of file diff --git a/src/CUDA2HIP_Device_functions.cpp b/src/CUDA2HIP_Device_functions.cpp index 30321b9b..95be0ef3 100644 --- a/src/CUDA2HIP_Device_functions.cpp +++ b/src/CUDA2HIP_Device_functions.cpp @@ -1241,15 +1241,7 @@ const std::map HIP_DEVICE_FUNCTION_VER_MAP { {"__funnelshift_rc", {HIP_4040, HIP_0, HIP_0 }}, }; -const std::map CUDA_DEVICE_TYPE_NAME_MAP { -}; - -const std::map CUDA_DEVICE_TYPE_NAME_VER_MAP { -}; - -const std::map HIP_DEVICE_TYPE_NAME_VER_MAP { -}; - const std::map CUDA_DEVICE_FUNCTION_API_SECTION_MAP { {1, "Device Functions"}, + {2, "Device Types"}, }; diff --git a/src/CUDA2HIP_Device_types.cpp b/src/CUDA2HIP_Device_types.cpp new file mode 100644 index 00000000..37c09dcf --- /dev/null +++ b/src/CUDA2HIP_Device_types.cpp @@ -0,0 +1,33 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "CUDA2HIP.h" + +// Maps the names of CUDA Device/Host types to the corresponding HIP types +const std::map CUDA_DEVICE_TYPE_NAME_MAP { +}; + +const std::map CUDA_DEVICE_TYPE_NAME_VER_MAP { +}; + +const std::map HIP_DEVICE_TYPE_NAME_VER_MAP { +}; From dc686d65e9fd2451d4ef24574c18ad07cf3a6ee8 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 19 Oct 2022 10:11:16 +0200 Subject: [PATCH 22/43] [HIPIFY][doc] LLVM 15.0.3 is the latest supported LLVM release + No patches are needed + Updated README.md accordingly + Tested on Windows 10 and Ubuntu 21.10 --- README.md | 57 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index c39258de..1289bfc8 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ After applying all the matchers, the output HIP source is produced. `hipify-clang` requires: -1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [4.0.0](http://releases.llvm.org/download.html#4.0.0); the latest stable and recommended release: [**15.0.2**](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.2). +1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [4.0.0](http://releases.llvm.org/download.html#4.0.0); the latest stable and recommended release: [**15.0.3**](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.3). 2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive), the latest supported version is [**11.7.1**](https://developer.nvidia.com/cuda-downloads). @@ -172,8 +172,9 @@ After applying all the matchers, the output HIP source is produced. 14.0.5, 14.0.6,
15.0.0, - 15.0.1, - 15.0.2 + 15.0.1,
+ 15.0.2, + 15.0.3 11.7.1 LATEST STABLE CONFIG @@ -189,7 +190,7 @@ After applying all the matchers, the output HIP source is produced. In most cases, you can get a suitable version of `LLVM+CLANG` with your package manager. Failing that or having multiple versions of `LLVM`, you can [download a release archive](http://releases.llvm.org/), build or install it, and set -[CMAKE_PREFIX_PATH](https://cmake.org/cmake/help/v3.5/variable/CMAKE_PREFIX_PATH.html) so `cmake` can find it; for instance: `-DCMAKE_PREFIX_PATH=d:\LLVM\15.0.2\dist` +[CMAKE_PREFIX_PATH](https://cmake.org/cmake/help/v3.5/variable/CMAKE_PREFIX_PATH.html) so `cmake` can find it; for instance: `-DCMAKE_PREFIX_PATH=d:\LLVM\15.0.3\dist` ### hipify-clang: usage @@ -287,7 +288,7 @@ Run `Visual Studio 16 2019`, open the generated `LLVM.sln`, build all, and build **LLVM >= 10.0.0:** -1. download [`LLVM project`](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.2) sources; +1. download [`LLVM project`](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.3) sources; 2. build [`LLVM project`](http://llvm.org/docs/CMake.html): ```bash @@ -352,21 +353,21 @@ Run `Visual Studio 17 2022`, open the generated `LLVM.sln`, build all, build pro * Install `lit` into `python`: - - ***Linux***: `python /usr/llvm/15.0.2/llvm-project/llvm/utils/lit/setup.py install` + - ***Linux***: `python /usr/llvm/15.0.3/llvm-project/llvm/utils/lit/setup.py install` - - ***Windows***: `python d:/LLVM/15.0.2/llvm-project/llvm/utils/lit/setup.py install` + - ***Windows***: `python d:/LLVM/15.0.3/llvm-project/llvm/utils/lit/setup.py install` * Starting with LLVM 6.0.1 path to `llvm-lit` python script should be specified by the `LLVM_EXTERNAL_LIT` option: - - ***Linux***: `-DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.2/build/bin/llvm-lit` + - ***Linux***: `-DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.3/build/bin/llvm-lit` - - ***Windows***: `-DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.2/build/Release/bin/llvm-lit.py` + - ***Windows***: `-DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.3/build/Release/bin/llvm-lit.py` * `FileCheck`: - - ***Linux***: copy from `/usr/llvm/15.0.2/build/bin/` to `CMAKE_INSTALL_PREFIX/dist/bin` + - ***Linux***: copy from `/usr/llvm/15.0.3/build/bin/` to `CMAKE_INSTALL_PREFIX/dist/bin` - - ***Windows***: copy from `d:/LLVM/15.0.2/build/Release/bin` to `CMAKE_INSTALL_PREFIX/dist/bin` + - ***Windows***: copy from `d:/LLVM/15.0.3/build/Release/bin` to `CMAKE_INSTALL_PREFIX/dist/bin` - Or specify the path to `FileCheck` in `CMAKE_INSTALL_PREFIX` option @@ -388,7 +389,7 @@ Ubuntu 14: LLVM 4.0.0 - 7.1.0, CUDA 7.0 - 9.0, cuDNN 5.0.5 - 7.6.5 Ubuntu 16-18: LLVM 8.0.0 - 14.0.6, CUDA 8.0 - 10.2, cuDNN 5.1.10 - 8.0.5 -Ubuntu 20-21: LLVM 9.0.0 - 15.0.2, CUDA 8.0 - 11.7.1, cuDNN 5.1.10 - 8.5.0 +Ubuntu 20-21: LLVM 9.0.0 - 15.0.3, CUDA 8.0 - 11.7.1, cuDNN 5.1.10 - 8.5.0 Minimum build system requirements for the above configurations: @@ -405,11 +406,11 @@ cmake -DHIPIFY_CLANG_TESTS=1 \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=/usr/llvm/15.0.2/dist \ + -DCMAKE_PREFIX_PATH=/usr/llvm/15.0.3/dist \ -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda \ -DCUDA_DNN_ROOT_DIR=/usr/local/cuda \ -DCUDA_CUB_ROOT_DIR=/usr/CUB \ - -DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.2/build/bin/llvm-lit \ + -DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.3/build/bin/llvm-lit \ ../hipify ``` *A corresponding successful output:* @@ -427,14 +428,14 @@ cmake -- Detecting CXX compile features -- Detecting CXX compile features - done -- Found ZLIB: /usr/lib/x86_64-linux-gnu/libz.so (found version "1.2.11") --- Found LLVM 15.0.2: --- - CMake module path: /usr/llvm/15.0.2/dist/lib/cmake/llvm --- - Include path : /usr/llvm/15.0.2/dist/include --- - Binary path : /usr/llvm/15.0.2/dist/bin +-- Found LLVM 15.0.3: +-- - CMake module path: /usr/llvm/15.0.3/dist/lib/cmake/llvm +-- - Include path : /usr/llvm/15.0.3/dist/include +-- - Binary path : /usr/llvm/15.0.3/dist/bin -- Linker detection: GNU ld -- Found PythonInterp: /usr/bin/python (found suitable version "3.9.7", minimum required is "2.7") -- Found lit: /usr/local/bin/lit --- Found FileCheck: /usr/llvm/15.0.2/dist/bin/FileCheck +-- Found FileCheck: /usr/llvm/15.0.3/dist/bin/FileCheck -- Looking for pthread.h -- Looking for pthread.h - found -- Performing Test CMAKE_HAVE_LIBC_PTHREAD @@ -453,7 +454,7 @@ make test-hipify Running HIPify regression tests ======================================== CUDA 11.7 - will be used for testing -LLVM 15.0.2 - will be used for testing +LLVM 15.0.3 - will be used for testing x86_64 - Platform architecture Linux 5.13.0-21-generic - Platform OS 64 - hipify-clang binary bitness @@ -567,7 +568,7 @@ Testing Time: 6.22s | 11.0.1 - 11.1.0 | 7.0 - 11.2.2 | 7.6.5 - 8.0.5 | 2017.15.9.31, 2019.16.8.4 | 3.19.3 | 3.9.2 | | 12.0.0 - 13.0.1 | 7.0 - 11.5.1 | 7.6.5 - 8.3.2 | 2017.15.9.43, 2019.16.11.9 | 3.22.2 | 3.10.2 | | 14.0.0 - 14.0.6 | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.49, 2019.16.11.17, 2022.17.2.6 | 3.24.0 | 3.10.6 | -| 15.0.0 - 15.0.2 | 7.0 - 11.7.1 | 8.0.5 - 8.5.0 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | +| 15.0.0 - 15.0.3 | 7.0 - 11.7.1 | 8.0.5 - 8.5.0 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | | 16.0.0git | 7.0 - 11.7.1 | 8.0.5 - 8.5.0 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | *Building with testing support by `Visual Studio 17 2022` on `Windows 10`:* @@ -580,23 +581,23 @@ cmake -DHIPIFY_CLANG_TESTS=1 \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=d:/LLVM/15.0.2/dist \ + -DCMAKE_PREFIX_PATH=d:/LLVM/15.0.3/dist \ -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.7" \ -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v11.7" \ -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.7-windows-x64-v8.5.0 \ -DCUDA_CUB_ROOT_DIR=d:/GIT/cub \ - -DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.2/build/Release/bin/llvm-lit.py \ + -DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.3/build/Release/bin/llvm-lit.py \ ../hipify ``` *A corresponding successful output:* ```shell --- Found LLVM 15.0.2: --- - CMake module path: d:/LLVM/15.0.2/dist/lib/cmake/llvm --- - Include path : d:/LLVM/15.0.2/dist/include --- - Binary path : d:/LLVM/15.0.2/dist/bin +-- Found LLVM 15.0.3: +-- - CMake module path: d:/LLVM/15.0.3/dist/lib/cmake/llvm +-- - Include path : d:/LLVM/15.0.3/dist/include +-- - Binary path : d:/LLVM/15.0.3/dist/bin -- Found PythonInterp: c:/Program Files/Python39/python.exe (found suitable version "3.9.5", minimum required is "3.6") -- Found lit: c:/Program Files/Python39/Scripts/lit.exe --- Found FileCheck: d:/LLVM/15.0.2/dist/bin/FileCheck.exe +-- Found FileCheck: d:/LLVM/15.0.3/dist/bin/FileCheck.exe -- Found CUDA: c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.7 (found version "11.7") -- Configuring done -- Generating done From 1d7048c3ca0aef253bc4e8d24bf6e8ecb389603c Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 19 Oct 2022 11:22:04 +0200 Subject: [PATCH 23/43] [HIPIFY][fp16][fix] Add missing `fp16` functions appeared in CUDA 11.x + Updated the regenerated hipify-perl and CUDA_Device_API_supported_by_HIP.md --- bin/hipify-perl | 24 ++++++++++ .../CUDA_Device_API_supported_by_HIP.md | 24 ++++++++++ src/CUDA2HIP_Device_functions.cpp | 48 +++++++++++++++++++ 3 files changed, 96 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index 572531a6..820d269a 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -5394,6 +5394,10 @@ sub warnUnsupportedDeviceFunctions { "__vabs4", "__vabs2", "__trap", + "__stwt", + "__stwb", + "__stcs", + "__stcg", "__signbitl", "__signbitf", "__signbit", @@ -5416,12 +5420,31 @@ sub warnUnsupportedDeviceFunctions { "__nv_cvt_double2_to_fp8x2", "__nv_cvt_bfloat16raw_to_fp8", "__nv_cvt_bfloat16raw2_to_fp8x2", + "__ldlu", + "__ldcv", "__isnanl", "__isnanf", "__isnan", "__isinfl", "__isinff", "__isinf", + "__hsub_rn", + "__hsub2_rn", + "__hmul_rn", + "__hmul2_rn", + "__hmin_nan", + "__hmin2_nan", + "__hmin2", + "__hmin", + "__hmax_nan", + "__hmax2_nan", + "__hmax2", + "__hmax", + "__hfma_relu", + "__hfma2_relu", + "__hcmadd", + "__hadd_rn", + "__hadd2_rn", "__fsub_rz", "__fsub_ru", "__fsub_rd", @@ -5458,6 +5481,7 @@ sub warnUnsupportedDeviceFunctions { "__drcp_rz", "__drcp_ru", "__drcp_rd", + "__double2half", "__dmul_rz", "__dmul_ru", "__dmul_rd", diff --git a/doc/markdown/CUDA_Device_API_supported_by_HIP.md b/doc/markdown/CUDA_Device_API_supported_by_HIP.md index b7d0df66..b111a146 100644 --- a/doc/markdown/CUDA_Device_API_supported_by_HIP.md +++ b/doc/markdown/CUDA_Device_API_supported_by_HIP.md @@ -33,6 +33,7 @@ |`__double2float_rn`| | | |`__double2float_rn`|1.6.0| | | | |`__double2float_ru`| | | |`__double2float_ru`|1.6.0| | | | |`__double2float_rz`| | | |`__double2float_rz`|1.6.0| | | | +|`__double2half`|11.0| | | | | | | | |`__double2hiint`| | | |`__double2hiint`|1.6.0| | | | |`__double2int_rd`| | | |`__double2int_rd`|1.6.0| | | | |`__double2int_rn`| | | |`__double2int_rn`|1.6.0| | | | @@ -140,7 +141,9 @@ |`__habs2`| | | |`__habs2`|3.5.0| | | | |`__hadd`| | | |`__hadd`|1.6.0| | | | |`__hadd2`| | | |`__hadd2`|1.6.0| | | | +|`__hadd2_rn`|11.6| | | | | | | | |`__hadd2_sat`| | | |`__hadd2_sat`|1.6.0| | | | +|`__hadd_rn`|11.6| | | | | | | | |`__hadd_sat`| | | |`__hadd_sat`|1.6.0| | | | |`__half22float2`| | | |`__half22float2`|1.6.0| | | | |`__half2float`| | | |`__half2float`|1.6.0| | | | @@ -184,6 +187,7 @@ |`__hbltu2`| | | |`__hbltu2`|1.9.0| | | | |`__hbne2`| | | |`__hbne2`|1.6.0| | | | |`__hbneu2`| | | |`__hbneu2`|1.9.0| | | | +|`__hcmadd`|11.1| | | | | | | | |`__hdiv`| | | |`__hdiv`|1.9.0| | | | |`__heq`| | | |`__heq`|1.6.0| | | | |`__heq2`| | | |`__heq2`|1.6.0| | | | @@ -191,7 +195,9 @@ |`__hequ2`| | | |`__hequ2`|1.9.0| | | | |`__hfma`| | | |`__hfma`|1.6.0| | | | |`__hfma2`| | | |`__hfma2`|1.6.0| | | | +|`__hfma2_relu`|11.0| | | | | | | | |`__hfma2_sat`| | | |`__hfma2_sat`|1.6.0| | | | +|`__hfma_relu`|11.0| | | | | | | | |`__hfma_sat`| | | |`__hfma_sat`|1.6.0| | | | |`__hge`| | | |`__hge`|1.6.0| | | | |`__hge2`| | | |`__hge2`|1.6.0| | | | @@ -217,9 +223,19 @@ |`__hlt2`| | | |`__hlt2`|1.6.0| | | | |`__hltu`| | | |`__hltu`|1.9.0| | | | |`__hltu2`| | | |`__hltu2`|1.9.0| | | | +|`__hmax`|11.0| | | | | | | | +|`__hmax2`|11.0| | | | | | | | +|`__hmax2_nan`|11.0| | | | | | | | +|`__hmax_nan`|11.0| | | | | | | | +|`__hmin`|11.0| | | | | | | | +|`__hmin2`|11.0| | | | | | | | +|`__hmin2_nan`|11.0| | | | | | | | +|`__hmin_nan`|11.0| | | | | | | | |`__hmul`| | | |`__hmul`|1.6.0| | | | |`__hmul2`| | | |`__hmul2`|1.6.0| | | | +|`__hmul2_rn`|11.6| | | | | | | | |`__hmul2_sat`| | | |`__hmul2_sat`|1.6.0| | | | +|`__hmul_rn`|11.6| | | | | | | | |`__hmul_sat`| | | |`__hmul_sat`|1.6.0| | | | |`__hne`| | | |`__hne`|1.6.0| | | | |`__hne2`| | | |`__hne2`|1.6.0| | | | @@ -229,7 +245,9 @@ |`__hneu2`| | | |`__hneu2`|1.9.0| | | | |`__hsub`| | | |`__hsub`|1.6.0| | | | |`__hsub2`| | | |`__hsub2`|1.6.0| | | | +|`__hsub2_rn`|11.6| | | | | | | | |`__hsub2_sat`| | | |`__hsub2_sat`|1.6.0| | | | +|`__hsub_rn`|11.6| | | | | | | | |`__hsub_sat`| | | |`__hsub_sat`|1.6.0| | | | |`__int2double_rn`| | | |`__int2double_rn`|1.6.0| | | | |`__int2float_rd`| | | |`__int2float_rd`|1.6.0| | | | @@ -250,7 +268,9 @@ |`__ldca`| | | |`__ldca`|1.9.0| | | | |`__ldcg`| | | |`__ldcg`|1.9.0| | | | |`__ldcs`| | | |`__ldcs`|1.9.0| | | | +|`__ldcv`|11.0| | | | | | | | |`__ldg`| | | |`__ldg`|1.6.0| | | | +|`__ldlu`|11.0| | | | | | | | |`__ll2double_rd`| | | |`__ll2double_rd`|1.6.0| | | | |`__ll2double_rn`| | | |`__ll2double_rn`|1.6.0| | | | |`__ll2double_ru`| | | |`__ll2double_ru`|1.6.0| | | | @@ -314,6 +334,10 @@ |`__signbitl`| | | | | | | | | |`__sincosf`| | | |`__sincosf`|1.6.0| | | | |`__sinf`| | | |`__sinf`|1.6.0| | | | +|`__stcg`|11.0| | | | | | | | +|`__stcs`|11.0| | | | | | | | +|`__stwb`|11.0| | | | | | | | +|`__stwt`|11.0| | | | | | | | |`__syncthreads`| | | |`__syncthreads`|1.6.0| | | | |`__syncthreads_and`| | | |`__syncthreads_and`|3.7.0| | | | |`__syncthreads_count`| | | |`__syncthreads_count`|3.7.0| | | | diff --git a/src/CUDA2HIP_Device_functions.cpp b/src/CUDA2HIP_Device_functions.cpp index 95be0ef3..3cd6c01e 100644 --- a/src/CUDA2HIP_Device_functions.cpp +++ b/src/CUDA2HIP_Device_functions.cpp @@ -339,6 +339,7 @@ const std::map CUDA_DEVICE_FUNCTION_MAP { {"__brevll", {"__brevll", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__byte_perm", {"__byte_perm", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__hadd", {"__hadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hadd_rn", {"__hadd_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__rhadd", {"__rhadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__uhadd", {"__uhadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__urhadd", {"__urhadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, @@ -581,6 +582,8 @@ const std::map CUDA_DEVICE_FUNCTION_MAP { {"__ldcg", {"__ldcg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__ldca", {"__ldca", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__ldcs", {"__ldcs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__ldlu", {"__ldlu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ldcv", {"__ldcv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__heq2", {"__heq2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__hne2", {"__hne2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__hle2", {"__hle2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, @@ -595,22 +598,29 @@ const std::map CUDA_DEVICE_FUNCTION_MAP { {"__hgtu2", {"__hgtu2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__hisnan2", {"__hisnan2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__hadd2", {"__hadd2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hadd2_rn", {"__hadd2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__hsub2", {"__hsub2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hsub2_rn", {"__hsub2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__hmul2", {"__hmul2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hmul2_rn", {"__hmul2_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__h2div", {"__h2div", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__hadd2_sat", {"__hadd2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__hsub2_sat", {"__hsub2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__hmul2_sat", {"__hmul2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__hfma2", {"__hfma2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__hfma2_sat", {"__hfma2_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hfma2_relu", {"__hfma2_relu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__hneg2", {"__hneg2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__hsub", {"__hsub", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hsub_rn", {"__hsub_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__hmul", {"__hmul", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hmul_rn", {"__hmul_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__hdiv", {"__hdiv", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__hadd_sat", {"__hadd_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__hsub_sat", {"__hsub_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__hmul_sat", {"__hmul_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__hfma", {"__hfma", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__hfma_relu", {"__hfma_relu", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__hfma_sat", {"__hfma_sat", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__hneg", {"__hneg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__habs2", {"__habs2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, @@ -674,6 +684,20 @@ const std::map CUDA_DEVICE_FUNCTION_MAP { {"__funnelshift_lc", {"__funnelshift_lc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__funnelshift_r", {"__funnelshift_r", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"__funnelshift_rc", {"__funnelshift_rc", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, + {"__double2half", {"__double2half", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hmax", {"__hmax", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hmax_nan", {"__hmax_nan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hmax2", {"__hmax2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hmax2_nan", {"__hmax2_nan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hmin", {"__hmin", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hmin_nan", {"__hmin_nan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hmin2", {"__hmin2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hmin2_nan", {"__hmin2_nan", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__stwb", {"__stwb", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__stcg", {"__stcg", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__stcs", {"__stcs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__stwt", {"__stwt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__hcmadd", {"__hcmadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, // atomic functions {"atomicAdd", {"atomicAdd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"atomicAdd_system", {"atomicAdd_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, @@ -722,6 +746,30 @@ const std::map CUDA_DEVICE_FUNCTION_VER_MAP { {"__shfl_up", {CUDA_75, CUDA_90, CUDA_0 }}, {"__shfl_down", {CUDA_75, CUDA_90, CUDA_0 }}, {"__shfl_xor", {CUDA_75, CUDA_90, CUDA_0 }}, + {"__double2half", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__hmax", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__hmax2", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__hmin", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__hmin2", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ldlu", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ldcv", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__stwb", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__stcg", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__stcs", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__stwt", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__hmax_nan", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__hmin_nan", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__hmax2_nan", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__hmin2_nan", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__hfma_relu", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__hfma2_relu", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__hcmadd", {CUDA_111, CUDA_0, CUDA_0 }}, + {"__hadd2_rn", {CUDA_116, CUDA_0, CUDA_0 }}, + {"__hsub2_rn", {CUDA_116, CUDA_0, CUDA_0 }}, + {"__hmul2_rn", {CUDA_116, CUDA_0, CUDA_0 }}, + {"__hadd_rn", {CUDA_116, CUDA_0, CUDA_0 }}, + {"__hsub_rn", {CUDA_116, CUDA_0, CUDA_0 }}, + {"__hmul_rn", {CUDA_116, CUDA_0, CUDA_0 }}, {"__nv_cvt_double_to_fp8", {CUDA_118, CUDA_0, CUDA_0 }}, {"__nv_cvt_double2_to_fp8x2", {CUDA_118, CUDA_0, CUDA_0 }}, {"__nv_cvt_float_to_fp8", {CUDA_118, CUDA_0, CUDA_0 }}, From a3217b067be3f7a4abd9bbe26df73ebafdc709ec Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 19 Oct 2022 23:25:26 +0200 Subject: [PATCH 24/43] [HIPIFY][fp16][fp8] Device types introduction - float point only + Introduced a new type: CONV_DEVICE_TYPE + Updated the regenerated hipify-perl and CUDA_Device_API_supported_by_HIP.md + [NOTE] Not all of the device types are device-only types: they might be host types as well, but this is not important for hipification (at least for now) --- bin/hipify-perl | 24 ++++++++++- .../CUDA_Device_API_supported_by_HIP.md | 23 +++++++++- src/CUDA2HIP.cpp | 1 + src/CUDA2HIP_Device_types.cpp | 43 +++++++++++++++++++ src/Statistics.cpp | 1 + src/Statistics.h | 1 + 6 files changed, 90 insertions(+), 3 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 820d269a..da21c6f5 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -855,7 +855,7 @@ push(@exclude_filelist, split(',', $exclude_files)); %exclude_dirhash = map { $_ => 1 } @exclude_dirlist; %exclude_filehash = map { $_ => 1 } @exclude_filelist; -@statNames = ("error", "init", "version", "device", "context", "module", "memory", "virtual_memory", "stream_ordered_memory", "addressing", "stream", "event", "external_resource_interop", "stream_memory", "execution", "graph", "occupancy", "texture", "surface", "peer", "graphics", "interactions", "profiler", "openGL", "D3D9", "D3D10", "D3D11", "VDPAU", "EGL", "thread", "complex", "library", "device_library", "device_function", "include", "include_cuda_main_header", "include_cuda_main_header_v2", "type", "literal", "numeric_literal", "define", "extern_shared", "kernel_launch"); +@statNames = ("error", "init", "version", "device", "context", "module", "memory", "virtual_memory", "stream_ordered_memory", "addressing", "stream", "event", "external_resource_interop", "stream_memory", "execution", "graph", "occupancy", "texture", "surface", "peer", "graphics", "interactions", "profiler", "openGL", "D3D9", "D3D10", "D3D11", "VDPAU", "EGL", "thread", "complex", "library", "device_library", "device_function", "device_type", "include", "include_cuda_main_header", "include_cuda_main_header_v2", "type", "literal", "numeric_literal", "define", "extern_shared", "kernel_launch"); sub totalStats { my %count = %{shift()}; @@ -3076,6 +3076,10 @@ sub simpleSubstitutions { subst("curand_uniform4", "hiprand_uniform4", "device_library"); subst("curand_uniform4_double", "hiprand_uniform4_double", "device_library"); subst("curand_uniform_double", "hiprand_uniform_double", "device_library"); + subst("__half", "__half", "device_type"); + subst("__half2", "__half2", "device_type"); + subst("__half2_raw", "__half2_raw", "device_type"); + subst("__half_raw", "__half_raw", "device_type"); subst("caffe2\/core\/common_cudnn.h", "caffe2\/core\/hip\/common_miopen.h", "include"); subst("caffe2\/operators\/spatial_batch_norm_op.h", "caffe2\/operators\/hip\/spatial_batch_norm_op_miopen.hip", "include"); subst("channel_descriptor.h", "hip\/channel_descriptor.h", "include"); @@ -6692,7 +6696,23 @@ sub warnUnsupportedFunctions { "bsrsv2Info", "bsrilu02Info", "bsric02Info", + "__nv_saturation_t", + "__nv_fp8x4_storage_t", + "__nv_fp8x4_e4m3", + "__nv_fp8x2_storage_t", + "__nv_fp8x2_e5m2", + "__nv_fp8x2_e4m3", + "__nv_fp8_storage_t", + "__nv_fp8_interpretation_t", + "__nv_fp8_e5m2", + "__nv_fp8_e4m3", + "__nv_bfloat162", + "__nv_bfloat16", "__curand_umul", + "__NV_SATFINITE", + "__NV_NOSAT", + "__NV_E5M2", + "__NV_E4M3", "__CUB_LP64__", "_CUB_ASM_PTR_SIZE_", "_CUB_ASM_PTR_", @@ -8384,7 +8404,7 @@ while (@ARGV) { transformHostFunctions(); # TODO: would like to move this code outside loop but it uses $_ which contains the whole file unless ($no_output) { - my $apiCalls = $ft{'error'} + $ft{'init'} + $ft{'version'} + $ft{'device'} + $ft{'context'} + $ft{'module'} + $ft{'memory'} + $ft{'virtual_memory'} + $ft{'stream_ordered_memory'} + $ft{'addressing'} + $ft{'stream'} + $ft{'event'} + $ft{'external_resource_interop'} + $ft{'stream_memory'} + $ft{'execution'} + $ft{'graph'} + $ft{'occupancy'} + $ft{'texture'} + $ft{'surface'} + $ft{'peer'} + $ft{'graphics'} + $ft{'interactions'} + $ft{'profiler'} + $ft{'openGL'} + $ft{'D3D9'} + $ft{'D3D10'} + $ft{'D3D11'} + $ft{'VDPAU'} + $ft{'EGL'} + $ft{'thread'} + $ft{'complex'} + $ft{'library'} + $ft{'device_library'} + $ft{'include'} + $ft{'include_cuda_main_header'} + $ft{'include_cuda_main_header_v2'} + $ft{'type'} + $ft{'literal'} + $ft{'numeric_literal'} + $ft{'define'}; + my $apiCalls = $ft{'error'} + $ft{'init'} + $ft{'version'} + $ft{'device'} + $ft{'context'} + $ft{'module'} + $ft{'memory'} + $ft{'virtual_memory'} + $ft{'stream_ordered_memory'} + $ft{'addressing'} + $ft{'stream'} + $ft{'event'} + $ft{'external_resource_interop'} + $ft{'stream_memory'} + $ft{'execution'} + $ft{'graph'} + $ft{'occupancy'} + $ft{'texture'} + $ft{'surface'} + $ft{'peer'} + $ft{'graphics'} + $ft{'interactions'} + $ft{'profiler'} + $ft{'openGL'} + $ft{'D3D9'} + $ft{'D3D10'} + $ft{'D3D11'} + $ft{'VDPAU'} + $ft{'EGL'} + $ft{'thread'} + $ft{'complex'} + $ft{'library'} + $ft{'device_library'} + $ft{'device_type'} + $ft{'include'} + $ft{'include_cuda_main_header'} + $ft{'include_cuda_main_header_v2'} + $ft{'type'} + $ft{'literal'} + $ft{'numeric_literal'} + $ft{'define'}; my $kernStuff = $hasDeviceCode + $ft{'kernel_launch'} + $ft{'device_function'}; my $totalCalls = $apiCalls + $kernStuff; $is_dos = m/\r\n$/; diff --git a/doc/markdown/CUDA_Device_API_supported_by_HIP.md b/doc/markdown/CUDA_Device_API_supported_by_HIP.md index b111a146..91128450 100644 --- a/doc/markdown/CUDA_Device_API_supported_by_HIP.md +++ b/doc/markdown/CUDA_Device_API_supported_by_HIP.md @@ -712,7 +712,28 @@ ## **2. Device Types** -Unsupported +|**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**| +|:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:| +|`__NV_E4M3`|11.8| | | | | | | | +|`__NV_E5M2`|11.8| | | | | | | | +|`__NV_NOSAT`|11.8| | | | | | | | +|`__NV_SATFINITE`|11.8| | | | | | | | +|`__half`| | | |`__half`|1.6.0| | | | +|`__half2`| | | |`__half2`|1.6.0| | | | +|`__half2_raw`| | | |`__half2_raw`|1.9.0| | | | +|`__half_raw`| | | |`__half_raw`|1.9.0| | | | +|`__nv_bfloat16`|11.0| | | | | | | | +|`__nv_bfloat162`|11.0| | | | | | | | +|`__nv_fp8_e4m3`|11.8| | | | | | | | +|`__nv_fp8_e5m2`|11.8| | | | | | | | +|`__nv_fp8_interpretation_t`|11.8| | | | | | | | +|`__nv_fp8_storage_t`|11.8| | | | | | | | +|`__nv_fp8x2_e4m3`|11.8| | | | | | | | +|`__nv_fp8x2_e5m2`|11.8| | | | | | | | +|`__nv_fp8x2_storage_t`|11.8| | | | | | | | +|`__nv_fp8x4_e4m3`|11.8| | | | | | | | +|`__nv_fp8x4_storage_t`|11.8| | | | | | | | +|`__nv_saturation_t`|11.8| | | | | | | | \*A - Added; D - Deprecated; R - Removed; E - Experimental \ No newline at end of file diff --git a/src/CUDA2HIP.cpp b/src/CUDA2HIP.cpp index 7708f478..7f0f7c9e 100644 --- a/src/CUDA2HIP.cpp +++ b/src/CUDA2HIP.cpp @@ -113,6 +113,7 @@ const std::map &CUDA_RENAMES_MAP() { ret.insert(CUDA_CUB_FUNCTION_MAP.begin(), CUDA_CUB_FUNCTION_MAP.end()); ret.insert(CUDA_RTC_TYPE_NAME_MAP.begin(), CUDA_RTC_TYPE_NAME_MAP.end()); ret.insert(CUDA_RTC_FUNCTION_MAP.begin(), CUDA_RTC_FUNCTION_MAP.end()); + ret.insert(CUDA_DEVICE_TYPE_NAME_MAP.begin(), CUDA_DEVICE_TYPE_NAME_MAP.end()); return ret; }; diff --git a/src/CUDA2HIP_Device_types.cpp b/src/CUDA2HIP_Device_types.cpp index 37c09dcf..d5f60858 100644 --- a/src/CUDA2HIP_Device_types.cpp +++ b/src/CUDA2HIP_Device_types.cpp @@ -24,10 +24,53 @@ THE SOFTWARE. // Maps the names of CUDA Device/Host types to the corresponding HIP types const std::map CUDA_DEVICE_TYPE_NAME_MAP { + // float16 Precision Device types + {"__half", {"__half", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}}, + {"__half_raw", {"__half_raw", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}}, + {"__half2", {"__half2", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}}, + {"__half2_raw", {"__half2_raw", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}}, + // Bfloat16 Precision Device types + {"__nv_bfloat16", {"__hip_bfloat16", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_bfloat162", {"__hip_bfloat162", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + // float8 Precision Device types + {"__nv_fp8_storage_t", {"__hip_fp8_storage_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_fp8x2_storage_t", {"__hip_fp8x2_storage_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_fp8x4_storage_t", {"__hip_fp8x4_storage_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_fp8_e5m2", {"__hip_fp8_e5m2", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_fp8x2_e5m2", {"__hip_fp8x2_e5m2", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_fp8_e4m3", {"__hip_fp8_e4m3", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_fp8x2_e4m3", {"__hip_fp8x2_e4m3", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_fp8x4_e4m3", {"__hip_fp8x4_e4m3", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_saturation_t", {"__hip_saturation_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__NV_NOSAT", {"__HIP_NOSAT", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2, UNSUPPORTED}}, + {"__NV_SATFINITE", {"__HIP_SATFINITE", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_fp8_interpretation_t", {"__hip_fp8_interpretation_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__NV_E4M3", {"__HIP_E4M3", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2, UNSUPPORTED}}, + {"__NV_E5M2", {"__HIP_E5M2", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2, UNSUPPORTED}}, }; const std::map CUDA_DEVICE_TYPE_NAME_VER_MAP { + {"__nv_bfloat16", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__nv_bfloat162", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__nv_fp8_storage_t", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_fp8x2_storage_t", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_fp8x4_storage_t", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_fp8_e5m2", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_fp8x2_e5m2", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_fp8_e4m3", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_fp8x2_e4m3", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_fp8x4_e4m3", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_saturation_t", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__NV_NOSAT", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__NV_SATFINITE", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_fp8_interpretation_t", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__NV_E4M3", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__NV_E5M2", {CUDA_118, CUDA_0, CUDA_0 }}, }; const std::map HIP_DEVICE_TYPE_NAME_VER_MAP { + {"__half", {HIP_1060, HIP_0, HIP_0 }}, + {"__half2", {HIP_1060, HIP_0, HIP_0 }}, + {"__half_raw", {HIP_1090, HIP_0, HIP_0 }}, + {"__half2_raw", {HIP_1090, HIP_0, HIP_0 }}, }; diff --git a/src/Statistics.cpp b/src/Statistics.cpp index dafb5f8c..ecfdb55e 100644 --- a/src/Statistics.cpp +++ b/src/Statistics.cpp @@ -62,6 +62,7 @@ const char *counterNames[NUM_CONV_TYPES] = { "library", // CONV_LIB_FUNC "device_library", // CONV_LIB_DEVICE_FUNC "device_function", // CONV_DEVICE_FUNC + "device_type", // CONV_DEVICE_TYPE "include", // CONV_INCLUDE "include_cuda_main_header", // CONV_INCLUDE_CUDA_MAIN_H "include_cuda_main_header_v2", // CONV_INCLUDE_CUDA_MAIN_V2_H diff --git a/src/Statistics.h b/src/Statistics.h index 85f1022a..9080be35 100644 --- a/src/Statistics.h +++ b/src/Statistics.h @@ -119,6 +119,7 @@ enum ConvTypes { CONV_LIB_FUNC, CONV_LIB_DEVICE_FUNC, CONV_DEVICE_FUNC, + CONV_DEVICE_TYPE, CONV_INCLUDE, CONV_INCLUDE_CUDA_MAIN_H, CONV_INCLUDE_CUDA_MAIN_V2_H, From f31b29aa08c776a0ec2e19c2d55ed930e4ac3981 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 20 Oct 2022 22:19:59 +0200 Subject: [PATCH 25/43] [HIPIFY][bfp16] `Bfloat16` Precision Intrinsics support - Part 1 + Updated the regenerated hipify-perl and CUDA_Device_API_supported_by_HIP.md accordingly --- bin/hipify-perl | 42 +++++++++ .../CUDA_Device_API_supported_by_HIP.md | 42 +++++++++ src/CUDA2HIP_Device_functions.cpp | 85 +++++++++++++++++++ 3 files changed, 169 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index da21c6f5..3d850ea4 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -5397,6 +5397,14 @@ sub warnUnsupportedDeviceFunctions { "__vabsdiffs2", "__vabs4", "__vabs2", + "__ushort2bfloat16_rz", + "__ushort2bfloat16_ru", + "__ushort2bfloat16_rn", + "__ushort2bfloat16_rd", + "__uint2bfloat16_rz", + "__uint2bfloat16_ru", + "__uint2bfloat16_rn", + "__uint2bfloat16_rd", "__trap", "__stwt", "__stwb", @@ -5405,6 +5413,10 @@ sub warnUnsupportedDeviceFunctions { "__signbitl", "__signbitf", "__signbit", + "__short2bfloat16_rz", + "__short2bfloat16_ru", + "__short2bfloat16_rn", + "__short2bfloat16_rd", "__shfl_xor_sync", "__shfl_up_sync", "__shfl_sync", @@ -5432,6 +5444,10 @@ sub warnUnsupportedDeviceFunctions { "__isinfl", "__isinff", "__isinf", + "__int2bfloat16_rz", + "__int2bfloat16_ru", + "__int2bfloat16_rn", + "__int2bfloat16_rd", "__hsub_rn", "__hsub2_rn", "__hmul_rn", @@ -5467,6 +5483,13 @@ sub warnUnsupportedDeviceFunctions { "__fma_rz", "__fma_ru", "__fma_rd", + "__floats2bfloat162_rn", + "__float2bfloat16_rz", + "__float2bfloat16_ru", + "__float2bfloat16_rn", + "__float2bfloat16_rd", + "__float2bfloat162_rn", + "__float2bfloat16", "__finitel", "__finitef", "__finite", @@ -5486,6 +5509,7 @@ sub warnUnsupportedDeviceFunctions { "__drcp_ru", "__drcp_rd", "__double2half", + "__double2bfloat16", "__dmul_rz", "__dmul_ru", "__dmul_rd", @@ -5496,6 +5520,24 @@ sub warnUnsupportedDeviceFunctions { "__dadd_ru", "__dadd_rd", "__brkpt", + "__bfloat162ushort_rz", + "__bfloat162ushort_ru", + "__bfloat162ushort_rn", + "__bfloat162ushort_rd", + "__bfloat162uint_rz", + "__bfloat162uint_ru", + "__bfloat162uint_rn", + "__bfloat162uint_rd", + "__bfloat162short_rz", + "__bfloat162short_ru", + "__bfloat162short_rn", + "__bfloat162short_rd", + "__bfloat162int_rz", + "__bfloat162int_ru", + "__bfloat162int_rn", + "__bfloat162int_rd", + "__bfloat162float", + "__bfloat1622float2", "_Pow_int" ) { diff --git a/doc/markdown/CUDA_Device_API_supported_by_HIP.md b/doc/markdown/CUDA_Device_API_supported_by_HIP.md index 91128450..2d9ba637 100644 --- a/doc/markdown/CUDA_Device_API_supported_by_HIP.md +++ b/doc/markdown/CUDA_Device_API_supported_by_HIP.md @@ -10,6 +10,24 @@ |`__assert_fail`| | | |`__assert_fail`|1.9.0| | | | |`__assertfail`| | | |`__assertfail`|1.9.0| | | | |`__ballot`| | | |`__ballot`|1.6.0| | | | +|`__bfloat1622float2`|11.0| | | | | | | | +|`__bfloat162float`|11.0| | | | | | | | +|`__bfloat162int_rd`|11.0| | | | | | | | +|`__bfloat162int_rn`|11.0| | | | | | | | +|`__bfloat162int_ru`|11.0| | | | | | | | +|`__bfloat162int_rz`|11.0| | | | | | | | +|`__bfloat162short_rd`|11.0| | | | | | | | +|`__bfloat162short_rn`|11.0| | | | | | | | +|`__bfloat162short_ru`|11.0| | | | | | | | +|`__bfloat162short_rz`|11.0| | | | | | | | +|`__bfloat162uint_rd`|11.0| | | | | | | | +|`__bfloat162uint_rn`|11.0| | | | | | | | +|`__bfloat162uint_ru`|11.0| | | | | | | | +|`__bfloat162uint_rz`|11.0| | | | | | | | +|`__bfloat162ushort_rd`|11.0| | | | | | | | +|`__bfloat162ushort_rn`|11.0| | | | | | | | +|`__bfloat162ushort_ru`|11.0| | | | | | | | +|`__bfloat162ushort_rz`|11.0| | | | | | | | |`__brev`| | | |`__brev`|1.6.0| | | | |`__brevll`| | | |`__brevll`|1.6.0| | | | |`__brkpt`| | | | | | | | | @@ -29,6 +47,7 @@ |`__dmul_rn`| | | |`__dmul_rn`|1.6.0| | | | |`__dmul_ru`| | | | | | | | | |`__dmul_rz`| | | | | | | | | +|`__double2bfloat16`|11.0| | | | | | | | |`__double2float_rd`| | | |`__double2float_rd`|1.6.0| | | | |`__double2float_rn`| | | |`__double2float_rn`|1.6.0| | | | |`__double2float_ru`| | | |`__double2float_ru`|1.6.0| | | | @@ -82,6 +101,12 @@ |`__finitef`| | | | | | | | | |`__finitel`| | | | | | | | | |`__float22half2_rn`| | | |`__float22half2_rn`|1.6.0| | | | +|`__float2bfloat16`|11.0| | | | | | | | +|`__float2bfloat162_rn`|11.0| | | | | | | | +|`__float2bfloat16_rd`|11.0| | | | | | | | +|`__float2bfloat16_rn`|11.0| | | | | | | | +|`__float2bfloat16_ru`|11.0| | | | | | | | +|`__float2bfloat16_rz`|11.0| | | | | | | | |`__float2half`| | | |`__float2half`|1.6.0| | | | |`__float2half2_rn`| | | |`__float2half2_rn`|1.6.0| | | | |`__float2half_rd`| | | |`__float2half_rd`|1.6.0| | | | @@ -106,6 +131,7 @@ |`__float2ull_rz`| | | |`__float2ull_rz`|1.6.0| | | | |`__float_as_int`| | | |`__float_as_int`|1.6.0| | | | |`__float_as_uint`| | | |`__float_as_uint`|1.6.0| | | | +|`__floats2bfloat162_rn`|11.0| | | | | | | | |`__floats2half2_rn`| | | |`__floats2half2_rn`|1.6.0| | | | |`__fma_rd`| | | | | | | | | |`__fma_rn`| | | |`__fma_rn`|1.6.0| | | | @@ -249,6 +275,10 @@ |`__hsub2_sat`| | | |`__hsub2_sat`|1.6.0| | | | |`__hsub_rn`|11.6| | | | | | | | |`__hsub_sat`| | | |`__hsub_sat`|1.6.0| | | | +|`__int2bfloat16_rd`|11.0| | | | | | | | +|`__int2bfloat16_rn`|11.0| | | | | | | | +|`__int2bfloat16_ru`|11.0| | | | | | | | +|`__int2bfloat16_rz`|11.0| | | | | | | | |`__int2double_rn`| | | |`__int2double_rn`|1.6.0| | | | |`__int2float_rd`| | | |`__int2float_rd`|1.6.0| | | | |`__int2float_rn`| | | |`__int2float_rn`|1.6.0| | | | @@ -324,6 +354,10 @@ |`__shfl_up_sync`| | | | | | | | | |`__shfl_xor`|7.5|9.0| |`__shfl_xor`|1.6.0| | | | |`__shfl_xor_sync`| | | | | | | | | +|`__short2bfloat16_rd`|11.0| | | | | | | | +|`__short2bfloat16_rn`|11.0| | | | | | | | +|`__short2bfloat16_ru`|11.0| | | | | | | | +|`__short2bfloat16_rz`|11.0| | | | | | | | |`__short2half_rd`| | | |`__short2half_rd`|1.6.0| | | | |`__short2half_rn`| | | |`__short2half_rn`|1.6.0| | | | |`__short2half_ru`| | | |`__short2half_ru`|1.6.0| | | | @@ -348,6 +382,10 @@ |`__threadfence_system`| | | |`__threadfence_system`|1.6.0| | | | |`__trap`| | | | | | | | | |`__uhadd`| | | |`__uhadd`|1.6.0| | | | +|`__uint2bfloat16_rd`|11.0| | | | | | | | +|`__uint2bfloat16_rn`|11.0| | | | | | | | +|`__uint2bfloat16_ru`|11.0| | | | | | | | +|`__uint2bfloat16_rz`|11.0| | | | | | | | |`__uint2double_rn`| | | |`__uint2double_rn`|1.6.0| | | | |`__uint2float_rd`| | | |`__uint2float_rd`|1.6.0| | | | |`__uint2float_rn`| | | |`__uint2float_rn`|1.6.0| | | | @@ -375,6 +413,10 @@ |`__umulhi`| | | |`__umulhi`|1.6.0| | | | |`__urhadd`| | | |`__urhadd`|1.6.0| | | | |`__usad`| | | |`__usad`|1.6.0| | | | +|`__ushort2bfloat16_rd`|11.0| | | | | | | | +|`__ushort2bfloat16_rn`|11.0| | | | | | | | +|`__ushort2bfloat16_ru`|11.0| | | | | | | | +|`__ushort2bfloat16_rz`|11.0| | | | | | | | |`__ushort2half_rd`| | | |`__ushort2half_rd`|1.6.0| | | | |`__ushort2half_rn`| | | |`__ushort2half_rn`|1.6.0| | | | |`__ushort2half_ru`| | | |`__ushort2half_ru`|1.6.0| | | | diff --git a/src/CUDA2HIP_Device_functions.cpp b/src/CUDA2HIP_Device_functions.cpp index 3cd6c01e..b22a39f1 100644 --- a/src/CUDA2HIP_Device_functions.cpp +++ b/src/CUDA2HIP_Device_functions.cpp @@ -698,6 +698,49 @@ const std::map CUDA_DEVICE_FUNCTION_MAP { {"__stcs", {"__stcs", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__stwt", {"__stwt", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__hcmadd", {"__hcmadd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + // bfp16 functions + {"__double2bfloat16", {"__double2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__float2bfloat16", {"__float2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__float2bfloat16_rn", {"__float2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__float2bfloat16_rz", {"__float2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__float2bfloat16_rd", {"__float2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__float2bfloat16_ru", {"__float2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162float", {"__bfloat162float", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__float2bfloat162_rn", {"__float2bfloat162_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__floats2bfloat162_rn", {"__floats2bfloat162_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat1622float2", {"__bfloat1622float2", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162int_rn", {"__bfloat162int_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162int_rz", {"__bfloat162int_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162int_rd", {"__bfloat162int_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162int_ru", {"__bfloat162int_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__int2bfloat16_rn", {"__int2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__int2bfloat16_rz", {"__int2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__int2bfloat16_rd", {"__int2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__int2bfloat16_ru", {"__int2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162short_rn", {"__bfloat162short_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162short_rz", {"__bfloat162short_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162short_rd", {"__bfloat162short_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162short_ru", {"__bfloat162short_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__short2bfloat16_rn", {"__short2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__short2bfloat16_rz", {"__short2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__short2bfloat16_rd", {"__short2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__short2bfloat16_ru", {"__short2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162uint_rn", {"__bfloat162uint_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162uint_rz", {"__bfloat162uint_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162uint_rd", {"__bfloat162uint_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162uint_ru", {"__bfloat162uint_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__uint2bfloat16_rn", {"__uint2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__uint2bfloat16_rz", {"__uint2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__uint2bfloat16_rd", {"__uint2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__uint2bfloat16_ru", {"__uint2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ushort_rn", {"__bfloat162ushort_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ushort_rz", {"__bfloat162ushort_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ushort_rd", {"__bfloat162ushort_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ushort_ru", {"__bfloat162ushort_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ushort2bfloat16_rn", {"__ushort2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ushort2bfloat16_rz", {"__ushort2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ushort2bfloat16_rd", {"__ushort2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ushort2bfloat16_ru", {"__ushort2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, // atomic functions {"atomicAdd", {"atomicAdd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"atomicAdd_system", {"atomicAdd_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, @@ -763,6 +806,48 @@ const std::map CUDA_DEVICE_FUNCTION_VER_MAP { {"__hmin2_nan", {CUDA_110, CUDA_0, CUDA_0 }}, {"__hfma_relu", {CUDA_110, CUDA_0, CUDA_0 }}, {"__hfma2_relu", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__double2bfloat16", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__float2bfloat16", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__float2bfloat16_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__float2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__float2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__float2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162float", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__float2bfloat162_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__floats2bfloat162_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat1622float2", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162int_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162int_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162int_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162int_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__int2bfloat16_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__int2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__int2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__int2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162short_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162short_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162short_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162short_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__short2bfloat16_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__short2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__short2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__short2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162uint_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162uint_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162uint_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162uint_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__uint2bfloat16_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__uint2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__uint2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__uint2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ushort_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ushort_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ushort_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ushort_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ushort2bfloat16_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ushort2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ushort2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ushort2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }}, {"__hcmadd", {CUDA_111, CUDA_0, CUDA_0 }}, {"__hadd2_rn", {CUDA_116, CUDA_0, CUDA_0 }}, {"__hsub2_rn", {CUDA_116, CUDA_0, CUDA_0 }}, From 167e52ea686f7a15c9a5d29ef515c9a83e555079 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sat, 22 Oct 2022 21:07:19 +0200 Subject: [PATCH 26/43] [HIPIFY][bfp16] `Bfloat16` Precision Intrinsics support - Part 2 - final + Updated the regenerated hipify-perl and CUDA_Device_API_supported_by_HIP.md accordingly [ToDo] + There are plenty of overridden `bfloat16` functions which are not supported yet; so take them first into account while implementing #662 --- bin/hipify-perl | 28 ++++++++++ .../CUDA_Device_API_supported_by_HIP.md | 28 ++++++++++ src/CUDA2HIP_Device_functions.cpp | 56 +++++++++++++++++++ 3 files changed, 112 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index 3d850ea4..6549957e 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -5397,10 +5397,15 @@ sub warnUnsupportedDeviceFunctions { "__vabsdiffs2", "__vabs4", "__vabs2", + "__ushort_as_bfloat16", "__ushort2bfloat16_rz", "__ushort2bfloat16_ru", "__ushort2bfloat16_rn", "__ushort2bfloat16_rd", + "__ull2bfloat16_rz", + "__ull2bfloat16_ru", + "__ull2bfloat16_rn", + "__ull2bfloat16_rd", "__uint2bfloat16_rz", "__uint2bfloat16_ru", "__uint2bfloat16_rn", @@ -5413,6 +5418,7 @@ sub warnUnsupportedDeviceFunctions { "__signbitl", "__signbitf", "__signbit", + "__short_as_bfloat16", "__short2bfloat16_rz", "__short2bfloat16_ru", "__short2bfloat16_rn", @@ -5436,6 +5442,13 @@ sub warnUnsupportedDeviceFunctions { "__nv_cvt_double2_to_fp8x2", "__nv_cvt_bfloat16raw_to_fp8", "__nv_cvt_bfloat16raw2_to_fp8x2", + "__lows2bfloat162", + "__low2bfloat162", + "__low2bfloat16", + "__ll2bfloat16_rz", + "__ll2bfloat16_ru", + "__ll2bfloat16_rn", + "__ll2bfloat16_rd", "__ldlu", "__ldcv", "__isnanl", @@ -5460,9 +5473,13 @@ sub warnUnsupportedDeviceFunctions { "__hmax2_nan", "__hmax2", "__hmax", + "__highs2bfloat162", + "__high2bfloat162", + "__high2bfloat16", "__hfma_relu", "__hfma2_relu", "__hcmadd", + "__halves2bfloat162", "__hadd_rn", "__hadd2_rn", "__fsub_rz", @@ -5520,10 +5537,16 @@ sub warnUnsupportedDeviceFunctions { "__dadd_ru", "__dadd_rd", "__brkpt", + "__bfloat16_as_ushort", + "__bfloat16_as_short", "__bfloat162ushort_rz", "__bfloat162ushort_ru", "__bfloat162ushort_rn", "__bfloat162ushort_rd", + "__bfloat162ull_rz", + "__bfloat162ull_ru", + "__bfloat162ull_rn", + "__bfloat162ull_rd", "__bfloat162uint_rz", "__bfloat162uint_ru", "__bfloat162uint_rn", @@ -5532,11 +5555,16 @@ sub warnUnsupportedDeviceFunctions { "__bfloat162short_ru", "__bfloat162short_rn", "__bfloat162short_rd", + "__bfloat162ll_rz", + "__bfloat162ll_ru", + "__bfloat162ll_rn", + "__bfloat162ll_rd", "__bfloat162int_rz", "__bfloat162int_ru", "__bfloat162int_rn", "__bfloat162int_rd", "__bfloat162float", + "__bfloat162bfloat162", "__bfloat1622float2", "_Pow_int" ) diff --git a/doc/markdown/CUDA_Device_API_supported_by_HIP.md b/doc/markdown/CUDA_Device_API_supported_by_HIP.md index 2d9ba637..8983d3bc 100644 --- a/doc/markdown/CUDA_Device_API_supported_by_HIP.md +++ b/doc/markdown/CUDA_Device_API_supported_by_HIP.md @@ -11,11 +11,16 @@ |`__assertfail`| | | |`__assertfail`|1.9.0| | | | |`__ballot`| | | |`__ballot`|1.6.0| | | | |`__bfloat1622float2`|11.0| | | | | | | | +|`__bfloat162bfloat162`|11.0| | | | | | | | |`__bfloat162float`|11.0| | | | | | | | |`__bfloat162int_rd`|11.0| | | | | | | | |`__bfloat162int_rn`|11.0| | | | | | | | |`__bfloat162int_ru`|11.0| | | | | | | | |`__bfloat162int_rz`|11.0| | | | | | | | +|`__bfloat162ll_rd`|11.0| | | | | | | | +|`__bfloat162ll_rn`|11.0| | | | | | | | +|`__bfloat162ll_ru`|11.0| | | | | | | | +|`__bfloat162ll_rz`|11.0| | | | | | | | |`__bfloat162short_rd`|11.0| | | | | | | | |`__bfloat162short_rn`|11.0| | | | | | | | |`__bfloat162short_ru`|11.0| | | | | | | | @@ -24,10 +29,16 @@ |`__bfloat162uint_rn`|11.0| | | | | | | | |`__bfloat162uint_ru`|11.0| | | | | | | | |`__bfloat162uint_rz`|11.0| | | | | | | | +|`__bfloat162ull_rd`|11.0| | | | | | | | +|`__bfloat162ull_rn`|11.0| | | | | | | | +|`__bfloat162ull_ru`|11.0| | | | | | | | +|`__bfloat162ull_rz`|11.0| | | | | | | | |`__bfloat162ushort_rd`|11.0| | | | | | | | |`__bfloat162ushort_rn`|11.0| | | | | | | | |`__bfloat162ushort_ru`|11.0| | | | | | | | |`__bfloat162ushort_rz`|11.0| | | | | | | | +|`__bfloat16_as_short`|11.0| | | | | | | | +|`__bfloat16_as_ushort`|11.0| | | | | | | | |`__brev`| | | |`__brev`|1.6.0| | | | |`__brevll`| | | |`__brevll`|1.6.0| | | | |`__brkpt`| | | | | | | | | @@ -200,6 +211,7 @@ |`__half2ushort_rz`| | | |`__half2ushort_rz`|1.6.0| | | | |`__half_as_short`| | | |`__half_as_short`|1.6.0| | | | |`__half_as_ushort`| | | |`__half_as_ushort`|1.6.0| | | | +|`__halves2bfloat162`|11.0| | | | | | | | |`__halves2half2`| | | |`__halves2half2`|1.6.0| | | | |`__hbeq2`| | | |`__hbeq2`|1.6.0| | | | |`__hbequ2`| | | |`__hbequ2`|1.9.0| | | | @@ -233,9 +245,12 @@ |`__hgt2`| | | |`__hgt2`|1.6.0| | | | |`__hgtu`| | | |`__hgtu`|1.9.0| | | | |`__hgtu2`| | | |`__hgtu2`|1.9.0| | | | +|`__high2bfloat16`|11.0| | | | | | | | +|`__high2bfloat162`|11.0| | | | | | | | |`__high2float`| | | |`__high2float`|1.6.0| | | | |`__high2half`| | | |`__high2half`|1.6.0| | | | |`__high2half2`| | | |`__high2half2`|1.6.0| | | | +|`__highs2bfloat162`|11.0| | | | | | | | |`__highs2half2`| | | |`__highs2half2`|1.6.0| | | | |`__hiloint2double`| | | |`__hiloint2double`|1.6.0| | | | |`__hisinf`| | | |`__hisinf`|1.6.0| | | | @@ -301,6 +316,10 @@ |`__ldcv`|11.0| | | | | | | | |`__ldg`| | | |`__ldg`|1.6.0| | | | |`__ldlu`|11.0| | | | | | | | +|`__ll2bfloat16_rd`|11.0| | | | | | | | +|`__ll2bfloat16_rn`|11.0| | | | | | | | +|`__ll2bfloat16_ru`|11.0| | | | | | | | +|`__ll2bfloat16_rz`|11.0| | | | | | | | |`__ll2double_rd`| | | |`__ll2double_rd`|1.6.0| | | | |`__ll2double_rn`| | | |`__ll2double_rn`|1.6.0| | | | |`__ll2double_ru`| | | |`__ll2double_ru`|1.6.0| | | | @@ -317,10 +336,13 @@ |`__log2f`| | | |`__log2f`|1.6.0| | | | |`__logf`| | | |`__logf`|1.6.0| | | | |`__longlong_as_double`| | | |`__longlong_as_double`|1.6.0| | | | +|`__low2bfloat16`|11.0| | | | | | | | +|`__low2bfloat162`|11.0| | | | | | | | |`__low2float`| | | |`__low2float`|1.6.0| | | | |`__low2half`| | | |`__low2half`|1.6.0| | | | |`__low2half2`| | | |`__low2half2`|1.6.0| | | | |`__lowhigh2highlow`| | | |`__lowhigh2highlow`|1.6.0| | | | +|`__lows2bfloat162`|11.0| | | | | | | | |`__lows2half2`| | | |`__lows2half2`|1.6.0| | | | |`__mul24`| | | |`__mul24`|1.6.0| | | | |`__mul64hi`| | | |`__mul64hi`|1.6.0| | | | @@ -362,6 +384,7 @@ |`__short2half_rn`| | | |`__short2half_rn`|1.6.0| | | | |`__short2half_ru`| | | |`__short2half_ru`|1.6.0| | | | |`__short2half_rz`| | | |`__short2half_rz`|1.6.0| | | | +|`__short_as_bfloat16`|11.0| | | | | | | | |`__short_as_half`| | | |`__short_as_half`|1.9.0| | | | |`__signbit`| | | | | | | | | |`__signbitf`| | | | | | | | | @@ -396,6 +419,10 @@ |`__uint2half_ru`| | | |`__uint2half_ru`|1.6.0| | | | |`__uint2half_rz`| | | |`__uint2half_rz`|1.6.0| | | | |`__uint_as_float`| | | |`__uint_as_float`|1.6.0| | | | +|`__ull2bfloat16_rd`|11.0| | | | | | | | +|`__ull2bfloat16_rn`|11.0| | | | | | | | +|`__ull2bfloat16_ru`|11.0| | | | | | | | +|`__ull2bfloat16_rz`|11.0| | | | | | | | |`__ull2double_rd`| | | |`__ull2double_rd`|1.6.0| | | | |`__ull2double_rn`| | | |`__ull2double_rn`|1.6.0| | | | |`__ull2double_ru`| | | |`__ull2double_ru`|1.6.0| | | | @@ -421,6 +448,7 @@ |`__ushort2half_rn`| | | |`__ushort2half_rn`|1.6.0| | | | |`__ushort2half_ru`| | | |`__ushort2half_ru`|1.6.0| | | | |`__ushort2half_rz`| | | |`__ushort2half_rz`|1.6.0| | | | +|`__ushort_as_bfloat16`|11.0| | | | | | | | |`__ushort_as_half`| | | |`__ushort_as_half`|1.6.0| | | | |`__vabs2`| | | | | | | | | |`__vabs4`| | | | | | | | | diff --git a/src/CUDA2HIP_Device_functions.cpp b/src/CUDA2HIP_Device_functions.cpp index b22a39f1..07c2f797 100644 --- a/src/CUDA2HIP_Device_functions.cpp +++ b/src/CUDA2HIP_Device_functions.cpp @@ -741,6 +741,34 @@ const std::map CUDA_DEVICE_FUNCTION_MAP { {"__ushort2bfloat16_rz", {"__ushort2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__ushort2bfloat16_rd", {"__ushort2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, {"__ushort2bfloat16_ru", {"__ushort2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ull_rn", {"__bfloat162ull_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ull_rz", {"__bfloat162ull_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ull_rd", {"__bfloat162ull_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ull_ru", {"__bfloat162ull_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ull2bfloat16_rn", {"__ull2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ull2bfloat16_rz", {"__ull2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ull2bfloat16_rd", {"__ull2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ull2bfloat16_ru", {"__ull2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ll_rn", {"__bfloat162ll_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ll_rz", {"__bfloat162ll_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ll_rd", {"__bfloat162ll_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162ll_ru", {"__bfloat162ll_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ll2bfloat16_rn", {"__ll2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ll2bfloat16_rz", {"__ll2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ll2bfloat16_rd", {"__ll2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ll2bfloat16_ru", {"__ll2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat162bfloat162", {"__bfloat162bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__lows2bfloat162", {"__lows2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__highs2bfloat162", {"__highs2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__high2bfloat16", {"__high2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__low2bfloat16", {"__low2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__halves2bfloat162", {"__halves2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__low2bfloat162", {"__halves2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__high2bfloat162", {"__high2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat16_as_short", {"__bfloat16_as_short", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__bfloat16_as_ushort", {"__bfloat16_as_ushort", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__short_as_bfloat16", {"__short_as_bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, + {"__ushort_as_bfloat16", {"__ushort_as_bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}}, // atomic functions {"atomicAdd", {"atomicAdd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, {"atomicAdd_system", {"atomicAdd_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}}, @@ -848,6 +876,34 @@ const std::map CUDA_DEVICE_FUNCTION_VER_MAP { {"__ushort2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }}, {"__ushort2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }}, {"__ushort2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ull_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ull_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ull_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ull_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ull2bfloat16_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ull2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ull2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ull2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ll_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ll_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ll_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162ll_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ll2bfloat16_rn", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ll2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ll2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ll2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat162bfloat162", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__lows2bfloat162", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__highs2bfloat162", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__high2bfloat16", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__low2bfloat16", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__halves2bfloat162", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__low2bfloat162", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__high2bfloat162", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat16_as_short", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__bfloat16_as_ushort", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__short_as_bfloat16", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__ushort_as_bfloat16", {CUDA_110, CUDA_0, CUDA_0 }}, {"__hcmadd", {CUDA_111, CUDA_0, CUDA_0 }}, {"__hadd2_rn", {CUDA_116, CUDA_0, CUDA_0 }}, {"__hsub2_rn", {CUDA_116, CUDA_0, CUDA_0 }}, From f420847b06ca91bbd6c7a0f322bf28ae17279404 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sun, 23 Oct 2022 13:15:32 +0200 Subject: [PATCH 27/43] [HIPIFY][fp] Added support of the missing float point device types + Updated the regenerated hipify-perl and CUDA_Device_API_supported_by_HIP.md accordingly --- bin/hipify-perl | 5 +++++ doc/markdown/CUDA_Device_API_supported_by_HIP.md | 5 +++++ src/CUDA2HIP_Device_types.cpp | 10 ++++++++++ 3 files changed, 20 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index 6549957e..c66da9e6 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -5642,6 +5642,8 @@ sub warnUnsupportedFunctions { "nvrtcGetNumSupportedArchs", "nvrtcGetNVVMSize", "nvrtcGetNVVM", + "nv_bfloat162", + "nv_bfloat16", "memoryBarrier", "libraryPropertyType_t", "libraryPropertyType", @@ -6768,6 +6770,7 @@ sub warnUnsupportedFunctions { "bsric02Info", "__nv_saturation_t", "__nv_fp8x4_storage_t", + "__nv_fp8x4_e5m2", "__nv_fp8x4_e4m3", "__nv_fp8x2_storage_t", "__nv_fp8x2_e5m2", @@ -6776,6 +6779,8 @@ sub warnUnsupportedFunctions { "__nv_fp8_interpretation_t", "__nv_fp8_e5m2", "__nv_fp8_e4m3", + "__nv_bfloat16_raw", + "__nv_bfloat162_raw", "__nv_bfloat162", "__nv_bfloat16", "__curand_umul", diff --git a/doc/markdown/CUDA_Device_API_supported_by_HIP.md b/doc/markdown/CUDA_Device_API_supported_by_HIP.md index 8983d3bc..a8275f1b 100644 --- a/doc/markdown/CUDA_Device_API_supported_by_HIP.md +++ b/doc/markdown/CUDA_Device_API_supported_by_HIP.md @@ -794,6 +794,8 @@ |`__half_raw`| | | |`__half_raw`|1.9.0| | | | |`__nv_bfloat16`|11.0| | | | | | | | |`__nv_bfloat162`|11.0| | | | | | | | +|`__nv_bfloat162_raw`|11.0| | | | | | | | +|`__nv_bfloat16_raw`|11.0| | | | | | | | |`__nv_fp8_e4m3`|11.8| | | | | | | | |`__nv_fp8_e5m2`|11.8| | | | | | | | |`__nv_fp8_interpretation_t`|11.8| | | | | | | | @@ -802,8 +804,11 @@ |`__nv_fp8x2_e5m2`|11.8| | | | | | | | |`__nv_fp8x2_storage_t`|11.8| | | | | | | | |`__nv_fp8x4_e4m3`|11.8| | | | | | | | +|`__nv_fp8x4_e5m2`|11.8| | | | | | | | |`__nv_fp8x4_storage_t`|11.8| | | | | | | | |`__nv_saturation_t`|11.8| | | | | | | | +|`nv_bfloat16`|11.0| | | | | | | | +|`nv_bfloat162`|11.0| | | | | | | | \*A - Added; D - Deprecated; R - Removed; E - Experimental \ No newline at end of file diff --git a/src/CUDA2HIP_Device_types.cpp b/src/CUDA2HIP_Device_types.cpp index d5f60858..f621707a 100644 --- a/src/CUDA2HIP_Device_types.cpp +++ b/src/CUDA2HIP_Device_types.cpp @@ -31,7 +31,11 @@ const std::map CUDA_DEVICE_TYPE_NAME_MAP { {"__half2_raw", {"__half2_raw", "", CONV_DEVICE_TYPE, API_RUNTIME, 2}}, // Bfloat16 Precision Device types {"__nv_bfloat16", {"__hip_bfloat16", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"nv_bfloat16", {"hip_bfloat16", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_bfloat16_raw", {"__hip_bfloat16_raw", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, {"__nv_bfloat162", {"__hip_bfloat162", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"nv_bfloat162", {"hip_bfloat162", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_bfloat162_raw", {"__hip_bfloat162_raw", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, // float8 Precision Device types {"__nv_fp8_storage_t", {"__hip_fp8_storage_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, {"__nv_fp8x2_storage_t", {"__hip_fp8x2_storage_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, @@ -47,11 +51,16 @@ const std::map CUDA_DEVICE_TYPE_NAME_MAP { {"__nv_fp8_interpretation_t", {"__hip_fp8_interpretation_t", "", CONV_DEVICE_TYPE, API_RUNTIME, 2, UNSUPPORTED}}, {"__NV_E4M3", {"__HIP_E4M3", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2, UNSUPPORTED}}, {"__NV_E5M2", {"__HIP_E5M2", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2, UNSUPPORTED}}, + {"__nv_fp8x4_e5m2", {"__hip_fp8x4_e5m2", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 2, UNSUPPORTED}}, }; const std::map CUDA_DEVICE_TYPE_NAME_VER_MAP { {"__nv_bfloat16", {CUDA_110, CUDA_0, CUDA_0 }}, + {"nv_bfloat16", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__nv_bfloat16_raw", {CUDA_110, CUDA_0, CUDA_0 }}, {"__nv_bfloat162", {CUDA_110, CUDA_0, CUDA_0 }}, + {"nv_bfloat162", {CUDA_110, CUDA_0, CUDA_0 }}, + {"__nv_bfloat162_raw", {CUDA_110, CUDA_0, CUDA_0 }}, {"__nv_fp8_storage_t", {CUDA_118, CUDA_0, CUDA_0 }}, {"__nv_fp8x2_storage_t", {CUDA_118, CUDA_0, CUDA_0 }}, {"__nv_fp8x4_storage_t", {CUDA_118, CUDA_0, CUDA_0 }}, @@ -66,6 +75,7 @@ const std::map CUDA_DEVICE_TYPE_NAME_VER_MAP { {"__nv_fp8_interpretation_t", {CUDA_118, CUDA_0, CUDA_0 }}, {"__NV_E4M3", {CUDA_118, CUDA_0, CUDA_0 }}, {"__NV_E5M2", {CUDA_118, CUDA_0, CUDA_0 }}, + {"__nv_fp8x4_e5m2", {CUDA_118, CUDA_0, CUDA_0 }}, }; const std::map HIP_DEVICE_TYPE_NAME_VER_MAP { From 9b2183c607e80c8b6cb87f6810e37460de14bc9b Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sun, 23 Oct 2022 14:40:10 +0200 Subject: [PATCH 28/43] [HIPIFY][FFT][XT] Sync with cuFFT 11.8 + Updated the regenerated hipify-perl and CUFFT_API_supported_by_HIP.md --- bin/hipify-perl | 5 +++++ doc/markdown/CUFFT_API_supported_by_HIP.md | 7 ++++++- src/CUDA2HIP_FFT_API_functions.cpp | 3 +++ src/CUDA2HIP_FFT_API_types.cpp | 10 +++++++++- 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index c66da9e6..76fed4f4 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -5837,6 +5837,7 @@ sub warnUnsupportedFunctions { "cufftXtSetWorkAreaPolicy", "cufftXtSetWorkArea", "cufftXtSetGPUs", + "cufftXtSetDistribution", "cufftXtQueryType_t", "cufftXtQueryType", "cufftXtQueryPlan", @@ -5859,6 +5860,8 @@ sub warnUnsupportedFunctions { "cufftXt1dFactors", "cufftCompatibility_t", "cufftCompatibility", + "cufftBox3d_t", + "cufftBox3d", "cudnnWgradMode_t", "cudnnTransformTensorEx", "cudnnTransformTensor", @@ -7304,6 +7307,8 @@ sub warnUnsupportedFunctions { "CUFFT_XT_FORMAT_INPUT", "CUFFT_XT_FORMAT_INPLACE_SHUFFLED", "CUFFT_XT_FORMAT_INPLACE", + "CUFFT_XT_FORMAT_DISTRIBUTED_OUTPUT", + "CUFFT_XT_FORMAT_DISTRIBUTED_INPUT", "CUFFT_XT_FORMAT_1D_INPUT_SHUFFLED", "CUFFT_WORKAREA_USER", "CUFFT_WORKAREA_PERFORMANCE", diff --git a/doc/markdown/CUFFT_API_supported_by_HIP.md b/doc/markdown/CUFFT_API_supported_by_HIP.md index d9c3cbeb..163e5696 100644 --- a/doc/markdown/CUFFT_API_supported_by_HIP.md +++ b/doc/markdown/CUFFT_API_supported_by_HIP.md @@ -54,6 +54,8 @@ |`CUFFT_WORKAREA_PERFORMANCE`| | | | | | | | | |`CUFFT_WORKAREA_USER`|9.2| | | | | | | | |`CUFFT_XT_FORMAT_1D_INPUT_SHUFFLED`| | | | | | | | | +|`CUFFT_XT_FORMAT_DISTRIBUTED_INPUT`|11.8| | | | | | | | +|`CUFFT_XT_FORMAT_DISTRIBUTED_OUTPUT`|11.8| | | | | | | | |`CUFFT_XT_FORMAT_INPLACE`| | | | | | | | | |`CUFFT_XT_FORMAT_INPLACE_SHUFFLED`| | | | | | | | | |`CUFFT_XT_FORMAT_INPUT`| | | | | | | | | @@ -61,6 +63,8 @@ |`CUFFT_Z2D`| | | |`HIPFFT_Z2D`|1.7.0| | | | |`CUFFT_Z2Z`| | | |`HIPFFT_Z2Z`|1.7.0| | | | |`MAX_CUFFT_ERROR`| | | | | | | | | +|`cufftBox3d`|11.8| | | | | | | | +|`cufftBox3d_t`|11.8| | | | | | | | |`cufftCompatibility`| | | | | | | | | |`cufftCompatibility_t`| | | | | | | | | |`cufftComplex`| | | |`hipfftComplex`|1.7.0| | | | @@ -131,7 +135,7 @@ |`cufftSetWorkArea`| | | |`hipfftSetWorkArea`|1.7.0| | | | |`cufftXtClearCallback`| | | |`hipfftXtClearCallback`|4.3.0| | | | |`cufftXtExec`|8.0| | | | | | | | -|`cufftXtExecDescriptor`| | | | | | | | | +|`cufftXtExecDescriptor`|8.0| | | | | | | | |`cufftXtExecDescriptorC2C`| | | | | | | | | |`cufftXtExecDescriptorC2R`| | | | | | | | | |`cufftXtExecDescriptorD2Z`| | | | | | | | | @@ -146,6 +150,7 @@ |`cufftXtQueryPlan`| | | | | | | | | |`cufftXtSetCallback`| | | |`hipfftXtSetCallback`|4.3.0| | | | |`cufftXtSetCallbackSharedSize`| | | |`hipfftXtSetCallbackSharedSize`|4.3.0| | | | +|`cufftXtSetDistribution`|11.8| | | | | | | | |`cufftXtSetGPUs`| | | | | | | | | |`cufftXtSetWorkArea`| | | | | | | | | |`cufftXtSetWorkAreaPolicy`|9.2| | | | | | | | diff --git a/src/CUDA2HIP_FFT_API_functions.cpp b/src/CUDA2HIP_FFT_API_functions.cpp index a4db4976..01f9a2e2 100644 --- a/src/CUDA2HIP_FFT_API_functions.cpp +++ b/src/CUDA2HIP_FFT_API_functions.cpp @@ -84,6 +84,7 @@ const std::map CUDA_FFT_FUNCTION_MAP { {"cufftXtExec", {"hipfftXtExec", "", CONV_LIB_FUNC, API_FFT, 2, HIP_UNSUPPORTED}}, {"cufftXtExecDescriptor", {"hipfftXtExecDescriptor", "", CONV_LIB_FUNC, API_FFT, 2, HIP_UNSUPPORTED}}, {"cufftXtSetWorkAreaPolicy", {"hipfftXtSetWorkAreaPolicy", "", CONV_LIB_FUNC, API_FFT, 2, HIP_UNSUPPORTED}}, + {"cufftXtSetDistribution", {"hipfftXtSetDistribution", "", CONV_LIB_FUNC, API_FFT, 2, HIP_UNSUPPORTED}}, }; const std::map CUDA_FFT_FUNCTION_VER_MAP { @@ -93,7 +94,9 @@ const std::map CUDA_FFT_FUNCTION_VER_MAP { {"cufftXtMakePlanMany", {CUDA_80, CUDA_0, CUDA_0}}, {"cufftXtGetSizeMany", {CUDA_80, CUDA_0, CUDA_0}}, {"cufftXtExec", {CUDA_80, CUDA_0, CUDA_0}}, + {"cufftXtExecDescriptor", {CUDA_80, CUDA_0, CUDA_0}}, {"cufftXtSetWorkAreaPolicy", {CUDA_92, CUDA_0, CUDA_0}}, + {"cufftXtSetDistribution", {CUDA_118, CUDA_0, CUDA_0}}, }; const std::map HIP_FFT_FUNCTION_VER_MAP { diff --git a/src/CUDA2HIP_FFT_API_types.cpp b/src/CUDA2HIP_FFT_API_types.cpp index 9b003869..1c99d62c 100644 --- a/src/CUDA2HIP_FFT_API_types.cpp +++ b/src/CUDA2HIP_FFT_API_types.cpp @@ -77,7 +77,9 @@ const std::map CUDA_FFT_TYPE_NAME_MAP { {"CUFFT_XT_FORMAT_INPLACE", {"HIPFFT_XT_FORMAT_INPLACE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x02 {"CUFFT_XT_FORMAT_INPLACE_SHUFFLED", {"HIPFFT_XT_FORMAT_INPLACE_SHUFFLED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x03 {"CUFFT_XT_FORMAT_1D_INPUT_SHUFFLED",{"HIPFFT_XT_FORMAT_1D_INPUT_SHUFFLED","", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x04 - {"CUFFT_FORMAT_UNDEFINED", {"HIPFFT_FORMAT_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x05 + {"CUFFT_XT_FORMAT_DISTRIBUTED_INPUT",{"HIPFFT_XT_FORMAT_DISTRIBUTED_INPUT","", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x05 + {"CUFFT_XT_FORMAT_DISTRIBUTED_OUTPUT",{"HIPFFT_XT_FORMAT_DISTRIBUTED_OUTPUT","", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x06 + {"CUFFT_FORMAT_UNDEFINED", {"HIPFFT_FORMAT_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x07 {"cufftXtCopyType_t", {"hipfftXtCopyType_t", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, {"cufftXtCopyType", {"hipfftXtCopyType", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, @@ -117,6 +119,8 @@ const std::map CUDA_FFT_TYPE_NAME_MAP { {"cufftHandle", {"hipfftHandle", "", CONV_TYPE, API_FFT, 1}}, {"cufftXt1dFactors_t", {"hipfftXt1dFactors_t", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, {"cufftXt1dFactors", {"hipfftXt1dFactors", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, + {"cufftBox3d_t", {"hipfftBox3d_t", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, + {"cufftBox3d", {"hipfftBox3d", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, }; const std::map CUDA_FFT_TYPE_NAME_VER_MAP { @@ -130,6 +134,10 @@ const std::map CUDA_FFT_TYPE_NAME_VER_MAP { {"cufftXtWorkAreaPolicy", {CUDA_92, CUDA_0, CUDA_0}}, {"CUFFT_WORKAREA_MINIMAL", {CUDA_92, CUDA_0, CUDA_0}}, {"CUFFT_WORKAREA_USER", {CUDA_92, CUDA_0, CUDA_0}}, + {"CUFFT_XT_FORMAT_DISTRIBUTED_INPUT",{CUDA_118, CUDA_0, CUDA_0}}, + {"CUFFT_XT_FORMAT_DISTRIBUTED_OUTPUT",{CUDA_118, CUDA_0, CUDA_0}}, + {"cufftBox3d_t", {CUDA_118, CUDA_0, CUDA_0}}, + {"cufftBox3d", {CUDA_118, CUDA_0, CUDA_0}}, }; const std::map HIP_FFT_TYPE_NAME_VER_MAP { From fb0685d0fe345e05bdd8c27ccef255bb21c5d294 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sun, 23 Oct 2022 15:07:43 +0200 Subject: [PATCH 29/43] [HIPIFY][format] FFT data types + `HIP_UNSUPPORTED` -> `UNSUPPORTED` --- src/CUDA2HIP_FFT_API_types.cpp | 298 ++++++++++++++++----------------- 1 file changed, 149 insertions(+), 149 deletions(-) diff --git a/src/CUDA2HIP_FFT_API_types.cpp b/src/CUDA2HIP_FFT_API_types.cpp index 1c99d62c..8831f73f 100644 --- a/src/CUDA2HIP_FFT_API_types.cpp +++ b/src/CUDA2HIP_FFT_API_types.cpp @@ -26,163 +26,163 @@ THE SOFTWARE. const std::map CUDA_FFT_TYPE_NAME_MAP { // cuFFT defines - {"CUFFT_VER_MAJOR", {"HIPRAND_VER_MAJOR", "", CONV_DEFINE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"CUFFT_VER_MINOR", {"HIPRAND_VER_MINOR", "", CONV_DEFINE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"CUFFT_VER_PATCH", {"HIPRAND_VER_PATCH", "", CONV_DEFINE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"CUFFT_VER_BUILD", {"HIPRAND_VER_BUILD", "", CONV_DEFINE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"CUFFT_VERSION", {"HIPRAND_VERSION", "", CONV_DEFINE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"CUFFT_FORWARD", {"HIPFFT_FORWARD", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // -1 - {"CUFFT_INVERSE", {"HIPFFT_BACKWARD", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 1 - {"CUFFT_COMPATIBILITY_DEFAULT", {"HIPFFT_COMPATIBILITY_DEFAULT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // CUFFT_COMPATIBILITY_FFTW_PADDING - {"MAX_CUFFT_ERROR", {"HIPFFT_MAX_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x11 + {"CUFFT_VER_MAJOR", {"HIPRAND_VER_MAJOR", "", CONV_DEFINE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_VER_MINOR", {"HIPRAND_VER_MINOR", "", CONV_DEFINE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_VER_PATCH", {"HIPRAND_VER_PATCH", "", CONV_DEFINE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_VER_BUILD", {"HIPRAND_VER_BUILD", "", CONV_DEFINE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_VERSION", {"HIPRAND_VERSION", "", CONV_DEFINE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_FORWARD", {"HIPFFT_FORWARD", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // -1 + {"CUFFT_INVERSE", {"HIPFFT_BACKWARD", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 1 + {"CUFFT_COMPATIBILITY_DEFAULT", {"HIPFFT_COMPATIBILITY_DEFAULT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // CUFFT_COMPATIBILITY_FFTW_PADDING + {"MAX_CUFFT_ERROR", {"HIPFFT_MAX_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x11 // cuFFT enums - {"cufftResult_t", {"hipfftResult_t", "", CONV_TYPE, API_FFT, 1}}, - {"cufftResult", {"hipfftResult", "", CONV_TYPE, API_FFT, 1}}, - {"CUFFT_SUCCESS", {"HIPFFT_SUCCESS", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x0 0 - {"CUFFT_INVALID_PLAN", {"HIPFFT_INVALID_PLAN", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x1 1 - {"CUFFT_ALLOC_FAILED", {"HIPFFT_ALLOC_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2 2 - {"CUFFT_INVALID_TYPE", {"HIPFFT_INVALID_TYPE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x3 3 - {"CUFFT_INVALID_VALUE", {"HIPFFT_INVALID_VALUE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x4 4 - {"CUFFT_INTERNAL_ERROR", {"HIPFFT_INTERNAL_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x5 5 - {"CUFFT_EXEC_FAILED", {"HIPFFT_EXEC_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6 6 - {"CUFFT_SETUP_FAILED", {"HIPFFT_SETUP_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7 7 - {"CUFFT_INVALID_SIZE", {"HIPFFT_INVALID_SIZE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x8 8 - {"CUFFT_UNALIGNED_DATA", {"HIPFFT_UNALIGNED_DATA", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x9 9 - {"CUFFT_INCOMPLETE_PARAMETER_LIST", {"HIPFFT_INCOMPLETE_PARAMETER_LIST", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xA 10 - {"CUFFT_INVALID_DEVICE", {"HIPFFT_INVALID_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xB 11 - {"CUFFT_PARSE_ERROR", {"HIPFFT_PARSE_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xC 12 - {"CUFFT_NO_WORKSPACE", {"HIPFFT_NO_WORKSPACE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xD 13 - {"CUFFT_NOT_IMPLEMENTED", {"HIPFFT_NOT_IMPLEMENTED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xE 14 - {"CUFFT_LICENSE_ERROR", {"HIPFFT_LICENSE_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, - {"CUFFT_NOT_SUPPORTED", {"HIPFFT_NOT_SUPPORTED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x10 16 - - {"cufftType_t", {"hipfftType_t", "", CONV_TYPE, API_FFT, 1}}, - {"cufftType", {"hipfftType", "", CONV_TYPE, API_FFT, 1}}, - {"CUFFT_R2C", {"HIPFFT_R2C", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2a - {"CUFFT_C2R", {"HIPFFT_C2R", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2c - {"CUFFT_C2C", {"HIPFFT_C2C", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x29 - {"CUFFT_D2Z", {"HIPFFT_D2Z", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6a - {"CUFFT_Z2D", {"HIPFFT_Z2D", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6c - {"CUFFT_Z2Z", {"HIPFFT_Z2Z", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x69 - - {"cufftCompatibility_t", {"hipfftCompatibility_t", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"cufftCompatibility", {"hipfftCompatibility", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"CUFFT_COMPATIBILITY_FFTW_PADDING", {"HIPFFT_COMPATIBILITY_FFTW_PADDING", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x01 - - {"cufftXtSubFormat_t", {"hipfftXtSubFormat_t", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"cufftXtSubFormat", {"hipfftXtSubFormat", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"CUFFT_XT_FORMAT_INPUT", {"HIPFFT_XT_FORMAT_INPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x00 - {"CUFFT_XT_FORMAT_OUTPUT", {"HIPFFT_XT_FORMAT_OUTPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x01 - {"CUFFT_XT_FORMAT_INPLACE", {"HIPFFT_XT_FORMAT_INPLACE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x02 - {"CUFFT_XT_FORMAT_INPLACE_SHUFFLED", {"HIPFFT_XT_FORMAT_INPLACE_SHUFFLED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x03 - {"CUFFT_XT_FORMAT_1D_INPUT_SHUFFLED",{"HIPFFT_XT_FORMAT_1D_INPUT_SHUFFLED","", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x04 - {"CUFFT_XT_FORMAT_DISTRIBUTED_INPUT",{"HIPFFT_XT_FORMAT_DISTRIBUTED_INPUT","", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x05 - {"CUFFT_XT_FORMAT_DISTRIBUTED_OUTPUT",{"HIPFFT_XT_FORMAT_DISTRIBUTED_OUTPUT","", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x06 - {"CUFFT_FORMAT_UNDEFINED", {"HIPFFT_FORMAT_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x07 - - {"cufftXtCopyType_t", {"hipfftXtCopyType_t", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"cufftXtCopyType", {"hipfftXtCopyType", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"CUFFT_COPY_HOST_TO_DEVICE", {"HIPFFT_COPY_HOST_TO_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x00 - {"CUFFT_COPY_DEVICE_TO_HOST", {"HIPFFT_COPY_DEVICE_TO_HOST", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x01 - {"CUFFT_COPY_DEVICE_TO_DEVICE", {"HIPFFT_COPY_DEVICE_TO_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x02 - {"CUFFT_COPY_UNDEFINED", {"HIPFFT_COPY_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x03 - - {"cufftXtQueryType_t", {"hipfftXtQueryType_t", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"cufftXtQueryType", {"hipfftXtQueryType", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"CUFFT_QUERY_1D_FACTORS", {"HIPFFT_QUERY_1D_FACTORS", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x00 - {"CUFFT_QUERY_UNDEFINED", {"HIPFFT_QUERY_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x01 - - {"cufftXtWorkAreaPolicy_t", {"hipfftXtWorkAreaPolicy_t", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"cufftXtWorkAreaPolicy", {"hipfftXtWorkAreaPolicy", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"CUFFT_WORKAREA_MINIMAL", {"HIPFFT_WORKAREA_MINIMAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0 - {"CUFFT_WORKAREA_USER", {"HIPFFT_WORKAREA_USER", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 1 - {"CUFFT_WORKAREA_PERFORMANCE", {"HIPFFT_WORKAREA_PERFORMANCE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 2 - - {"cufftXtCallbackType_t", {"hipfftXtCallbackType_t", "", CONV_TYPE, API_FFT, 1}}, - {"cufftXtCallbackType", {"hipfftXtCallbackType", "", CONV_TYPE, API_FFT, 1}}, - {"CUFFT_CB_LD_COMPLEX", {"HIPFFT_CB_LD_COMPLEX", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x0 - {"CUFFT_CB_LD_COMPLEX_DOUBLE", {"HIPFFT_CB_LD_COMPLEX_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x1 - {"CUFFT_CB_LD_REAL", {"HIPFFT_CB_LD_REAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2 - {"CUFFT_CB_LD_REAL_DOUBLE", {"HIPFFT_CB_LD_REAL_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x3 - {"CUFFT_CB_ST_COMPLEX", {"HIPFFT_CB_ST_COMPLEX", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x4 - {"CUFFT_CB_ST_COMPLEX_DOUBLE", {"HIPFFT_CB_ST_COMPLEX_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x5 - {"CUFFT_CB_ST_REAL", {"HIPFFT_CB_ST_REAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6 - {"CUFFT_CB_ST_REAL_DOUBLE", {"HIPFFT_CB_ST_REAL_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7 - {"CUFFT_CB_UNDEFINED", {"HIPFFT_CB_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7 + {"cufftResult_t", {"hipfftResult_t", "", CONV_TYPE, API_FFT, 1}}, + {"cufftResult", {"hipfftResult", "", CONV_TYPE, API_FFT, 1}}, + {"CUFFT_SUCCESS", {"HIPFFT_SUCCESS", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x0 0 + {"CUFFT_INVALID_PLAN", {"HIPFFT_INVALID_PLAN", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x1 1 + {"CUFFT_ALLOC_FAILED", {"HIPFFT_ALLOC_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2 2 + {"CUFFT_INVALID_TYPE", {"HIPFFT_INVALID_TYPE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x3 3 + {"CUFFT_INVALID_VALUE", {"HIPFFT_INVALID_VALUE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x4 4 + {"CUFFT_INTERNAL_ERROR", {"HIPFFT_INTERNAL_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x5 5 + {"CUFFT_EXEC_FAILED", {"HIPFFT_EXEC_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6 6 + {"CUFFT_SETUP_FAILED", {"HIPFFT_SETUP_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7 7 + {"CUFFT_INVALID_SIZE", {"HIPFFT_INVALID_SIZE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x8 8 + {"CUFFT_UNALIGNED_DATA", {"HIPFFT_UNALIGNED_DATA", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x9 9 + {"CUFFT_INCOMPLETE_PARAMETER_LIST", {"HIPFFT_INCOMPLETE_PARAMETER_LIST", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xA 10 + {"CUFFT_INVALID_DEVICE", {"HIPFFT_INVALID_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xB 11 + {"CUFFT_PARSE_ERROR", {"HIPFFT_PARSE_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xC 12 + {"CUFFT_NO_WORKSPACE", {"HIPFFT_NO_WORKSPACE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xD 13 + {"CUFFT_NOT_IMPLEMENTED", {"HIPFFT_NOT_IMPLEMENTED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xE 14 + {"CUFFT_LICENSE_ERROR", {"HIPFFT_LICENSE_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_NOT_SUPPORTED", {"HIPFFT_NOT_SUPPORTED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x10 16 + + {"cufftType_t", {"hipfftType_t", "", CONV_TYPE, API_FFT, 1}}, + {"cufftType", {"hipfftType", "", CONV_TYPE, API_FFT, 1}}, + {"CUFFT_R2C", {"HIPFFT_R2C", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2a + {"CUFFT_C2R", {"HIPFFT_C2R", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2c + {"CUFFT_C2C", {"HIPFFT_C2C", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x29 + {"CUFFT_D2Z", {"HIPFFT_D2Z", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6a + {"CUFFT_Z2D", {"HIPFFT_Z2D", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6c + {"CUFFT_Z2Z", {"HIPFFT_Z2Z", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x69 + + {"cufftCompatibility_t", {"hipfftCompatibility_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftCompatibility", {"hipfftCompatibility", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_COMPATIBILITY_FFTW_PADDING", {"HIPFFT_COMPATIBILITY_FFTW_PADDING", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x01 + + {"cufftXtSubFormat_t", {"hipfftXtSubFormat_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftXtSubFormat", {"hipfftXtSubFormat", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_XT_FORMAT_INPUT", {"HIPFFT_XT_FORMAT_INPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x00 + {"CUFFT_XT_FORMAT_OUTPUT", {"HIPFFT_XT_FORMAT_OUTPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x01 + {"CUFFT_XT_FORMAT_INPLACE", {"HIPFFT_XT_FORMAT_INPLACE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x02 + {"CUFFT_XT_FORMAT_INPLACE_SHUFFLED", {"HIPFFT_XT_FORMAT_INPLACE_SHUFFLED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x03 + {"CUFFT_XT_FORMAT_1D_INPUT_SHUFFLED", {"HIPFFT_XT_FORMAT_1D_INPUT_SHUFFLED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x04 + {"CUFFT_XT_FORMAT_DISTRIBUTED_INPUT", {"HIPFFT_XT_FORMAT_DISTRIBUTED_INPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x05 + {"CUFFT_XT_FORMAT_DISTRIBUTED_OUTPUT", {"HIPFFT_XT_FORMAT_DISTRIBUTED_OUTPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x06 + {"CUFFT_FORMAT_UNDEFINED", {"HIPFFT_FORMAT_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x07 + + {"cufftXtCopyType_t", {"hipfftXtCopyType_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftXtCopyType", {"hipfftXtCopyType", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_COPY_HOST_TO_DEVICE", {"HIPFFT_COPY_HOST_TO_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x00 + {"CUFFT_COPY_DEVICE_TO_HOST", {"HIPFFT_COPY_DEVICE_TO_HOST", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x01 + {"CUFFT_COPY_DEVICE_TO_DEVICE", {"HIPFFT_COPY_DEVICE_TO_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x02 + {"CUFFT_COPY_UNDEFINED", {"HIPFFT_COPY_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x03 + + {"cufftXtQueryType_t", {"hipfftXtQueryType_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftXtQueryType", {"hipfftXtQueryType", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_QUERY_1D_FACTORS", {"HIPFFT_QUERY_1D_FACTORS", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x00 + {"CUFFT_QUERY_UNDEFINED", {"HIPFFT_QUERY_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x01 + + {"cufftXtWorkAreaPolicy_t", {"hipfftXtWorkAreaPolicy_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftXtWorkAreaPolicy", {"hipfftXtWorkAreaPolicy", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_WORKAREA_MINIMAL", {"HIPFFT_WORKAREA_MINIMAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0 + {"CUFFT_WORKAREA_USER", {"HIPFFT_WORKAREA_USER", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 1 + {"CUFFT_WORKAREA_PERFORMANCE", {"HIPFFT_WORKAREA_PERFORMANCE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 2 + + {"cufftXtCallbackType_t", {"hipfftXtCallbackType_t", "", CONV_TYPE, API_FFT, 1}}, + {"cufftXtCallbackType", {"hipfftXtCallbackType", "", CONV_TYPE, API_FFT, 1}}, + {"CUFFT_CB_LD_COMPLEX", {"HIPFFT_CB_LD_COMPLEX", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x0 + {"CUFFT_CB_LD_COMPLEX_DOUBLE", {"HIPFFT_CB_LD_COMPLEX_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x1 + {"CUFFT_CB_LD_REAL", {"HIPFFT_CB_LD_REAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2 + {"CUFFT_CB_LD_REAL_DOUBLE", {"HIPFFT_CB_LD_REAL_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x3 + {"CUFFT_CB_ST_COMPLEX", {"HIPFFT_CB_ST_COMPLEX", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x4 + {"CUFFT_CB_ST_COMPLEX_DOUBLE", {"HIPFFT_CB_ST_COMPLEX_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x5 + {"CUFFT_CB_ST_REAL", {"HIPFFT_CB_ST_REAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6 + {"CUFFT_CB_ST_REAL_DOUBLE", {"HIPFFT_CB_ST_REAL_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7 + {"CUFFT_CB_UNDEFINED", {"HIPFFT_CB_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7 // cuFFT types - {"cufftReal", {"hipfftReal", "", CONV_TYPE, API_FFT, 1}}, - {"cufftDoubleReal", {"hipfftDoubleReal", "", CONV_TYPE, API_FFT, 1}}, - {"cufftComplex", {"hipfftComplex", "", CONV_TYPE, API_FFT, 1}}, - {"cufftDoubleComplex", {"hipfftDoubleComplex", "", CONV_TYPE, API_FFT, 1}}, - {"cufftHandle", {"hipfftHandle", "", CONV_TYPE, API_FFT, 1}}, - {"cufftXt1dFactors_t", {"hipfftXt1dFactors_t", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"cufftXt1dFactors", {"hipfftXt1dFactors", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"cufftBox3d_t", {"hipfftBox3d_t", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"cufftBox3d", {"hipfftBox3d", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, + {"cufftReal", {"hipfftReal", "", CONV_TYPE, API_FFT, 1}}, + {"cufftDoubleReal", {"hipfftDoubleReal", "", CONV_TYPE, API_FFT, 1}}, + {"cufftComplex", {"hipfftComplex", "", CONV_TYPE, API_FFT, 1}}, + {"cufftDoubleComplex", {"hipfftDoubleComplex", "", CONV_TYPE, API_FFT, 1}}, + {"cufftHandle", {"hipfftHandle", "", CONV_TYPE, API_FFT, 1}}, + {"cufftXt1dFactors_t", {"hipfftXt1dFactors_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftXt1dFactors", {"hipfftXt1dFactors", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftBox3d_t", {"hipfftBox3d_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftBox3d", {"hipfftBox3d", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, }; const std::map CUDA_FFT_TYPE_NAME_VER_MAP { - {"CUFFT_VER_MAJOR", {CUDA_102, CUDA_0, CUDA_0}}, - {"CUFFT_VER_MINOR", {CUDA_102, CUDA_0, CUDA_0}}, - {"CUFFT_VER_PATCH", {CUDA_102, CUDA_0, CUDA_0}}, - {"CUFFT_VER_BUILD", {CUDA_102, CUDA_0, CUDA_0}}, - {"CUFFT_VERSION", {CUDA_102, CUDA_0, CUDA_0}}, - {"CUFFT_NOT_SUPPORTED", {CUDA_80, CUDA_0, CUDA_0}}, - {"cufftXtWorkAreaPolicy_t", {CUDA_92, CUDA_0, CUDA_0}}, - {"cufftXtWorkAreaPolicy", {CUDA_92, CUDA_0, CUDA_0}}, - {"CUFFT_WORKAREA_MINIMAL", {CUDA_92, CUDA_0, CUDA_0}}, - {"CUFFT_WORKAREA_USER", {CUDA_92, CUDA_0, CUDA_0}}, - {"CUFFT_XT_FORMAT_DISTRIBUTED_INPUT",{CUDA_118, CUDA_0, CUDA_0}}, - {"CUFFT_XT_FORMAT_DISTRIBUTED_OUTPUT",{CUDA_118, CUDA_0, CUDA_0}}, - {"cufftBox3d_t", {CUDA_118, CUDA_0, CUDA_0}}, - {"cufftBox3d", {CUDA_118, CUDA_0, CUDA_0}}, + {"CUFFT_VER_MAJOR", {CUDA_102, CUDA_0, CUDA_0}}, + {"CUFFT_VER_MINOR", {CUDA_102, CUDA_0, CUDA_0}}, + {"CUFFT_VER_PATCH", {CUDA_102, CUDA_0, CUDA_0}}, + {"CUFFT_VER_BUILD", {CUDA_102, CUDA_0, CUDA_0}}, + {"CUFFT_VERSION", {CUDA_102, CUDA_0, CUDA_0}}, + {"CUFFT_NOT_SUPPORTED", {CUDA_80, CUDA_0, CUDA_0}}, + {"cufftXtWorkAreaPolicy_t", {CUDA_92, CUDA_0, CUDA_0}}, + {"cufftXtWorkAreaPolicy", {CUDA_92, CUDA_0, CUDA_0}}, + {"CUFFT_WORKAREA_MINIMAL", {CUDA_92, CUDA_0, CUDA_0}}, + {"CUFFT_WORKAREA_USER", {CUDA_92, CUDA_0, CUDA_0}}, + {"CUFFT_XT_FORMAT_DISTRIBUTED_INPUT", {CUDA_118, CUDA_0, CUDA_0}}, + {"CUFFT_XT_FORMAT_DISTRIBUTED_OUTPUT", {CUDA_118, CUDA_0, CUDA_0}}, + {"cufftBox3d_t", {CUDA_118, CUDA_0, CUDA_0}}, + {"cufftBox3d", {CUDA_118, CUDA_0, CUDA_0}}, }; const std::map HIP_FFT_TYPE_NAME_VER_MAP { - {"HIPFFT_FORWARD", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_BACKWARD", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftResult_t", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftResult", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_SUCCESS", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_INVALID_PLAN", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_ALLOC_FAILED", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_INVALID_TYPE", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_INVALID_VALUE", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_INTERNAL_ERROR", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_EXEC_FAILED", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_SETUP_FAILED", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_INVALID_SIZE", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_UNALIGNED_DATA", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_INCOMPLETE_PARAMETER_LIST", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_INVALID_DEVICE", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_PARSE_ERROR", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_NO_WORKSPACE", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_NOT_IMPLEMENTED", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_NOT_SUPPORTED", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftType_t", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftType", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_R2C", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_C2R", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_C2C", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_D2Z", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_Z2D", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_Z2Z", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftReal", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftDoubleReal", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftComplex", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftDoubleComplex", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftHandle", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftXtCallbackType_t", {HIP_4030, HIP_0, HIP_0 }}, - {"hipfftXtCallbackType", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_LD_COMPLEX", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_LD_COMPLEX_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_LD_REAL", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_LD_REAL_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_ST_COMPLEX", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_ST_COMPLEX_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_ST_REAL", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_ST_REAL_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_UNDEFINED", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_FORWARD", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_BACKWARD", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftResult_t", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftResult", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_SUCCESS", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_INVALID_PLAN", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_ALLOC_FAILED", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_INVALID_TYPE", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_INVALID_VALUE", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_INTERNAL_ERROR", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_EXEC_FAILED", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_SETUP_FAILED", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_INVALID_SIZE", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_UNALIGNED_DATA", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_INCOMPLETE_PARAMETER_LIST", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_INVALID_DEVICE", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_PARSE_ERROR", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_NO_WORKSPACE", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_NOT_IMPLEMENTED", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_NOT_SUPPORTED", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftType_t", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftType", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_R2C", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_C2R", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_C2C", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_D2Z", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_Z2D", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_Z2Z", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftReal", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftDoubleReal", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftComplex", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftDoubleComplex", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftHandle", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftXtCallbackType_t", {HIP_4030, HIP_0, HIP_0 }}, + {"hipfftXtCallbackType", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_LD_COMPLEX", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_LD_COMPLEX_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_LD_REAL", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_LD_REAL_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_ST_COMPLEX", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_ST_COMPLEX_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_ST_REAL", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_ST_REAL_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_UNDEFINED", {HIP_4030, HIP_0, HIP_0 }}, }; From d5e8601831bc75e4a834b7b038b2d18e4659a7a7 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 24 Oct 2022 14:35:58 +0200 Subject: [PATCH 30/43] [HIPIFY][doc] CUDA 11.8 is the latest supported release + Update README.md accordingly + Tested on Windows 10 and Ubuntu 21.10 --- README.md | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 1289bfc8..502296c0 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ After applying all the matchers, the output HIP source is produced. 1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [4.0.0](http://releases.llvm.org/download.html#4.0.0); the latest stable and recommended release: [**15.0.3**](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.3). -2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive), the latest supported version is [**11.7.1**](https://developer.nvidia.com/cuda-downloads). +2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive), the latest supported version is [**11.8.0**](https://developer.nvidia.com/cuda-downloads). @@ -160,7 +160,7 @@ After applying all the matchers, the output HIP source is produced. 14.0.2, 14.0.3, 14.0.4 - + - + @@ -199,14 +199,14 @@ To process a file, `hipify-clang` needs access to the same headers that would be For example: ```shell -./hipify-clang square.cu --cuda-path=/usr/local/cuda-11.7 -I /usr/local/cuda-11.7/samples/common/inc +./hipify-clang square.cu --cuda-path=/usr/local/cuda-11.8 -I /usr/local/cuda-11.8/samples/common/inc ``` `hipify-clang` arguments are given first, followed by a separator `'--'`, and then the arguments you'd pass to `clang` if you were compiling the input file. For example: ```bash -./hipify-clang cpp17.cu --cuda-path=/usr/local/cuda-11.7 -- -std=c++17 +./hipify-clang cpp17.cu --cuda-path=/usr/local/cuda-11.8 -- -std=c++17 ``` The [Clang manual for compiling CUDA](https://llvm.org/docs/CompileCudaWithLLVM.html#compiling-cuda-code) may be useful. @@ -327,9 +327,9 @@ Run `Visual Studio 17 2022`, open the generated `LLVM.sln`, build all, build pro - ***Linux***: `-DCUDA_TOOLKIT_ROOT_DIR=/usr/include` - - ***Windows***: `-DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.7"` + - ***Windows***: `-DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8"` - `-DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v11.7"` + `-DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v11.8"` 4. Ensure [`cuDNN`](https://developer.nvidia.com/rdp/cudnn-archive) of the version corresponding to CUDA's version is installed. @@ -337,7 +337,7 @@ Run `Visual Studio 17 2022`, open the generated `LLVM.sln`, build all, build pro - ***Linux***: `-DCUDA_DNN_ROOT_DIR=/usr/include` - - ***Windows***: `-DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.7-windows-x64-v8.5.0` + - ***Windows***: `-DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.8-windows-x64-v8.5.0` 5. Ensure [`CUB`](https://github.com/NVlabs/cub) of the version corresponding to CUDA's version is installed. @@ -389,7 +389,7 @@ Ubuntu 14: LLVM 4.0.0 - 7.1.0, CUDA 7.0 - 9.0, cuDNN 5.0.5 - 7.6.5 Ubuntu 16-18: LLVM 8.0.0 - 14.0.6, CUDA 8.0 - 10.2, cuDNN 5.1.10 - 8.0.5 -Ubuntu 20-21: LLVM 9.0.0 - 15.0.3, CUDA 8.0 - 11.7.1, cuDNN 5.1.10 - 8.5.0 +Ubuntu 20-21: LLVM 9.0.0 - 15.0.3, CUDA 8.0 - 11.8.0, cuDNN 5.1.10 - 8.5.0 Minimum build system requirements for the above configurations: @@ -441,7 +441,7 @@ cmake -- Performing Test CMAKE_HAVE_LIBC_PTHREAD -- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success -- Found Threads: TRUE --- Found CUDA: /usr/local/cuda (found version "11.7") +-- Found CUDA: /usr/local/cuda (found version "11.8") -- Configuring done -- Generating done -- Build files have been written to: /usr/hipify/build @@ -453,7 +453,7 @@ make test-hipify ```shell Running HIPify regression tests ======================================== -CUDA 11.7 - will be used for testing +CUDA 11.8 - will be used for testing LLVM 15.0.3 - will be used for testing x86_64 - Platform architecture Linux 5.13.0-21-generic - Platform OS @@ -568,8 +568,8 @@ Testing Time: 6.22s | 11.0.1 - 11.1.0 | 7.0 - 11.2.2 | 7.6.5 - 8.0.5 | 2017.15.9.31, 2019.16.8.4 | 3.19.3 | 3.9.2 | | 12.0.0 - 13.0.1 | 7.0 - 11.5.1 | 7.6.5 - 8.3.2 | 2017.15.9.43, 2019.16.11.9 | 3.22.2 | 3.10.2 | | 14.0.0 - 14.0.6 | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.49, 2019.16.11.17, 2022.17.2.6 | 3.24.0 | 3.10.6 | -| 15.0.0 - 15.0.3 | 7.0 - 11.7.1 | 8.0.5 - 8.5.0 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | -| 16.0.0git | 7.0 - 11.7.1 | 8.0.5 - 8.5.0 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | +| 15.0.0 - 15.0.3 | 7.0 - 11.8.0 | 8.0.5 - 8.5.0 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | +| 16.0.0git | 7.0 - 11.8.0 | 8.0.5 - 8.5.0 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | *Building with testing support by `Visual Studio 17 2022` on `Windows 10`:* @@ -582,9 +582,9 @@ cmake -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ -DCMAKE_PREFIX_PATH=d:/LLVM/15.0.3/dist \ - -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.7" \ - -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v11.7" \ - -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.7-windows-x64-v8.5.0 \ + -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8" \ + -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v11.8" \ + -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.8-windows-x64-v8.5.0 \ -DCUDA_CUB_ROOT_DIR=d:/GIT/cub \ -DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.3/build/Release/bin/llvm-lit.py \ ../hipify @@ -598,7 +598,7 @@ cmake -- Found PythonInterp: c:/Program Files/Python39/python.exe (found suitable version "3.9.5", minimum required is "3.6") -- Found lit: c:/Program Files/Python39/Scripts/lit.exe -- Found FileCheck: d:/LLVM/15.0.3/dist/bin/FileCheck.exe --- Found CUDA: c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.7 (found version "11.7") +-- Found CUDA: c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8 (found version "11.8") -- Configuring done -- Generating done -- Build files have been written to: d:/hipify/build From 6947bafcef55064e165aebef3bf6ab793aec205c Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 24 Oct 2022 16:23:58 +0200 Subject: [PATCH 31/43] [HIPIFY][DNN] Sync with cuDNN 8.6.0 + Update regenerated hipify-perl and CUDNN_API_supported_by_HIP.md --- bin/hipify-perl | 8 ++++++++ doc/markdown/CUDNN_API_supported_by_HIP.md | 8 ++++++++ src/CUDA2HIP_DNN_API_functions.cpp | 2 ++ src/CUDA2HIP_DNN_API_types.cpp | 14 ++++++++++++++ src/Statistics.cpp | 1 + src/Statistics.h | 1 + 6 files changed, 34 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index 76fed4f4..e0ec2008 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -5977,6 +5977,7 @@ sub warnUnsupportedFunctions { "cudnnGetNormalizationBackwardWorkspaceSize", "cudnnGetMultiHeadAttnWeights", "cudnnGetMultiHeadAttnBuffers", + "cudnnGetMaxDeviceVersion", "cudnnGetFusedOpsVariantParamPackAttribute", "cudnnGetFusedOpsConstParamPackAttribute", "cudnnGetFoldedConvBackwardDataDescriptors", @@ -7398,6 +7399,8 @@ sub warnUnsupportedFunctions { "CUDNN_RESAMPLE_NEAREST", "CUDNN_RESAMPLE_MAXPOOL", "CUDNN_RESAMPLE_BILINEAR", + "CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING", + "CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING", "CUDNN_RESAMPLE_AVGPOOL", "CUDNN_PTR_ZDATA", "CUDNN_PTR_YSUM", @@ -7570,6 +7573,9 @@ sub warnUnsupportedFunctions { "CUDNN_KNOB_TYPE_WINO_TILE", "CUDNN_KNOB_TYPE_USE_TEX", "CUDNN_KNOB_TYPE_TILE_SIZE", + "CUDNN_KNOB_TYPE_TILE_CGA_N", + "CUDNN_KNOB_TYPE_TILE_CGA_M", + "CUDNN_KNOB_TYPE_TILE_CGA", "CUDNN_KNOB_TYPE_TILEK", "CUDNN_KNOB_TYPE_SWIZZLE", "CUDNN_KNOB_TYPE_STAGES", @@ -7623,6 +7629,8 @@ sub warnUnsupportedFunctions { "CUDNN_DATA_UINT8", "CUDNN_DATA_INT8x32", "CUDNN_DATA_INT64", + "CUDNN_DATA_FP8_E5M2", + "CUDNN_DATA_FP8_E4M3", "CUDNN_DATA_BOOLEAN", "CUDNN_DATA_BFLOAT16", "CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC", diff --git a/doc/markdown/CUDNN_API_supported_by_HIP.md b/doc/markdown/CUDNN_API_supported_by_HIP.md index 4a3d5380..405b23c5 100644 --- a/doc/markdown/CUDNN_API_supported_by_HIP.md +++ b/doc/markdown/CUDNN_API_supported_by_HIP.md @@ -310,6 +310,8 @@ |`CUDNN_DATA_BOOLEAN`|8.3.0| | | | | | | | |`CUDNN_DATA_DOUBLE`|1.0.0| | |`HIPDNN_DATA_DOUBLE`| | | | | |`CUDNN_DATA_FLOAT`|1.0.0| | |`HIPDNN_DATA_FLOAT`| | | | | +|`CUDNN_DATA_FP8_E4M3`|8.6.0| | | | | | | | +|`CUDNN_DATA_FP8_E5M2`|8.6.0| | | | | | | | |`CUDNN_DATA_HALF`|3.0.0| | |`HIPDNN_DATA_HALF`| | | | | |`CUDNN_DATA_INT32`|6.0.0| | |`HIPDNN_DATA_INT32`| | | | | |`CUDNN_DATA_INT64`|8.1.0| | | | | | | | @@ -369,6 +371,9 @@ |`CUDNN_KNOB_TYPE_STAGES`|8.0.1| | | | | | | | |`CUDNN_KNOB_TYPE_SWIZZLE`|8.0.1| | | | | | | | |`CUDNN_KNOB_TYPE_TILEK`|8.0.1| | | | | | | | +|`CUDNN_KNOB_TYPE_TILE_CGA`|8.6.0| | | | | | | | +|`CUDNN_KNOB_TYPE_TILE_CGA_M`|8.6.0| | | | | | | | +|`CUDNN_KNOB_TYPE_TILE_CGA_N`|8.6.0| | | | | | | | |`CUDNN_KNOB_TYPE_TILE_SIZE`|8.0.1| | | | | | | | |`CUDNN_KNOB_TYPE_USE_TEX`|8.0.1| | | | | | | | |`CUDNN_KNOB_TYPE_WINO_TILE`|8.0.1| | | | | | | | @@ -566,6 +571,8 @@ |`CUDNN_REDUCE_TENSOR_NORM2`|6.0.0| | |`HIPDNN_REDUCE_TENSOR_NORM2`| | | | | |`CUDNN_REDUCE_TENSOR_NO_INDICES`|6.0.0| | |`HIPDNN_REDUCE_TENSOR_NO_INDICES`| | | | | |`CUDNN_RESAMPLE_AVGPOOL`|8.3.0| | | | | | | | +|`CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING`|8.6.0| | | | | | | | +|`CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING`|8.6.0| | | | | | | | |`CUDNN_RESAMPLE_BILINEAR`|8.3.0| | | | | | | | |`CUDNN_RESAMPLE_MAXPOOL`|8.3.0| | | | | | | | |`CUDNN_RESAMPLE_NEAREST`|8.3.0| | | | | | | | @@ -944,6 +951,7 @@ |`cudnnGetFusedOpsConstParamPackAttribute`|7.6.0| | | | | | | | |`cudnnGetFusedOpsVariantParamPackAttribute`|7.6.0| | | | | | | | |`cudnnGetLRNDescriptor`|3.0.0| | |`hipdnnGetLRNDescriptor`| | | | | +|`cudnnGetMaxDeviceVersion`|8.6.0| | | | | | | | |`cudnnGetMultiHeadAttnBuffers`|7.5.0| | | | | | | | |`cudnnGetMultiHeadAttnWeights`|7.5.0| | | | | | | | |`cudnnGetNormalizationBackwardWorkspaceSize`|8.0.1| | | | | | | | diff --git a/src/CUDA2HIP_DNN_API_functions.cpp b/src/CUDA2HIP_DNN_API_functions.cpp index 3babf0da..d41f2dfc 100644 --- a/src/CUDA2HIP_DNN_API_functions.cpp +++ b/src/CUDA2HIP_DNN_API_functions.cpp @@ -27,6 +27,7 @@ const std::map CUDA_DNN_FUNCTION_MAP { {"cudnnGetVersion", {"hipdnnGetVersion", "", CONV_LIB_FUNC, API_DNN, 2}}, {"cudnnGetCudartVersion", {"hipdnnGetCudartVersion", "", CONV_LIB_FUNC, API_DNN, 2, HIP_UNSUPPORTED}}, + {"cudnnGetMaxDeviceVersion", {"hipdnnGetMaxDeviceVersion", "", CONV_LIB_FUNC, API_DNN, 2, HIP_UNSUPPORTED}}, {"cudnnQueryRuntimeError", {"hipdnnQueryRuntimeError", "", CONV_LIB_FUNC, API_DNN, 2, HIP_UNSUPPORTED}}, {"cudnnGetProperty", {"hipdnnGetProperty", "", CONV_LIB_FUNC, API_DNN, 2, HIP_UNSUPPORTED}}, {"cudnnGetErrorString", {"hipdnnGetErrorString", "", CONV_LIB_FUNC, API_DNN, 2}}, @@ -608,6 +609,7 @@ const std::map CUDA_DNN_FUNCTION_VER_MAP { {"cudnnSetRNNDescriptor_v5", {CUDNN_705, CUDNN_765, CUDNN_801}}, {"cudnnSetActivationDescriptorSwishBeta", {CUDNN_820, CUDA_0, CUDA_0}}, {"cudnnGetActivationDescriptorSwishBeta", {CUDNN_820, CUDA_0, CUDA_0}}, + {"cudnnGetMaxDeviceVersion", {CUDNN_860, CUDA_0, CUDA_0}}, }; const std::map HIP_DNN_FUNCTION_VER_MAP { diff --git a/src/CUDA2HIP_DNN_API_types.cpp b/src/CUDA2HIP_DNN_API_types.cpp index 7149587e..75d3bfb1 100644 --- a/src/CUDA2HIP_DNN_API_types.cpp +++ b/src/CUDA2HIP_DNN_API_types.cpp @@ -99,6 +99,8 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_DATA_BFLOAT16", {"HIPDNN_DATA_BFLOAT16", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 9 {"CUDNN_DATA_INT64", {"HIPDNN_DATA_INT64", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 10 {"CUDNN_DATA_BOOLEAN", {"HIPDNN_DATA_BOOLEAN", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 11 + {"CUDNN_DATA_FP8_E4M3", {"HIPDNN_DATA_FP8_E4M3", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 11 + {"CUDNN_DATA_FP8_E5M2", {"HIPDNN_DATA_FP8_E5M2", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 11 {"cudnnErrQueryMode_t", {"hipdnnErrQueryMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_ERRQUERY_RAWCODE", {"HIPDNN_ERRQUERY_RAWCODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 0 {"CUDNN_ERRQUERY_NONBLOCKING", {"HIPDNN_ERRQUERY_NONBLOCKING", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1 @@ -715,6 +717,9 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_KNOB_TYPE_SPECFILT", {"HIPDNN_KNOB_TYPE_SPECFILT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_KNOB_TYPE_KERNEL_CFG", {"HIPDNN_KNOB_TYPE_KERNEL_CFG", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_KNOB_TYPE_WORKSPACE", {"HIPDNN_KNOB_TYPE_WORKSPACE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_KNOB_TYPE_TILE_CGA", {"HIPDNN_KNOB_TYPE_TILE_CGA", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_KNOB_TYPE_TILE_CGA_M", {"HIPDNN_KNOB_TYPE_TILE_CGA_M", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_KNOB_TYPE_TILE_CGA_N", {"HIPDNN_KNOB_TYPE_TILE_CGA_N", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_KNOB_TYPE_COUNTS", {"HIPDNN_KNOB_TYPE_COUNTS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"cudnnBackendHeurMode_t", {"hipdnnBackendHeurMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_HEUR_MODE_INSTANT", {"HIPDNN_HEUR_MODE_INSTANT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, @@ -744,6 +749,8 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_RESAMPLE_NEAREST", {"HIPDNN_RESAMPLE_NEAREST", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_RESAMPLE_BILINEAR", {"HIPDNN_RESAMPLE_BILINEAR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_RESAMPLE_AVGPOOL", {"HIPDNN_RESAMPLE_AVGPOOL", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING", {"HIPDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING", {"HIPDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_RESAMPLE_MAXPOOL", {"HIPDNN_RESAMPLE_MAXPOOL", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"cudnnSignalMode_t", {"hipdnnSignalMode_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_SIGNAL_SET", {"HIPDNN_SIGNAL_SET", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, @@ -1630,6 +1637,13 @@ const std::map CUDA_DNN_TYPE_NAME_VER_MAP { {"cudnnBackendNormFwdPhase_t", {CUDNN_850, CUDA_0, CUDA_0 }}, {"CUDNN_NORM_FWD_INFERENCE", {CUDNN_850, CUDA_0, CUDA_0 }}, {"CUDNN_NORM_FWD_TRAINING", {CUDNN_850, CUDA_0, CUDA_0 }}, + {"CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING", {CUDNN_860, CUDA_0, CUDA_0 }}, + {"CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING", {CUDNN_860, CUDA_0, CUDA_0 }}, + {"CUDNN_KNOB_TYPE_TILE_CGA", {CUDNN_860, CUDA_0, CUDA_0 }}, + {"CUDNN_KNOB_TYPE_TILE_CGA_M", {CUDNN_860, CUDA_0, CUDA_0 }}, + {"CUDNN_KNOB_TYPE_TILE_CGA_N", {CUDNN_860, CUDA_0, CUDA_0 }}, + {"CUDNN_DATA_FP8_E4M3", {CUDNN_860, CUDA_0, CUDA_0 }}, + {"CUDNN_DATA_FP8_E5M2", {CUDNN_860, CUDA_0, CUDA_0 }}, }; const std::map HIP_DNN_TYPE_NAME_VER_MAP { diff --git a/src/Statistics.cpp b/src/Statistics.cpp index ecfdb55e..c95a9ff8 100644 --- a/src/Statistics.cpp +++ b/src/Statistics.cpp @@ -480,6 +480,7 @@ std::string Statistics::getCudaVersion(const cudaVersions& ver) { case CUDNN_830: return "8.3.0"; case CUDNN_840: return "8.4.0"; case CUDNN_850: return "8.5.0"; + case CUDNN_860: return "8.6.0"; } return ""; } diff --git a/src/Statistics.h b/src/Statistics.h index 9080be35..e15f1edc 100644 --- a/src/Statistics.h +++ b/src/Statistics.h @@ -236,6 +236,7 @@ enum cudaVersions { CUDNN_830 = 830, CUDNN_840 = 840, CUDNN_850 = 850, + CUDNN_860 = 860, }; enum hipVersions { From 0f560b68aa1cebc4f03db64ad331b52ba126844f Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 24 Oct 2022 16:32:54 +0200 Subject: [PATCH 32/43] [HIPIFY][doc] cuDNN 8.6.0 is the latest supported cuDNN release --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 502296c0..12e09c68 100644 --- a/README.md +++ b/README.md @@ -337,7 +337,7 @@ Run `Visual Studio 17 2022`, open the generated `LLVM.sln`, build all, build pro - ***Linux***: `-DCUDA_DNN_ROOT_DIR=/usr/include` - - ***Windows***: `-DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.8-windows-x64-v8.5.0` + - ***Windows***: `-DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.8-windows-x64-v8.6.0` 5. Ensure [`CUB`](https://github.com/NVlabs/cub) of the version corresponding to CUDA's version is installed. @@ -389,7 +389,7 @@ Ubuntu 14: LLVM 4.0.0 - 7.1.0, CUDA 7.0 - 9.0, cuDNN 5.0.5 - 7.6.5 Ubuntu 16-18: LLVM 8.0.0 - 14.0.6, CUDA 8.0 - 10.2, cuDNN 5.1.10 - 8.0.5 -Ubuntu 20-21: LLVM 9.0.0 - 15.0.3, CUDA 8.0 - 11.8.0, cuDNN 5.1.10 - 8.5.0 +Ubuntu 20-21: LLVM 9.0.0 - 15.0.3, CUDA 8.0 - 11.8.0, cuDNN 5.1.10 - 8.6.0 Minimum build system requirements for the above configurations: @@ -568,8 +568,8 @@ Testing Time: 6.22s | 11.0.1 - 11.1.0 | 7.0 - 11.2.2 | 7.6.5 - 8.0.5 | 2017.15.9.31, 2019.16.8.4 | 3.19.3 | 3.9.2 | | 12.0.0 - 13.0.1 | 7.0 - 11.5.1 | 7.6.5 - 8.3.2 | 2017.15.9.43, 2019.16.11.9 | 3.22.2 | 3.10.2 | | 14.0.0 - 14.0.6 | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.49, 2019.16.11.17, 2022.17.2.6 | 3.24.0 | 3.10.6 | -| 15.0.0 - 15.0.3 | 7.0 - 11.8.0 | 8.0.5 - 8.5.0 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | -| 16.0.0git | 7.0 - 11.8.0 | 8.0.5 - 8.5.0 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | +| 15.0.0 - 15.0.3 | 7.0 - 11.8.0 | 8.0.5 - 8.6.0 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | +| 16.0.0git | 7.0 - 11.8.0 | 8.0.5 - 8.6.0 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | *Building with testing support by `Visual Studio 17 2022` on `Windows 10`:* @@ -584,7 +584,7 @@ cmake -DCMAKE_PREFIX_PATH=d:/LLVM/15.0.3/dist \ -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8" \ -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v11.8" \ - -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.8-windows-x64-v8.5.0 \ + -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.8-windows-x64-v8.6.0 \ -DCUDA_CUB_ROOT_DIR=d:/GIT/cub \ -DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.3/build/Release/bin/llvm-lit.py \ ../hipify From 56b780c15a3d46158bdd13a0894c1db4ddaab889 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 25 Oct 2022 12:34:45 +0200 Subject: [PATCH 33/43] [HIPIFY][HIP][5.4.0] Sync - Part 1 + Bumped version to 5.4.0 + Unset all HIP_EXPERIMANTAL APIs + Updated the regenerated hipify-perl and some of the affected docs --- bin/hipify-perl | 216 ++++++------------ ...A_Driver_API_functions_supported_by_HIP.md | 94 ++++---- doc/markdown/CUDA_RTC_API_supported_by_HIP.md | 4 +- ..._Runtime_API_functions_supported_by_HIP.md | 48 ++-- src/CUDA2HIP_Driver_API_functions.cpp | 46 ++-- src/CUDA2HIP_Driver_API_types.cpp | 80 +++---- src/CUDA2HIP_RTC_API_functions.cpp | 8 +- src/CUDA2HIP_Runtime_API_functions.cpp | 40 ++-- src/CUDA2HIP_Runtime_API_types.cpp | 54 ++--- src/Statistics.cpp | 1 + src/Statistics.h | 3 +- 11 files changed, 263 insertions(+), 331 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index e0ec2008..1dbc1466 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -717,77 +717,7 @@ my %removed_funcs = ( ); my %experimental_funcs = ( - "nvrtcGetCUBINSize" => "5.3.0", - "nvrtcGetCUBIN" => "5.3.0", - "cudaUserObject_t" => "5.3.0", - "cudaUserObjectRetainFlags" => "5.3.0", - "cudaUserObjectRetain" => "5.3.0", - "cudaUserObjectRelease" => "5.3.0", - "cudaUserObjectNoDestructorSync" => "5.3.0", - "cudaUserObjectFlags" => "5.3.0", - "cudaUserObjectCreate" => "5.3.0", - "cudaMemoryTypeManaged" => "5.3.0", - "cudaLimitStackSize" => "5.3.0", - "cudaGraphUserObjectMove" => "5.3.0", - "cudaGraphUpload" => "5.3.0", - "cudaGraphRetainUserObject" => "5.3.0", - "cudaGraphReleaseUserObject" => "5.3.0", - "cudaGraphNodeTypeExtSemaphoreWait" => "5.3.0", - "cudaGraphNodeTypeExtSemaphoreSignal" => "5.3.0", - "cudaGraphMemAttributeType" => "5.3.0", - "cudaGraphMemAttrUsedMemHigh" => "5.3.0", - "cudaGraphMemAttrUsedMemCurrent" => "5.3.0", - "cudaGraphMemAttrReservedMemHigh" => "5.3.0", - "cudaGraphMemAttrReservedMemCurrent" => "5.3.0", - "cudaDeviceSetLimit" => "5.3.0", - "cudaDeviceSetGraphMemAttribute" => "5.3.0", - "cudaDeviceGraphMemTrim" => "5.3.0", - "cudaDeviceGetGraphMemAttribute" => "5.3.0", - "cuUserObjectRetain" => "5.3.0", - "cuUserObjectRelease" => "5.3.0", - "cuUserObjectCreate" => "5.3.0", - "cuLinkDestroy" => "5.3.0", - "cuLinkCreate_v2" => "5.3.0", - "cuLinkCreate" => "5.3.0", - "cuLinkComplete" => "5.3.0", - "cuLinkAddFile_v2" => "5.3.0", - "cuLinkAddFile" => "5.3.0", - "cuLinkAddData_v2" => "5.3.0", - "cuLinkAddData" => "5.3.0", - "cuGraphUpload" => "5.3.0", - "cuGraphRetainUserObject" => "5.3.0", - "cuGraphReleaseUserObject" => "5.3.0", - "cuDeviceSetGraphMemAttribute" => "5.3.0", - "cuDeviceGraphMemTrim" => "5.3.0", - "cuDeviceGetGraphMemAttribute" => "5.3.0", - "cuCtxSetLimit" => "5.3.0", - "CUuserObject_st" => "5.3.0", - "CUuserObject_flags_enum" => "5.3.0", - "CUuserObject_flags" => "5.3.0", - "CUuserObjectRetain_flags_enum" => "5.3.0", - "CUuserObjectRetain_flags" => "5.3.0", - "CUuserObject" => "5.3.0", - "CUjitInputType_enum" => "5.3.0", - "CUjitInputType" => "5.3.0", - "CUgraphMem_attribute_enum" => "5.3.0", - "CUgraphMem_attribute" => "5.3.0", - "CU_USER_OBJECT_NO_DESTRUCTOR_SYNC" => "5.3.0", - "CU_LIMIT_STACK_SIZE" => "5.3.0", - "CU_JIT_NUM_INPUT_TYPES" => "5.3.0", - "CU_JIT_INPUT_PTX" => "5.3.0", - "CU_JIT_INPUT_OBJECT" => "5.3.0", - "CU_JIT_INPUT_NVVM" => "5.3.0", - "CU_JIT_INPUT_LIBRARY" => "5.3.0", - "CU_JIT_INPUT_FATBINARY" => "5.3.0", - "CU_JIT_INPUT_CUBIN" => "5.3.0", - "CU_GRAPH_USER_OBJECT_MOVE" => "5.3.0", - "CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT" => "5.3.0", - "CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL" => "5.3.0", - "CU_GRAPH_MEM_ATTR_USED_MEM_HIGH" => "5.3.0", - "CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT" => "5.3.0", - "CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH" => "5.3.0", - "CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT" => "5.3.0", - "CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED" => "5.3.0" + ); $print_stats = 1 if $examine; @@ -925,77 +855,6 @@ sub subst { } sub experimentalSubstitutions { - subst("cudaDeviceSetLimit", "hipDeviceSetLimit", "device"); - subst("cuCtxSetLimit", "hipDeviceSetLimit", "context"); - subst("cuLinkAddData", "hiprtcLinkAddData", "module"); - subst("cuLinkAddData_v2", "hiprtcLinkAddData", "module"); - subst("cuLinkAddFile", "hiprtcLinkAddFile", "module"); - subst("cuLinkAddFile_v2", "hiprtcLinkAddFile", "module"); - subst("cuLinkComplete", "hiprtcLinkComplete", "module"); - subst("cuLinkCreate", "hiprtcLinkCreate", "module"); - subst("cuLinkCreate_v2", "hiprtcLinkCreate", "module"); - subst("cuLinkDestroy", "hiprtcLinkDestroy", "module"); - subst("cuDeviceGetGraphMemAttribute", "hipDeviceGetGraphMemAttribute", "graph"); - subst("cuDeviceGraphMemTrim", "hipDeviceGraphMemTrim", "graph"); - subst("cuDeviceSetGraphMemAttribute", "hipDeviceSetGraphMemAttribute", "graph"); - subst("cuGraphReleaseUserObject", "hipGraphReleaseUserObject", "graph"); - subst("cuGraphRetainUserObject", "hipGraphRetainUserObject", "graph"); - subst("cuGraphUpload", "hipGraphUpload", "graph"); - subst("cuUserObjectCreate", "hipUserObjectCreate", "graph"); - subst("cuUserObjectRelease", "hipUserObjectRelease", "graph"); - subst("cuUserObjectRetain", "hipUserObjectRetain", "graph"); - subst("cudaDeviceGetGraphMemAttribute", "hipDeviceGetGraphMemAttribute", "graph"); - subst("cudaDeviceGraphMemTrim", "hipDeviceGraphMemTrim", "graph"); - subst("cudaDeviceSetGraphMemAttribute", "hipDeviceSetGraphMemAttribute", "graph"); - subst("cudaGraphReleaseUserObject", "hipGraphReleaseUserObject", "graph"); - subst("cudaGraphRetainUserObject", "hipGraphRetainUserObject", "graph"); - subst("cudaGraphUpload", "hipGraphUpload", "graph"); - subst("cudaUserObjectCreate", "hipUserObjectCreate", "graph"); - subst("cudaUserObjectRelease", "hipUserObjectRelease", "graph"); - subst("cudaUserObjectRetain", "hipUserObjectRetain", "graph"); - subst("nvrtcGetCUBIN", "hiprtcGetBitcode", "library"); - subst("nvrtcGetCUBINSize", "hiprtcGetBitcodeSize", "library"); - subst("CUgraphMem_attribute", "hipGraphMemAttributeType", "type"); - subst("CUgraphMem_attribute_enum", "hipGraphMemAttributeType", "type"); - subst("CUjitInputType", "hiprtcJITInputType", "type"); - subst("CUjitInputType_enum", "hiprtcJITInputType", "type"); - subst("CUuserObject", "hipUserObject_t", "type"); - subst("CUuserObjectRetain_flags", "hipUserObjectRetainFlags", "type"); - subst("CUuserObjectRetain_flags_enum", "hipUserObjectRetainFlags", "type"); - subst("CUuserObject_flags", "hipUserObjectFlags", "type"); - subst("CUuserObject_flags_enum", "hipUserObjectFlags", "type"); - subst("CUuserObject_st", "hipUserObject", "type"); - subst("cudaGraphMemAttributeType", "hipGraphMemAttributeType", "type"); - subst("cudaUserObjectFlags", "hipUserObjectFlags", "type"); - subst("cudaUserObjectRetainFlags", "hipUserObjectRetainFlags", "type"); - subst("cudaUserObject_t", "hipUserObject_t", "type"); - subst("CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED", "hipDeviceAttributeVirtualMemoryManagementSupported", "numeric_literal"); - subst("CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT", "hipGraphMemAttrReservedMemCurrent", "numeric_literal"); - subst("CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH", "hipGraphMemAttrReservedMemHigh", "numeric_literal"); - subst("CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT", "hipGraphMemAttrUsedMemCurrent", "numeric_literal"); - subst("CU_GRAPH_MEM_ATTR_USED_MEM_HIGH", "hipGraphMemAttrUsedMemHigh", "numeric_literal"); - subst("CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL", "hipGraphNodeTypeExtSemaphoreSignal", "numeric_literal"); - subst("CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT", "hipGraphNodeTypeExtSemaphoreWait", "numeric_literal"); - subst("CU_GRAPH_USER_OBJECT_MOVE", "hipGraphUserObjectMove", "numeric_literal"); - subst("CU_JIT_INPUT_CUBIN", "HIPRTC_JIT_INPUT_CUBIN", "numeric_literal"); - subst("CU_JIT_INPUT_FATBINARY", "HIPRTC_JIT_INPUT_FATBINARY", "numeric_literal"); - subst("CU_JIT_INPUT_LIBRARY", "HIPRTC_JIT_INPUT_LIBRARY", "numeric_literal"); - subst("CU_JIT_INPUT_NVVM", "HIPRTC_JIT_INPUT_NVVM", "numeric_literal"); - subst("CU_JIT_INPUT_OBJECT", "HIPRTC_JIT_INPUT_OBJECT", "numeric_literal"); - subst("CU_JIT_INPUT_PTX", "HIPRTC_JIT_INPUT_PTX", "numeric_literal"); - subst("CU_JIT_NUM_INPUT_TYPES", "HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES", "numeric_literal"); - subst("CU_LIMIT_STACK_SIZE", "hipLimitStackSize", "numeric_literal"); - subst("CU_USER_OBJECT_NO_DESTRUCTOR_SYNC", "hipUserObjectNoDestructorSync", "numeric_literal"); - subst("cudaGraphMemAttrReservedMemCurrent", "hipGraphMemAttrReservedMemCurrent", "numeric_literal"); - subst("cudaGraphMemAttrReservedMemHigh", "hipGraphMemAttrReservedMemHigh", "numeric_literal"); - subst("cudaGraphMemAttrUsedMemCurrent", "hipGraphMemAttrUsedMemCurrent", "numeric_literal"); - subst("cudaGraphMemAttrUsedMemHigh", "hipGraphMemAttrUsedMemHigh", "numeric_literal"); - subst("cudaGraphNodeTypeExtSemaphoreSignal", "hipGraphNodeTypeExtSemaphoreSignal", "numeric_literal"); - subst("cudaGraphNodeTypeExtSemaphoreWait", "hipGraphNodeTypeExtSemaphoreWait", "numeric_literal"); - subst("cudaGraphUserObjectMove", "hipGraphUserObjectMove", "numeric_literal"); - subst("cudaLimitStackSize", "hipLimitStackSize", "numeric_literal"); - subst("cudaMemoryTypeManaged", "hipMemoryTypeManaged", "numeric_literal"); - subst("cudaUserObjectNoDestructorSync", "hipUserObjectNoDestructorSync", "numeric_literal"); } sub rocSubstitutions { @@ -1463,6 +1322,7 @@ sub simpleSubstitutions { subst("cudaDeviceGetStreamPriorityRange", "hipDeviceGetStreamPriorityRange", "device"); subst("cudaDeviceReset", "hipDeviceReset", "device"); subst("cudaDeviceSetCacheConfig", "hipDeviceSetCacheConfig", "device"); + subst("cudaDeviceSetLimit", "hipDeviceSetLimit", "device"); subst("cudaDeviceSetMemPool", "hipDeviceSetMemPool", "device"); subst("cudaDeviceSetSharedMemConfig", "hipDeviceSetSharedMemConfig", "device"); subst("cudaDeviceSynchronize", "hipDeviceSynchronize", "device"); @@ -1496,6 +1356,7 @@ sub simpleSubstitutions { subst("cuCtxPushCurrent_v2", "hipCtxPushCurrent", "context"); subst("cuCtxSetCacheConfig", "hipCtxSetCacheConfig", "context"); subst("cuCtxSetCurrent", "hipCtxSetCurrent", "context"); + subst("cuCtxSetLimit", "hipDeviceSetLimit", "context"); subst("cuCtxSetSharedMemConfig", "hipCtxSetSharedMemConfig", "context"); subst("cuCtxSynchronize", "hipCtxSynchronize", "context"); subst("cuDevicePrimaryCtxGetState", "hipDevicePrimaryCtxGetState", "context"); @@ -1506,6 +1367,14 @@ sub simpleSubstitutions { subst("cuDevicePrimaryCtxRetain", "hipDevicePrimaryCtxRetain", "context"); subst("cuDevicePrimaryCtxSetFlags", "hipDevicePrimaryCtxSetFlags", "context"); subst("cuDevicePrimaryCtxSetFlags_v2", "hipDevicePrimaryCtxSetFlags", "context"); + subst("cuLinkAddData", "hiprtcLinkAddData", "module"); + subst("cuLinkAddData_v2", "hiprtcLinkAddData", "module"); + subst("cuLinkAddFile", "hiprtcLinkAddFile", "module"); + subst("cuLinkAddFile_v2", "hiprtcLinkAddFile", "module"); + subst("cuLinkComplete", "hiprtcLinkComplete", "module"); + subst("cuLinkCreate", "hiprtcLinkCreate", "module"); + subst("cuLinkCreate_v2", "hiprtcLinkCreate", "module"); + subst("cuLinkDestroy", "hiprtcLinkDestroy", "module"); subst("cuModuleGetFunction", "hipModuleGetFunction", "module"); subst("cuModuleGetGlobal", "hipModuleGetGlobal", "module"); subst("cuModuleGetGlobal_v2", "hipModuleGetGlobal", "module"); @@ -1770,6 +1639,9 @@ sub simpleSubstitutions { subst("cudaLaunchHostFunc", "hipLaunchHostFunc", "execution"); subst("cudaLaunchKernel", "hipLaunchKernel", "execution"); subst("cudaSetupArgument", "hipSetupArgument", "execution"); + subst("cuDeviceGetGraphMemAttribute", "hipDeviceGetGraphMemAttribute", "graph"); + subst("cuDeviceGraphMemTrim", "hipDeviceGraphMemTrim", "graph"); + subst("cuDeviceSetGraphMemAttribute", "hipDeviceSetGraphMemAttribute", "graph"); subst("cuGraphAddChildGraphNode", "hipGraphAddChildGraphNode", "graph"); subst("cuGraphAddDependencies", "hipGraphAddDependencies", "graph"); subst("cuGraphAddEmptyNode", "hipGraphAddEmptyNode", "graph"); @@ -1814,7 +1686,16 @@ sub simpleSubstitutions { subst("cuGraphNodeGetDependencies", "hipGraphNodeGetDependencies", "graph"); subst("cuGraphNodeGetDependentNodes", "hipGraphNodeGetDependentNodes", "graph"); subst("cuGraphNodeGetType", "hipGraphNodeGetType", "graph"); + subst("cuGraphReleaseUserObject", "hipGraphReleaseUserObject", "graph"); subst("cuGraphRemoveDependencies", "hipGraphRemoveDependencies", "graph"); + subst("cuGraphRetainUserObject", "hipGraphRetainUserObject", "graph"); + subst("cuGraphUpload", "hipGraphUpload", "graph"); + subst("cuUserObjectCreate", "hipUserObjectCreate", "graph"); + subst("cuUserObjectRelease", "hipUserObjectRelease", "graph"); + subst("cuUserObjectRetain", "hipUserObjectRetain", "graph"); + subst("cudaDeviceGetGraphMemAttribute", "hipDeviceGetGraphMemAttribute", "graph"); + subst("cudaDeviceGraphMemTrim", "hipDeviceGraphMemTrim", "graph"); + subst("cudaDeviceSetGraphMemAttribute", "hipDeviceSetGraphMemAttribute", "graph"); subst("cudaGraphAddChildGraphNode", "hipGraphAddChildGraphNode", "graph"); subst("cudaGraphAddDependencies", "hipGraphAddDependencies", "graph"); subst("cudaGraphAddEmptyNode", "hipGraphAddEmptyNode", "graph"); @@ -1871,7 +1752,13 @@ sub simpleSubstitutions { subst("cudaGraphNodeGetDependencies", "hipGraphNodeGetDependencies", "graph"); subst("cudaGraphNodeGetDependentNodes", "hipGraphNodeGetDependentNodes", "graph"); subst("cudaGraphNodeGetType", "hipGraphNodeGetType", "graph"); + subst("cudaGraphReleaseUserObject", "hipGraphReleaseUserObject", "graph"); subst("cudaGraphRemoveDependencies", "hipGraphRemoveDependencies", "graph"); + subst("cudaGraphRetainUserObject", "hipGraphRetainUserObject", "graph"); + subst("cudaGraphUpload", "hipGraphUpload", "graph"); + subst("cudaUserObjectCreate", "hipUserObjectCreate", "graph"); + subst("cudaUserObjectRelease", "hipUserObjectRelease", "graph"); + subst("cudaUserObjectRetain", "hipUserObjectRetain", "graph"); subst("cuOccupancyMaxActiveBlocksPerMultiprocessor", "hipModuleOccupancyMaxActiveBlocksPerMultiprocessor", "occupancy"); subst("cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "occupancy"); subst("cuOccupancyMaxPotentialBlockSize", "hipModuleOccupancyMaxPotentialBlockSize", "occupancy"); @@ -3046,6 +2933,8 @@ sub simpleSubstitutions { subst("nvrtcCompileProgram", "hiprtcCompileProgram", "library"); subst("nvrtcCreateProgram", "hiprtcCreateProgram", "library"); subst("nvrtcDestroyProgram", "hiprtcDestroyProgram", "library"); + subst("nvrtcGetCUBIN", "hiprtcGetBitcode", "library"); + subst("nvrtcGetCUBINSize", "hiprtcGetBitcodeSize", "library"); subst("nvrtcGetErrorString", "hiprtcGetErrorString", "library"); subst("nvrtcGetLoweredName", "hiprtcGetLoweredName", "library"); subst("nvrtcGetPTX", "hiprtcGetCode", "library"); @@ -3227,6 +3116,8 @@ sub simpleSubstitutions { subst("CUgraphExec_st", "hipGraphExec", "type"); subst("CUgraphInstantiate_flags", "hipGraphInstantiateFlags", "type"); subst("CUgraphInstantiate_flags_enum", "hipGraphInstantiateFlags", "type"); + subst("CUgraphMem_attribute", "hipGraphMemAttributeType", "type"); + subst("CUgraphMem_attribute_enum", "hipGraphMemAttributeType", "type"); subst("CUgraphNode", "hipGraphNode_t", "type"); subst("CUgraphNodeType", "hipGraphNodeType", "type"); subst("CUgraphNodeType_enum", "hipGraphNodeType", "type"); @@ -3243,6 +3134,8 @@ sub simpleSubstitutions { subst("CUipcMemHandle", "hipIpcMemHandle_t", "type"); subst("CUipcMemHandle_st", "hipIpcMemHandle_st", "type"); subst("CUipcMemHandle_v1", "hipIpcMemHandle_t", "type"); + subst("CUjitInputType", "hiprtcJITInputType", "type"); + subst("CUjitInputType_enum", "hiprtcJITInputType", "type"); subst("CUjit_option", "hipJitOption", "type"); subst("CUjit_option_enum", "hipJitOption", "type"); subst("CUkernelNodeAttrID", "hipKernelNodeAttrID", "type"); @@ -3324,6 +3217,12 @@ sub simpleSubstitutions { subst("CUtexObject_v1", "hipTextureObject_t", "type"); subst("CUtexref", "hipTexRef", "type"); subst("CUtexref_st", "textureReference", "type"); + subst("CUuserObject", "hipUserObject_t", "type"); + subst("CUuserObjectRetain_flags", "hipUserObjectRetainFlags", "type"); + subst("CUuserObjectRetain_flags_enum", "hipUserObjectRetainFlags", "type"); + subst("CUuserObject_flags", "hipUserObjectFlags", "type"); + subst("CUuserObject_flags_enum", "hipUserObjectFlags", "type"); + subst("CUuserObject_st", "hipUserObject", "type"); subst("CUuuid", "hipUUID", "type"); subst("CUuuid_st", "hipUUID_t", "type"); subst("GLenum", "GLenum", "type"); @@ -3391,6 +3290,7 @@ sub simpleSubstitutions { subst("cudaGraphExecUpdateResult", "hipGraphExecUpdateResult", "type"); subst("cudaGraphExec_t", "hipGraphExec_t", "type"); subst("cudaGraphInstantiateFlags", "hipGraphInstantiateFlags", "type"); + subst("cudaGraphMemAttributeType", "hipGraphMemAttributeType", "type"); subst("cudaGraphNodeType", "hipGraphNodeType", "type"); subst("cudaGraphNode_t", "hipGraphNode_t", "type"); subst("cudaGraph_t", "hipGraph_t", "type"); @@ -3448,6 +3348,9 @@ sub simpleSubstitutions { subst("cudaTextureObject_t", "hipTextureObject_t", "type"); subst("cudaTextureReadMode", "hipTextureReadMode", "type"); subst("cudaUUID_t", "hipUUID", "type"); + subst("cudaUserObjectFlags", "hipUserObjectFlags", "type"); + subst("cudaUserObjectRetainFlags", "hipUserObjectRetainFlags", "type"); + subst("cudaUserObject_t", "hipUserObject_t", "type"); subst("cudnnActivationDescriptor_t", "hipdnnActivationDescriptor_t", "type"); subst("cudnnActivationMode_t", "hipdnnActivationMode_t", "type"); subst("cudnnBatchNormMode_t", "hipdnnBatchNormMode_t", "type"); @@ -4045,6 +3948,7 @@ sub simpleSubstitutions { subst("CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT", "hipDeviceAttributeTexturePitchAlignment", "numeric_literal"); subst("CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY", "hipDeviceAttributeTotalConstantMemory", "numeric_literal"); subst("CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING", "hipDeviceAttributeUnifiedAddressing", "numeric_literal"); + subst("CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED", "hipDeviceAttributeVirtualMemoryManagementSupported", "numeric_literal"); subst("CU_DEVICE_ATTRIBUTE_WARP_SIZE", "hipDeviceAttributeWarpSize", "numeric_literal"); subst("CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED", "hipDevP2PAttrHipArrayAccessSupported", "numeric_literal"); subst("CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED", "hipDevP2PAttrAccessSupported", "numeric_literal"); @@ -4098,15 +4002,22 @@ sub simpleSubstitutions { subst("CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED", "hipGraphExecUpdateErrorTopologyChanged", "numeric_literal"); subst("CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE", "hipGraphExecUpdateErrorUnsupportedFunctionChange", "numeric_literal"); subst("CU_GRAPH_EXEC_UPDATE_SUCCESS", "hipGraphExecUpdateSuccess", "numeric_literal"); + subst("CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT", "hipGraphMemAttrReservedMemCurrent", "numeric_literal"); + subst("CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH", "hipGraphMemAttrReservedMemHigh", "numeric_literal"); + subst("CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT", "hipGraphMemAttrUsedMemCurrent", "numeric_literal"); + subst("CU_GRAPH_MEM_ATTR_USED_MEM_HIGH", "hipGraphMemAttrUsedMemHigh", "numeric_literal"); subst("CU_GRAPH_NODE_TYPE_COUNT", "hipGraphNodeTypeCount", "numeric_literal"); subst("CU_GRAPH_NODE_TYPE_EMPTY", "hipGraphNodeTypeEmpty", "numeric_literal"); subst("CU_GRAPH_NODE_TYPE_EVENT_RECORD", "hipGraphNodeTypeEventRecord", "numeric_literal"); + subst("CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL", "hipGraphNodeTypeExtSemaphoreSignal", "numeric_literal"); + subst("CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT", "hipGraphNodeTypeExtSemaphoreWait", "numeric_literal"); subst("CU_GRAPH_NODE_TYPE_GRAPH", "hipGraphNodeTypeGraph", "numeric_literal"); subst("CU_GRAPH_NODE_TYPE_HOST", "hipGraphNodeTypeHost", "numeric_literal"); subst("CU_GRAPH_NODE_TYPE_KERNEL", "hipGraphNodeTypeKernel", "numeric_literal"); subst("CU_GRAPH_NODE_TYPE_MEMCPY", "hipGraphNodeTypeMemcpy", "numeric_literal"); subst("CU_GRAPH_NODE_TYPE_MEMSET", "hipGraphNodeTypeMemset", "numeric_literal"); subst("CU_GRAPH_NODE_TYPE_WAIT_EVENT", "hipGraphNodeTypeWaitEvent", "numeric_literal"); + subst("CU_GRAPH_USER_OBJECT_MOVE", "hipGraphUserObjectMove", "numeric_literal"); subst("CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS", "hipIpcMemLazyEnablePeerAccess", "numeric_literal"); subst("CU_JIT_CACHE_MODE", "hipJitOptionCacheMode", "numeric_literal"); subst("CU_JIT_ERROR_LOG_BUFFER", "hipJitOptionErrorLogBuffer", "numeric_literal"); @@ -4117,9 +4028,16 @@ sub simpleSubstitutions { subst("CU_JIT_GENERATE_LINE_INFO", "hipJitOptionGenerateLineInfo", "numeric_literal"); subst("CU_JIT_INFO_LOG_BUFFER", "hipJitOptionInfoLogBuffer", "numeric_literal"); subst("CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES", "hipJitOptionInfoLogBufferSizeBytes", "numeric_literal"); + subst("CU_JIT_INPUT_CUBIN", "HIPRTC_JIT_INPUT_CUBIN", "numeric_literal"); + subst("CU_JIT_INPUT_FATBINARY", "HIPRTC_JIT_INPUT_FATBINARY", "numeric_literal"); + subst("CU_JIT_INPUT_LIBRARY", "HIPRTC_JIT_INPUT_LIBRARY", "numeric_literal"); + subst("CU_JIT_INPUT_NVVM", "HIPRTC_JIT_INPUT_NVVM", "numeric_literal"); + subst("CU_JIT_INPUT_OBJECT", "HIPRTC_JIT_INPUT_OBJECT", "numeric_literal"); + subst("CU_JIT_INPUT_PTX", "HIPRTC_JIT_INPUT_PTX", "numeric_literal"); subst("CU_JIT_LOG_VERBOSE", "hipJitOptionLogVerbose", "numeric_literal"); subst("CU_JIT_MAX_REGISTERS", "hipJitOptionMaxRegisters", "numeric_literal"); subst("CU_JIT_NEW_SM3X_OPT", "hipJitOptionSm3xOpt", "numeric_literal"); + subst("CU_JIT_NUM_INPUT_TYPES", "HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES", "numeric_literal"); subst("CU_JIT_NUM_OPTIONS", "hipJitOptionNumOptions", "numeric_literal"); subst("CU_JIT_OPTIMIZATION_LEVEL", "hipJitOptionOptimizationLevel", "numeric_literal"); subst("CU_JIT_TARGET", "hipJitOptionTarget", "numeric_literal"); @@ -4130,6 +4048,7 @@ sub simpleSubstitutions { subst("CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE", "hipKernelNodeAttributeCooperative", "numeric_literal"); subst("CU_LIMIT_MALLOC_HEAP_SIZE", "hipLimitMallocHeapSize", "numeric_literal"); subst("CU_LIMIT_PRINTF_FIFO_SIZE", "hipLimitPrintfFifoSize", "numeric_literal"); + subst("CU_LIMIT_STACK_SIZE", "hipLimitStackSize", "numeric_literal"); subst("CU_MEMORYTYPE_ARRAY", "hipMemoryTypeArray", "numeric_literal"); subst("CU_MEMORYTYPE_DEVICE", "hipMemoryTypeDevice", "numeric_literal"); subst("CU_MEMORYTYPE_HOST", "hipMemoryTypeHost", "numeric_literal"); @@ -4252,6 +4171,7 @@ sub simpleSubstitutions { subst("CU_TR_ADDRESS_MODE_WRAP", "HIP_TR_ADDRESS_MODE_WRAP", "numeric_literal"); subst("CU_TR_FILTER_MODE_LINEAR", "HIP_TR_FILTER_MODE_LINEAR", "numeric_literal"); subst("CU_TR_FILTER_MODE_POINT", "HIP_TR_FILTER_MODE_POINT", "numeric_literal"); + subst("CU_USER_OBJECT_NO_DESTRUCTOR_SYNC", "hipUserObjectNoDestructorSync", "numeric_literal"); subst("NVRTC_ERROR_BUILTIN_OPERATION_FAILURE", "HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE", "numeric_literal"); subst("NVRTC_ERROR_COMPILATION", "HIPRTC_ERROR_COMPILATION", "numeric_literal"); subst("NVRTC_ERROR_INTERNAL_ERROR", "HIPRTC_ERROR_INTERNAL_ERROR", "numeric_literal"); @@ -4479,15 +4399,22 @@ sub simpleSubstitutions { subst("cudaGraphExecUpdateErrorUnsupportedFunctionChange", "hipGraphExecUpdateErrorUnsupportedFunctionChange", "numeric_literal"); subst("cudaGraphExecUpdateSuccess", "hipGraphExecUpdateSuccess", "numeric_literal"); subst("cudaGraphInstantiateFlagAutoFreeOnLaunch", "hipGraphInstantiateFlagAutoFreeOnLaunch", "numeric_literal"); + subst("cudaGraphMemAttrReservedMemCurrent", "hipGraphMemAttrReservedMemCurrent", "numeric_literal"); + subst("cudaGraphMemAttrReservedMemHigh", "hipGraphMemAttrReservedMemHigh", "numeric_literal"); + subst("cudaGraphMemAttrUsedMemCurrent", "hipGraphMemAttrUsedMemCurrent", "numeric_literal"); + subst("cudaGraphMemAttrUsedMemHigh", "hipGraphMemAttrUsedMemHigh", "numeric_literal"); subst("cudaGraphNodeTypeCount", "hipGraphNodeTypeCount", "numeric_literal"); subst("cudaGraphNodeTypeEmpty", "hipGraphNodeTypeEmpty", "numeric_literal"); subst("cudaGraphNodeTypeEventRecord", "hipGraphNodeTypeEventRecord", "numeric_literal"); + subst("cudaGraphNodeTypeExtSemaphoreSignal", "hipGraphNodeTypeExtSemaphoreSignal", "numeric_literal"); + subst("cudaGraphNodeTypeExtSemaphoreWait", "hipGraphNodeTypeExtSemaphoreWait", "numeric_literal"); subst("cudaGraphNodeTypeGraph", "hipGraphNodeTypeGraph", "numeric_literal"); subst("cudaGraphNodeTypeHost", "hipGraphNodeTypeHost", "numeric_literal"); subst("cudaGraphNodeTypeKernel", "hipGraphNodeTypeKernel", "numeric_literal"); subst("cudaGraphNodeTypeMemcpy", "hipGraphNodeTypeMemcpy", "numeric_literal"); subst("cudaGraphNodeTypeMemset", "hipGraphNodeTypeMemset", "numeric_literal"); subst("cudaGraphNodeTypeWaitEvent", "hipGraphNodeTypeWaitEvent", "numeric_literal"); + subst("cudaGraphUserObjectMove", "hipGraphUserObjectMove", "numeric_literal"); subst("cudaGraphicsRegisterFlagsNone", "hipGraphicsRegisterFlagsNone", "numeric_literal"); subst("cudaGraphicsRegisterFlagsReadOnly", "hipGraphicsRegisterFlagsReadOnly", "numeric_literal"); subst("cudaGraphicsRegisterFlagsSurfaceLoadStore", "hipGraphicsRegisterFlagsSurfaceLoadStore", "numeric_literal"); @@ -4497,6 +4424,7 @@ sub simpleSubstitutions { subst("cudaKernelNodeAttributeCooperative", "hipKernelNodeAttributeCooperative", "numeric_literal"); subst("cudaLimitMallocHeapSize", "hipLimitMallocHeapSize", "numeric_literal"); subst("cudaLimitPrintfFifoSize", "hipLimitPrintfFifoSize", "numeric_literal"); + subst("cudaLimitStackSize", "hipLimitStackSize", "numeric_literal"); subst("cudaMemAccessFlagsProtNone", "hipMemAccessFlagsProtNone", "numeric_literal"); subst("cudaMemAccessFlagsProtRead", "hipMemAccessFlagsProtRead", "numeric_literal"); subst("cudaMemAccessFlagsProtReadWrite", "hipMemAccessFlagsProtReadWrite", "numeric_literal"); @@ -4534,6 +4462,7 @@ sub simpleSubstitutions { subst("cudaMemcpyHostToHost", "hipMemcpyHostToHost", "numeric_literal"); subst("cudaMemoryTypeDevice", "hipMemoryTypeDevice", "numeric_literal"); subst("cudaMemoryTypeHost", "hipMemoryTypeHost", "numeric_literal"); + subst("cudaMemoryTypeManaged", "hipMemoryTypeManaged", "numeric_literal"); subst("cudaReadModeElementType", "hipReadModeElementType", "numeric_literal"); subst("cudaReadModeNormalizedFloat", "hipReadModeNormalizedFloat", "numeric_literal"); subst("cudaResViewFormatFloat1", "hipResViewFormatFloat1", "numeric_literal"); @@ -4587,6 +4516,7 @@ sub simpleSubstitutions { subst("cudaStreamCaptureStatusNone", "hipStreamCaptureStatusNone", "numeric_literal"); subst("cudaStreamSetCaptureDependencies", "hipStreamSetCaptureDependencies", "numeric_literal"); subst("cudaSuccess", "hipSuccess", "numeric_literal"); + subst("cudaUserObjectNoDestructorSync", "hipUserObjectNoDestructorSync", "numeric_literal"); subst("CUB_MAX", "CUB_MAX", "define"); subst("CUB_MIN", "CUB_MIN", "define"); subst("CUB_NAMESPACE_BEGIN", "BEGIN_HIPCUB_NAMESPACE", "define"); @@ -8363,7 +8293,7 @@ if ($help) { print STDERR "$USAGE\n"; } if ($version) { - print STDERR "HIP version 5.3.0\n"; + print STDERR "HIP version 5.4.0\n"; } while (@ARGV) { $fileName=shift (@ARGV); diff --git a/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md b/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md index 9ce898ad..5923086c 100644 --- a/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md @@ -411,7 +411,7 @@ |`CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY`| | | |`hipDeviceAttributeTotalConstantMemory`|1.6.0| | | | |`CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING`| | | |`hipDeviceAttributeUnifiedAddressing`|4.3.0| | | | |`CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED`|10.2|11.2| | | | | | | -|`CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED`|11.2| | |`hipDeviceAttributeVirtualMemoryManagementSupported`|5.3.0| | |5.3.0| +|`CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED`|11.2| | |`hipDeviceAttributeVirtualMemoryManagementSupported`|5.3.0| | | | |`CU_DEVICE_ATTRIBUTE_WARP_SIZE`| | | |`hipDeviceAttributeWarpSize`|1.6.0| | | | |`CU_DEVICE_CPU`|8.0| | |`hipCpuDeviceId`|3.7.0| | | | |`CU_DEVICE_INVALID`|8.0| | |`hipInvalidDeviceId`|3.7.0| | | | @@ -599,16 +599,16 @@ |`CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED`|10.2| | |`hipGraphExecUpdateErrorTopologyChanged`|4.3.0| | | | |`CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE`|11.2| | |`hipGraphExecUpdateErrorUnsupportedFunctionChange`|4.3.0| | | | |`CU_GRAPH_EXEC_UPDATE_SUCCESS`|10.2| | |`hipGraphExecUpdateSuccess`|4.3.0| | | | -|`CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT`|11.4| | |`hipGraphMemAttrReservedMemCurrent`|5.3.0| | |5.3.0| -|`CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH`|11.4| | |`hipGraphMemAttrReservedMemHigh`|5.3.0| | |5.3.0| -|`CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT`|11.4| | |`hipGraphMemAttrUsedMemCurrent`|5.3.0| | |5.3.0| -|`CU_GRAPH_MEM_ATTR_USED_MEM_HIGH`|11.4| | |`hipGraphMemAttrUsedMemHigh`|5.3.0| | |5.3.0| +|`CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT`|11.4| | |`hipGraphMemAttrReservedMemCurrent`|5.3.0| | | | +|`CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH`|11.4| | |`hipGraphMemAttrReservedMemHigh`|5.3.0| | | | +|`CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT`|11.4| | |`hipGraphMemAttrUsedMemCurrent`|5.3.0| | | | +|`CU_GRAPH_MEM_ATTR_USED_MEM_HIGH`|11.4| | |`hipGraphMemAttrUsedMemHigh`|5.3.0| | | | |`CU_GRAPH_NODE_TYPE_BATCH_MEM_OP`|11.7| | | | | | | | |`CU_GRAPH_NODE_TYPE_COUNT`|10.0| |11.0|`hipGraphNodeTypeCount`|4.3.0| | | | |`CU_GRAPH_NODE_TYPE_EMPTY`|10.0| | |`hipGraphNodeTypeEmpty`|4.3.0| | | | |`CU_GRAPH_NODE_TYPE_EVENT_RECORD`|11.1| | |`hipGraphNodeTypeEventRecord`|4.3.0| | | | -|`CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL`|11.2| | |`hipGraphNodeTypeExtSemaphoreSignal`|5.3.0| | |5.3.0| -|`CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT`|11.2| | |`hipGraphNodeTypeExtSemaphoreWait`|5.3.0| | |5.3.0| +|`CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL`|11.2| | |`hipGraphNodeTypeExtSemaphoreSignal`|5.3.0| | | | +|`CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT`|11.2| | |`hipGraphNodeTypeExtSemaphoreWait`|5.3.0| | | | |`CU_GRAPH_NODE_TYPE_GRAPH`|10.0| | |`hipGraphNodeTypeGraph`|4.3.0| | | | |`CU_GRAPH_NODE_TYPE_HOST`|10.0| | |`hipGraphNodeTypeHost`|4.3.0| | | | |`CU_GRAPH_NODE_TYPE_KERNEL`|10.0| | |`hipGraphNodeTypeKernel`|4.3.0| | | | @@ -617,7 +617,7 @@ |`CU_GRAPH_NODE_TYPE_MEM_ALLOC`|11.4| | | | | | | | |`CU_GRAPH_NODE_TYPE_MEM_FREE`|11.4| | | | | | | | |`CU_GRAPH_NODE_TYPE_WAIT_EVENT`|11.1| | |`hipGraphNodeTypeWaitEvent`|4.3.0| | | | -|`CU_GRAPH_USER_OBJECT_MOVE`|11.3| | |`hipGraphUserObjectMove`|5.3.0| | |5.3.0| +|`CU_GRAPH_USER_OBJECT_MOVE`|11.3| | |`hipGraphUserObjectMove`|5.3.0| | | | |`CU_IPC_HANDLE_SIZE`| | | |`HIP_IPC_HANDLE_SIZE`|1.6.0| | | | |`CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS`| | | |`hipIpcMemLazyEnablePeerAccess`|1.6.0| | | | |`CU_JIT_CACHE_MODE`| | | |`hipJitOptionCacheMode`|1.6.0| | | | @@ -637,17 +637,17 @@ |`CU_JIT_GLOBAL_SYMBOL_NAMES`| | | | | | | | | |`CU_JIT_INFO_LOG_BUFFER`| | | |`hipJitOptionInfoLogBuffer`|1.6.0| | | | |`CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES`| | | |`hipJitOptionInfoLogBufferSizeBytes`|1.6.0| | | | -|`CU_JIT_INPUT_CUBIN`| | | |`HIPRTC_JIT_INPUT_CUBIN`|5.3.0| | |5.3.0| -|`CU_JIT_INPUT_FATBINARY`| | | |`HIPRTC_JIT_INPUT_FATBINARY`|5.3.0| | |5.3.0| -|`CU_JIT_INPUT_LIBRARY`| | | |`HIPRTC_JIT_INPUT_LIBRARY`|5.3.0| | |5.3.0| -|`CU_JIT_INPUT_NVVM`|11.4| | |`HIPRTC_JIT_INPUT_NVVM`|5.3.0| | |5.3.0| -|`CU_JIT_INPUT_OBJECT`| | | |`HIPRTC_JIT_INPUT_OBJECT`|5.3.0| | |5.3.0| -|`CU_JIT_INPUT_PTX`| | | |`HIPRTC_JIT_INPUT_PTX`|5.3.0| | |5.3.0| +|`CU_JIT_INPUT_CUBIN`| | | |`HIPRTC_JIT_INPUT_CUBIN`|5.3.0| | | | +|`CU_JIT_INPUT_FATBINARY`| | | |`HIPRTC_JIT_INPUT_FATBINARY`|5.3.0| | | | +|`CU_JIT_INPUT_LIBRARY`| | | |`HIPRTC_JIT_INPUT_LIBRARY`|5.3.0| | | | +|`CU_JIT_INPUT_NVVM`|11.4| | |`HIPRTC_JIT_INPUT_NVVM`|5.3.0| | | | +|`CU_JIT_INPUT_OBJECT`| | | |`HIPRTC_JIT_INPUT_OBJECT`|5.3.0| | | | +|`CU_JIT_INPUT_PTX`| | | |`HIPRTC_JIT_INPUT_PTX`|5.3.0| | | | |`CU_JIT_LOG_VERBOSE`| | | |`hipJitOptionLogVerbose`|1.6.0| | | | |`CU_JIT_LTO`|11.4| | | | | | | | |`CU_JIT_MAX_REGISTERS`| | | |`hipJitOptionMaxRegisters`|1.6.0| | | | |`CU_JIT_NEW_SM3X_OPT`| | | |`hipJitOptionSm3xOpt`|1.6.0| | | | -|`CU_JIT_NUM_INPUT_TYPES`| | | |`HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES`|5.3.0| | |5.3.0| +|`CU_JIT_NUM_INPUT_TYPES`| | | |`HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES`|5.3.0| | | | |`CU_JIT_NUM_OPTIONS`| | | |`hipJitOptionNumOptions`|1.6.0| | | | |`CU_JIT_OPTIMIZATION_LEVEL`| | | |`hipJitOptionOptimizationLevel`|1.6.0| | | | |`CU_JIT_OPTIMIZE_UNUSED_DEVICE_VARIABLES`|11.7| | | | | | | | @@ -688,7 +688,7 @@ |`CU_LIMIT_MAX_L2_FETCH_GRANULARITY`|10.0| | | | | | | | |`CU_LIMIT_PERSISTING_L2_CACHE_SIZE`|11.0| | | | | | | | |`CU_LIMIT_PRINTF_FIFO_SIZE`| | | |`hipLimitPrintfFifoSize`|4.5.0| | | | -|`CU_LIMIT_STACK_SIZE`| | | |`hipLimitStackSize`|5.3.0| | |5.3.0| +|`CU_LIMIT_STACK_SIZE`| | | |`hipLimitStackSize`|5.3.0| | | | |`CU_MEMHOSTALLOC_DEVICEMAP`| | | |`hipHostMallocMapped`|1.6.0| | | | |`CU_MEMHOSTALLOC_PORTABLE`| | | |`hipHostMallocPortable`|1.6.0| | | | |`CU_MEMHOSTALLOC_WRITECOMBINED`| | | |`hipHostMallocWriteCombined`|1.6.0| | | | @@ -889,7 +889,7 @@ |`CU_TR_ADDRESS_MODE_WRAP`| | | |`HIP_TR_ADDRESS_MODE_WRAP`|3.5.0| | | | |`CU_TR_FILTER_MODE_LINEAR`| | | |`HIP_TR_FILTER_MODE_LINEAR`|3.5.0| | | | |`CU_TR_FILTER_MODE_POINT`| | | |`HIP_TR_FILTER_MODE_POINT`|3.5.0| | | | -|`CU_USER_OBJECT_NO_DESTRUCTOR_SYNC`|11.3| | |`hipUserObjectNoDestructorSync`|5.3.0| | |5.3.0| +|`CU_USER_OBJECT_NO_DESTRUCTOR_SYNC`|11.3| | |`hipUserObjectNoDestructorSync`|5.3.0| | | | |`CUaccessPolicyWindow`|11.0| | |`hipAccessPolicyWindow`|5.2.0| | | | |`CUaccessPolicyWindow_st`|11.0| | |`hipAccessPolicyWindow`|5.2.0| | | | |`CUaccessProperty`|11.0| | |`hipAccessProperty`|5.2.0| | | | @@ -1000,8 +1000,8 @@ |`CUgraphExec_st`|10.0| | |`hipGraphExec`|4.3.0| | | | |`CUgraphInstantiate_flags`|11.4| | |`hipGraphInstantiateFlags`|5.2.0| | | | |`CUgraphInstantiate_flags_enum`|11.4| | |`hipGraphInstantiateFlags`|5.2.0| | | | -|`CUgraphMem_attribute`|11.4| | |`hipGraphMemAttributeType`|5.3.0| | |5.3.0| -|`CUgraphMem_attribute_enum`|11.4| | |`hipGraphMemAttributeType`|5.3.0| | |5.3.0| +|`CUgraphMem_attribute`|11.4| | |`hipGraphMemAttributeType`|5.3.0| | | | +|`CUgraphMem_attribute_enum`|11.4| | |`hipGraphMemAttributeType`|5.3.0| | | | |`CUgraphNode`|10.0| | |`hipGraphNode_t`|4.3.0| | | | |`CUgraphNodeType`|10.0| | |`hipGraphNodeType`|4.3.0| | | | |`CUgraphNodeType_enum`|10.0| | |`hipGraphNodeType`|4.3.0| | | | @@ -1022,8 +1022,8 @@ |`CUipcMemHandle_v1`|11.3| | |`hipIpcMemHandle_t`|1.6.0| | | | |`CUipcMem_flags`| | | | | | | | | |`CUipcMem_flags_enum`| | | | | | | | | -|`CUjitInputType`| | | |`hiprtcJITInputType`|5.3.0| | |5.3.0| -|`CUjitInputType_enum`| | | |`hiprtcJITInputType`|5.3.0| | |5.3.0| +|`CUjitInputType`| | | |`hiprtcJITInputType`|5.3.0| | | | +|`CUjitInputType_enum`| | | |`hiprtcJITInputType`|5.3.0| | | | |`CUjit_cacheMode`| | | | | | | | | |`CUjit_cacheMode_enum`| | | | | | | | | |`CUjit_fallback`| | | | | | | | | @@ -1047,8 +1047,8 @@ |`CUlaunchConfig_st`|11.8| | | | | | | | |`CUlimit`| | | |`hipLimit_t`|1.6.0| | | | |`CUlimit_enum`| | | |`hipLimit_t`|1.6.0| | | | -|`CUlinkState`| | | |`hiprtcLinkState`|5.3.0| | |5.3.0| -|`CUlinkState_st`| | | |`ihiprtcLinkState`|5.3.0| | |5.3.0| +|`CUlinkState`| | | |`hiprtcLinkState`|5.3.0| | | | +|`CUlinkState_st`| | | |`ihiprtcLinkState`|5.3.0| | | | |`CUmemAccessDesc`|10.2| | |`hipMemAccessDesc`|5.2.0| | | | |`CUmemAccessDesc_st`|10.2| | |`hipMemAccessDesc`|5.2.0| | | | |`CUmemAccessDesc_v1`|11.3| | |`hipMemAccessDesc`|5.2.0| | | | @@ -1152,12 +1152,12 @@ |`CUtexObject_v1`|11.3| | |`hipTextureObject_t`|1.7.0| | | | |`CUtexref`| | | |`hipTexRef`|3.10.0| | | | |`CUtexref_st`| | | |`textureReference`|1.6.0| | | | -|`CUuserObject`|11.3| | |`hipUserObject_t`|5.3.0| | |5.3.0| -|`CUuserObjectRetain_flags`|11.3| | |`hipUserObjectRetainFlags`|5.3.0| | |5.3.0| -|`CUuserObjectRetain_flags_enum`|11.3| | |`hipUserObjectRetainFlags`|5.3.0| | |5.3.0| -|`CUuserObject_flags`|11.3| | |`hipUserObjectFlags`|5.3.0| | |5.3.0| -|`CUuserObject_flags_enum`|11.3| | |`hipUserObjectFlags`|5.3.0| | |5.3.0| -|`CUuserObject_st`|11.3| | |`hipUserObject`|5.3.0| | |5.3.0| +|`CUuserObject`|11.3| | |`hipUserObject_t`|5.3.0| | | | +|`CUuserObjectRetain_flags`|11.3| | |`hipUserObjectRetainFlags`|5.3.0| | | | +|`CUuserObjectRetain_flags_enum`|11.3| | |`hipUserObjectRetainFlags`|5.3.0| | | | +|`CUuserObject_flags`|11.3| | |`hipUserObjectFlags`|5.3.0| | | | +|`CUuserObject_flags_enum`|11.3| | |`hipUserObjectFlags`|5.3.0| | | | +|`CUuserObject_st`|11.3| | |`hipUserObject`|5.3.0| | | | |`CUuuid`| | | |`hipUUID`|5.2.0| | | | |`CUuuid_st`| | | |`hipUUID_t`|5.2.0| | | | |`GLenum`| | | |`GLenum`|5.1.0| | | | @@ -1262,7 +1262,7 @@ |`cuCtxResetPersistingL2Cache`|11.0| | | | | | | | |`cuCtxSetCacheConfig`| | | |`hipCtxSetCacheConfig`|1.9.0|1.9.0| | | |`cuCtxSetCurrent`| | | |`hipCtxSetCurrent`|1.6.0|1.9.0| | | -|`cuCtxSetLimit`| | | |`hipDeviceSetLimit`|5.3.0| | |5.3.0| +|`cuCtxSetLimit`| | | |`hipDeviceSetLimit`|5.3.0| | | | |`cuCtxSetSharedMemConfig`| | | |`hipCtxSetSharedMemConfig`|1.9.0|1.9.0| | | |`cuCtxSynchronize`| | | |`hipCtxSynchronize`|1.9.0|1.9.0| | | @@ -1277,14 +1277,14 @@ |**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**| |:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:| -|`cuLinkAddData`| | | |`hiprtcLinkAddData`|5.3.0| | |5.3.0| -|`cuLinkAddData_v2`| | | |`hiprtcLinkAddData`|5.3.0| | |5.3.0| -|`cuLinkAddFile`| | | |`hiprtcLinkAddFile`|5.3.0| | |5.3.0| -|`cuLinkAddFile_v2`| | | |`hiprtcLinkAddFile`|5.3.0| | |5.3.0| -|`cuLinkComplete`| | | |`hiprtcLinkComplete`|5.3.0| | |5.3.0| -|`cuLinkCreate`| | | |`hiprtcLinkCreate`|5.3.0| | |5.3.0| -|`cuLinkCreate_v2`| | | |`hiprtcLinkCreate`|5.3.0| | |5.3.0| -|`cuLinkDestroy`| | | |`hiprtcLinkDestroy`|5.3.0| | |5.3.0| +|`cuLinkAddData`| | | |`hiprtcLinkAddData`|5.3.0| | | | +|`cuLinkAddData_v2`| | | |`hiprtcLinkAddData`|5.3.0| | | | +|`cuLinkAddFile`| | | |`hiprtcLinkAddFile`|5.3.0| | | | +|`cuLinkAddFile_v2`| | | |`hiprtcLinkAddFile`|5.3.0| | | | +|`cuLinkComplete`| | | |`hiprtcLinkComplete`|5.3.0| | | | +|`cuLinkCreate`| | | |`hiprtcLinkCreate`|5.3.0| | | | +|`cuLinkCreate_v2`| | | |`hiprtcLinkCreate`|5.3.0| | | | +|`cuLinkDestroy`| | | |`hiprtcLinkDestroy`|5.3.0| | | | |`cuModuleGetFunction`| | | |`hipModuleGetFunction`|1.6.0| | | | |`cuModuleGetGlobal`| | | |`hipModuleGetGlobal`|1.6.0| | | | |`cuModuleGetGlobal_v2`| | | |`hipModuleGetGlobal`|1.6.0| | | | @@ -1561,9 +1561,9 @@ |**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**| |:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:| -|`cuDeviceGetGraphMemAttribute`|11.4| | |`hipDeviceGetGraphMemAttribute`|5.3.0| | |5.3.0| -|`cuDeviceGraphMemTrim`|11.4| | |`hipDeviceGraphMemTrim`|5.3.0| | |5.3.0| -|`cuDeviceSetGraphMemAttribute`|11.4| | |`hipDeviceSetGraphMemAttribute`|5.3.0| | |5.3.0| +|`cuDeviceGetGraphMemAttribute`|11.4| | |`hipDeviceGetGraphMemAttribute`|5.3.0| | | | +|`cuDeviceGraphMemTrim`|11.4| | |`hipDeviceGraphMemTrim`|5.3.0| | | | +|`cuDeviceSetGraphMemAttribute`|11.4| | |`hipDeviceSetGraphMemAttribute`|5.3.0| | | | |`cuGraphAddBatchMemOpNode`|11.7| | | | | | | | |`cuGraphAddChildGraphNode`|10.0| | |`hipGraphAddChildGraphNode`|5.0.0| | | | |`cuGraphAddDependencies`|10.0| | |`hipGraphAddDependencies`|4.5.0| | | | @@ -1632,13 +1632,13 @@ |`cuGraphNodeGetEnabled`|11.6| | | | | | | | |`cuGraphNodeGetType`|10.0| | |`hipGraphNodeGetType`|5.0.0| | | | |`cuGraphNodeSetEnabled`|11.6| | | | | | | | -|`cuGraphReleaseUserObject`|11.3| | |`hipGraphReleaseUserObject`|5.3.0| | |5.3.0| +|`cuGraphReleaseUserObject`|11.3| | |`hipGraphReleaseUserObject`|5.3.0| | | | |`cuGraphRemoveDependencies`|10.0| | |`hipGraphRemoveDependencies`|5.0.0| | | | -|`cuGraphRetainUserObject`|11.3| | |`hipGraphRetainUserObject`|5.3.0| | |5.3.0| -|`cuGraphUpload`|11.1| | |`hipGraphUpload`|5.3.0| | |5.3.0| -|`cuUserObjectCreate`|11.3| | |`hipUserObjectCreate`|5.3.0| | |5.3.0| -|`cuUserObjectRelease`|11.3| | |`hipUserObjectRelease`|5.3.0| | |5.3.0| -|`cuUserObjectRetain`|11.3| | |`hipUserObjectRetain`|5.3.0| | |5.3.0| +|`cuGraphRetainUserObject`|11.3| | |`hipGraphRetainUserObject`|5.3.0| | | | +|`cuGraphUpload`|11.1| | |`hipGraphUpload`|5.3.0| | | | +|`cuUserObjectCreate`|11.3| | |`hipUserObjectCreate`|5.3.0| | | | +|`cuUserObjectRelease`|11.3| | |`hipUserObjectRelease`|5.3.0| | | | +|`cuUserObjectRetain`|11.3| | |`hipUserObjectRetain`|5.3.0| | | | ## **22. Occupancy** diff --git a/doc/markdown/CUDA_RTC_API_supported_by_HIP.md b/doc/markdown/CUDA_RTC_API_supported_by_HIP.md index 22d82bba..ec6c5c48 100644 --- a/doc/markdown/CUDA_RTC_API_supported_by_HIP.md +++ b/doc/markdown/CUDA_RTC_API_supported_by_HIP.md @@ -27,8 +27,8 @@ |`nvrtcCompileProgram`| | | |`hiprtcCompileProgram`|2.6.0| | | | |`nvrtcCreateProgram`| | | |`hiprtcCreateProgram`|2.6.0| | | | |`nvrtcDestroyProgram`| | | |`hiprtcDestroyProgram`|2.6.0| | | | -|`nvrtcGetCUBIN`|11.1| | |`hiprtcGetBitcode`|5.3.0| | |5.3.0| -|`nvrtcGetCUBINSize`|11.1| | |`hiprtcGetBitcodeSize`|5.3.0| | |5.3.0| +|`nvrtcGetCUBIN`|11.1| | |`hiprtcGetBitcode`|5.3.0| | | | +|`nvrtcGetCUBINSize`|11.1| | |`hiprtcGetBitcodeSize`|5.3.0| | | | |`nvrtcGetErrorString`| | | |`hiprtcGetErrorString`|2.6.0| | | | |`nvrtcGetLoweredName`|8.0| | |`hiprtcGetLoweredName`|2.6.0| | | | |`nvrtcGetNVVM`|11.4| | | | | | | | diff --git a/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md b/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md index 685ce925..d9a3b2ce 100644 --- a/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md +++ b/doc/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md @@ -20,7 +20,7 @@ |`cudaDeviceGetTexture1DLinearMaxWidth`|11.1| | | | | | | | |`cudaDeviceReset`| | | |`hipDeviceReset`|1.6.0| | | | |`cudaDeviceSetCacheConfig`| | | |`hipDeviceSetCacheConfig`|1.6.0| | | | -|`cudaDeviceSetLimit`| | | |`hipDeviceSetLimit`|5.3.0| | |5.3.0| +|`cudaDeviceSetLimit`| | | |`hipDeviceSetLimit`|5.3.0| | | | |`cudaDeviceSetMemPool`|11.2| | |`hipDeviceSetMemPool`|5.2.0| | | | |`cudaDeviceSetSharedMemConfig`| | | |`hipDeviceSetSharedMemConfig`|1.6.0| | | | |`cudaDeviceSynchronize`| | | |`hipDeviceSynchronize`|1.6.0| | | | @@ -422,9 +422,9 @@ |**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**| |:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:| -|`cudaDeviceGetGraphMemAttribute`|11.4| | |`hipDeviceGetGraphMemAttribute`|5.3.0| | |5.3.0| -|`cudaDeviceGraphMemTrim`|11.4| | |`hipDeviceGraphMemTrim`|5.3.0| | |5.3.0| -|`cudaDeviceSetGraphMemAttribute`|11.4| | |`hipDeviceSetGraphMemAttribute`|5.3.0| | |5.3.0| +|`cudaDeviceGetGraphMemAttribute`|11.4| | |`hipDeviceGetGraphMemAttribute`|5.3.0| | | | +|`cudaDeviceGraphMemTrim`|11.4| | |`hipDeviceGraphMemTrim`|5.3.0| | | | +|`cudaDeviceSetGraphMemAttribute`|11.4| | |`hipDeviceSetGraphMemAttribute`|5.3.0| | | | |`cudaGraphAddChildGraphNode`|10.0| | |`hipGraphAddChildGraphNode`|5.0.0| | | | |`cudaGraphAddDependencies`|10.0| | |`hipGraphAddDependencies`|4.5.0| | | | |`cudaGraphAddEmptyNode`|10.0| | |`hipGraphAddEmptyNode`|4.5.0| | | | @@ -496,13 +496,13 @@ |`cudaGraphNodeGetDependentNodes`|11.0| | |`hipGraphNodeGetDependentNodes`|5.0.0| | | | |`cudaGraphNodeGetType`|11.0| | |`hipGraphNodeGetType`|5.0.0| | | | |`cudaGraphNodeSetEnabled`|11.6| | | | | | | | -|`cudaGraphReleaseUserObject`|11.3| | |`hipGraphReleaseUserObject`|5.3.0| | |5.3.0| +|`cudaGraphReleaseUserObject`|11.3| | |`hipGraphReleaseUserObject`|5.3.0| | | | |`cudaGraphRemoveDependencies`|11.0| | |`hipGraphRemoveDependencies`|5.0.0| | | | -|`cudaGraphRetainUserObject`|11.3| | |`hipGraphRetainUserObject`|5.3.0| | |5.3.0| -|`cudaGraphUpload`|11.1| | |`hipGraphUpload`|5.3.0| | |5.3.0| -|`cudaUserObjectCreate`|11.3| | |`hipUserObjectCreate`|5.3.0| | |5.3.0| -|`cudaUserObjectRelease`|11.3| | |`hipUserObjectRelease`|5.3.0| | |5.3.0| -|`cudaUserObjectRetain`|11.3| | |`hipUserObjectRetain`|5.3.0| | |5.3.0| +|`cudaGraphRetainUserObject`|11.3| | |`hipGraphRetainUserObject`|5.3.0| | | | +|`cudaGraphUpload`|11.1| | |`hipGraphUpload`|5.3.0| | | | +|`cudaUserObjectCreate`|11.3| | |`hipUserObjectCreate`|5.3.0| | | | +|`cudaUserObjectRelease`|11.3| | |`hipUserObjectRelease`|5.3.0| | | | +|`cudaUserObjectRetain`|11.3| | |`hipUserObjectRetain`|5.3.0| | | | ## **31. Driver Entry Point Access** @@ -1110,17 +1110,17 @@ Unsupported |`cudaGraphInstantiateFlagAutoFreeOnLaunch`|11.4| | |`hipGraphInstantiateFlagAutoFreeOnLaunch`|5.2.0| | | | |`cudaGraphInstantiateFlagUseNodePriority`|11.7| | | | | | | | |`cudaGraphInstantiateFlags`|11.4| | |`hipGraphInstantiateFlags`|5.2.0| | | | -|`cudaGraphMemAttrReservedMemCurrent`|11.4| | |`hipGraphMemAttrReservedMemCurrent`|5.3.0| | |5.3.0| -|`cudaGraphMemAttrReservedMemHigh`|11.4| | |`hipGraphMemAttrReservedMemHigh`|5.3.0| | |5.3.0| -|`cudaGraphMemAttrUsedMemCurrent`|11.4| | |`hipGraphMemAttrUsedMemCurrent`|5.3.0| | |5.3.0| -|`cudaGraphMemAttrUsedMemHigh`|11.4| | |`hipGraphMemAttrUsedMemHigh`|5.3.0| | |5.3.0| -|`cudaGraphMemAttributeType`|11.4| | |`hipGraphMemAttributeType`|5.3.0| | |5.3.0| +|`cudaGraphMemAttrReservedMemCurrent`|11.4| | |`hipGraphMemAttrReservedMemCurrent`|5.3.0| | | | +|`cudaGraphMemAttrReservedMemHigh`|11.4| | |`hipGraphMemAttrReservedMemHigh`|5.3.0| | | | +|`cudaGraphMemAttrUsedMemCurrent`|11.4| | |`hipGraphMemAttrUsedMemCurrent`|5.3.0| | | | +|`cudaGraphMemAttrUsedMemHigh`|11.4| | |`hipGraphMemAttrUsedMemHigh`|5.3.0| | | | +|`cudaGraphMemAttributeType`|11.4| | |`hipGraphMemAttributeType`|5.3.0| | | | |`cudaGraphNodeType`|10.0| | |`hipGraphNodeType`|4.3.0| | | | |`cudaGraphNodeTypeCount`|10.0| | |`hipGraphNodeTypeCount`|4.3.0| | | | |`cudaGraphNodeTypeEmpty`|10.0| | |`hipGraphNodeTypeEmpty`|4.3.0| | | | |`cudaGraphNodeTypeEventRecord`|11.1| | |`hipGraphNodeTypeEventRecord`|4.3.0| | | | -|`cudaGraphNodeTypeExtSemaphoreSignal`|11.4| | |`hipGraphNodeTypeExtSemaphoreSignal`|5.3.0| | |5.3.0| -|`cudaGraphNodeTypeExtSemaphoreWait`|11.4| | |`hipGraphNodeTypeExtSemaphoreWait`|5.3.0| | |5.3.0| +|`cudaGraphNodeTypeExtSemaphoreSignal`|11.4| | |`hipGraphNodeTypeExtSemaphoreSignal`|5.3.0| | | | +|`cudaGraphNodeTypeExtSemaphoreWait`|11.4| | |`hipGraphNodeTypeExtSemaphoreWait`|5.3.0| | | | |`cudaGraphNodeTypeGraph`|10.0| | |`hipGraphNodeTypeGraph`|4.3.0| | | | |`cudaGraphNodeTypeHost`|10.0| | |`hipGraphNodeTypeHost`|4.3.0| | | | |`cudaGraphNodeTypeKernel`|10.0| | |`hipGraphNodeTypeKernel`|4.3.0| | | | @@ -1130,7 +1130,7 @@ Unsupported |`cudaGraphNodeTypeMemset`|10.0| | |`hipGraphNodeTypeMemset`|4.3.0| | | | |`cudaGraphNodeTypeWaitEvent`|11.1| | |`hipGraphNodeTypeWaitEvent`|4.3.0| | | | |`cudaGraphNode_t`|10.0| | |`hipGraphNode_t`|4.3.0| | | | -|`cudaGraphUserObjectMove`|11.3| | |`hipGraphUserObjectMove`|5.3.0| | |5.3.0| +|`cudaGraphUserObjectMove`|11.3| | |`hipGraphUserObjectMove`|5.3.0| | | | |`cudaGraph_t`|10.0| | |`hipGraph_t`|4.3.0| | | | |`cudaGraphicsCubeFace`| | | | | | | | | |`cudaGraphicsCubeFaceNegativeX`| | | | | | | | | @@ -1200,7 +1200,7 @@ Unsupported |`cudaLimitMaxL2FetchGranularity`|10.0| | | | | | | | |`cudaLimitPersistingL2CacheSize`|11.0| | | | | | | | |`cudaLimitPrintfFifoSize`| | | |`hipLimitPrintfFifoSize`|4.5.0| | | | -|`cudaLimitStackSize`| | | |`hipLimitStackSize`|5.3.0| | |5.3.0| +|`cudaLimitStackSize`| | | |`hipLimitStackSize`|5.3.0| | | | |`cudaMemAccessDesc`|11.2| | |`hipMemAccessDesc`|5.2.0| | | | |`cudaMemAccessFlags`|11.2| | |`hipMemAccessFlags`|5.2.0| | | | |`cudaMemAccessFlagsProtNone`|11.2| | |`hipMemAccessFlagsProtNone`|5.2.0| | | | @@ -1258,7 +1258,7 @@ Unsupported |`cudaMemoryType`| | | |`hipMemoryType`|1.6.0| | | | |`cudaMemoryTypeDevice`| | | |`hipMemoryTypeDevice`|1.6.0| | | | |`cudaMemoryTypeHost`| | | |`hipMemoryTypeHost`|1.6.0| | | | -|`cudaMemoryTypeManaged`|10.0| | |`hipMemoryTypeManaged`|5.3.0| | |5.3.0| +|`cudaMemoryTypeManaged`|10.0| | |`hipMemoryTypeManaged`|5.3.0| | | | |`cudaMemoryTypeUnregistered`| | | | | | | | | |`cudaMemsetParams`|10.0| | |`hipMemsetParams`|4.3.0| | | | |`cudaMipmappedArray`| | | |`hipMipmappedArray`|1.7.0| | | | @@ -1369,10 +1369,10 @@ Unsupported |`cudaTextureTypeCubemap`| | | |`hipTextureTypeCubemap`|1.7.0| | | | |`cudaTextureTypeCubemapLayered`| | | |`hipTextureTypeCubemapLayered`|1.7.0| | | | |`cudaUUID_t`| | | |`hipUUID`|5.2.0| | | | -|`cudaUserObjectFlags`|11.3| | |`hipUserObjectFlags`|5.3.0| | |5.3.0| -|`cudaUserObjectNoDestructorSync`|11.3| | |`hipUserObjectNoDestructorSync`|5.3.0| | |5.3.0| -|`cudaUserObjectRetainFlags`|11.3| | |`hipUserObjectRetainFlags`|5.3.0| | |5.3.0| -|`cudaUserObject_t`|11.3| | |`hipUserObject_t`|5.3.0| | |5.3.0| +|`cudaUserObjectFlags`|11.3| | |`hipUserObjectFlags`|5.3.0| | | | +|`cudaUserObjectNoDestructorSync`|11.3| | |`hipUserObjectNoDestructorSync`|5.3.0| | | | +|`cudaUserObjectRetainFlags`|11.3| | |`hipUserObjectRetainFlags`|5.3.0| | | | +|`cudaUserObject_t`|11.3| | |`hipUserObject_t`|5.3.0| | | | |`libraryPropertyType`|8.0| | | | | | | | |`libraryPropertyType_t`|8.0| | | | | | | | |`surfaceReference`| | | |`surfaceReference`|1.9.0| | | | diff --git a/src/CUDA2HIP_Driver_API_functions.cpp b/src/CUDA2HIP_Driver_API_functions.cpp index 29d03116..6c148fea 100644 --- a/src/CUDA2HIP_Driver_API_functions.cpp +++ b/src/CUDA2HIP_Driver_API_functions.cpp @@ -119,7 +119,7 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { {"cuCtxResetPersistingL2Cache", {"hipCtxResetPersistingL2Cache", "", CONV_CONTEXT, API_DRIVER, 8, HIP_UNSUPPORTED}}, {"cuCtxSetCurrent", {"hipCtxSetCurrent", "", CONV_CONTEXT, API_DRIVER, 8, HIP_DEPRECATED}}, // cudaDeviceSetLimit - {"cuCtxSetLimit", {"hipDeviceSetLimit", "", CONV_CONTEXT, API_DRIVER, 8, HIP_EXPERIMENTAL}}, + {"cuCtxSetLimit", {"hipDeviceSetLimit", "", CONV_CONTEXT, API_DRIVER, 8}}, // cudaDeviceSetSharedMemConfig // TODO: rename to hipDeviceSetSharedMemConfig {"cuCtxSetSharedMemConfig", {"hipCtxSetSharedMemConfig", "", CONV_CONTEXT, API_DRIVER, 8, HIP_DEPRECATED}}, @@ -136,14 +136,14 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // 10. Module Management // no analogues - {"cuLinkAddData", {"hiprtcLinkAddData", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, - {"cuLinkAddData_v2", {"hiprtcLinkAddData", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, - {"cuLinkAddFile", {"hiprtcLinkAddFile", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, - {"cuLinkAddFile_v2", {"hiprtcLinkAddFile", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, - {"cuLinkComplete", {"hiprtcLinkComplete", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, - {"cuLinkCreate", {"hiprtcLinkCreate", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, - {"cuLinkCreate_v2", {"hiprtcLinkCreate", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, - {"cuLinkDestroy", {"hiprtcLinkDestroy", "", CONV_MODULE, API_DRIVER, 10, HIP_EXPERIMENTAL}}, + {"cuLinkAddData", {"hiprtcLinkAddData", "", CONV_MODULE, API_DRIVER, 10}}, + {"cuLinkAddData_v2", {"hiprtcLinkAddData", "", CONV_MODULE, API_DRIVER, 10}}, + {"cuLinkAddFile", {"hiprtcLinkAddFile", "", CONV_MODULE, API_DRIVER, 10}}, + {"cuLinkAddFile_v2", {"hiprtcLinkAddFile", "", CONV_MODULE, API_DRIVER, 10}}, + {"cuLinkComplete", {"hiprtcLinkComplete", "", CONV_MODULE, API_DRIVER, 10}}, + {"cuLinkCreate", {"hiprtcLinkCreate", "", CONV_MODULE, API_DRIVER, 10}}, + {"cuLinkCreate_v2", {"hiprtcLinkCreate", "", CONV_MODULE, API_DRIVER, 10}}, + {"cuLinkDestroy", {"hiprtcLinkDestroy", "", CONV_MODULE, API_DRIVER, 10}}, {"cuModuleGetFunction", {"hipModuleGetFunction", "", CONV_MODULE, API_DRIVER, 10}}, {"cuModuleGetGlobal", {"hipModuleGetGlobal", "", CONV_MODULE, API_DRIVER, 10}}, {"cuModuleGetGlobal_v2", {"hipModuleGetGlobal", "", CONV_MODULE, API_DRIVER, 10}}, @@ -687,7 +687,7 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // cudaGraphExecEventWaitNodeSetEvent {"cuGraphExecEventWaitNodeSetEvent", {"hipGraphExecEventWaitNodeSetEvent", "", CONV_GRAPH, API_DRIVER, 21}}, // cudaGraphUpload - {"cuGraphUpload", {"hipGraphUpload", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}}, + {"cuGraphUpload", {"hipGraphUpload", "", CONV_GRAPH, API_DRIVER, 21}}, // cudaGraphAddExternalSemaphoresSignalNode {"cuGraphAddExternalSemaphoresSignalNode", {"hipGraphAddExternalSemaphoresSignalNode", "", CONV_GRAPH, API_DRIVER, 21, HIP_UNSUPPORTED}}, // cudaGraphExternalSemaphoresSignalNodeGetParams @@ -705,15 +705,15 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // cudaGraphExecExternalSemaphoresWaitNodeSetParams {"cuGraphExecExternalSemaphoresWaitNodeSetParams", {"hipGraphExecExternalSemaphoresWaitNodeSetParams", "", CONV_GRAPH, API_DRIVER, 21, HIP_UNSUPPORTED}}, // cudaUserObjectCreate - {"cuUserObjectCreate", {"hipUserObjectCreate", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}}, + {"cuUserObjectCreate", {"hipUserObjectCreate", "", CONV_GRAPH, API_DRIVER, 21}}, // cudaUserObjectRetain - {"cuUserObjectRetain", {"hipUserObjectRetain", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}}, + {"cuUserObjectRetain", {"hipUserObjectRetain", "", CONV_GRAPH, API_DRIVER, 21}}, // cudaUserObjectRelease - {"cuUserObjectRelease", {"hipUserObjectRelease", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}}, + {"cuUserObjectRelease", {"hipUserObjectRelease", "", CONV_GRAPH, API_DRIVER, 21}}, // cudaGraphRetainUserObject - {"cuGraphRetainUserObject", {"hipGraphRetainUserObject", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}}, + {"cuGraphRetainUserObject", {"hipGraphRetainUserObject", "", CONV_GRAPH, API_DRIVER, 21}}, // cudaGraphReleaseUserObject - {"cuGraphReleaseUserObject", {"hipGraphReleaseUserObject", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}}, + {"cuGraphReleaseUserObject", {"hipGraphReleaseUserObject", "", CONV_GRAPH, API_DRIVER, 21}}, // cudaGraphAddMemAllocNode {"cuGraphAddMemAllocNode", {"hipGraphAddMemAllocNode", "", CONV_GRAPH, API_DRIVER, 21, HIP_UNSUPPORTED}}, // cudaGraphMemAllocNodeGetParams @@ -723,11 +723,11 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // cudaGraphMemFreeNodeGetParams {"cuGraphMemFreeNodeGetParams", {"hipGraphMemFreeNodeGetParams", "", CONV_GRAPH, API_DRIVER, 21, HIP_UNSUPPORTED}}, // cudaDeviceGraphMemTrim - {"cuDeviceGraphMemTrim", {"hipDeviceGraphMemTrim", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}}, + {"cuDeviceGraphMemTrim", {"hipDeviceGraphMemTrim", "", CONV_GRAPH, API_DRIVER, 21}}, // cudaDeviceGetGraphMemAttribute - {"cuDeviceGetGraphMemAttribute", {"hipDeviceGetGraphMemAttribute", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}}, + {"cuDeviceGetGraphMemAttribute", {"hipDeviceGetGraphMemAttribute", "", CONV_GRAPH, API_DRIVER, 21}}, // cudaDeviceSetGraphMemAttribute - {"cuDeviceSetGraphMemAttribute", {"hipDeviceSetGraphMemAttribute", "", CONV_GRAPH, API_DRIVER, 21, HIP_EXPERIMENTAL}}, + {"cuDeviceSetGraphMemAttribute", {"hipDeviceSetGraphMemAttribute", "", CONV_GRAPH, API_DRIVER, 21}}, // cudaGraphInstantiateWithFlags {"cuGraphInstantiateWithFlags", {"hipGraphInstantiateWithFlags", "", CONV_GRAPH, API_DRIVER, 21}}, // @@ -1420,11 +1420,11 @@ const std::map HIP_DRIVER_FUNCTION_VER_MAP { {"hipMemRetainAllocationHandle", {HIP_5020, HIP_0, HIP_0 }}, {"hipMemSetAccess", {HIP_5020, HIP_0, HIP_0 }}, {"hipMemUnmap", {HIP_5020, HIP_0, HIP_0 }}, - {"hiprtcLinkCreate", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hiprtcLinkAddFile", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hiprtcLinkAddData", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hiprtcLinkComplete", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hiprtcLinkDestroy", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"hiprtcLinkCreate", {HIP_5030, HIP_0, HIP_0 }}, + {"hiprtcLinkAddFile", {HIP_5030, HIP_0, HIP_0 }}, + {"hiprtcLinkAddData", {HIP_5030, HIP_0, HIP_0 }}, + {"hiprtcLinkComplete", {HIP_5030, HIP_0, HIP_0 }}, + {"hiprtcLinkDestroy", {HIP_5030, HIP_0, HIP_0 }}, }; const std::map CUDA_DRIVER_API_SECTION_MAP { diff --git a/src/CUDA2HIP_Driver_API_types.cpp b/src/CUDA2HIP_Driver_API_types.cpp index bc763129..474c3aa0 100644 --- a/src/CUDA2HIP_Driver_API_types.cpp +++ b/src/CUDA2HIP_Driver_API_types.cpp @@ -281,9 +281,9 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUmemPoolPtrExportData_v1", {"hipMemPoolPtrExportData", "", CONV_TYPE, API_DRIVER, 1}}, // - {"CUuserObject_st", {"hipUserObject", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CUuserObject_st", {"hipUserObject", "", CONV_TYPE, API_DRIVER, 1}}, // cudaUserObject_t - {"CUuserObject", {"hipUserObject_t", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CUuserObject", {"hipUserObject_t", "", CONV_TYPE, API_DRIVER, 1}}, // {"CUexecAffinitySmCount_st", {"hipExecAffinitySmCount", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, @@ -714,7 +714,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { // no analogue {"CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED", {"hipDeviceAttributeVirtualAddressManagementSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED | CUDA_DEPRECATED}}, // 102 // no analogue - {"CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED", {"hipDeviceAttributeVirtualMemoryManagementSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // 102 + {"CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED", {"hipDeviceAttributeVirtualMemoryManagementSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 102 // no analogue {"CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED", {"hipDeviceAttributeHandleTypePosixFileDescriptorSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 103 // no analogue @@ -1119,9 +1119,9 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { // cudaGraphNodeTypeEventRecord = 0x07 {"CU_GRAPH_NODE_TYPE_EVENT_RECORD", {"hipGraphNodeTypeEventRecord", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 7 // cudaGraphNodeTypeExtSemaphoreSignal - {"CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL", {"hipGraphNodeTypeExtSemaphoreSignal", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // 8 + {"CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL", {"hipGraphNodeTypeExtSemaphoreSignal", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 8 // cudaGraphNodeTypeExtSemaphoreWait - {"CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT", {"hipGraphNodeTypeExtSemaphoreWait", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // 9 + {"CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT", {"hipGraphNodeTypeExtSemaphoreWait", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 9 // cudaGraphNodeTypeMemAlloc {"CU_GRAPH_NODE_TYPE_MEM_ALLOC", {"hipGraphNodeTypeMemAlloc", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 10 // cudaGraphNodeTypeMemFree @@ -1252,23 +1252,23 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_TARGET_COMPUTE_90", {"hipJitTargetCompute90", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 90 // no analogue - {"CUjitInputType", {"hiprtcJITInputType", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}}, - {"CUjitInputType_enum", {"hiprtcJITInputType", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CUjitInputType", {"hiprtcJITInputType", "", CONV_TYPE, API_DRIVER, 1}}, + {"CUjitInputType_enum", {"hiprtcJITInputType", "", CONV_TYPE, API_DRIVER, 1}}, // CUjitInputType enum values - {"CU_JIT_INPUT_CUBIN", {"HIPRTC_JIT_INPUT_CUBIN", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // 0 - {"CU_JIT_INPUT_PTX", {"HIPRTC_JIT_INPUT_PTX", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, - {"CU_JIT_INPUT_FATBINARY", {"HIPRTC_JIT_INPUT_FATBINARY", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, - {"CU_JIT_INPUT_OBJECT", {"HIPRTC_JIT_INPUT_OBJECT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, - {"CU_JIT_INPUT_LIBRARY", {"HIPRTC_JIT_INPUT_LIBRARY", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, - {"CU_JIT_INPUT_NVVM", {"HIPRTC_JIT_INPUT_NVVM", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, - {"CU_JIT_NUM_INPUT_TYPES", {"HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CU_JIT_INPUT_CUBIN", {"HIPRTC_JIT_INPUT_CUBIN", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 0 + {"CU_JIT_INPUT_PTX", {"HIPRTC_JIT_INPUT_PTX", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_INPUT_FATBINARY", {"HIPRTC_JIT_INPUT_FATBINARY", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_INPUT_OBJECT", {"HIPRTC_JIT_INPUT_OBJECT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_INPUT_LIBRARY", {"HIPRTC_JIT_INPUT_LIBRARY", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_INPUT_NVVM", {"HIPRTC_JIT_INPUT_NVVM", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_NUM_INPUT_TYPES", {"HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // cudaLimit {"CUlimit", {"hipLimit_t", "", CONV_TYPE, API_DRIVER, 1}}, {"CUlimit_enum", {"hipLimit_t", "", CONV_TYPE, API_DRIVER, 1}}, // CUlimit enum values // cudaLimitStackSize - {"CU_LIMIT_STACK_SIZE", {"hipLimitStackSize", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // 0x00 + {"CU_LIMIT_STACK_SIZE", {"hipLimitStackSize", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 0x00 // cudaLimitPrintfFifoSize {"CU_LIMIT_PRINTF_FIFO_SIZE", {"hipLimitPrintfFifoSize", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 0x01 // cudaLimitMallocHeapSize @@ -2083,18 +2083,18 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_GRAPH_DEBUG_DOT_FLAGS_BATCH_MEM_OP_NODE_PARAMS", {"hipGraphDebugDotFlagsBatchMemOpNodeParams", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // 1<<13 // cudaUserObjectFlags - {"CUuserObject_flags", {"hipUserObjectFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}}, - {"CUuserObject_flags_enum", {"hipUserObjectFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CUuserObject_flags", {"hipUserObjectFlags", "", CONV_TYPE, API_DRIVER, 1}}, + {"CUuserObject_flags_enum", {"hipUserObjectFlags", "", CONV_TYPE, API_DRIVER, 1}}, // CUuserObject_flags enum values // cudaUserObjectNoDestructorSync - {"CU_USER_OBJECT_NO_DESTRUCTOR_SYNC", {"hipUserObjectNoDestructorSync", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // 1 + {"CU_USER_OBJECT_NO_DESTRUCTOR_SYNC", {"hipUserObjectNoDestructorSync", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 1 // cudaUserObjectRetainFlags - {"CUuserObjectRetain_flags", {"hipUserObjectRetainFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}}, - {"CUuserObjectRetain_flags_enum", {"hipUserObjectRetainFlags", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CUuserObjectRetain_flags", {"hipUserObjectRetainFlags", "", CONV_TYPE, API_DRIVER, 1}}, + {"CUuserObjectRetain_flags_enum", {"hipUserObjectRetainFlags", "", CONV_TYPE, API_DRIVER, 1}}, // CUuserObjectRetain_flags enum values // cudaGraphUserObjectMove - {"CU_GRAPH_USER_OBJECT_MOVE", {"hipGraphUserObjectMove", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, // 1 + {"CU_GRAPH_USER_OBJECT_MOVE", {"hipGraphUserObjectMove", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 1 // no analogue {"CUexecAffinityType", {"hipExecAffinityType", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, @@ -2104,17 +2104,17 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_EXEC_AFFINITY_TYPE_MAX", {"hipExecAffinityTypeMax", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, // // cudaGraphMemAttributeType - {"CUgraphMem_attribute", {"hipGraphMemAttributeType", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}}, - {"CUgraphMem_attribute_enum", {"hipGraphMemAttributeType", "", CONV_TYPE, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CUgraphMem_attribute", {"hipGraphMemAttributeType", "", CONV_TYPE, API_DRIVER, 1}}, + {"CUgraphMem_attribute_enum", {"hipGraphMemAttributeType", "", CONV_TYPE, API_DRIVER, 1}}, // CUgraphMem_attribute enum values // cudaGraphMemAttrUsedMemCurrent - {"CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT", {"hipGraphMemAttrUsedMemCurrent", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT", {"hipGraphMemAttrUsedMemCurrent", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // cudaGraphMemAttrUsedMemHigh - {"CU_GRAPH_MEM_ATTR_USED_MEM_HIGH", {"hipGraphMemAttrUsedMemHigh", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CU_GRAPH_MEM_ATTR_USED_MEM_HIGH", {"hipGraphMemAttrUsedMemHigh", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // cudaGraphMemAttrReservedMemCurrent - {"CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT", {"hipGraphMemAttrReservedMemCurrent", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT", {"hipGraphMemAttrReservedMemCurrent", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // cudaGraphMemAttrReservedMemHigh - {"CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH", {"hipGraphMemAttrReservedMemHigh", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_EXPERIMENTAL}}, + {"CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH", {"hipGraphMemAttrReservedMemHigh", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // cudaGraphInstantiateFlags {"CUgraphInstantiate_flags", {"hipGraphInstantiateFlags", "", CONV_TYPE, API_DRIVER, 1}}, @@ -3455,17 +3455,17 @@ const std::map HIP_DRIVER_TYPE_NAME_VER_MAP { {"hipArraySparseSubresourceType", {HIP_5020, HIP_0, HIP_0 }}, {"hipArraySparseSubresourceTypeSparseLevel", {HIP_5020, HIP_0, HIP_0 }}, {"hipArraySparseSubresourceTypeMiptail", {HIP_5020, HIP_0, HIP_0 }}, - {"hipDeviceAttributeVirtualMemoryManagementSupported", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipUserObject", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipUserObject_t", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hiprtcJITInputType", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"HIPRTC_JIT_INPUT_CUBIN", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"HIPRTC_JIT_INPUT_PTX", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"HIPRTC_JIT_INPUT_FATBINARY", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"HIPRTC_JIT_INPUT_OBJECT", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"HIPRTC_JIT_INPUT_LIBRARY", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"HIPRTC_JIT_INPUT_NVVM", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"ihiprtcLinkState", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hiprtcLinkState", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipDeviceAttributeVirtualMemoryManagementSupported", {HIP_5030, HIP_0, HIP_0 }}, + {"hipUserObject", {HIP_5030, HIP_0, HIP_0 }}, + {"hipUserObject_t", {HIP_5030, HIP_0, HIP_0 }}, + {"hiprtcJITInputType", {HIP_5030, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_INPUT_CUBIN", {HIP_5030, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_INPUT_PTX", {HIP_5030, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_INPUT_FATBINARY", {HIP_5030, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_INPUT_OBJECT", {HIP_5030, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_INPUT_LIBRARY", {HIP_5030, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_INPUT_NVVM", {HIP_5030, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES", {HIP_5030, HIP_0, HIP_0 }}, + {"ihiprtcLinkState", {HIP_5030, HIP_0, HIP_0 }}, + {"hiprtcLinkState", {HIP_5030, HIP_0, HIP_0 }}, }; diff --git a/src/CUDA2HIP_RTC_API_functions.cpp b/src/CUDA2HIP_RTC_API_functions.cpp index 2763485b..263c1811 100644 --- a/src/CUDA2HIP_RTC_API_functions.cpp +++ b/src/CUDA2HIP_RTC_API_functions.cpp @@ -33,8 +33,8 @@ const std::map CUDA_RTC_FUNCTION_MAP { {"nvrtcCompileProgram", {"hiprtcCompileProgram", "", CONV_LIB_FUNC, API_RTC, 2}}, {"nvrtcGetPTXSize", {"hiprtcGetCodeSize", "", CONV_LIB_FUNC, API_RTC, 2}}, {"nvrtcGetPTX", {"hiprtcGetCode", "", CONV_LIB_FUNC, API_RTC, 2}}, - {"nvrtcGetCUBINSize", {"hiprtcGetBitcodeSize", "", CONV_LIB_FUNC, API_RTC, 2, HIP_EXPERIMENTAL}}, - {"nvrtcGetCUBIN", {"hiprtcGetBitcode", "", CONV_LIB_FUNC, API_RTC, 2, HIP_EXPERIMENTAL}}, + {"nvrtcGetCUBINSize", {"hiprtcGetBitcodeSize", "", CONV_LIB_FUNC, API_RTC, 2}}, + {"nvrtcGetCUBIN", {"hiprtcGetBitcode", "", CONV_LIB_FUNC, API_RTC, 2}}, {"nvrtcGetNVVMSize", {"hiprtcGetNVVMSize", "", CONV_LIB_FUNC, API_RTC, 2, HIP_UNSUPPORTED}}, {"nvrtcGetNVVM", {"hiprtcGetNVVM", "", CONV_LIB_FUNC, API_RTC, 2, HIP_UNSUPPORTED}}, {"nvrtcGetProgramLogSize", {"hiprtcGetProgramLogSize", "", CONV_LIB_FUNC, API_RTC, 2}}, @@ -66,8 +66,8 @@ const std::map HIP_RTC_FUNCTION_VER_MAP { {"hiprtcGetProgramLog", {HIP_2060, HIP_0, HIP_0 }}, {"hiprtcAddNameExpression", {HIP_2060, HIP_0, HIP_0 }}, {"hiprtcGetLoweredName", {HIP_2060, HIP_0, HIP_0 }}, - {"hiprtcGetBitcode", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hiprtcGetBitcodeSize", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"hiprtcGetBitcode", {HIP_5030, HIP_0, HIP_0 }}, + {"hiprtcGetBitcodeSize", {HIP_5030, HIP_0, HIP_0 }}, }; const std::map CUDA_RTC_API_SECTION_MAP { diff --git a/src/CUDA2HIP_Runtime_API_functions.cpp b/src/CUDA2HIP_Runtime_API_functions.cpp index 353d3cf3..4a9ef995 100644 --- a/src/CUDA2HIP_Runtime_API_functions.cpp +++ b/src/CUDA2HIP_Runtime_API_functions.cpp @@ -52,7 +52,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // no analogue {"cudaDeviceSetCacheConfig", {"hipDeviceSetCacheConfig", "", CONV_DEVICE, API_RUNTIME, 1}}, // cuCtxSetLimit - {"cudaDeviceSetLimit", {"hipDeviceSetLimit", "", CONV_DEVICE, API_RUNTIME, 1, HIP_EXPERIMENTAL}}, + {"cudaDeviceSetLimit", {"hipDeviceSetLimit", "", CONV_DEVICE, API_RUNTIME, 1}}, // cuCtxSetSharedMemConfig {"cudaDeviceSetSharedMemConfig", {"hipDeviceSetSharedMemConfig", "", CONV_DEVICE, API_RUNTIME, 1}}, // cuCtxSynchronize @@ -800,7 +800,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuGraphExecEventWaitNodeSetEvent {"cudaGraphExecEventWaitNodeSetEvent", {"hipGraphExecEventWaitNodeSetEvent", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuGraphUpload - {"cudaGraphUpload", {"hipGraphUpload", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}}, + {"cudaGraphUpload", {"hipGraphUpload", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuGraphAddExternalSemaphoresSignalNode {"cudaGraphAddExternalSemaphoresSignalNode", {"hipGraphAddExternalSemaphoresSignalNode", "", CONV_GRAPH, API_RUNTIME, 30, HIP_UNSUPPORTED}}, // cuGraphExternalSemaphoresSignalNodeGetParams @@ -818,15 +818,15 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuGraphExecExternalSemaphoresWaitNodeSetParams {"cudaGraphExecExternalSemaphoresWaitNodeSetParams", {"hipGraphExecExternalSemaphoresWaitNodeSetParams", "", CONV_GRAPH, API_RUNTIME, 30, HIP_UNSUPPORTED}}, // cuUserObjectCreate - {"cudaUserObjectCreate", {"hipUserObjectCreate", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}}, + {"cudaUserObjectCreate", {"hipUserObjectCreate", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuUserObjectRetain - {"cudaUserObjectRetain", {"hipUserObjectRetain", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}}, + {"cudaUserObjectRetain", {"hipUserObjectRetain", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuUserObjectRelease - {"cudaUserObjectRelease", {"hipUserObjectRelease", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}}, + {"cudaUserObjectRelease", {"hipUserObjectRelease", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuGraphRetainUserObject - {"cudaGraphRetainUserObject", {"hipGraphRetainUserObject", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}}, + {"cudaGraphRetainUserObject", {"hipGraphRetainUserObject", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuGraphReleaseUserObject - {"cudaGraphReleaseUserObject", {"hipGraphReleaseUserObject", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}}, + {"cudaGraphReleaseUserObject", {"hipGraphReleaseUserObject", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuGraphAddMemAllocNode {"cudaGraphAddMemAllocNode", {"hipGraphAddMemAllocNode", "", CONV_GRAPH, API_RUNTIME, 30, HIP_UNSUPPORTED}}, // cuGraphMemAllocNodeGetParams @@ -836,11 +836,11 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuGraphMemFreeNodeGetParams {"cudaGraphMemFreeNodeGetParams", {"hipGraphMemFreeNodeGetParams", "", CONV_GRAPH, API_RUNTIME, 30, HIP_UNSUPPORTED}}, // cuDeviceGraphMemTrim - {"cudaDeviceGraphMemTrim", {"hipDeviceGraphMemTrim", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}}, + {"cudaDeviceGraphMemTrim", {"hipDeviceGraphMemTrim", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuDeviceGetGraphMemAttribute - {"cudaDeviceGetGraphMemAttribute", {"hipDeviceGetGraphMemAttribute", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}}, + {"cudaDeviceGetGraphMemAttribute", {"hipDeviceGetGraphMemAttribute", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuDeviceSetGraphMemAttribute - {"cudaDeviceSetGraphMemAttribute", {"hipDeviceSetGraphMemAttribute", "", CONV_GRAPH, API_RUNTIME, 30, HIP_EXPERIMENTAL}}, + {"cudaDeviceSetGraphMemAttribute", {"hipDeviceSetGraphMemAttribute", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuGraphInstantiateWithFlags {"cudaGraphInstantiateWithFlags", {"hipGraphInstantiateWithFlags", "", CONV_GRAPH, API_RUNTIME, 30}}, // cuGraphNodeSetEnabled @@ -1320,16 +1320,16 @@ const std::map HIP_RUNTIME_FUNCTION_VER_MAP { {"hipThreadExchangeStreamCaptureMode", {HIP_5020, HIP_0, HIP_0 }}, {"hipGraphKernelNodeSetAttribute", {HIP_5020, HIP_0, HIP_0 }}, {"hipGraphKernelNodeGetAttribute", {HIP_5020, HIP_0, HIP_0 }}, - {"hipDeviceSetLimit", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphUpload", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipDeviceGetGraphMemAttribute", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipDeviceSetGraphMemAttribute", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipDeviceGraphMemTrim", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipUserObjectCreate", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipUserObjectRelease", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipUserObjectRetain", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphRetainUserObject", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphReleaseUserObject", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipDeviceSetLimit", {HIP_5030, HIP_0, HIP_0 }}, + {"hipGraphUpload", {HIP_5030, HIP_0, HIP_0 }}, + {"hipDeviceGetGraphMemAttribute", {HIP_5030, HIP_0, HIP_0 }}, + {"hipDeviceSetGraphMemAttribute", {HIP_5030, HIP_0, HIP_0 }}, + {"hipDeviceGraphMemTrim", {HIP_5030, HIP_0, HIP_0 }}, + {"hipUserObjectCreate", {HIP_5030, HIP_0, HIP_0 }}, + {"hipUserObjectRelease", {HIP_5030, HIP_0, HIP_0 }}, + {"hipUserObjectRetain", {HIP_5030, HIP_0, HIP_0 }}, + {"hipGraphRetainUserObject", {HIP_5030, HIP_0, HIP_0 }}, + {"hipGraphReleaseUserObject", {HIP_5030, HIP_0, HIP_0 }}, }; const std::map CUDA_RUNTIME_API_SECTION_MAP { diff --git a/src/CUDA2HIP_Runtime_API_types.cpp b/src/CUDA2HIP_Runtime_API_types.cpp index 41f36ae4..ff3c4986 100644 --- a/src/CUDA2HIP_Runtime_API_types.cpp +++ b/src/CUDA2HIP_Runtime_API_types.cpp @@ -1150,9 +1150,9 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // CU_GRAPH_NODE_TYPE_EVENT_RECORD = 7 {"cudaGraphNodeTypeEventRecord", {"hipGraphNodeTypeEventRecord", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x07 // CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL = 8 - {"cudaGraphNodeTypeExtSemaphoreSignal", {"hipGraphNodeTypeExtSemaphoreSignal", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, // 0x08 + {"cudaGraphNodeTypeExtSemaphoreSignal", {"hipGraphNodeTypeExtSemaphoreSignal", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x08 // CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT = 9 - {"cudaGraphNodeTypeExtSemaphoreWait", {"hipGraphNodeTypeExtSemaphoreWait", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, // 0x09 + {"cudaGraphNodeTypeExtSemaphoreWait", {"hipGraphNodeTypeExtSemaphoreWait", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x09 // CU_GRAPH_NODE_TYPE_MEM_ALLOC = 10 {"cudaGraphNodeTypeMemAlloc", {"hipGraphNodeTypeMemAlloc", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 0x0a // CU_GRAPH_NODE_TYPE_MEM_FREE = 11 @@ -1186,7 +1186,7 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaLimit", {"hipLimit_t", "", CONV_TYPE, API_RUNTIME, 36}}, // cudaLimit enum values // CU_LIMIT_STACK_SIZE - {"cudaLimitStackSize", {"hipLimitStackSize", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, // 0x00 + {"cudaLimitStackSize", {"hipLimitStackSize", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x00 // CU_LIMIT_PRINTF_FIFO_SIZE {"cudaLimitPrintfFifoSize", {"hipLimitPrintfFifoSize", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x01 // CU_LIMIT_MALLOC_HEAP_SIZE @@ -1232,7 +1232,7 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaMemoryTypeUnregistered", {"hipMemoryTypeUnregistered", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 0 {"cudaMemoryTypeHost", {"hipMemoryTypeHost", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 1 {"cudaMemoryTypeDevice", {"hipMemoryTypeDevice", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 2 - {"cudaMemoryTypeManaged", {"hipMemoryTypeManaged", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, // 3 + {"cudaMemoryTypeManaged", {"hipMemoryTypeManaged", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 3 // CUmem_range_attribute {"cudaMemRangeAttribute", {"hipMemRangeAttribute", "", CONV_TYPE, API_RUNTIME, 36}}, @@ -1619,16 +1619,16 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaStreamSetCaptureDependencies", {"hipStreamSetCaptureDependencies", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x1 // CUuserObject_flags - {"cudaUserObjectFlags", {"hipUserObjectFlags", "", CONV_TYPE, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, + {"cudaUserObjectFlags", {"hipUserObjectFlags", "", CONV_TYPE, API_RUNTIME, 36}}, // cudaUserObjectFlags enum values // CU_USER_OBJECT_NO_DESTRUCTOR_SYNC - {"cudaUserObjectNoDestructorSync", {"hipUserObjectNoDestructorSync", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, // 0x1 + {"cudaUserObjectNoDestructorSync", {"hipUserObjectNoDestructorSync", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x1 // CUuserObjectRetain_flags - {"cudaUserObjectRetainFlags", {"hipUserObjectRetainFlags", "", CONV_TYPE, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, + {"cudaUserObjectRetainFlags", {"hipUserObjectRetainFlags", "", CONV_TYPE, API_RUNTIME, 36}}, // cudaUserObjectRetainFlags enum values // CU_GRAPH_USER_OBJECT_MOVE - {"cudaGraphUserObjectMove", {"hipGraphUserObjectMove", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, // 0x1 + {"cudaGraphUserObjectMove", {"hipGraphUserObjectMove", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // 0x1 // CUflushGPUDirectRDMAWritesOptions {"cudaFlushGPUDirectRDMAWritesOptions", {"hipFlushGPUDirectRDMAWritesOptions", "", CONV_TYPE, API_RUNTIME, 36, HIP_UNSUPPORTED}}, @@ -1697,16 +1697,16 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaGraphDebugDotFlagsHandles", {"hipGraphDebugDotFlagsHandles", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_UNSUPPORTED}}, // 1<<10 // CUgraphMem_attribute - {"cudaGraphMemAttributeType", {"hipGraphMemAttributeType", "", CONV_TYPE, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, + {"cudaGraphMemAttributeType", {"hipGraphMemAttributeType", "", CONV_TYPE, API_RUNTIME, 36}}, // cudaGraphMemAttributeType enum values // CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT - {"cudaGraphMemAttrUsedMemCurrent", {"hipGraphMemAttrUsedMemCurrent", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, + {"cudaGraphMemAttrUsedMemCurrent", {"hipGraphMemAttrUsedMemCurrent", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // CU_GRAPH_MEM_ATTR_USED_MEM_HIGH - {"cudaGraphMemAttrUsedMemHigh", {"hipGraphMemAttrUsedMemHigh", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, + {"cudaGraphMemAttrUsedMemHigh", {"hipGraphMemAttrUsedMemHigh", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT - {"cudaGraphMemAttrReservedMemCurrent", {"hipGraphMemAttrReservedMemCurrent", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, + {"cudaGraphMemAttrReservedMemCurrent", {"hipGraphMemAttrReservedMemCurrent", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH - {"cudaGraphMemAttrReservedMemHigh", {"hipGraphMemAttrReservedMemHigh", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, + {"cudaGraphMemAttrReservedMemHigh", {"hipGraphMemAttrReservedMemHigh", "", CONV_NUMERIC_LITERAL, API_RUNTIME, 36}}, // CUgraphInstantiate_flags {"cudaGraphInstantiateFlags", {"hipGraphInstantiateFlags", "", CONV_TYPE, API_RUNTIME, 36}}, @@ -1769,7 +1769,7 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaMemPool_t", {"hipMemPool_t", "", CONV_TYPE, API_RUNTIME, 36}}, // CUuserObject - {"cudaUserObject_t", {"hipUserObject_t", "", CONV_TYPE, API_RUNTIME, 36, HIP_EXPERIMENTAL}}, + {"cudaUserObject_t", {"hipUserObject_t", "", CONV_TYPE, API_RUNTIME, 36}}, // 5. Defines @@ -2574,17 +2574,17 @@ const std::map HIP_RUNTIME_TYPE_NAME_VER_MAP { {"hipMemPoolPtrExportData", {HIP_5020, HIP_0, HIP_0 }}, {"hipGraphInstantiateFlags", {HIP_5020, HIP_0, HIP_0 }}, {"hipGraphInstantiateFlagAutoFreeOnLaunch", {HIP_5020, HIP_0, HIP_0 }}, - {"hipMemoryTypeManaged", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipLimitStackSize", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphNodeTypeExtSemaphoreSignal", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphNodeTypeExtSemaphoreWait", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphMemAttributeType", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphMemAttrUsedMemCurrent", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphMemAttrUsedMemHigh", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphMemAttrReservedMemCurrent", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphMemAttrReservedMemHigh", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipUserObjectFlags", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipUserObjectNoDestructorSync", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipUserObjectRetainFlags", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, - {"hipGraphUserObjectMove", {HIP_5030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipMemoryTypeManaged", {HIP_5030, HIP_0, HIP_0 }}, + {"hipLimitStackSize", {HIP_5030, HIP_0, HIP_0 }}, + {"hipGraphNodeTypeExtSemaphoreSignal", {HIP_5030, HIP_0, HIP_0 }}, + {"hipGraphNodeTypeExtSemaphoreWait", {HIP_5030, HIP_0, HIP_0 }}, + {"hipGraphMemAttributeType", {HIP_5030, HIP_0, HIP_0 }}, + {"hipGraphMemAttrUsedMemCurrent", {HIP_5030, HIP_0, HIP_0 }}, + {"hipGraphMemAttrUsedMemHigh", {HIP_5030, HIP_0, HIP_0 }}, + {"hipGraphMemAttrReservedMemCurrent", {HIP_5030, HIP_0, HIP_0 }}, + {"hipGraphMemAttrReservedMemHigh", {HIP_5030, HIP_0, HIP_0 }}, + {"hipUserObjectFlags", {HIP_5030, HIP_0, HIP_0 }}, + {"hipUserObjectNoDestructorSync", {HIP_5030, HIP_0, HIP_0 }}, + {"hipUserObjectRetainFlags", {HIP_5030, HIP_0, HIP_0 }}, + {"hipGraphUserObjectMove", {HIP_5030, HIP_0, HIP_0 }}, }; diff --git a/src/Statistics.cpp b/src/Statistics.cpp index c95a9ff8..248cebdc 100644 --- a/src/Statistics.cpp +++ b/src/Statistics.cpp @@ -544,6 +544,7 @@ std::string Statistics::getHipVersion(const hipVersions& ver) { case HIP_5011: return "5.1.1"; case HIP_5020: return "5.2.0"; case HIP_5030: return "5.3.0"; + case HIP_5040: return "5.4.0"; } return ""; } diff --git a/src/Statistics.h b/src/Statistics.h index e15f1edc..3cc3e915 100644 --- a/src/Statistics.h +++ b/src/Statistics.h @@ -296,7 +296,8 @@ enum hipVersions { HIP_5011 = 5011, HIP_5020 = 5020, HIP_5030 = 5030, - HIP_LATEST = HIP_5030, + HIP_5040 = 5040, + HIP_LATEST = HIP_5040, }; struct cudaAPIversions { From c9246005ead5f6ba6e30f3f5cb113a332c14536b Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 25 Oct 2022 14:27:25 +0200 Subject: [PATCH 34/43] [HIPIFY][HIP][5.4.0] Sync - Part 2 + Added new APIs + Renamed JIT APIs + Updated the regenerated hipify-perl, the affected synthetic tests, and docs --- bin/hipify-perl | 43 +++++------ ...A_Driver_API_functions_supported_by_HIP.md | 40 +++++------ src/CUDA2HIP_Driver_API_functions.cpp | 6 +- src/CUDA2HIP_Driver_API_types.cpp | 72 +++++++++---------- tests/unit_tests/synthetic/driver_enums.cu | 36 +++++----- 5 files changed, 100 insertions(+), 97 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 1dbc1466..0a1a0073 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -717,7 +717,8 @@ my %removed_funcs = ( ); my %experimental_funcs = ( - + "cuGetErrorString" => "5.4.0", + "cuGetErrorName" => "5.4.0" ); $print_stats = 1 if $examine; @@ -855,6 +856,8 @@ sub subst { } sub experimentalSubstitutions { + subst("cuGetErrorName", "hipDrvGetErrorName", "error"); + subst("cuGetErrorString", "hipDrvGetErrorString", "error"); } sub rocSubstitutions { @@ -4019,31 +4022,31 @@ sub simpleSubstitutions { subst("CU_GRAPH_NODE_TYPE_WAIT_EVENT", "hipGraphNodeTypeWaitEvent", "numeric_literal"); subst("CU_GRAPH_USER_OBJECT_MOVE", "hipGraphUserObjectMove", "numeric_literal"); subst("CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS", "hipIpcMemLazyEnablePeerAccess", "numeric_literal"); - subst("CU_JIT_CACHE_MODE", "hipJitOptionCacheMode", "numeric_literal"); - subst("CU_JIT_ERROR_LOG_BUFFER", "hipJitOptionErrorLogBuffer", "numeric_literal"); - subst("CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", "hipJitOptionErrorLogBufferSizeBytes", "numeric_literal"); - subst("CU_JIT_FALLBACK_STRATEGY", "hipJitOptionFallbackStrategy", "numeric_literal"); - subst("CU_JIT_FAST_COMPILE", "hipJitOptionFastCompile", "numeric_literal"); - subst("CU_JIT_GENERATE_DEBUG_INFO", "hipJitOptionGenerateDebugInfo", "numeric_literal"); - subst("CU_JIT_GENERATE_LINE_INFO", "hipJitOptionGenerateLineInfo", "numeric_literal"); - subst("CU_JIT_INFO_LOG_BUFFER", "hipJitOptionInfoLogBuffer", "numeric_literal"); - subst("CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES", "hipJitOptionInfoLogBufferSizeBytes", "numeric_literal"); + subst("CU_JIT_CACHE_MODE", "HIPRTC_JIT_CACHE_MODE", "numeric_literal"); + subst("CU_JIT_ERROR_LOG_BUFFER", "HIPRTC_JIT_ERROR_LOG_BUFFER", "numeric_literal"); + subst("CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", "HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", "numeric_literal"); + subst("CU_JIT_FALLBACK_STRATEGY", "HIPRTC_JIT_FALLBACK_STRATEGY", "numeric_literal"); + subst("CU_JIT_FAST_COMPILE", "HIPRTC_JIT_FAST_COMPILE", "numeric_literal"); + subst("CU_JIT_GENERATE_DEBUG_INFO", "HIPRTC_JIT_GENERATE_DEBUG_INFO", "numeric_literal"); + subst("CU_JIT_GENERATE_LINE_INFO", "HIPRTC_JIT_GENERATE_LINE_INFO", "numeric_literal"); + subst("CU_JIT_INFO_LOG_BUFFER", "HIPRTC_JIT_INFO_LOG_BUFFER", "numeric_literal"); + subst("CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES", "HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES", "numeric_literal"); subst("CU_JIT_INPUT_CUBIN", "HIPRTC_JIT_INPUT_CUBIN", "numeric_literal"); subst("CU_JIT_INPUT_FATBINARY", "HIPRTC_JIT_INPUT_FATBINARY", "numeric_literal"); subst("CU_JIT_INPUT_LIBRARY", "HIPRTC_JIT_INPUT_LIBRARY", "numeric_literal"); subst("CU_JIT_INPUT_NVVM", "HIPRTC_JIT_INPUT_NVVM", "numeric_literal"); subst("CU_JIT_INPUT_OBJECT", "HIPRTC_JIT_INPUT_OBJECT", "numeric_literal"); subst("CU_JIT_INPUT_PTX", "HIPRTC_JIT_INPUT_PTX", "numeric_literal"); - subst("CU_JIT_LOG_VERBOSE", "hipJitOptionLogVerbose", "numeric_literal"); - subst("CU_JIT_MAX_REGISTERS", "hipJitOptionMaxRegisters", "numeric_literal"); - subst("CU_JIT_NEW_SM3X_OPT", "hipJitOptionSm3xOpt", "numeric_literal"); + subst("CU_JIT_LOG_VERBOSE", "HIPRTC_JIT_LOG_VERBOSE", "numeric_literal"); + subst("CU_JIT_MAX_REGISTERS", "HIPRTC_JIT_MAX_REGISTERS", "numeric_literal"); + subst("CU_JIT_NEW_SM3X_OPT", "HIPRTC_JIT_NEW_SM3X_OPT", "numeric_literal"); subst("CU_JIT_NUM_INPUT_TYPES", "HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES", "numeric_literal"); - subst("CU_JIT_NUM_OPTIONS", "hipJitOptionNumOptions", "numeric_literal"); - subst("CU_JIT_OPTIMIZATION_LEVEL", "hipJitOptionOptimizationLevel", "numeric_literal"); - subst("CU_JIT_TARGET", "hipJitOptionTarget", "numeric_literal"); - subst("CU_JIT_TARGET_FROM_CUCONTEXT", "hipJitOptionTargetFromContext", "numeric_literal"); - subst("CU_JIT_THREADS_PER_BLOCK", "hipJitOptionThreadsPerBlock", "numeric_literal"); - subst("CU_JIT_WALL_TIME", "hipJitOptionWallTime", "numeric_literal"); + subst("CU_JIT_NUM_OPTIONS", "HIPRTC_JIT_NUM_OPTIONS", "numeric_literal"); + subst("CU_JIT_OPTIMIZATION_LEVEL", "HIPRTC_JIT_OPTIMIZATION_LEVEL", "numeric_literal"); + subst("CU_JIT_TARGET", "HIPRTC_JIT_TARGET", "numeric_literal"); + subst("CU_JIT_TARGET_FROM_CUCONTEXT", "HIPRTC_JIT_TARGET_FROM_HIPCONTEXT", "numeric_literal"); + subst("CU_JIT_THREADS_PER_BLOCK", "HIPRTC_JIT_THREADS_PER_BLOCK", "numeric_literal"); + subst("CU_JIT_WALL_TIME", "HIPRTC_JIT_WALL_TIME", "numeric_literal"); subst("CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW", "hipKernelNodeAttributeAccessPolicyWindow", "numeric_literal"); subst("CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE", "hipKernelNodeAttributeCooperative", "numeric_literal"); subst("CU_LIMIT_MALLOC_HEAP_SIZE", "hipLimitMallocHeapSize", "numeric_literal"); @@ -6609,8 +6612,6 @@ sub warnUnsupportedFunctions { "cuGraphAddExternalSemaphoresSignalNode", "cuGraphAddBatchMemOpNode", "cuGetProcAddress", - "cuGetErrorString", - "cuGetErrorName", "cuGLUnregisterBufferObject", "cuGLUnmapBufferObjectAsync", "cuGLUnmapBufferObject", diff --git a/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md b/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md index 5923086c..510b653a 100644 --- a/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/doc/markdown/CUDA_Driver_API_functions_supported_by_HIP.md @@ -620,36 +620,36 @@ |`CU_GRAPH_USER_OBJECT_MOVE`|11.3| | |`hipGraphUserObjectMove`|5.3.0| | | | |`CU_IPC_HANDLE_SIZE`| | | |`HIP_IPC_HANDLE_SIZE`|1.6.0| | | | |`CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS`| | | |`hipIpcMemLazyEnablePeerAccess`|1.6.0| | | | -|`CU_JIT_CACHE_MODE`| | | |`hipJitOptionCacheMode`|1.6.0| | | | +|`CU_JIT_CACHE_MODE`| | | |`HIPRTC_JIT_CACHE_MODE`|1.6.0| | | | |`CU_JIT_CACHE_OPTION_CA`| | | | | | | | | |`CU_JIT_CACHE_OPTION_CG`| | | | | | | | | |`CU_JIT_CACHE_OPTION_NONE`| | | | | | | | | -|`CU_JIT_ERROR_LOG_BUFFER`| | | |`hipJitOptionErrorLogBuffer`|1.6.0| | | | -|`CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES`| | | |`hipJitOptionErrorLogBufferSizeBytes`|1.6.0| | | | -|`CU_JIT_FALLBACK_STRATEGY`| | | |`hipJitOptionFallbackStrategy`|1.6.0| | | | -|`CU_JIT_FAST_COMPILE`| | | |`hipJitOptionFastCompile`|1.6.0| | | | +|`CU_JIT_ERROR_LOG_BUFFER`| | | |`HIPRTC_JIT_ERROR_LOG_BUFFER`|1.6.0| | | | +|`CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES`| | | |`HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES`|1.6.0| | | | +|`CU_JIT_FALLBACK_STRATEGY`| | | |`HIPRTC_JIT_FALLBACK_STRATEGY`|1.6.0| | | | +|`CU_JIT_FAST_COMPILE`| | | |`HIPRTC_JIT_FAST_COMPILE`|1.6.0| | | | |`CU_JIT_FMA`|11.4| | | | | | | | |`CU_JIT_FTZ`|11.4| | | | | | | | -|`CU_JIT_GENERATE_DEBUG_INFO`| | | |`hipJitOptionGenerateDebugInfo`|1.6.0| | | | -|`CU_JIT_GENERATE_LINE_INFO`| | | |`hipJitOptionGenerateLineInfo`|1.6.0| | | | +|`CU_JIT_GENERATE_DEBUG_INFO`| | | |`HIPRTC_JIT_GENERATE_DEBUG_INFO`|1.6.0| | | | +|`CU_JIT_GENERATE_LINE_INFO`| | | |`HIPRTC_JIT_GENERATE_LINE_INFO`|1.6.0| | | | |`CU_JIT_GLOBAL_SYMBOL_ADDRESSES`| | | | | | | | | |`CU_JIT_GLOBAL_SYMBOL_COUNT`| | | | | | | | | |`CU_JIT_GLOBAL_SYMBOL_NAMES`| | | | | | | | | -|`CU_JIT_INFO_LOG_BUFFER`| | | |`hipJitOptionInfoLogBuffer`|1.6.0| | | | -|`CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES`| | | |`hipJitOptionInfoLogBufferSizeBytes`|1.6.0| | | | +|`CU_JIT_INFO_LOG_BUFFER`| | | |`HIPRTC_JIT_INFO_LOG_BUFFER`|1.6.0| | | | +|`CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES`| | | |`HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES`|1.6.0| | | | |`CU_JIT_INPUT_CUBIN`| | | |`HIPRTC_JIT_INPUT_CUBIN`|5.3.0| | | | |`CU_JIT_INPUT_FATBINARY`| | | |`HIPRTC_JIT_INPUT_FATBINARY`|5.3.0| | | | |`CU_JIT_INPUT_LIBRARY`| | | |`HIPRTC_JIT_INPUT_LIBRARY`|5.3.0| | | | |`CU_JIT_INPUT_NVVM`|11.4| | |`HIPRTC_JIT_INPUT_NVVM`|5.3.0| | | | |`CU_JIT_INPUT_OBJECT`| | | |`HIPRTC_JIT_INPUT_OBJECT`|5.3.0| | | | |`CU_JIT_INPUT_PTX`| | | |`HIPRTC_JIT_INPUT_PTX`|5.3.0| | | | -|`CU_JIT_LOG_VERBOSE`| | | |`hipJitOptionLogVerbose`|1.6.0| | | | +|`CU_JIT_LOG_VERBOSE`| | | |`HIPRTC_JIT_LOG_VERBOSE`|1.6.0| | | | |`CU_JIT_LTO`|11.4| | | | | | | | -|`CU_JIT_MAX_REGISTERS`| | | |`hipJitOptionMaxRegisters`|1.6.0| | | | -|`CU_JIT_NEW_SM3X_OPT`| | | |`hipJitOptionSm3xOpt`|1.6.0| | | | +|`CU_JIT_MAX_REGISTERS`| | | |`HIPRTC_JIT_MAX_REGISTERS`|1.6.0| | | | +|`CU_JIT_NEW_SM3X_OPT`| | | |`HIPRTC_JIT_NEW_SM3X_OPT`|1.6.0| | | | |`CU_JIT_NUM_INPUT_TYPES`| | | |`HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES`|5.3.0| | | | -|`CU_JIT_NUM_OPTIONS`| | | |`hipJitOptionNumOptions`|1.6.0| | | | -|`CU_JIT_OPTIMIZATION_LEVEL`| | | |`hipJitOptionOptimizationLevel`|1.6.0| | | | +|`CU_JIT_NUM_OPTIONS`| | | |`HIPRTC_JIT_NUM_OPTIONS`|1.6.0| | | | +|`CU_JIT_OPTIMIZATION_LEVEL`| | | |`HIPRTC_JIT_OPTIMIZATION_LEVEL`|1.6.0| | | | |`CU_JIT_OPTIMIZE_UNUSED_DEVICE_VARIABLES`|11.7| | | | | | | | |`CU_JIT_PREC_DIV`|11.4| | | | | | | | |`CU_JIT_PREC_SQRT`|11.4| | | | | | | | @@ -657,10 +657,10 @@ |`CU_JIT_REFERENCED_KERNEL_NAMES`|11.7| | | | | | | | |`CU_JIT_REFERENCED_VARIABLE_COUNT`|11.7| | | | | | | | |`CU_JIT_REFERENCED_VARIABLE_NAMES`|11.7| | | | | | | | -|`CU_JIT_TARGET`| | | |`hipJitOptionTarget`|1.6.0| | | | -|`CU_JIT_TARGET_FROM_CUCONTEXT`| | | |`hipJitOptionTargetFromContext`|1.6.0| | | | -|`CU_JIT_THREADS_PER_BLOCK`| | | |`hipJitOptionThreadsPerBlock`|1.6.0| | | | -|`CU_JIT_WALL_TIME`| | | |`hipJitOptionWallTime`|1.6.0| | | | +|`CU_JIT_TARGET`| | | |`HIPRTC_JIT_TARGET`|1.6.0| | | | +|`CU_JIT_TARGET_FROM_CUCONTEXT`| | | |`HIPRTC_JIT_TARGET_FROM_HIPCONTEXT`|1.6.0| | | | +|`CU_JIT_THREADS_PER_BLOCK`| | | |`HIPRTC_JIT_THREADS_PER_BLOCK`|1.6.0| | | | +|`CU_JIT_WALL_TIME`| | | |`HIPRTC_JIT_WALL_TIME`|1.6.0| | | | |`CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW`|11.0| | |`hipKernelNodeAttributeAccessPolicyWindow`|5.2.0| | | | |`CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION`|11.8| | | | | | | | |`CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE`|11.8| | | | | | | | @@ -1182,8 +1182,8 @@ |**CUDA**|**A**|**D**|**R**|**HIP**|**A**|**D**|**R**|**E**| |:--|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:| -|`cuGetErrorName`| | | | | | | | | -|`cuGetErrorString`| | | | | | | | | +|`cuGetErrorName`| | | |`hipDrvGetErrorName`|5.4.0| | |5.4.0| +|`cuGetErrorString`| | | |`hipDrvGetErrorString`|5.4.0| | |5.4.0| ## **3. Initialization** diff --git a/src/CUDA2HIP_Driver_API_functions.cpp b/src/CUDA2HIP_Driver_API_functions.cpp index 6c148fea..eff0c0c9 100644 --- a/src/CUDA2HIP_Driver_API_functions.cpp +++ b/src/CUDA2HIP_Driver_API_functions.cpp @@ -27,10 +27,10 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // 2. Error Handling // no analogue // NOTE: cudaGetErrorName and cuGetErrorName have different signatures - {"cuGetErrorName", {"hipDrvGetErrorName", "", CONV_ERROR, API_DRIVER, 2, HIP_UNSUPPORTED}}, + {"cuGetErrorName", {"hipDrvGetErrorName", "", CONV_ERROR, API_DRIVER, 2, HIP_EXPERIMENTAL}}, // no analogue // NOTE: cudaGetErrorString and cuGetErrorString have different signatures - {"cuGetErrorString", {"hipDrvGetErrorString", "", CONV_ERROR, API_DRIVER, 2, HIP_UNSUPPORTED}}, + {"cuGetErrorString", {"hipDrvGetErrorString", "", CONV_ERROR, API_DRIVER, 2, HIP_EXPERIMENTAL}}, // 3. Initialization // no analogue @@ -1425,6 +1425,8 @@ const std::map HIP_DRIVER_FUNCTION_VER_MAP { {"hiprtcLinkAddData", {HIP_5030, HIP_0, HIP_0 }}, {"hiprtcLinkComplete", {HIP_5030, HIP_0, HIP_0 }}, {"hiprtcLinkDestroy", {HIP_5030, HIP_0, HIP_0 }}, + {"hipDrvGetErrorName", {HIP_5040, HIP_0, HIP_0, HIP_LATEST}}, + {"hipDrvGetErrorString", {HIP_5040, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map CUDA_DRIVER_API_SECTION_MAP { diff --git a/src/CUDA2HIP_Driver_API_types.cpp b/src/CUDA2HIP_Driver_API_types.cpp index 474c3aa0..2af7b1b7 100644 --- a/src/CUDA2HIP_Driver_API_types.cpp +++ b/src/CUDA2HIP_Driver_API_types.cpp @@ -1184,23 +1184,23 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CUjit_option", {"hipJitOption", "", CONV_TYPE, API_DRIVER, 1}}, {"CUjit_option_enum", {"hipJitOption", "", CONV_TYPE, API_DRIVER, 1}}, // CUjit_option enum values - {"CU_JIT_MAX_REGISTERS", {"hipJitOptionMaxRegisters", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 0 - {"CU_JIT_THREADS_PER_BLOCK", {"hipJitOptionThreadsPerBlock", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_WALL_TIME", {"hipJitOptionWallTime", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_INFO_LOG_BUFFER", {"hipJitOptionInfoLogBuffer", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES", {"hipJitOptionInfoLogBufferSizeBytes", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_ERROR_LOG_BUFFER", {"hipJitOptionErrorLogBuffer", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", {"hipJitOptionErrorLogBufferSizeBytes", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_OPTIMIZATION_LEVEL", {"hipJitOptionOptimizationLevel", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_TARGET_FROM_CUCONTEXT", {"hipJitOptionTargetFromContext", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_TARGET", {"hipJitOptionTarget", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_FALLBACK_STRATEGY", {"hipJitOptionFallbackStrategy", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_GENERATE_DEBUG_INFO", {"hipJitOptionGenerateDebugInfo", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_LOG_VERBOSE", {"hipJitOptionLogVerbose", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_GENERATE_LINE_INFO", {"hipJitOptionGenerateLineInfo", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_CACHE_MODE", {"hipJitOptionCacheMode", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_NEW_SM3X_OPT", {"hipJitOptionSm3xOpt", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, - {"CU_JIT_FAST_COMPILE", {"hipJitOptionFastCompile", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_MAX_REGISTERS", {"HIPRTC_JIT_MAX_REGISTERS", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // 0 + {"CU_JIT_THREADS_PER_BLOCK", {"HIPRTC_JIT_THREADS_PER_BLOCK", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_WALL_TIME", {"HIPRTC_JIT_WALL_TIME", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_INFO_LOG_BUFFER", {"HIPRTC_JIT_INFO_LOG_BUFFER", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES", {"HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_ERROR_LOG_BUFFER", {"HIPRTC_JIT_ERROR_LOG_BUFFER", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", {"HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_OPTIMIZATION_LEVEL", {"HIPRTC_JIT_OPTIMIZATION_LEVEL", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_TARGET_FROM_CUCONTEXT", {"HIPRTC_JIT_TARGET_FROM_HIPCONTEXT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_TARGET", {"HIPRTC_JIT_TARGET", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_FALLBACK_STRATEGY", {"HIPRTC_JIT_FALLBACK_STRATEGY", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_GENERATE_DEBUG_INFO", {"HIPRTC_JIT_GENERATE_DEBUG_INFO", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_LOG_VERBOSE", {"HIPRTC_JIT_LOG_VERBOSE", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_GENERATE_LINE_INFO", {"HIPRTC_JIT_GENERATE_LINE_INFO", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_CACHE_MODE", {"HIPRTC_JIT_CACHE_MODE", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_NEW_SM3X_OPT", {"HIPRTC_JIT_NEW_SM3X_OPT", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_FAST_COMPILE", {"HIPRTC_JIT_FAST_COMPILE", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, {"CU_JIT_GLOBAL_SYMBOL_NAMES", {"hipJitGlobalSymbolNames", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, {"CU_JIT_GLOBAL_SYMBOL_ADDRESSES", {"hipJitGlobalSymbolAddresses", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, {"CU_JIT_GLOBAL_SYMBOL_COUNT", {"hipJitGlobalSymbolCount", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, @@ -1214,7 +1214,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_JIT_REFERENCED_VARIABLE_NAMES", {"hipJitReferencedVariableNames", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, {"CU_JIT_REFERENCED_VARIABLE_COUNT", {"hipJitReferencedVariableCount", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, {"CU_JIT_OPTIMIZE_UNUSED_DEVICE_VARIABLES", {"hipJitOptimizeUnusedDeviceVariables", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1, HIP_UNSUPPORTED}}, - {"CU_JIT_NUM_OPTIONS", {"hipJitOptionNumOptions", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, + {"CU_JIT_NUM_OPTIONS", {"HIPRTC_JIT_NUM_OPTIONS", "", CONV_NUMERIC_LITERAL, API_DRIVER, 1}}, // no analogue {"CUjit_target", {"hipJitTarget", "", CONV_TYPE, API_DRIVER, 1, HIP_UNSUPPORTED}}, @@ -3108,24 +3108,24 @@ const std::map HIP_DRIVER_TYPE_NAME_VER_MAP { {"hipMemRangeAttributeAccessedBy", {HIP_3070, HIP_0, HIP_0 }}, {"hipMemRangeAttributeLastPrefetchLocation", {HIP_3070, HIP_0, HIP_0 }}, {"hipJitOption", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionMaxRegisters", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionThreadsPerBlock", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionWallTime", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionInfoLogBuffer", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionInfoLogBufferSizeBytes", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionErrorLogBuffer", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionErrorLogBufferSizeBytes", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionOptimizationLevel", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionTargetFromContext", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionTarget", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionFallbackStrategy", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionGenerateDebugInfo", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionLogVerbose", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionGenerateLineInfo", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionCacheMode", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionSm3xOpt", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionFastCompile", {HIP_1060, HIP_0, HIP_0 }}, - {"hipJitOptionNumOptions", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_MAX_REGISTERS", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_THREADS_PER_BLOCK", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_WALL_TIME", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_INFO_LOG_BUFFER", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_ERROR_LOG_BUFFER", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_OPTIMIZATION_LEVEL", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_TARGET_FROM_HIPCONTEXT", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_TARGET", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_FALLBACK_STRATEGY", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_GENERATE_DEBUG_INFO", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_LOG_VERBOSE", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_GENERATE_LINE_INFO", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_CACHE_MODE", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_NEW_SM3X_OPT", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_FAST_COMPILE", {HIP_1060, HIP_0, HIP_0 }}, + {"HIPRTC_JIT_NUM_OPTIONS", {HIP_1060, HIP_0, HIP_0 }}, {"hipFuncCache_t", {HIP_1060, HIP_0, HIP_0 }}, {"hipFuncCachePreferNone", {HIP_1060, HIP_0, HIP_0 }}, {"hipFuncCachePreferShared", {HIP_1060, HIP_0, HIP_0 }}, diff --git a/tests/unit_tests/synthetic/driver_enums.cu b/tests/unit_tests/synthetic/driver_enums.cu index 8bd9ddf8..8368e648 100644 --- a/tests/unit_tests/synthetic/driver_enums.cu +++ b/tests/unit_tests/synthetic/driver_enums.cu @@ -313,21 +313,21 @@ int main() { // CHECK: hipJitOption jit_option; // CHECK-NEXT: hipJitOption jit_option_enum; - // CHECK-NEXT: hipJitOption JIT_MAX_REGISTERS = hipJitOptionMaxRegisters; - // CHECK-NEXT: hipJitOption JIT_THREADS_PER_BLOCK = hipJitOptionThreadsPerBlock; - // CHECK-NEXT: hipJitOption JIT_WALL_TIME = hipJitOptionWallTime; - // CHECK-NEXT: hipJitOption JIT_INFO_LOG_BUFFER = hipJitOptionInfoLogBuffer; - // CHECK-NEXT: hipJitOption JIT_INFO_LOG_BUFFER_SIZE_BYTES = hipJitOptionInfoLogBufferSizeBytes; - // CHECK-NEXT: hipJitOption JIT_ERROR_LOG_BUFFER = hipJitOptionErrorLogBuffer; - // CHECK-NEXT: hipJitOption JIT_ERROR_LOG_BUFFER_SIZE_BYTES = hipJitOptionErrorLogBufferSizeBytes; - // CHECK-NEXT: hipJitOption JIT_OPTIMIZATION_LEVEL = hipJitOptionOptimizationLevel; - // CHECK-NEXT: hipJitOption JIT_TARGET_FROM_CUCONTEXT = hipJitOptionTargetFromContext; - // CHECK-NEXT: hipJitOption JIT_TARGET = hipJitOptionTarget; - // CHECK-NEXT: hipJitOption JIT_FALLBACK_STRATEGY = hipJitOptionFallbackStrategy; - // CHECK-NEXT: hipJitOption JIT_GENERATE_DEBUG_INFO = hipJitOptionGenerateDebugInfo; - // CHECK-NEXT: hipJitOption JIT_LOG_VERBOSE = hipJitOptionLogVerbose; - // CHECK-NEXT: hipJitOption JIT_GENERATE_LINE_INFO = hipJitOptionGenerateLineInfo; - // CHECK-NEXT: hipJitOption JIT_CACHE_MODE = hipJitOptionCacheMode; + // CHECK-NEXT: hipJitOption JIT_MAX_REGISTERS = HIPRTC_JIT_MAX_REGISTERS; + // CHECK-NEXT: hipJitOption JIT_THREADS_PER_BLOCK = HIPRTC_JIT_THREADS_PER_BLOCK; + // CHECK-NEXT: hipJitOption JIT_WALL_TIME = HIPRTC_JIT_WALL_TIME; + // CHECK-NEXT: hipJitOption JIT_INFO_LOG_BUFFER = HIPRTC_JIT_INFO_LOG_BUFFER; + // CHECK-NEXT: hipJitOption JIT_INFO_LOG_BUFFER_SIZE_BYTES = HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES; + // CHECK-NEXT: hipJitOption JIT_ERROR_LOG_BUFFER = HIPRTC_JIT_ERROR_LOG_BUFFER; + // CHECK-NEXT: hipJitOption JIT_ERROR_LOG_BUFFER_SIZE_BYTES = HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES; + // CHECK-NEXT: hipJitOption JIT_OPTIMIZATION_LEVEL = HIPRTC_JIT_OPTIMIZATION_LEVEL; + // CHECK-NEXT: hipJitOption JIT_TARGET_FROM_CUCONTEXT = HIPRTC_JIT_TARGET_FROM_HIPCONTEXT; + // CHECK-NEXT: hipJitOption JIT_TARGET = HIPRTC_JIT_TARGET; + // CHECK-NEXT: hipJitOption JIT_FALLBACK_STRATEGY = HIPRTC_JIT_FALLBACK_STRATEGY; + // CHECK-NEXT: hipJitOption JIT_GENERATE_DEBUG_INFO = HIPRTC_JIT_GENERATE_DEBUG_INFO; + // CHECK-NEXT: hipJitOption JIT_LOG_VERBOSE = HIPRTC_JIT_LOG_VERBOSE; + // CHECK-NEXT: hipJitOption JIT_GENERATE_LINE_INFO = HIPRTC_JIT_GENERATE_LINE_INFO; + // CHECK-NEXT: hipJitOption JIT_CACHE_MODE = HIPRTC_JIT_CACHE_MODE; CUjit_option jit_option; CUjit_option_enum jit_option_enum; CUjit_option JIT_MAX_REGISTERS = CU_JIT_MAX_REGISTERS; @@ -346,7 +346,7 @@ int main() { CUjit_option JIT_GENERATE_LINE_INFO = CU_JIT_GENERATE_LINE_INFO; CUjit_option JIT_CACHE_MODE = CU_JIT_CACHE_MODE; - // CHECK: hipJitOption JIT_NUM_OPTIONS = hipJitOptionNumOptions; + // CHECK: hipJitOption JIT_NUM_OPTIONS = HIPRTC_JIT_NUM_OPTIONS; CUjit_option JIT_NUM_OPTIONS = CU_JIT_NUM_OPTIONS; // CHECK: hipLimit_t limit; @@ -642,8 +642,8 @@ int main() { CUdevice_P2PAttribute DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED = CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED; CUdevice_P2PAttribute DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED = CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED; - // CHECK: hipJitOption JIT_NEW_SM3X_OPT = hipJitOptionSm3xOpt; - // CHECK-NEXT: hipJitOption JIT_FAST_COMPILE = hipJitOptionFastCompile; + // CHECK: hipJitOption JIT_NEW_SM3X_OPT = HIPRTC_JIT_NEW_SM3X_OPT; + // CHECK-NEXT: hipJitOption JIT_FAST_COMPILE = HIPRTC_JIT_FAST_COMPILE; CUjit_option JIT_NEW_SM3X_OPT = CU_JIT_NEW_SM3X_OPT; CUjit_option JIT_FAST_COMPILE = CU_JIT_FAST_COMPILE; From b229737aa68ba8673a18d55a8ebcc5facfe15a0e Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 31 Oct 2022 14:47:40 +0100 Subject: [PATCH 35/43] [HIPIFY][doc][tests] Python 3.11.0 is the latest supported release + Tested on Windows 10 and Ubuntu 21.10 + Update 3rd parties versions --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 12e09c68..ff026b05 100644 --- a/README.md +++ b/README.md @@ -568,8 +568,8 @@ Testing Time: 6.22s | 11.0.1 - 11.1.0 | 7.0 - 11.2.2 | 7.6.5 - 8.0.5 | 2017.15.9.31, 2019.16.8.4 | 3.19.3 | 3.9.2 | | 12.0.0 - 13.0.1 | 7.0 - 11.5.1 | 7.6.5 - 8.3.2 | 2017.15.9.43, 2019.16.11.9 | 3.22.2 | 3.10.2 | | 14.0.0 - 14.0.6 | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.49, 2019.16.11.17, 2022.17.2.6 | 3.24.0 | 3.10.6 | -| 15.0.0 - 15.0.3 | 7.0 - 11.8.0 | 8.0.5 - 8.6.0 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | -| 16.0.0git | 7.0 - 11.8.0 | 8.0.5 - 8.6.0 | 2017.15.9.50, 2019.16.11.18, 2022.17.3.3 | 3.24.1 | 3.10.6 | +| 15.0.0 - 15.0.3 | 7.0 - 11.8.0 | 8.0.5 - 8.6.0 | 2017.15.9.50, 2019.16.11.20, 2022.17.3.6 | 3.24.2 | 3.11.0 | +| 16.0.0git | 7.0 - 11.8.0 | 8.0.5 - 8.6.0 | 2017.15.9.50, 2019.16.11.20, 2022.17.3.6 | 3.24.2 | 3.11.0 | *Building with testing support by `Visual Studio 17 2022` on `Windows 10`:* @@ -595,8 +595,8 @@ cmake -- - CMake module path: d:/LLVM/15.0.3/dist/lib/cmake/llvm -- - Include path : d:/LLVM/15.0.3/dist/include -- - Binary path : d:/LLVM/15.0.3/dist/bin --- Found PythonInterp: c:/Program Files/Python39/python.exe (found suitable version "3.9.5", minimum required is "3.6") --- Found lit: c:/Program Files/Python39/Scripts/lit.exe +-- Found PythonInterp: c:/Program Files/Python311/python.exe (found suitable version "3.11.0", minimum required is "3.6") +-- Found lit: c:/Program Files/Python311/Scripts/lit.exe -- Found FileCheck: d:/LLVM/15.0.3/dist/bin/FileCheck.exe -- Found CUDA: c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8 (found version "11.8") -- Configuring done From 857d29f1dbbeacbc5ce7a58ad47b4f094ec36394 Mon Sep 17 00:00:00 2001 From: Ashutosh Mishra Date: Fri, 28 Oct 2022 16:00:43 +0530 Subject: [PATCH 36/43] SWDEV-364233 - Wrong runpath in hipify-clang Correcting runpath of hipify-clang from runpath to correct rpath Signed-off-by: Ashutosh Mishra --- CMakeLists.txt | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b04de886..0c6806bb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,12 +24,12 @@ if(MSVC AND MSVC_VERSION VERSION_LESS "1900") endif() include_directories(${LLVM_INCLUDE_DIRS}) -link_directories(${LLVM_LIBRARY_DIRS}) add_definitions(${LLVM_DEFINITIONS}) file(GLOB_RECURSE HIPIFY_SOURCES src/*.cpp) file(GLOB_RECURSE HIPIFY_HEADERS src/*.h) add_llvm_executable(hipify-clang ${HIPIFY_SOURCES} ${HIPIFY_HEADERS}) +target_link_directories(hipify-clang PRIVATE ${LLVM_LIBRARY_DIRS}) set(CMAKE_CXX_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang++) set(CMAKE_C_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang) @@ -131,7 +131,14 @@ install( PATTERN "openmp_wrappers" EXCLUDE) option(FILE_REORG_BACKWARD_COMPATIBILITY "Enable File Reorg with backward compatibility" ON) + if(UNIX) + + #get rid of any RPATH definations already + set_target_properties(hipify-clang PROPERTIES INSTALL_RPATH "") + #set RPATH for the binary + set_target_properties(hipify-clang PROPERTIES LINK_FLAGS "-Wl,--disable-new-dtags -Wl,--rpath,$ORIGIN/../lib" ) + if(FILE_REORG_BACKWARD_COMPATIBILITY) include(hipify-backward-compat.cmake) endif() From 2480a81250b404db5f800835ba464ad047060292 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 1 Nov 2022 18:23:42 +0100 Subject: [PATCH 37/43] [HIPIFY][#584][DNN][MIOpen] cuDNN -> MIOpen - Part 1 + Introduced the `API_DNN` as yet another API to hipify under the `--roc` option + Introduced a new synthetic test `cudnn2miopen.cu` + Added just a few cudnn2miopen mappings [ToDo] + Add the needed changes in the `hipify-perl` script generation + Start to generate yet another CUDA2HIP Markdown doc regarding cuDNN support in MIOpen + Decide what to do with the still being generated but not being published `CUDNN_API_supported_by_HIP.md` for cuDNN support in the obsolete hipDNN --- src/CUDA2HIP.cpp | 2 +- src/CUDA2HIP_DNN_API_types.cpp | 32 +++++++++---------- src/Statistics.cpp | 2 +- .../synthetic/libraries/cudnn2miopen.cu | 30 +++++++++++++++++ 4 files changed, 48 insertions(+), 18 deletions(-) create mode 100644 tests/unit_tests/synthetic/libraries/cudnn2miopen.cu diff --git a/src/CUDA2HIP.cpp b/src/CUDA2HIP.cpp index 7f0f7c9e..e5e53e9c 100644 --- a/src/CUDA2HIP.cpp +++ b/src/CUDA2HIP.cpp @@ -62,7 +62,7 @@ const std::map CUDA_INCLUDE_MAP { {"curand_precalc.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, {"curand_uniform.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND, 0}}, // cuDNN includes - {"cudnn.h", {"hipDNN.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_DNN, 0}}, + {"cudnn.h", {"hipDNN.h", "miopen/miopen.h", CONV_INCLUDE_CUDA_MAIN_H, API_DNN, 0}}, // cuFFT includes {"cufft.h", {"hipfft.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_FFT, 0}}, {"cufftXt.h", {"hipfftXt.h", "", CONV_INCLUDE, API_FFT, 0}}, diff --git a/src/CUDA2HIP_DNN_API_types.cpp b/src/CUDA2HIP_DNN_API_types.cpp index 75d3bfb1..de977487 100644 --- a/src/CUDA2HIP_DNN_API_types.cpp +++ b/src/CUDA2HIP_DNN_API_types.cpp @@ -59,22 +59,22 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_OPS_TRAIN_PATCH", {"HIPDNN_OPS_TRAIN_PATCH", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // cuDNN enums - {"cudnnStatus_t", {"hipdnnStatus_t", "", CONV_TYPE, API_DNN, 1}}, - {"CUDNN_STATUS_SUCCESS", {"HIPDNN_STATUS_SUCCESS", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 0 - {"CUDNN_STATUS_NOT_INITIALIZED", {"HIPDNN_STATUS_NOT_INITIALIZED", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 1 - {"CUDNN_STATUS_ALLOC_FAILED", {"HIPDNN_STATUS_ALLOC_FAILED", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 2 - {"CUDNN_STATUS_BAD_PARAM", {"HIPDNN_STATUS_BAD_PARAM", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 3 - {"CUDNN_STATUS_INTERNAL_ERROR", {"HIPDNN_STATUS_INTERNAL_ERROR", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 4 - {"CUDNN_STATUS_INVALID_VALUE", {"HIPDNN_STATUS_INVALID_VALUE", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 5 - {"CUDNN_STATUS_ARCH_MISMATCH", {"HIPDNN_STATUS_ARCH_MISMATCH", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 6 - {"CUDNN_STATUS_MAPPING_ERROR", {"HIPDNN_STATUS_MAPPING_ERROR", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 7 - {"CUDNN_STATUS_EXECUTION_FAILED", {"HIPDNN_STATUS_EXECUTION_FAILED", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 8 - {"CUDNN_STATUS_NOT_SUPPORTED", {"HIPDNN_STATUS_NOT_SUPPORTED", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 9 - {"CUDNN_STATUS_LICENSE_ERROR", {"HIPDNN_STATUS_LICENSE_ERROR", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 10 - {"CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", {"HIPDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 11 - {"CUDNN_STATUS_RUNTIME_IN_PROGRESS", {"HIPDNN_STATUS_RUNTIME_IN_PROGRESS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 12 - {"CUDNN_STATUS_RUNTIME_FP_OVERFLOW", {"HIPDNN_STATUS_RUNTIME_FP_OVERFLOW", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 13 - {"CUDNN_STATUS_VERSION_MISMATCH", {"HIPDNN_STATUS_VERSION_MISMATCH", "", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 14 + {"cudnnStatus_t", {"hipdnnStatus_t", "miopenStatus_t", CONV_TYPE, API_DNN, 1}}, + {"CUDNN_STATUS_SUCCESS", {"HIPDNN_STATUS_SUCCESS", "miopenStatusSuccess", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 0 + {"CUDNN_STATUS_NOT_INITIALIZED", {"HIPDNN_STATUS_NOT_INITIALIZED", "miopenStatusNotInitialized", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 1 + {"CUDNN_STATUS_ALLOC_FAILED", {"HIPDNN_STATUS_ALLOC_FAILED", "miopenStatusAllocFailed", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 2 // 4 + {"CUDNN_STATUS_BAD_PARAM", {"HIPDNN_STATUS_BAD_PARAM", "miopenStatusBadParm", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 3 + {"CUDNN_STATUS_INTERNAL_ERROR", {"HIPDNN_STATUS_INTERNAL_ERROR", "miopenStatusInternalError", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 4 // 5 + {"CUDNN_STATUS_INVALID_VALUE", {"HIPDNN_STATUS_INVALID_VALUE", "miopenStatusInvalidValue", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 5 // 2 + {"CUDNN_STATUS_ARCH_MISMATCH", {"HIPDNN_STATUS_ARCH_MISMATCH", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED}}, // 6 + {"CUDNN_STATUS_MAPPING_ERROR", {"HIPDNN_STATUS_MAPPING_ERROR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED}}, // 7 + {"CUDNN_STATUS_EXECUTION_FAILED", {"HIPDNN_STATUS_EXECUTION_FAILED", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED}}, // 8 + {"CUDNN_STATUS_NOT_SUPPORTED", {"HIPDNN_STATUS_NOT_SUPPORTED", "miopenStatusUnsupportedOp", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 9 // 8 + {"CUDNN_STATUS_LICENSE_ERROR", {"HIPDNN_STATUS_LICENSE_ERROR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED}}, // 10 + {"CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", {"HIPDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED}}, // 11 + {"CUDNN_STATUS_RUNTIME_IN_PROGRESS", {"HIPDNN_STATUS_RUNTIME_IN_PROGRESS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 12 + {"CUDNN_STATUS_RUNTIME_FP_OVERFLOW", {"HIPDNN_STATUS_RUNTIME_FP_OVERFLOW", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 13 + {"CUDNN_STATUS_VERSION_MISMATCH", {"HIPDNN_STATUS_VERSION_MISMATCH", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 14 {"cudnnRuntimeTag_t", {"hipdnnRuntimeTag_t", "", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"cudnnConvolutionMode_t", {"hipdnnConvolutionMode_t", "", CONV_TYPE, API_DNN, 1}}, {"CUDNN_CONVOLUTION", {"HIPDNN_CONVOLUTION", "", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 0 diff --git a/src/Statistics.cpp b/src/Statistics.cpp index 248cebdc..8c8ef69f 100644 --- a/src/Statistics.cpp +++ b/src/Statistics.cpp @@ -349,7 +349,7 @@ void Statistics::setActive(const std::string &name) { } bool Statistics::isToRoc(const hipCounter &counter) { - return TranslateToRoc && counter.apiType == API_BLAS; + return TranslateToRoc && (counter.apiType == API_BLAS || counter.apiType == API_DNN); } bool Statistics::isHipExperimental(const hipCounter& counter) { diff --git a/tests/unit_tests/synthetic/libraries/cudnn2miopen.cu b/tests/unit_tests/synthetic/libraries/cudnn2miopen.cu new file mode 100644 index 00000000..d7a9ebb3 --- /dev/null +++ b/tests/unit_tests/synthetic/libraries/cudnn2miopen.cu @@ -0,0 +1,30 @@ +// RUN: %run_test hipify "%s" "%t" %hipify_args 3 --skip-excluded-preprocessor-conditional-blocks --experimental -roc %clang_args -D__CUDA_API_VERSION_INTERNAL + +// CHECK: #include +#include +#include +// CHECK: #include "miopen/miopen.h" +#include "cudnn.h" + +int main() { + printf("15. cuDNN API to MIOpen API synthetic test\n"); + + // CHECK: miopenStatus_t dnnStatus_t; + // CHECK-NEXT: miopenStatus_t STATUS_SUCCESS = miopenStatusSuccess; + // CHECK-NEXT: miopenStatus_t STATUS_NOT_INITIALIZED = miopenStatusNotInitialized; + // CHECK-NEXT: miopenStatus_t STATUS_ALLOC_FAILED = miopenStatusAllocFailed; + // CHECK-NEXT: miopenStatus_t STATUS_BAD_PARAM = miopenStatusBadParm; + // CHECK-NEXT: miopenStatus_t STATUS_INTERNAL_ERROR = miopenStatusInternalError; + // CHECK-NEXT: miopenStatus_t STATUS_INVALID_VALUE = miopenStatusInvalidValue; + // CHECK-NEXT: miopenStatus_t STATUS_NOT_SUPPORTED = miopenStatusUnsupportedOp; + cudnnStatus_t dnnStatus_t; + cudnnStatus_t STATUS_SUCCESS = CUDNN_STATUS_SUCCESS; + cudnnStatus_t STATUS_NOT_INITIALIZED = CUDNN_STATUS_NOT_INITIALIZED; + cudnnStatus_t STATUS_ALLOC_FAILED = CUDNN_STATUS_ALLOC_FAILED; + cudnnStatus_t STATUS_BAD_PARAM = CUDNN_STATUS_BAD_PARAM; + cudnnStatus_t STATUS_INTERNAL_ERROR = CUDNN_STATUS_INTERNAL_ERROR; + cudnnStatus_t STATUS_INVALID_VALUE = CUDNN_STATUS_INVALID_VALUE; + cudnnStatus_t STATUS_NOT_SUPPORTED = CUDNN_STATUS_NOT_SUPPORTED; + + return 0; +} From 20226b86700d49dcfd237692b548b955f81ba36f Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 2 Nov 2022 23:47:48 +0100 Subject: [PATCH 38/43] [HIPIFY][doc] LLVM 15.0.4 is the latest supported LLVM release + No patches are needed + Updated README.md accordingly + Tested on Windows 10 and Ubuntu 21.10 --- README.md | 55 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index ff026b05..2040e605 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ After applying all the matchers, the output HIP source is produced. `hipify-clang` requires: -1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [4.0.0](http://releases.llvm.org/download.html#4.0.0); the latest stable and recommended release: [**15.0.3**](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.3). +1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [4.0.0](http://releases.llvm.org/download.html#4.0.0); the latest stable and recommended release: [**15.0.4**](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.4). 2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive), the latest supported version is [**11.8.0**](https://developer.nvidia.com/cuda-downloads). @@ -174,7 +174,8 @@ After applying all the matchers, the output HIP source is produced. 15.0.0, 15.0.1,
15.0.2, - 15.0.3 + 15.0.3,
+ 15.0.4
@@ -190,7 +191,7 @@ After applying all the matchers, the output HIP source is produced. In most cases, you can get a suitable version of `LLVM+CLANG` with your package manager. Failing that or having multiple versions of `LLVM`, you can [download a release archive](http://releases.llvm.org/), build or install it, and set -[CMAKE_PREFIX_PATH](https://cmake.org/cmake/help/v3.5/variable/CMAKE_PREFIX_PATH.html) so `cmake` can find it; for instance: `-DCMAKE_PREFIX_PATH=d:\LLVM\15.0.3\dist` +[CMAKE_PREFIX_PATH](https://cmake.org/cmake/help/v3.5/variable/CMAKE_PREFIX_PATH.html) so `cmake` can find it; for instance: `-DCMAKE_PREFIX_PATH=d:\LLVM\15.0.4\dist` ### hipify-clang: usage @@ -288,7 +289,7 @@ Run `Visual Studio 16 2019`, open the generated `LLVM.sln`, build all, and build **LLVM >= 10.0.0:** -1. download [`LLVM project`](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.3) sources; +1. download [`LLVM project`](https://github.com/llvm/llvm-project/releases/tag/llvmorg-15.0.4) sources; 2. build [`LLVM project`](http://llvm.org/docs/CMake.html): ```bash @@ -353,21 +354,21 @@ Run `Visual Studio 17 2022`, open the generated `LLVM.sln`, build all, build pro * Install `lit` into `python`: - - ***Linux***: `python /usr/llvm/15.0.3/llvm-project/llvm/utils/lit/setup.py install` + - ***Linux***: `python /usr/llvm/15.0.4/llvm-project/llvm/utils/lit/setup.py install` - - ***Windows***: `python d:/LLVM/15.0.3/llvm-project/llvm/utils/lit/setup.py install` + - ***Windows***: `python d:/LLVM/15.0.4/llvm-project/llvm/utils/lit/setup.py install` * Starting with LLVM 6.0.1 path to `llvm-lit` python script should be specified by the `LLVM_EXTERNAL_LIT` option: - - ***Linux***: `-DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.3/build/bin/llvm-lit` + - ***Linux***: `-DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.4/build/bin/llvm-lit` - - ***Windows***: `-DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.3/build/Release/bin/llvm-lit.py` + - ***Windows***: `-DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.4/build/Release/bin/llvm-lit.py` * `FileCheck`: - - ***Linux***: copy from `/usr/llvm/15.0.3/build/bin/` to `CMAKE_INSTALL_PREFIX/dist/bin` + - ***Linux***: copy from `/usr/llvm/15.0.4/build/bin/` to `CMAKE_INSTALL_PREFIX/dist/bin` - - ***Windows***: copy from `d:/LLVM/15.0.3/build/Release/bin` to `CMAKE_INSTALL_PREFIX/dist/bin` + - ***Windows***: copy from `d:/LLVM/15.0.4/build/Release/bin` to `CMAKE_INSTALL_PREFIX/dist/bin` - Or specify the path to `FileCheck` in `CMAKE_INSTALL_PREFIX` option @@ -389,7 +390,7 @@ Ubuntu 14: LLVM 4.0.0 - 7.1.0, CUDA 7.0 - 9.0, cuDNN 5.0.5 - 7.6.5 Ubuntu 16-18: LLVM 8.0.0 - 14.0.6, CUDA 8.0 - 10.2, cuDNN 5.1.10 - 8.0.5 -Ubuntu 20-21: LLVM 9.0.0 - 15.0.3, CUDA 8.0 - 11.8.0, cuDNN 5.1.10 - 8.6.0 +Ubuntu 20-21: LLVM 9.0.0 - 15.0.4, CUDA 8.0 - 11.8.0, cuDNN 5.1.10 - 8.6.0 Minimum build system requirements for the above configurations: @@ -406,11 +407,11 @@ cmake -DHIPIFY_CLANG_TESTS=1 \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=/usr/llvm/15.0.3/dist \ + -DCMAKE_PREFIX_PATH=/usr/llvm/15.0.4/dist \ -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda \ -DCUDA_DNN_ROOT_DIR=/usr/local/cuda \ -DCUDA_CUB_ROOT_DIR=/usr/CUB \ - -DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.3/build/bin/llvm-lit \ + -DLLVM_EXTERNAL_LIT=/usr/llvm/15.0.4/build/bin/llvm-lit \ ../hipify ``` *A corresponding successful output:* @@ -428,14 +429,14 @@ cmake -- Detecting CXX compile features -- Detecting CXX compile features - done -- Found ZLIB: /usr/lib/x86_64-linux-gnu/libz.so (found version "1.2.11") --- Found LLVM 15.0.3: --- - CMake module path: /usr/llvm/15.0.3/dist/lib/cmake/llvm --- - Include path : /usr/llvm/15.0.3/dist/include --- - Binary path : /usr/llvm/15.0.3/dist/bin +-- Found LLVM 15.0.4: +-- - CMake module path: /usr/llvm/15.0.4/dist/lib/cmake/llvm +-- - Include path : /usr/llvm/15.0.4/dist/include +-- - Binary path : /usr/llvm/15.0.4/dist/bin -- Linker detection: GNU ld -- Found PythonInterp: /usr/bin/python (found suitable version "3.9.7", minimum required is "2.7") -- Found lit: /usr/local/bin/lit --- Found FileCheck: /usr/llvm/15.0.3/dist/bin/FileCheck +-- Found FileCheck: /usr/llvm/15.0.4/dist/bin/FileCheck -- Looking for pthread.h -- Looking for pthread.h - found -- Performing Test CMAKE_HAVE_LIBC_PTHREAD @@ -454,7 +455,7 @@ make test-hipify Running HIPify regression tests ======================================== CUDA 11.8 - will be used for testing -LLVM 15.0.3 - will be used for testing +LLVM 15.0.4 - will be used for testing x86_64 - Platform architecture Linux 5.13.0-21-generic - Platform OS 64 - hipify-clang binary bitness @@ -568,7 +569,7 @@ Testing Time: 6.22s | 11.0.1 - 11.1.0 | 7.0 - 11.2.2 | 7.6.5 - 8.0.5 | 2017.15.9.31, 2019.16.8.4 | 3.19.3 | 3.9.2 | | 12.0.0 - 13.0.1 | 7.0 - 11.5.1 | 7.6.5 - 8.3.2 | 2017.15.9.43, 2019.16.11.9 | 3.22.2 | 3.10.2 | | 14.0.0 - 14.0.6 | 7.0 - 11.7.1 | 8.0.5 - 8.4.1 | 2017.15.9.49, 2019.16.11.17, 2022.17.2.6 | 3.24.0 | 3.10.6 | -| 15.0.0 - 15.0.3 | 7.0 - 11.8.0 | 8.0.5 - 8.6.0 | 2017.15.9.50, 2019.16.11.20, 2022.17.3.6 | 3.24.2 | 3.11.0 | +| 15.0.0 - 15.0.4 | 7.0 - 11.8.0 | 8.0.5 - 8.6.0 | 2017.15.9.50, 2019.16.11.20, 2022.17.3.6 | 3.24.2 | 3.11.0 | | 16.0.0git | 7.0 - 11.8.0 | 8.0.5 - 8.6.0 | 2017.15.9.50, 2019.16.11.20, 2022.17.3.6 | 3.24.2 | 3.11.0 | *Building with testing support by `Visual Studio 17 2022` on `Windows 10`:* @@ -581,23 +582,23 @@ cmake -DHIPIFY_CLANG_TESTS=1 \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=d:/LLVM/15.0.3/dist \ + -DCMAKE_PREFIX_PATH=d:/LLVM/15.0.4/dist \ -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8" \ -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v11.8" \ -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-11.8-windows-x64-v8.6.0 \ -DCUDA_CUB_ROOT_DIR=d:/GIT/cub \ - -DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.3/build/Release/bin/llvm-lit.py \ + -DLLVM_EXTERNAL_LIT=d:/LLVM/15.0.4/build/Release/bin/llvm-lit.py \ ../hipify ``` *A corresponding successful output:* ```shell --- Found LLVM 15.0.3: --- - CMake module path: d:/LLVM/15.0.3/dist/lib/cmake/llvm --- - Include path : d:/LLVM/15.0.3/dist/include --- - Binary path : d:/LLVM/15.0.3/dist/bin +-- Found LLVM 15.0.4: +-- - CMake module path: d:/LLVM/15.0.4/dist/lib/cmake/llvm +-- - Include path : d:/LLVM/15.0.4/dist/include +-- - Binary path : d:/LLVM/15.0.4/dist/bin -- Found PythonInterp: c:/Program Files/Python311/python.exe (found suitable version "3.11.0", minimum required is "3.6") -- Found lit: c:/Program Files/Python311/Scripts/lit.exe --- Found FileCheck: d:/LLVM/15.0.3/dist/bin/FileCheck.exe +-- Found FileCheck: d:/LLVM/15.0.4/dist/bin/FileCheck.exe -- Found CUDA: c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8 (found version "11.8") -- Configuring done -- Generating done From dc3eb1bd3dde68158aca3448c99d8fb01dbc01a3 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 4 Nov 2022 19:50:30 +0100 Subject: [PATCH 39/43] [HIPIFY][rocBLAS][#SWDEV-365549] Sync with rocBLAS - functions only - Part 1 + Started syncing with the latest rocBLAS + Started populating rocm APIs with HIP versions + Added ROCm 1.6.4 version + Fixed #SWDEV-365549: `cublasGetStatusString` -> `rocblas_status_to_string` + Updated the regenerated hipify-perl accordingly + Docs are unchanged because documentation for rocBLAS is not generated yet [ToDo] + rocBLAS synthetic tests + Add CUDA2ROC documentation for rocBLAS --- bin/hipify-perl | 2 +- src/CUDA2HIP_BLAS_API_functions.cpp | 56 ++++++++++++++++++++++++++++- src/Statistics.cpp | 1 + src/Statistics.h | 1 + 4 files changed, 58 insertions(+), 2 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 0a1a0073..13a248c5 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1036,6 +1036,7 @@ sub rocSubstitutions { subst("cublasGetMatrixAsync", "rocblas_get_matrix_async", "library"); subst("cublasGetPointerMode", "rocblas_get_pointer_mode", "library"); subst("cublasGetPointerMode_v2", "rocblas_set_pointer_mode", "library"); + subst("cublasGetStatusString", "rocblas_status_to_string", "library"); subst("cublasGetStream", "rocblas_get_stream", "library"); subst("cublasGetStream_v2", "rocblas_get_stream", "library"); subst("cublasGetVector", "rocblas_get_vector", "library"); @@ -8155,7 +8156,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasIamaxEx", "cublasGetVersion_v2", "cublasGetVersion", - "cublasGetStatusString", "cublasGetStatusName", "cublasGetSmCountTarget", "cublasGetProperty", diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 72186afa..0e0dd529 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -41,7 +41,7 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasGetSmCountTarget", {"hipblasGetSmCountTarget", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasSetSmCountTarget", {"hipblasSetSmCountTarget", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasGetStatusName", {"hipblasGetStatusName", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, - {"cublasGetStatusString", {"hipblasGetStatusString", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, + {"cublasGetStatusString", {"hipblasGetStatusString", "rocblas_status_to_string", CONV_LIB_FUNC, API_BLAS, 4, HIP_UNSUPPORTED}}, // Blas logging {"cublasLogCallback", {"hipblasLogCallback", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, @@ -953,6 +953,60 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasNrm2Ex", {HIP_4010, HIP_0, HIP_0 }}, {"hipblasRotEx", {HIP_4010, HIP_0, HIP_0 }}, {"hipblasScalEx", {HIP_4010, HIP_0, HIP_0 }}, + {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dscal", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_cscal", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_zscal", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_csscal", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zdscal", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_scopy", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dcopy", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_ccopy", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_zcopy", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_sdot", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_ddot", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_hdot", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_cdotu", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_zdotu", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_cdotc", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zdotc", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_sswap", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dswap", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_cswap", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_zswap", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_saxpy", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_daxpy", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_caxpy", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_zaxpy", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_sasum", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dasum", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_scasum", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dzasum", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_snrm2", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dnrm2", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_scnrm2", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dznrm2", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_isamax", {HIP_1064, HIP_0, HIP_0 }}, + {"rocblas_idamax", {HIP_1064, HIP_0, HIP_0 }}, + {"rocblas_icamax", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_izamax", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_isamin", {HIP_1064, HIP_0, HIP_0 }}, + {"rocblas_idamin", {HIP_1064, HIP_0, HIP_0 }}, + {"rocblas_icamin", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_izamin", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_srot", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_drot", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_crot", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_csrot", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zrot", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zdrot", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_srotg", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_drotg", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_crotg", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zrotg", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_srotm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_drotm", {HIP_3050, HIP_0, HIP_0 }}, }; const std::map CUDA_BLAS_API_SECTION_MAP { diff --git a/src/Statistics.cpp b/src/Statistics.cpp index 8c8ef69f..7925cbde 100644 --- a/src/Statistics.cpp +++ b/src/Statistics.cpp @@ -494,6 +494,7 @@ std::string Statistics::getHipVersion(const hipVersions& ver) { case HIP_1052: return "1.5.2"; case HIP_1060: return "1.6.0"; case HIP_1061: return "1.6.1"; + case HIP_1064: return "1.6.4"; case HIP_1070: return "1.7.0"; case HIP_1071: return "1.7.1"; case HIP_1080: return "1.8.0"; diff --git a/src/Statistics.h b/src/Statistics.h index 3cc3e915..79dd6a78 100644 --- a/src/Statistics.h +++ b/src/Statistics.h @@ -246,6 +246,7 @@ enum hipVersions { HIP_1052 = 1052, HIP_1060 = 1060, HIP_1061 = 1061, + HIP_1064 = 1064, HIP_1070 = 1070, HIP_1071 = 1071, HIP_1080 = 1080, From c946b685d3ba58b73309b4d100f3005a5e841ee4 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sat, 5 Nov 2022 21:27:44 +0100 Subject: [PATCH 40/43] [HIPIFY][rocBLAS][#SWDEV-365549] Sync with rocBLAS - functions only - Part 2 --- src/CUDA2HIP_BLAS_API_functions.cpp | 48 +++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 0e0dd529..142f2cc4 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -1007,6 +1007,54 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_zrotg", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_srotm", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_drotm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_srotmg", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_drotmg", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_sgbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dgbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_cgbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zgbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_sgemv", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dgemv", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_cgemv", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_zgemv", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_chbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zhbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_chemv", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_zhemv", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_cher", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zher", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_cher2", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zher2", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_chpmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zhpmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_chpr", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zhpr", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_chpr2", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zhpr2", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_strmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dtrmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ctrmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ztrmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_stpmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dtpmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ctpmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ztpmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_stbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dtbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ctbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ztbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_stbsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dtbsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ctbsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ztbsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_strsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dtrsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ctrsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ztrsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_stpsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dtpsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ctpsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ztpsv", {HIP_3050, HIP_0, HIP_0 }}, }; const std::map CUDA_BLAS_API_SECTION_MAP { From a42975e1da4b437763baa94479342070c1df461c Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Sun, 6 Nov 2022 17:48:50 +0100 Subject: [PATCH 41/43] [HIPIFY][rocBLAS][fix] Sync with rocBLAS - functions only - Part 3 + Continued syncing with the latest rocBLAS + Continued populating rocm APIs with HIP versions + Fix error in mapping `cublasCherk_v2` -> `rocblas_cherk` (was `rocblas_cherkx`) --- bin/hipify-perl | 2 +- src/CUDA2HIP_BLAS_API_functions.cpp | 52 ++++++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 13a248c5..6fd17ab8 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -897,7 +897,7 @@ sub rocSubstitutions { subst("cublasCher2k_v2", "rocblas_cher2k", "library"); subst("cublasCher_v2", "rocblas_cher", "library"); subst("cublasCherk", "rocblas_cherk", "library"); - subst("cublasCherk_v2", "rocblas_cherkx", "library"); + subst("cublasCherk_v2", "rocblas_cherk", "library"); subst("cublasCherkx", "rocblas_cherkx", "library"); subst("cublasChpmv", "rocblas_chpmv", "library"); subst("cublasChpmv_v2", "rocblas_chpmv", "library"); diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 142f2cc4..524895f3 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -541,7 +541,7 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasCsyrk3mEx", {"hipblasCsyrk3mEx", "", CONV_LIB_FUNC, API_BLAS, 8, UNSUPPORTED}}, // HERK - {"cublasCherk_v2", {"hipblasCherk", "rocblas_cherkx", CONV_LIB_FUNC, API_BLAS, 7}}, + {"cublasCherk_v2", {"hipblasCherk", "rocblas_cherk", CONV_LIB_FUNC, API_BLAS, 7}}, // IO in Int8 complex/cuComplex, computation in cuComplex {"cublasCherkEx", {"hipblasCherkEx", "", CONV_LIB_FUNC, API_BLAS, 8, UNSUPPORTED}}, // IO in Int8 complex/cuComplex, computation in cuComplex, Gaussian math @@ -1055,6 +1055,56 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_dtpsv", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_ctpsv", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_ztpsv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ssymv", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dsymv", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_csymv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zsymv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_sspmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dspmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ssbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dsbmv", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_sger", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dger", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_cgeru", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zgeru", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_cgerc", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zgerc", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_sspr", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dspr", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_sspr2", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dspr2", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ssyr", {HIP_1071, HIP_0, HIP_0 }}, + {"rocblas_dsyr", {HIP_1071, HIP_0, HIP_0 }}, + {"rocblas_csyr", {HIP_1071, HIP_0, HIP_0 }}, + {"rocblas_zsyr", {HIP_1071, HIP_0, HIP_0 }}, + {"rocblas_ssyr2", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dsyr2", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_csyr2", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zsyr2", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_chemm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zhemm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_cherk", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zherk", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_cher2k", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zher2k", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_cherkx", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zherkx", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ssymm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dsymm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_csymm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zsymm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ssyrk", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dsyrk", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_csyrk", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zsyrk", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ssyr2k", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dsyr2k", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_csyr2k", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zsyr2k", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ssyrkx", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dsyrkx", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_csyrkx", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zsyrkx", {HIP_3050, HIP_0, HIP_0 }}, }; const std::map CUDA_BLAS_API_SECTION_MAP { From 98dae997db167f3417f170f5de21e4a7c9b3d157 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 7 Nov 2022 15:36:25 +0100 Subject: [PATCH 42/43] [HIPIFY][rocBLAS][fix] Sync with rocBLAS - functions only - Part 4 + Continued syncing with the latest rocBLAS + Continued populating rocm APIs with HIP versions + Added missing mappings `cublasInit` -> `rocblas_initialize` and `cublasGetVersion` -> `rocblas_get_version_string` + Updated the regenerated hipify-perl accordingly --- bin/hipify-perl | 4 +-- src/CUDA2HIP_BLAS_API_functions.cpp | 50 +++++++++++++++++++++++++++-- 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 6fd17ab8..6ca70c13 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1041,6 +1041,7 @@ sub rocSubstitutions { subst("cublasGetStream_v2", "rocblas_get_stream", "library"); subst("cublasGetVector", "rocblas_get_vector", "library"); subst("cublasGetVectorAsync", "rocblas_get_vector_async", "library"); + subst("cublasGetVersion", "rocblas_get_version_string", "library"); subst("cublasHgemm", "rocblas_hgemm", "library"); subst("cublasHgemmBatched", "rocblas_hgemm_batched", "library"); subst("cublasHgemmStridedBatched", "rocblas_hgemm_strided_batched", "library"); @@ -1052,6 +1053,7 @@ sub rocSubstitutions { subst("cublasIdamax_v2", "rocblas_idamax", "library"); subst("cublasIdamin", "rocblas_idamin", "library"); subst("cublasIdamin_v2", "rocblas_idamin", "library"); + subst("cublasInit", "rocblas_initialize", "library"); subst("cublasIsamax", "rocblas_isamax", "library"); subst("cublasIsamax_v2", "rocblas_isamax", "library"); subst("cublasIsamin", "rocblas_isamin", "library"); @@ -8151,11 +8153,9 @@ sub warnRocOnlyUnsupportedFunctions { "cublasMath_t", "cublasLoggerConfigure", "cublasLogCallback", - "cublasInit", "cublasIaminEx", "cublasIamaxEx", "cublasGetVersion_v2", - "cublasGetVersion", "cublasGetStatusName", "cublasGetSmCountTarget", "cublasGetProperty", diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 524895f3..a6d88cb0 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -26,9 +26,9 @@ THE SOFTWARE. const std::map CUDA_BLAS_FUNCTION_MAP { // Blas management functions - {"cublasInit", {"hipblasInit", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, + {"cublasInit", {"hipblasInit", "rocblas_initialize", CONV_LIB_FUNC, API_BLAS, 4, HIP_UNSUPPORTED}}, {"cublasShutdown", {"hipblasShutdown", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, - {"cublasGetVersion", {"hipblasGetVersion", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, + {"cublasGetVersion", {"hipblasGetVersion", "rocblas_get_version_string", CONV_LIB_FUNC, API_BLAS, 4, HIP_UNSUPPORTED}}, {"cublasGetError", {"hipblasGetError", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasAlloc", {"hipblasAlloc", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasFree", {"hipblasFree", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, @@ -1105,6 +1105,52 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_dsyrkx", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_csyrkx", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_zsyrkx", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_strmm_outofplace", {HIP_5000, HIP_0, HIP_0 }}, + {"rocblas_dtrmm_outofplace", {HIP_5000, HIP_0, HIP_0 }}, + {"rocblas_ctrmm_outofplace", {HIP_5000, HIP_0, HIP_0 }}, + {"rocblas_ztrmm_outofplace", {HIP_5000, HIP_0, HIP_0 }}, + {"rocblas_strsm", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dtrsm", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_ctrsm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ztrsm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_strsm_batched", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dtrsm_batched", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ctrsm_batched", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ztrsm_batched", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_sgemm", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dgemm", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_hgemm", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_cgemm", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_zgemm", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_sgemm_batched", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dgemm_batched", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_hgemm_batched", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_cgemm_batched", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zgemm_batched", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_sgemm_strided_batched", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_dgemm_strided_batched", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_hgemm_strided_batched", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_cgemm_strided_batched", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_zgemm_strided_batched", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_sdgmm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_ddgmm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_cdgmm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zdgmm", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_sgeam", {HIP_1064, HIP_0, HIP_0 }}, + {"rocblas_dgeam", {HIP_1064, HIP_0, HIP_0 }}, + {"rocblas_cgeam", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_zgeam", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_gemm_ex", {HIP_1082, HIP_0, HIP_0 }}, + {"rocblas_gemm_batched_ex", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_gemm_strided_batched_ex", {HIP_1090, HIP_0, HIP_0 }}, + {"rocblas_axpy_ex", {HIP_3090, HIP_0, HIP_0 }}, + {"rocblas_dot_ex", {HIP_4010, HIP_0, HIP_0 }}, + {"rocblas_dotc_ex", {HIP_4010, HIP_0, HIP_0 }}, + {"rocblas_nrm2_ex", {HIP_4010, HIP_0, HIP_0 }}, + {"rocblas_rot_ex", {HIP_4010, HIP_0, HIP_0 }}, + {"rocblas_scal_ex", {HIP_4010, HIP_0, HIP_0 }}, + {"rocblas_initialize", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_get_version_string", {HIP_2000, HIP_0, HIP_0 }}, }; const std::map CUDA_BLAS_API_SECTION_MAP { From 017a37864308af6ed122bd57a7338bba879c7dc8 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 7 Nov 2022 16:17:12 +0100 Subject: [PATCH 43/43] [HIPIFY][rocBLAS][fix] Sync with rocBLAS - functions only - Part 5 - final + Finished syncing with the latest rocBLAS + Finished populating rocm APIs with HIP versions + Added missing mappings `cublasGetAtomicsMode` -> `rocblas_get_atomics_mode` and `cublasSetAtomicsMode` -> `rocblas_set_atomics_mode` + Excluded `cublasGetVersion` -> `rocblas_get_version_string` due to different signatures + Updated the regenerated hipify-perl accordingly + Documentation is unchanged due to `rocBLAS` doc is not generated yet (sync and testing is incomplete) --- bin/hipify-perl | 6 +++--- src/CUDA2HIP_BLAS_API_functions.cpp | 23 +++++++++++++++++++---- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 6ca70c13..23da6adc 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1032,6 +1032,7 @@ sub rocSubstitutions { subst("cublasGemmBatchedEx", "rocblas_gemm_batched_ex", "library"); subst("cublasGemmEx", "rocblas_gemm_ex", "library"); subst("cublasGemmStridedBatchedEx", "rocblas_gemm_strided_batched_ex", "library"); + subst("cublasGetAtomicsMode", "rocblas_get_atomics_mode", "library"); subst("cublasGetMatrix", "rocblas_get_matrix", "library"); subst("cublasGetMatrixAsync", "rocblas_get_matrix_async", "library"); subst("cublasGetPointerMode", "rocblas_get_pointer_mode", "library"); @@ -1041,7 +1042,6 @@ sub rocSubstitutions { subst("cublasGetStream_v2", "rocblas_get_stream", "library"); subst("cublasGetVector", "rocblas_get_vector", "library"); subst("cublasGetVectorAsync", "rocblas_get_vector_async", "library"); - subst("cublasGetVersion", "rocblas_get_version_string", "library"); subst("cublasHgemm", "rocblas_hgemm", "library"); subst("cublasHgemmBatched", "rocblas_hgemm_batched", "library"); subst("cublasHgemmStridedBatched", "rocblas_hgemm_strided_batched", "library"); @@ -1078,6 +1078,7 @@ sub rocSubstitutions { subst("cublasSdgmm", "rocblas_sdgmm", "library"); subst("cublasSdot", "rocblas_sdot", "library"); subst("cublasSdot_v2", "rocblas_sdot", "library"); + subst("cublasSetAtomicsMode", "rocblas_set_atomics_mode", "library"); subst("cublasSetMatrix", "rocblas_set_matrix", "library"); subst("cublasSetMatrixAsync", "rocblas_set_matrix_async", "library"); subst("cublasSetPointerMode", "rocblas_set_pointer_mode", "library"); @@ -8145,7 +8146,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSetMathMode", "cublasSetLoggerCallback", "cublasSetKernelStream", - "cublasSetAtomicsMode", "cublasRotmgEx", "cublasRotmEx", "cublasRotgEx", @@ -8156,6 +8156,7 @@ sub warnRocOnlyUnsupportedFunctions { "cublasIaminEx", "cublasIamaxEx", "cublasGetVersion_v2", + "cublasGetVersion", "cublasGetStatusName", "cublasGetSmCountTarget", "cublasGetProperty", @@ -8163,7 +8164,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasGetLoggerCallback", "cublasGetError", "cublasGetCudartVersion", - "cublasGetAtomicsMode", "cublasFree", "cublasDtrttp", "cublasDtpttr", diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index a6d88cb0..a912fa1f 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -28,13 +28,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // Blas management functions {"cublasInit", {"hipblasInit", "rocblas_initialize", CONV_LIB_FUNC, API_BLAS, 4, HIP_UNSUPPORTED}}, {"cublasShutdown", {"hipblasShutdown", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, - {"cublasGetVersion", {"hipblasGetVersion", "rocblas_get_version_string", CONV_LIB_FUNC, API_BLAS, 4, HIP_UNSUPPORTED}}, + {"cublasGetVersion", {"hipblasGetVersion", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasGetError", {"hipblasGetError", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasAlloc", {"hipblasAlloc", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasFree", {"hipblasFree", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasSetKernelStream", {"hipblasSetKernelStream", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, - {"cublasGetAtomicsMode", {"hipblasGetAtomicsMode", "", CONV_LIB_FUNC, API_BLAS, 4, ROC_UNSUPPORTED}}, - {"cublasSetAtomicsMode", {"hipblasSetAtomicsMode", "", CONV_LIB_FUNC, API_BLAS, 4, ROC_UNSUPPORTED}}, + {"cublasGetAtomicsMode", {"hipblasGetAtomicsMode", "rocblas_get_atomics_mode", CONV_LIB_FUNC, API_BLAS, 4}}, + {"cublasSetAtomicsMode", {"hipblasSetAtomicsMode", "rocblas_set_atomics_mode", CONV_LIB_FUNC, API_BLAS, 4}}, {"cublasGetMathMode", {"hipblasGetMathMode", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasSetMathMode", {"hipblasSetMathMode", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, {"cublasMigrateComputeType", {"hipblasMigrateComputeType", "", CONV_LIB_FUNC, API_BLAS, 4, UNSUPPORTED}}, @@ -1150,7 +1150,22 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_rot_ex", {HIP_4010, HIP_0, HIP_0 }}, {"rocblas_scal_ex", {HIP_4010, HIP_0, HIP_0 }}, {"rocblas_initialize", {HIP_3050, HIP_0, HIP_0 }}, - {"rocblas_get_version_string", {HIP_2000, HIP_0, HIP_0 }}, + {"rocblas_create_handle", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_destroy_handle", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_set_stream", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_get_stream", {HIP_1050, HIP_0, HIP_0 }}, + {"rocblas_set_pointer_mode", {HIP_1060, HIP_0, HIP_0 }}, + {"rocblas_get_pointer_mode", {HIP_1060, HIP_0, HIP_0 }}, + {"rocblas_set_atomics_mode", {HIP_3080, HIP_0, HIP_0 }}, + {"rocblas_get_atomics_mode", {HIP_3080, HIP_0, HIP_0 }}, + {"rocblas_set_vector", {HIP_1060, HIP_0, HIP_0 }}, + {"rocblas_get_vector", {HIP_1060, HIP_0, HIP_0 }}, + {"rocblas_set_matrix", {HIP_1060, HIP_0, HIP_0 }}, + {"rocblas_get_matrix", {HIP_1060, HIP_0, HIP_0 }}, + {"rocblas_set_vector_async", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_get_vector_async", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_set_matrix_async", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_get_matrix_async", {HIP_3050, HIP_0, HIP_0 }}, }; const std::map CUDA_BLAS_API_SECTION_MAP {
11.7.111.7.1 works only with the patch
due to the clang's bug 54609
patch for 14.0.0**
patch for 14.0.1**
@@ -175,7 +175,7 @@ After applying all the matchers, the output HIP source is produced. 15.0.1,
15.0.2, 15.0.3
11.7.111.8.0 LATEST STABLE CONFIG
11.8.0 LATEST STABLE CONFIG