Skip to content

Commit

Permalink
Add support for sm_101 and sm_101a to NV_TARGET (NVIDIA#3166)
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber authored Dec 15, 2024
1 parent 1393082 commit c80fce9
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 2 deletions.
32 changes: 32 additions & 0 deletions libcudacxx/include/nv/detail/__target_macros
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#define _NV_TARGET_ARCH_TO_SELECTOR_890 nv::target::sm_89
#define _NV_TARGET_ARCH_TO_SELECTOR_900 nv::target::sm_90
#define _NV_TARGET_ARCH_TO_SELECTOR_1000 nv::target::sm_100
#define _NV_TARGET_ARCH_TO_SELECTOR_1010 nv::target::sm_101

#define _NV_TARGET_ARCH_TO_SM_350 35
#define _NV_TARGET_ARCH_TO_SM_370 37
Expand All @@ -52,6 +53,7 @@
#define _NV_TARGET_ARCH_TO_SM_890 89
#define _NV_TARGET_ARCH_TO_SM_900 90
#define _NV_TARGET_ARCH_TO_SM_1000 100
#define _NV_TARGET_ARCH_TO_SM_1010 101

// Only enable when compiling for CUDA/stdpar
#if defined(_NV_COMPILER_NVCXX) && defined(_NVHPC_CUDA)
Expand All @@ -73,6 +75,7 @@
# define _NV_TARGET_VAL_SM_89 nv::target::sm_89
# define _NV_TARGET_VAL_SM_90 nv::target::sm_90
# define _NV_TARGET_VAL_SM_100 nv::target::sm_100
# define _NV_TARGET_VAL_SM_101 nv::target::sm_101

# define _NV_TARGET___NV_IS_HOST nv::target::is_host
# define _NV_TARGET___NV_IS_DEVICE nv::target::is_device
Expand Down Expand Up @@ -108,6 +111,7 @@
# define _NV_TARGET_VAL_SM_89 890
# define _NV_TARGET_VAL_SM_90 900
# define _NV_TARGET_VAL_SM_100 1000
# define _NV_TARGET_VAL_SM_101 1010

# if defined(__CUDA_ARCH__)
# define _NV_TARGET_VAL __CUDA_ARCH__
Expand Down Expand Up @@ -155,6 +159,7 @@
# define _NV_TARGET_VAL_SM_89 890
# define _NV_TARGET_VAL_SM_90 900
# define _NV_TARGET_VAL_SM_100 1000
# define _NV_TARGET_VAL_SM_101 1010

# define _NV_TARGET_VAL 0

Expand Down Expand Up @@ -185,6 +190,7 @@
#define _NV_TARGET___NV_PROVIDES_SM_89 (_NV_TARGET_PROVIDES(_NV_TARGET_VAL_SM_89))
#define _NV_TARGET___NV_PROVIDES_SM_90 (_NV_TARGET_PROVIDES(_NV_TARGET_VAL_SM_90))
#define _NV_TARGET___NV_PROVIDES_SM_100 (_NV_TARGET_PROVIDES(_NV_TARGET_VAL_SM_100))
#define _NV_TARGET___NV_PROVIDES_SM_101 (_NV_TARGET_PROVIDES(_NV_TARGET_VAL_SM_101))

#define _NV_TARGET___NV_IS_EXACTLY_SM_35 (_NV_TARGET_IS_EXACTLY(_NV_TARGET_VAL_SM_35))
#define _NV_TARGET___NV_IS_EXACTLY_SM_37 (_NV_TARGET_IS_EXACTLY(_NV_TARGET_VAL_SM_37))
Expand All @@ -203,6 +209,7 @@
#define _NV_TARGET___NV_IS_EXACTLY_SM_89 (_NV_TARGET_IS_EXACTLY(_NV_TARGET_VAL_SM_89))
#define _NV_TARGET___NV_IS_EXACTLY_SM_90 (_NV_TARGET_IS_EXACTLY(_NV_TARGET_VAL_SM_90))
#define _NV_TARGET___NV_IS_EXACTLY_SM_100 (_NV_TARGET_IS_EXACTLY(_NV_TARGET_VAL_SM_100))
#define _NV_TARGET___NV_IS_EXACTLY_SM_101 (_NV_TARGET_IS_EXACTLY(_NV_TARGET_VAL_SM_101))

#define NV_PROVIDES_SM_35 __NV_PROVIDES_SM_35
#define NV_PROVIDES_SM_37 __NV_PROVIDES_SM_37
Expand All @@ -221,6 +228,7 @@
#define NV_PROVIDES_SM_89 __NV_PROVIDES_SM_89
#define NV_PROVIDES_SM_90 __NV_PROVIDES_SM_90
#define NV_PROVIDES_SM_100 __NV_PROVIDES_SM_100
#define NV_PROVIDES_SM_101 __NV_PROVIDES_SM_101

#define NV_IS_EXACTLY_SM_35 __NV_IS_EXACTLY_SM_35
#define NV_IS_EXACTLY_SM_37 __NV_IS_EXACTLY_SM_37
Expand All @@ -239,11 +247,13 @@
#define NV_IS_EXACTLY_SM_89 __NV_IS_EXACTLY_SM_89
#define NV_IS_EXACTLY_SM_90 __NV_IS_EXACTLY_SM_90
#define NV_IS_EXACTLY_SM_100 __NV_IS_EXACTLY_SM_100
#define NV_IS_EXACTLY_SM_101 __NV_IS_EXACTLY_SM_101

// Disable SM_90a support on non-supporting compilers.
// Will re-enable for nvcc below.
#define NV_HAS_FEATURE_SM_90a NV_NO_TARGET
#define NV_HAS_FEATURE_SM_100a NV_NO_TARGET
#define NV_HAS_FEATURE_SM_101a NV_NO_TARGET

#define NV_IS_HOST __NV_IS_HOST
#define NV_IS_DEVICE __NV_IS_DEVICE
Expand Down Expand Up @@ -365,6 +375,12 @@
# define _NV_TARGET_BOOL___NV_IS_EXACTLY_SM_100 0
# endif

# if (_NV_TARGET___NV_IS_EXACTLY_SM_101)
# define _NV_TARGET_BOOL___NV_IS_EXACTLY_SM_101 1
# else
# define _NV_TARGET_BOOL___NV_IS_EXACTLY_SM_101 0
# endif

// Re-enable sm_90a support in nvcc.
# undef NV_HAS_FEATURE_SM_90a
# define NV_HAS_FEATURE_SM_90a __NV_HAS_FEATURE_SM_90a
Expand All @@ -374,6 +390,7 @@
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_90a 0
# endif

// Re-enable sm_100a support in nvcc.
# undef NV_HAS_FEATURE_SM_100a
# define NV_HAS_FEATURE_SM_100a __NV_HAS_FEATURE_SM_100a
# if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 1000) && defined(__CUDA_ARCH_FEAT_SM100_ALL))
Expand All @@ -382,6 +399,15 @@
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_100a 0
# endif

// Re-enable sm_101a support in nvcc.
# undef NV_HAS_FEATURE_SM_101a
# define NV_HAS_FEATURE_SM_101a __NV_HAS_FEATURE_SM_101a
# if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 1010) && defined(__CUDA_ARCH_FEAT_SM101_ALL))
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_101a 1
# else
# define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_101a 0
# endif

# if (_NV_TARGET_IS_HOST)
# define _NV_TARGET_BOOL___NV_IS_HOST 1
# define _NV_TARGET_BOOL___NV_IS_DEVICE 0
Expand Down Expand Up @@ -497,6 +523,12 @@
# define _NV_TARGET_BOOL___NV_PROVIDES_SM_100 0
# endif

# if (_NV_TARGET___NV_PROVIDES_SM_101)
# define _NV_TARGET_BOOL___NV_PROVIDES_SM_101 1
# else
# define _NV_TARGET_BOOL___NV_PROVIDES_SM_101 0
# endif

# define _NV_ARCH_COND_CAT1(cond) _NV_TARGET_BOOL_##cond
# define _NV_ARCH_COND_CAT(cond) _NV_EVAL(_NV_ARCH_COND_CAT1(cond))

Expand Down
9 changes: 7 additions & 2 deletions libcudacxx/include/nv/target
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,10 @@ constexpr base_int_t sm_87_bit = 1 << 14;
constexpr base_int_t sm_89_bit = 1 << 15;
constexpr base_int_t sm_90_bit = 1 << 16;
constexpr base_int_t sm_100_bit = 1 << 17;
constexpr base_int_t sm_101_bit = 1 << 18;
constexpr base_int_t all_devices =
sm_35_bit | sm_37_bit | sm_50_bit | sm_52_bit | sm_53_bit | sm_60_bit | sm_61_bit | sm_62_bit | sm_70_bit | sm_72_bit
| sm_75_bit | sm_80_bit | sm_86_bit | sm_87_bit | sm_89_bit | sm_90_bit | sm_100_bit;
| sm_75_bit | sm_80_bit | sm_86_bit | sm_87_bit | sm_89_bit | sm_90_bit | sm_100_bit | sm_101_bit;

// Store a set of targets as a set of bits
struct _NV_BITSET_ATTRIBUTE target_description
Expand Down Expand Up @@ -101,6 +102,7 @@ enum class sm_selector : base_int_t
sm_89 = 89,
sm_90 = 90,
sm_100 = 100,
sm_101 = 101,
};

constexpr base_int_t toint(sm_selector a)
Expand All @@ -127,12 +129,14 @@ constexpr base_int_t bitexact(sm_selector a)
: toint(a) == 89 ? sm_89_bit
: toint(a) == 90 ? sm_90_bit
: toint(a) == 100 ? sm_100_bit
: toint(a) == 101 ? sm_101_bit
: 0;
}

constexpr base_int_t bitrounddown(sm_selector a)
{
return toint(a) >= 100 ? sm_100_bit
return toint(a) >= 101 ? sm_101_bit
: toint(a) >= 100 ? sm_100_bit
: toint(a) >= 90 ? sm_90_bit
: toint(a) >= 89 ? sm_89_bit
: toint(a) >= 87 ? sm_87_bit
Expand Down Expand Up @@ -209,6 +213,7 @@ constexpr sm_selector sm_87 = sm_selector::sm_87;
constexpr sm_selector sm_89 = sm_selector::sm_89;
constexpr sm_selector sm_90 = sm_selector::sm_90;
constexpr sm_selector sm_100 = sm_selector::sm_100;
constexpr sm_selector sm_101 = sm_selector::sm_101;

using detail::is_exactly;
using detail::provides;
Expand Down

0 comments on commit c80fce9

Please sign in to comment.