Skip to content

Commit dc920c9

Browse files
authored
Do not include extended floating point headers if they are not needed (#2956)
Fixes #2933
1 parent efee771 commit dc920c9

File tree

9 files changed

+75
-22
lines changed

9 files changed

+75
-22
lines changed

c2h/include/c2h/generators.h

+18-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,24 @@
3535
#include <c2h/vector.h>
3636

3737
#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
38-
# include <cub/util_type.cuh> // for <cuda_fp8.h>
38+
# if defined(_CCCL_HAS_NVFP16)
39+
# include <cuda_fp16.h>
40+
# endif // _CCCL_HAS_NVFP16
41+
42+
# if defined(_CCCL_HAS_NVBF16)
43+
_CCCL_DIAG_PUSH
44+
_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function")
45+
# include <cuda_bf16.h>
46+
_CCCL_DIAG_POP
47+
48+
# if _CCCL_CUDACC_AT_LEAST(11, 8)
49+
// cuda_fp8.h resets default for C4127, so we have to guard the inclusion
50+
_CCCL_DIAG_PUSH
51+
# include <cuda_fp8.h>
52+
_CCCL_DIAG_POP
53+
# endif // _CCCL_CUDACC_AT_LEAST(11, 8)
54+
# endif // _CCCL_HAS_NVBF16
55+
3956
# if defined(__CUDA_FP8_TYPES_EXIST__)
4057
namespace std
4158
{

cub/cub/detail/fast_modulo_division.cuh

+3-3
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,16 @@
3737
# pragma system_header
3838
#endif // no system header
3939

40+
#include <cub/detail/type_traits.cuh> // implicit_prom_t
41+
#include <cub/util_type.cuh> // CUB_IS_INT128_ENABLED
42+
4043
#include <cuda/cmath> // cuda::std::ceil_div
4144
#include <cuda/std/bit> // std::has_single_bit
4245
#include <cuda/std/climits> // CHAR_BIT
4346
#include <cuda/std/cstdint> // uint64_t
4447
#include <cuda/std/limits> // numeric_limits
4548
#include <cuda/std/type_traits> // std::is_integral
4649

47-
#include "cub/detail/type_traits.cuh" // implicit_prom_t
48-
#include "cub/util_type.cuh" // CUB_IS_INT128_ENABLED
49-
5050
#if defined(CCCL_ENABLE_DEVICE_ASSERTIONS)
5151
_CCCL_NV_DIAG_SUPPRESS(186) // pointless comparison of unsigned integer with zero
5252
#endif // CCCL_ENABLE_DEVICE_ASSERTIONS

cub/cub/thread/thread_operators.cuh

+11
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,17 @@
5656
#include <cuda/std/type_traits> // cuda::std::common_type
5757
#include <cuda/std/utility> // cuda::std::forward
5858

59+
#if defined(_CCCL_HAS_NVFP16)
60+
# include <cuda_fp16.h>
61+
#endif // _CCCL_HAS_NVFP16
62+
63+
#if defined(_CCCL_HAS_NVBF16)
64+
_CCCL_DIAG_PUSH
65+
_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function")
66+
# include <cuda_bf16.h>
67+
_CCCL_DIAG_POP
68+
#endif // _CCCL_HAS_NVFP16
69+
5970
CUB_NAMESPACE_BEGIN
6071

6172
// TODO(bgruber): deprecate in C++17 with a note: "replace by decltype(cuda::std::not_fn(EqualityOp{}))"

cub/cub/thread/thread_reduce.cuh

+11
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,17 @@
5454
#include <cuda/std/cstdint> // uint16_t
5555
#include <cuda/std/functional> // cuda::std::plus
5656

57+
#if defined(_CCCL_HAS_NVFP16)
58+
# include <cuda_fp16.h>
59+
#endif // _CCCL_HAS_NVFP16
60+
61+
#if defined(_CCCL_HAS_NVBF16)
62+
_CCCL_DIAG_PUSH
63+
_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function")
64+
# include <cuda_bf16.h>
65+
_CCCL_DIAG_POP
66+
#endif // _CCCL_HAS_NVFP16
67+
5768
CUB_NAMESPACE_BEGIN
5869

5970
//! @rst

cub/cub/util_type.cuh

+9
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,16 @@
5050
#include <cuda/std/limits>
5151
#include <cuda/std/type_traits>
5252

53+
#if defined(_CCCL_HAS_NVFP16)
54+
# include <cuda_fp16.h>
55+
#endif // _CCCL_HAS_NVFP16
56+
5357
#if defined(_CCCL_HAS_NVBF16)
58+
_CCCL_DIAG_PUSH
59+
_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function")
60+
# include <cuda_bf16.h>
61+
_CCCL_DIAG_POP
62+
5463
# if _CCCL_CUDACC_AT_LEAST(11, 8)
5564
// cuda_fp8.h resets default for C4127, so we have to guard the inclusion
5665
_CCCL_DIAG_PUSH

libcudacxx/include/cuda/std/__cccl/extended_floating_point.h

-11
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,4 @@
3939
# endif
4040
#endif // !_CCCL_HAS_NVBF16
4141

42-
#if defined(_CCCL_HAS_NVFP16)
43-
# include <cuda_fp16.h>
44-
#endif // _CCCL_HAS_NVFP16
45-
46-
#if defined(_CCCL_HAS_NVBF16)
47-
_CCCL_DIAG_PUSH
48-
_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function")
49-
# include <cuda_bf16.h>
50-
_CCCL_DIAG_POP
51-
#endif // _CCCL_HAS_NVFP16
52-
5342
#endif // __CCCL_EXTENDED_FLOATING_POINT_H

libcudacxx/include/cuda/std/__type_traits/is_extended_floating_point.h

+11-7
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,17 @@
2222

2323
#include <cuda/std/__type_traits/integral_constant.h>
2424

25+
#if defined(_LIBCUDACXX_HAS_NVFP16)
26+
# include <cuda_fp16.h>
27+
#endif // _LIBCUDACXX_HAS_NVFP16
28+
29+
#if defined(_LIBCUDACXX_HAS_NVBF16)
30+
_CCCL_DIAG_PUSH
31+
_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function")
32+
# include <cuda_bf16.h>
33+
_CCCL_DIAG_POP
34+
#endif // _LIBCUDACXX_HAS_NVBF16
35+
2536
_LIBCUDACXX_BEGIN_NAMESPACE_STD
2637

2738
template <class _Tp>
@@ -39,8 +50,6 @@ _CCCL_INLINE_VAR constexpr bool __is_extended_floating_point_v
3950
#endif // !_CCCL_NO_VARIABLE_TEMPLATES
4051

4152
#if defined(_LIBCUDACXX_HAS_NVFP16)
42-
# include <cuda_fp16.h>
43-
4453
template <>
4554
struct __is_extended_floating_point<__half> : true_type
4655
{};
@@ -52,11 +61,6 @@ _CCCL_INLINE_VAR constexpr bool __is_extended_floating_point_v<__half> = true;
5261
#endif // _LIBCUDACXX_HAS_NVFP16
5362

5463
#if defined(_LIBCUDACXX_HAS_NVBF16)
55-
_CCCL_DIAG_PUSH
56-
_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function")
57-
# include <cuda_bf16.h>
58-
_CCCL_DIAG_POP
59-
6064
template <>
6165
struct __is_extended_floating_point<__nv_bfloat16> : true_type
6266
{};

libcudacxx/include/cuda/std/__type_traits/promote.h

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#ifdef _LIBCUDACXX_HAS_NVFP16
2929
# include <cuda_fp16.h>
3030
#endif // _LIBCUDACXX_HAS_NVFP16
31+
3132
#ifdef _LIBCUDACXX_HAS_NVBF16
3233
_CCCL_DIAG_PUSH
3334
_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function")

thrust/thrust/system/cuda/detail/sort.h

+11
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,17 @@
6060

6161
# include <cstdint>
6262

63+
# if defined(_CCCL_HAS_NVFP16)
64+
# include <cuda_fp16.h>
65+
# endif // _CCCL_HAS_NVFP16
66+
67+
# if defined(_CCCL_HAS_NVBF16)
68+
_CCCL_DIAG_PUSH
69+
_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function")
70+
# include <cuda_bf16.h>
71+
_CCCL_DIAG_POP
72+
# endif // _CCCL_HAS_NVBF16
73+
6374
THRUST_NAMESPACE_BEGIN
6475
namespace cuda_cub
6576
{

0 commit comments

Comments
 (0)