14
14
#endif
15
15
16
16
#define NCCL_MAJOR 2
17
- #define NCCL_MINOR 15
18
- #define NCCL_PATCH 1
17
+ #define NCCL_MINOR 20
18
+ #define NCCL_PATCH 3
19
19
#define NCCL_SUFFIX ""
20
20
21
- #define NCCL_VERSION_CODE 21510
21
+ #define NCCL_VERSION_CODE 22003
22
22
#define NCCL_VERSION (X ,Y ,Z ) (((X) <= 2 && (Y) <= 8) ? (X) * 1000 + (Y) * 100 + (Z) : (X) * 10000 + (Y) * 100 + (Z))
23
23
24
24
#ifdef __cplusplus
@@ -42,15 +42,24 @@ typedef enum { ncclSuccess = 0,
42
42
ncclInProgress = 7 ,
43
43
ncclNumResults = 8 } ncclResult_t ;
44
44
45
+ #define NCCL_CONFIG_UNDEF_INT INT_MIN
46
+ #define NCCL_CONFIG_UNDEF_PTR NULL
47
+ #define NCCL_SPLIT_NOCOLOR -1
48
+
45
49
/* Communicator configuration. Users can assign value to attributes to specify the
46
50
* behavior of a communicator. */
47
- typedef struct ncclConfig_v21400 {
51
+ typedef struct ncclConfig_v21700 {
48
52
/* attributes that users should never touch. */
49
53
size_t size ;
50
54
unsigned int magic ;
51
55
unsigned int version ;
52
56
/* attributes that users are able to customize. */
53
57
int blocking ;
58
+ int cgaClusterSize ;
59
+ int minCTAs ;
60
+ int maxCTAs ;
61
+ const char * netName ;
62
+ int splitShare ;
54
63
} ncclConfig_t ;
55
64
56
65
/* Config initializer must be assigned to initialize config structure when it is created.
@@ -59,9 +68,23 @@ typedef struct ncclConfig_v21400 {
59
68
sizeof(ncclConfig_t), /* size */ \
60
69
0xcafebeef , /* magic */ \
61
70
NCCL_VERSION (NCCL_MAJOR , NCCL_MINOR , NCCL_PATCH ), /* version */ \
62
- 1 /* blocking */ \
71
+ NCCL_CONFIG_UNDEF_INT , /* blocking */ \
72
+ NCCL_CONFIG_UNDEF_INT , /* cgaClusterSize */ \
73
+ NCCL_CONFIG_UNDEF_INT , /* minCTAs */ \
74
+ NCCL_CONFIG_UNDEF_INT , /* maxCTAs */ \
75
+ NCCL_CONFIG_UNDEF_PTR , /* netName */ \
76
+ NCCL_CONFIG_UNDEF_INT /* splitShare */ \
63
77
}
64
78
79
+ /* NCCL malloc and free function for all types of NCCL optimizations
80
+ * (e.g. user buffer registration). The actual allocated size might
81
+ * be larger than requested due to granularity requirement. */
82
+ ncclResult_t ncclMemAlloc (void * * ptr , size_t size );
83
+ ncclResult_t pncclMemAlloc (void * * ptr , size_t size );
84
+
85
+ ncclResult_t ncclMemFree (void * ptr );
86
+ ncclResult_t pncclMemFree (void * ptr );
87
+
65
88
/* Return the NCCL_VERSION_CODE of the NCCL library in the supplied integer.
66
89
* This integer is coded with the MAJOR, MINOR and PATCH level of the
67
90
* NCCL library
@@ -119,6 +142,10 @@ ncclResult_t pncclCommAbort(ncclComm_t comm);
119
142
const char * ncclGetErrorString (ncclResult_t result );
120
143
const char * pncclGetErrorString (ncclResult_t result );
121
144
145
+ /* Returns a human-readable message of the last error that occurred. */
146
+ const char * ncclGetLastError (ncclComm_t comm );
147
+ const char * pncclGetLastError (ncclComm_t comm );
148
+
122
149
/* Checks whether the comm has encountered any asynchronous errors */
123
150
ncclResult_t ncclCommGetAsyncError (ncclComm_t comm , ncclResult_t * asyncError );
124
151
ncclResult_t pncclCommGetAsyncError (ncclComm_t comm , ncclResult_t * asyncError );
@@ -135,6 +162,16 @@ ncclResult_t pncclCommCuDevice(const ncclComm_t comm, int* device);
135
162
ncclResult_t ncclCommUserRank (const ncclComm_t comm , int * rank );
136
163
ncclResult_t pncclCommUserRank (const ncclComm_t comm , int * rank );
137
164
165
+
166
+ /* Register CUDA buffer for zero-copy operation */
167
+ ncclResult_t ncclCommRegister (const ncclComm_t comm , void * buff , size_t size , void * * handle );
168
+ ncclResult_t pncclCommRegister (const ncclComm_t comm , void * buff , size_t size , void * * handle );
169
+
170
+ /* Deregister CUDA buffer */
171
+ ncclResult_t ncclCommDeregister (const ncclComm_t comm , void * handle );
172
+ ncclResult_t pncclCommDeregister (const ncclComm_t comm , void * handle );
173
+
174
+
138
175
/* Reduction operation selector */
139
176
typedef enum { ncclNumOps_dummy = 5 } ncclRedOp_dummy_t ;
140
177
typedef enum { ncclSum = 0 ,
0 commit comments