Skip to content

Commit eb4950f

Browse files
committed
Ensure NVLS and NVLSTree chunksizes are matched
This would prevent failures when NVLS size is smaller than the NVLSTree chunksize or silent fallback to a lower value, if NCCL were to change defaults. Signed-off-by: Raghu Raja <[email protected]>
1 parent c5e9b22 commit eb4950f

File tree

1 file changed

+10
-0
lines changed

1 file changed

+10
-0
lines changed

src/platform-aws.c

+10
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,9 @@ int platform_init(const char **provider_filter)
447447
* Setting this unconditionally without relying on ncclGetVersion symbol
448448
* being available, since the parameter did not exist in versions prior
449449
* to v2.20.
450+
*
451+
* The NVLSTree chunk size can not be larger than the NVLS chunk size,
452+
* so we ensure both are set to 512KiB.
450453
*/
451454
NCCL_OFI_INFO(NCCL_INIT | NCCL_NET, "Setting NCCL_NVLSTREE_MAX_CHUNKSIZE to 512KiB");
452455
ret = setenv("NCCL_NVLSTREE_MAX_CHUNKSIZE", "524288", 0);
@@ -456,6 +459,13 @@ int platform_init(const char **provider_filter)
456459
goto exit;
457460
}
458461

462+
NCCL_OFI_INFO(NCCL_INIT | NCCL_NET, "Setting NCCL_NVLS_CHUNKSIZE to 512KiB");
463+
ret = setenv("NCCL_NVLS_CHUNKSIZE", "524288", 0);
464+
if (ret != 0) {
465+
NCCL_OFI_WARN("Unable to set NCCL_NVLS_CHUNKSIZE");
466+
ret = -errno;
467+
goto exit;
468+
}
459469
#endif
460470

461471
/*

0 commit comments

Comments
 (0)