Skip to content

Commit 9e7afda

Browse files
committed
sharp: Add trace option at rank 0 for sharp colls
1 parent 4045cc8 commit 9e7afda

File tree

1 file changed

+12
-0
lines changed

1 file changed

+12
-0
lines changed

src/sharp_plugin.c

+12
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ NCCL_PARAM(SharpGroupSizeThresh, "SHARP_GROUP_SIZE_THRESH", 2);
3434
NCCL_PARAM(SharpV3Datatypes, "SHARP_V3_DATATYPES", 2);
3535
NCCL_PARAM(SharpDisableRS, "SHARP_DISABLE_REDUCE_SCATTER", 0);
3636
NCCL_PARAM(SharpDisableAG, "SHARP_DISABLE_ALLGATHER", 0);
37+
NCCL_PARAM(enableSharpTrace, "SHARP_COLL_TRACE", 0);
3738

3839
enum ncclSharpRequestType {
3940
NCCL_SHARP_REQ_SHARP_COLL,
@@ -500,6 +501,9 @@ ncclResult_t ncclSharpIallreduce(void* collComm, void* sendData, void* recvData,
500501
reduce_spec.op = op_type;
501502
reduce_spec.aggr_mode = SHARP_AGGREGATION_NONE;
502503

504+
if (ncclParamenableSharpTrace() && cComm->rank == 0)
505+
INFO(NCCL_COLL, "Allreduce count:%d, op:%d dtype:%d ", count, op_type, sharp_type);
506+
503507
#if BLOCKING==0
504508
if (SHARP_COLL_SUCCESS != sharp_coll_do_allreduce_nb(cComm->sharpCollComm, &reduce_spec, &req->sharpRequest)) {
505509
WARN("SHARP allreduce failed\n");
@@ -546,6 +550,10 @@ ncclResult_t ncclSharpIallgather(void* collComm, void* sendData, int nRecvParts,
546550
gather_spec.size = recvParts[0].size;
547551
gather_spec.offset = windowOffset;
548552

553+
if (ncclParamenableSharpTrace() && cComm->rank == 0)
554+
INFO(NCCL_COLL, "Allgather size:%lu bytesPerRank:%lu windowOffset:%lu windowBytes:%lu",
555+
recvParts[0].size, bytesPerRank, windowOffset, windowBytes);
556+
549557
#if BLOCKING==0
550558
if (SHARP_COLL_SUCCESS != sharp_coll_do_allgather_nb(cComm->sharpCollComm, &gather_spec, &req->sharpRequest)) {
551559
WARN("SHARP Allgather failed\n");
@@ -611,6 +619,10 @@ ncclResult_t ncclSharpIreducescatter(void* collComm, int nSendParts, ncclNetSGE_
611619
reduce_spec.op = op_type;
612620
reduce_spec.aggr_mode = SHARP_AGGREGATION_NONE;
613621

622+
if (ncclParamenableSharpTrace() && cComm->rank == 0)
623+
INFO(NCCL_COLL, "ReduceScatter bytesPerRank:%lu windowOffset:%lu windowBytes:%lu op_type:%d dtype:%d",
624+
bytesPerRank, windowOffset, windowBytes, op_type, sharp_type);
625+
614626
#if BLOCKING==0
615627
if (SHARP_COLL_SUCCESS != sharp_coll_do_reduce_scatter_nb(cComm->sharpCollComm, &reduce_spec, &req->sharpRequest)) {
616628
WARN("SHARP reduce_scatter failed\n");

0 commit comments

Comments
 (0)