Skip to content

Commit

Permalink
UCP RMA: Address review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
tvegas1 committed Nov 18, 2024
1 parent a6b9270 commit 3cf2f96
Showing 1 changed file with 10 additions and 10 deletions.
20 changes: 10 additions & 10 deletions src/ucx_rma_plugin.c
Original file line number Diff line number Diff line change
Expand Up @@ -726,7 +726,8 @@ static ucs_status_t nccl_ucp_shared_put(nccl_ucp_comm_t *comm, void *va,
return UCS_PTR_STATUS(status_ptr);
}

static int nccl_ucp_mh_update(nccl_ucp_comm_t *comm, nccl_ucp_memh_t *mh) {
static ncclResult_t nccl_ucp_mh_update(nccl_ucp_comm_t *comm,
nccl_ucp_memh_t *mh) {
ucs_status_t status;
nccl_ucp_packed_rkey_t *packed, *remote;

Expand All @@ -741,11 +742,16 @@ static int nccl_ucp_mh_update(nccl_ucp_comm_t *comm, nccl_ucp_memh_t *mh) {

status = nccl_ucp_shared_put(comm, packed, sizeof(*packed), remote,
&comm->inflight_rkey);
if (UCS_STATUS_IS_ERR(status)) {
WARN("Failed to send packed rkey");
return ncclSystemError;
}

comm->inflight_rkey += (status == UCS_INPROGRESS);
mh->sent = !UCS_STATUS_IS_ERR(status);
mh->sent = 1;
}

return mh->sent == 0;
return ncclSuccess;
}

static ncclResult_t nccl_ucx_rma_regmr(void *reg_comm, void *data, size_t size,
Expand Down Expand Up @@ -781,7 +787,6 @@ static ncclResult_t nccl_ucx_rma_irecv(void *recv_comm, int n, void **data,
void **request) {
nccl_ucp_comm_t *comm = recv_comm;
nccl_ucp_memh_t **mh = (nccl_ucp_memh_t**)mhandle;
int missed = 0;
nccl_ucp_req_t *req;
nccl_ucp_rtr_t *rtr;
nccl_ucp_atp_t *atp;
Expand All @@ -804,7 +809,7 @@ static ncclResult_t nccl_ucx_rma_irecv(void *recv_comm, int n, void **data,
*request = NULL;

for (i = 0; i < n; i++) {
missed += nccl_ucp_mh_update(comm, mh[i]);
NCCLCHECK(nccl_ucp_mh_update(comm, mh[i]));

rtr->chunk[i].data = (uint64_t)data[i];
rtr->chunk[i].rkey_id = mh[i]->rkey_id;
Expand All @@ -822,11 +827,6 @@ static ncclResult_t nccl_ucx_rma_irecv(void *recv_comm, int n, void **data,
memcpy(atp->sizes, sizes, sizeof(*sizes) * n);
}

if (missed) {
ucp_worker_progress(comm->worker->ucp_worker);
return ncclSuccess;
}

remote = &comm->remote.share->rtr[comm->rtr_id & NCCL_UCP_RING_MASK];
status = nccl_ucp_shared_put(
comm, rtr, sizeof(*rtr) - (NCCL_UCP_MAX_RECV - n) * sizeof(*rtr->chunk),
Expand Down

0 comments on commit 3cf2f96

Please sign in to comment.