diff --git a/src/ucp/proto/proto_init.c b/src/ucp/proto/proto_init.c index 741e6489dad..c8ab972efbc 100644 --- a/src/ucp/proto/proto_init.c +++ b/src/ucp/proto/proto_init.c @@ -475,6 +475,7 @@ ucp_proto_common_init_send_perf(const ucp_proto_common_init_params_t *params, { ucp_proto_perf_node_t *child_perf_node; ucs_linear_func_t send_overhead; + ucs_memory_type_t remote_mem_type; ucs_status_t status; send_perf->node = ucp_proto_perf_node_new_data("send-ovrh", ""); @@ -494,10 +495,21 @@ ucp_proto_common_init_send_perf(const ucp_proto_common_init_params_t *params, send_overhead = UCS_LINEAR_FUNC_ZERO; } else { ucs_assert(reg_md_map == 0); + if ((params->flags & UCP_PROTO_COMMON_INIT_FLAG_REMOTE_ACCESS) && + (params->super.rkey_config_key != NULL)) { + remote_mem_type = params->super.rkey_config_key->mem_type; + } else { + remote_mem_type = UCS_MEMORY_TYPE_HOST; + } + ucs_print("%s |TX init send selp memt %s rkey mt %s config key %p", + ucp_proto_id_field(params->super.proto_id, name), + ucs_memory_type_names[params->super.select_param->mem_type], + ucs_memory_type_names[remote_mem_type], + params->super.rkey_config_key); status = ucp_proto_init_buffer_copy_time( - params->super.worker, "send copy", UCS_MEMORY_TYPE_HOST, - params->super.select_param->mem_type, params->memtype_op, - &send_overhead, &child_perf_node); + params->super.worker, "send copy", + params->super.select_param->mem_type, remote_mem_type, + params->memtype_op, &send_overhead, &child_perf_node); if (status != UCS_OK) { ucp_proto_perf_node_deref(&send_perf->node); return status; @@ -598,8 +610,11 @@ ucp_proto_common_init_recv_perf(const ucp_proto_common_init_params_t *params, if (params->super.rkey_config_key == NULL) { /* Assume same memory type as sender */ recv_mem_type = select_param->mem_type; + ucs_print("%s |RX selp memt %s, recvmt the same",ucp_proto_id_field(params->super.proto_id, name), ucs_memory_type_names[select_param->mem_type]) ; } else { recv_mem_type = params->super.rkey_config_key->mem_type; + ucs_print("%s|RX selp memt %s, recvmt %s (from rkey)", + ucp_proto_id_field(params->super.proto_id, name), ucs_memory_type_names[select_param->mem_type], ucs_memory_type_names[params->super.rkey_config_key->mem_type]) ; } /* Silence cppcheck */ diff --git a/src/ucp/proto/proto_select.h b/src/ucp/proto/proto_select.h index 9441c3c0044..c3371d4a33f 100644 --- a/src/ucp/proto/proto_select.h +++ b/src/ucp/proto/proto_select.h @@ -82,6 +82,8 @@ struct ucp_proto_select_param { uint8_t sys_dev; /* Reply buffer system device */ } UCS_S_PACKED reply; + uint8_t mem_type_flags; + /* Align struct size to uint64_t */ uint8_t padding[2]; diff --git a/src/ucp/rndv/rndv_get.c b/src/ucp/rndv/rndv_get.c index a77d920fc4c..fc850c30095 100644 --- a/src/ucp/rndv/rndv_get.c +++ b/src/ucp/rndv/rndv_get.c @@ -277,6 +277,7 @@ ucp_proto_rndv_get_mtype_fetch_completion(uct_completion_t *uct_comp) ucp_proto_rndv_mtype_copy(req, req->send.rndv.mdesc->ptr, ucp_proto_rndv_mtype_get_req_memh(req), + req->send.rndv.mdesc->memh->mem_type, uct_ep_put_zcopy, ucp_proto_rndv_get_mtype_unpack_completion, "out to"); diff --git a/src/ucp/rndv/rndv_mtype.inl b/src/ucp/rndv/rndv_mtype.inl index b28e657678f..18911e50879 100644 --- a/src/ucp/rndv/rndv_mtype.inl +++ b/src/ucp/rndv/rndv_mtype.inl @@ -128,10 +128,12 @@ ucp_proto_rndv_mtype_next_iov(ucp_request_t *req, static UCS_F_ALWAYS_INLINE ucs_status_t ucp_proto_rndv_mtype_copy( ucp_request_t *req, void *buffer, uct_mem_h memh, - uct_ep_put_zcopy_func_t copy_func, uct_completion_callback_t comp_func, - const char *mode) + ucs_memory_type_t frag_mem_type, uct_ep_put_zcopy_func_t copy_func, + uct_completion_callback_t comp_func, const char *mode) { - ucp_ep_h mtype_ep = ucp_proto_rndv_req_mtype_ep(req); + ucp_ep_h mtype_ep = ucp_proto_rndv_mtype_ep( + req->send.ep->worker, frag_mem_type, + req->send.state.dt_iter.mem_info.type); ucp_lane_index_t lane = ucp_ep_config(mtype_ep)->key.rma_bw_lanes[0]; ucp_context_t UCS_V_UNUSED *context = req->send.ep->worker->context; ucs_status_t status; @@ -140,7 +142,7 @@ static UCS_F_ALWAYS_INLINE ucs_status_t ucp_proto_rndv_mtype_copy( ucp_trace_req(req, "buffer %p copy-%s %p %s-%s using memtype-ep %p lane[%d]", buffer, mode, req->send.state.dt_iter.type.contig.buffer, ucs_memory_type_names[req->send.state.dt_iter.mem_info.type], - ucs_memory_type_names[req->send.rndv.mdesc->memh->mem_type], + ucs_memory_type_names[frag_mem_type], mtype_ep, lane); ucp_proto_completion_init(&req->send.state.uct_comp, comp_func); diff --git a/src/ucp/rndv/rndv_put.c b/src/ucp/rndv/rndv_put.c index fecee38c2fb..4fddf4a8216 100644 --- a/src/ucp/rndv/rndv_put.c +++ b/src/ucp/rndv/rndv_put.c @@ -517,6 +517,7 @@ ucp_proto_rndv_put_mtype_copy_progress(uct_pending_req_t *uct_req) req->flags |= UCP_REQUEST_FLAG_PROTO_INITIALIZED; ucp_proto_rndv_mtype_copy(req, req->send.rndv.mdesc->ptr, ucp_proto_rndv_mtype_get_req_memh(req), + req->send.rndv.mdesc->memh->mem_type, uct_ep_get_zcopy, ucp_proto_rndv_put_mtype_pack_completion, "in from"); diff --git a/src/ucp/rndv/rndv_rkey_ptr.c b/src/ucp/rndv/rndv_rkey_ptr.c index 4e3d68d6f20..3c69cf9dad5 100644 --- a/src/ucp/rndv/rndv_rkey_ptr.c +++ b/src/ucp/rndv/rndv_rkey_ptr.c @@ -323,7 +323,7 @@ ucp_proto_rndv_rkey_ptr_mtype_copy_progress(uct_pending_req_t *uct_req) req->flags |= UCP_REQUEST_FLAG_PROTO_INITIALIZED; ucp_proto_rndv_mtype_copy(req, ppln_data->local_ptr, ppln_data->uct_memh, - uct_ep_get_zcopy, + UCS_MEMORY_TYPE_HOST, uct_ep_get_zcopy, ucp_proto_rndv_rkey_ptr_mtype_copy_completion, "in from"); diff --git a/src/ucp/rndv/rndv_rtr.c b/src/ucp/rndv/rndv_rtr.c index 17889dcaa67..e84eb1cdb0c 100644 --- a/src/ucp/rndv/rndv_rtr.c +++ b/src/ucp/rndv/rndv_rtr.c @@ -337,6 +337,7 @@ ucp_proto_rndv_rtr_mtype_data_received(ucp_request_t *req, int in_buffer) the remote address we published - the rendezvous fragment */ ucp_proto_rndv_mtype_copy(req, req->send.rndv.mdesc->ptr, ucp_proto_rndv_mtype_get_req_memh(req), + req->send.rndv.mdesc->memh->mem_type, uct_ep_put_zcopy, ucp_proto_rndv_rtr_mtype_copy_completion, "out to"); @@ -416,6 +417,9 @@ ucp_proto_rndv_rtr_mtype_probe(const ucp_proto_init_params_t *init_params) params.mem_info.type = frag_mem_type; + ucs_print("RTR mtype | localmt %s, remote %s", + ucs_memory_type_names[frag_mem_type], + ucs_memory_type_names[init_params->select_param->mem_type]); status = ucp_proto_init_buffer_copy_time( init_params->worker, "rtr/mtype unpack", frag_mem_type, init_params->select_param->mem_type, UCT_EP_OP_PUT_ZCOPY,