diff --git a/src/components/tl/ucp/alltoall/alltoall.c b/src/components/tl/ucp/alltoall/alltoall.c index 3803d96426..3409ac3aec 100644 --- a/src/components/tl/ucp/alltoall/alltoall.c +++ b/src/components/tl/ucp/alltoall/alltoall.c @@ -72,8 +72,8 @@ ucc_status_t ucc_tl_ucp_alltoall_pairwise_init(ucc_base_coll_args_t *coll_args, } ucc_status_t ucc_tl_ucp_alltoall_onesided_init(ucc_base_coll_args_t *coll_args, - ucc_base_team_t *team, - ucc_coll_task_t **task_h) + ucc_base_team_t *team, + ucc_coll_task_t **task_h) { ucc_tl_ucp_team_t *tl_team = ucc_derived_of(team, ucc_tl_ucp_team_t); ucc_tl_ucp_task_t *task; @@ -99,7 +99,12 @@ ucc_status_t ucc_tl_ucp_alltoall_onesided_init(ucc_base_coll_args_t *coll_args, *task_h = &task->super; task->super.post = ucc_tl_ucp_alltoall_onesided_start; task->super.progress = ucc_tl_ucp_alltoall_onesided_progress; - status = UCC_OK; + + status = ucc_tl_ucp_coll_dynamic_segment_init(&coll_args->args, task); + if (UCC_OK != status) { + tl_error(UCC_TL_TEAM_LIB(tl_team), + "failed to initialize dynamic segments"); + } out: return status; } diff --git a/src/components/tl/ucp/alltoall/alltoall_onesided.c b/src/components/tl/ucp/alltoall/alltoall_onesided.c index f84f98ca5e..e30babd07e 100644 --- a/src/components/tl/ucp/alltoall/alltoall_onesided.c +++ b/src/components/tl/ucp/alltoall/alltoall_onesided.c @@ -27,9 +27,14 @@ ucc_status_t ucc_tl_ucp_alltoall_onesided_start(ucc_coll_task_t *ctask) long *pSync = TASK_ARGS(task).global_work_buffer; ucc_memory_type_t mtype = TASK_ARGS(task).src.info.mem_type; ucc_rank_t peer; + ucc_status_t status; ucc_tl_ucp_task_reset(task, UCC_INPROGRESS); - ucc_tl_ucp_coll_dynamic_segments(&TASK_ARGS(task), task); + status = ucc_tl_ucp_coll_dynamic_segment_exchange(&TASK_ARGS(task), task); + if (UCC_OK != status) { + task->super.status = status; + goto out; + } /* TODO: change when support for library-based work buffers is complete */ nelems = (nelems / gsize) * ucc_dt_size(TASK_ARGS(task).src.info.datatype); @@ -66,4 +71,5 @@ void ucc_tl_ucp_alltoall_onesided_progress(ucc_coll_task_t *ctask) pSync[0] = 0; task->super.status = UCC_OK; + ucc_tl_ucp_coll_dynamic_segment_finalize(task); } diff --git a/src/components/tl/ucp/alltoallv/alltoallv_onesided.c b/src/components/tl/ucp/alltoallv/alltoallv_onesided.c index 5c446a6941..96a425dc2b 100644 --- a/src/components/tl/ucp/alltoallv/alltoallv_onesided.c +++ b/src/components/tl/ucp/alltoallv/alltoallv_onesided.c @@ -26,10 +26,15 @@ ucc_status_t ucc_tl_ucp_alltoallv_onesided_start(ucc_coll_task_t *ctask) size_t rdt_size = ucc_dt_size(TASK_ARGS(task).dst.info_v.datatype); ucc_memory_type_t mtype = TASK_ARGS(task).src.info_v.mem_type; ucc_rank_t peer; + ucc_status_t status; size_t sd_disp, dd_disp, data_size; ucc_tl_ucp_task_reset(task, UCC_INPROGRESS); - ucc_tl_ucp_coll_dynamic_segments(&TASK_ARGS(task), task); + status = ucc_tl_ucp_coll_dynamic_segment_exchange(&TASK_ARGS(task), task); + if (UCC_OK != status) { + task->super.status = status; + goto out; + } /* perform a put to each member peer using the peer's index in the * destination displacement. */ @@ -70,6 +75,7 @@ void ucc_tl_ucp_alltoallv_onesided_progress(ucc_coll_task_t *ctask) pSync[0] = 0; task->super.status = UCC_OK; + ucc_tl_ucp_coll_dynamic_segment_finalize(task); } ucc_status_t ucc_tl_ucp_alltoallv_onesided_init(ucc_base_coll_args_t *coll_args, @@ -100,7 +106,12 @@ ucc_status_t ucc_tl_ucp_alltoallv_onesided_init(ucc_base_coll_args_t *coll_args, *task_h = &task->super; task->super.post = ucc_tl_ucp_alltoallv_onesided_start; task->super.progress = ucc_tl_ucp_alltoallv_onesided_progress; - status = UCC_OK; + + status = ucc_tl_ucp_coll_dynamic_segment_init(&coll_args->args, task); + if (UCC_OK != status) { + tl_error(UCC_TL_TEAM_LIB(tl_team), + "failed to initialize dynamic segments"); + } out: return status; } diff --git a/src/components/tl/ucp/tl_ucp.c b/src/components/tl/ucp/tl_ucp.c index 051f213f2e..a7ca9862e2 100644 --- a/src/components/tl/ucp/tl_ucp.c +++ b/src/components/tl/ucp/tl_ucp.c @@ -189,11 +189,6 @@ ucc_config_field_t ucc_tl_ucp_lib_config_table[] = { ucc_offsetof(ucc_tl_ucp_lib_config_t, use_reordering), UCC_CONFIG_TYPE_BOOL}, - {"USE_DYNAMIC_SEGMENTS", "n", - "Use dynamic segments in TL UCP for onesided collectives", - ucc_offsetof(ucc_tl_ucp_lib_config_t, use_dynamic_segments), - UCC_CONFIG_TYPE_BOOL}, - {"USE_XGVMI", "n", "Use XGVMI for onesided collectives", ucc_offsetof(ucc_tl_ucp_lib_config_t, use_xgvmi), diff --git a/src/components/tl/ucp/tl_ucp.h b/src/components/tl/ucp/tl_ucp.h index e8edef393f..6a4f40ed25 100644 --- a/src/components/tl/ucp/tl_ucp.h +++ b/src/components/tl/ucp/tl_ucp.h @@ -74,7 +74,6 @@ typedef struct ucc_tl_ucp_lib_config { uint32_t alltoallv_hybrid_pairwise_num_posts; ucc_ternary_auto_value_t use_topo; int use_reordering; - int use_dynamic_segments; int use_xgvmi; } ucc_tl_ucp_lib_config_t; @@ -98,28 +97,6 @@ typedef struct ucc_tl_ucp_lib { UCC_CLASS_DECLARE(ucc_tl_ucp_lib_t, const ucc_base_lib_params_t *, const ucc_base_config_t *); -/* dynamic segments stored in a flat buffer. An example with 4 segments on - * two PEs, with segments stored two at a time (collective with src/dst pair): - -rva/key => (rva, len, key size, key) tuple - -+-----------------------------+-----------------------------+ -| seg group 0 (seg 0 + seg 1) | seg group 1 (seg 2 + seg 3) | -+--------------+--------------+--------------+--------------+ -| rva/key pe 0 | rva/key pe 1 | rva/key pe 0 | rva/key pe 1 | -+--------------+--------------+--------------+--------------+ -*/ -typedef struct ucc_tl_ucp_dynamic_seg { - void *dyn_buff; /* flat buffer with rva, keys, etc. */ - size_t buff_size; - size_t *seg_groups; /* segment to segment group mapping */ - size_t *seg_group_start; /* offset of dyn_buff to start of seg group */ - size_t *seg_group_size; /* storage size of a seg group */ - size_t *starting_seg; /* starting seg for a seg group */ - size_t *num_seg_per_group; - size_t num_groups; -} ucc_tl_ucp_dynamic_seg_t; - typedef struct ucc_tl_ucp_remote_info { void * va_base; size_t len; @@ -146,13 +123,14 @@ typedef struct ucc_tl_ucp_context { uint32_t service_worker_throttling_count; ucc_mpool_t req_mp; ucc_tl_ucp_remote_info_t *remote_info; - ucc_tl_ucp_remote_info_t *dynamic_remote_info; - ucc_tl_ucp_dynamic_seg_t dyn_seg; ucp_rkey_h * rkeys; uint64_t n_rinfo_segs; - uint64_t n_dynrinfo_segs; uint64_t ucp_memory_types; int topo_required; + ucc_tl_ucp_remote_info_t *dynamic_remote_info; + void *dyn_seg_buf; + ucp_rkey_h *dyn_rkeys; + size_t n_dynrinfo_segs; } ucc_tl_ucp_context_t; UCC_CLASS_DECLARE(ucc_tl_ucp_context_t, const ucc_base_context_params_t *, const ucc_base_config_t *); @@ -216,38 +194,8 @@ extern ucc_config_field_t ucc_tl_ucp_lib_config_table[]; #define UCC_TL_UCP_REMOTE_RKEY(_ctx, _rank, _seg) \ ((_ctx)->rkeys[_rank * _ctx->n_rinfo_segs + _seg]) -#define UCC_TL_UCP_DYN_REMOTE_RKEY(_ctx, _rank, _size, _seg) \ - ((_ctx)->rkeys[_size * _ctx->n_rinfo_segs + _rank * _ctx->n_dynrinfo_segs + _seg]) - -#define UCC_TL_UCP_REMOTE_DYN_RVA(_ctx, _rank, _seg) \ - *(uint64_t *)(PTR_OFFSET(_ctx->dyn_seg.dyn_buff, \ - _ctx->dyn_seg.seg_group_start[_seg] \ - + _ctx->dyn_seg.seg_group_size[_ctx->dyn_seg.seg_groups[_seg]] * _rank \ - + (_seg - _ctx->dyn_seg.starting_seg[_seg]) * sizeof(uint64_t))) - -#define UCC_TL_UCP_REMOTE_DYN_LEN(_ctx, _rank, _seg) \ - *(uint64_t *)(PTR_OFFSET(_ctx->dyn_seg.dyn_buff, \ - sizeof(uint64_t) \ - * _ctx->dyn_seg.num_seg_per_group[_ctx->dyn_seg.seg_groups[_seg]] \ - + _ctx->dyn_seg.seg_group_start[_seg] \ - + _ctx->dyn_seg.seg_group_size[_ctx->dyn_seg.seg_groups[_seg]] * _rank \ - + (_seg - _ctx->dyn_seg.starting_seg[_seg]) * sizeof(uint64_t))) - -#define UCC_TL_UCP_REMOTE_DYN_KEY_SIZE(_ctx, _rank, _seg) \ - *(uint64_t *)(PTR_OFFSET(_ctx->dyn_seg.dyn_buff, \ - 2 * sizeof(uint64_t) \ - * _ctx->dyn_seg.num_seg_per_group[_ctx->dyn_seg.seg_groups[_seg]] \ - + _ctx->dyn_seg.seg_group_start[_seg] \ - + _ctx->dyn_seg.seg_group_size[_ctx->dyn_seg.seg_groups[_seg]] * _rank \ - + (_seg - _ctx->dyn_seg.starting_seg[_seg]) * sizeof(uint64_t))) - -#define UCC_TL_UCP_REMOTE_DYN_KEY(_ctx, _rank, _offset, _seg) \ - (PTR_OFFSET(_ctx->dyn_seg.dyn_buff, \ - 3 * sizeof(uint64_t) \ - * _ctx->dyn_seg.num_seg_per_group[_ctx->dyn_seg.seg_groups[_seg]] \ - + _ctx->dyn_seg.seg_group_start[_seg] \ - + _ctx->dyn_seg.seg_group_size[_ctx->dyn_seg.seg_groups[_seg]] * _rank \ - + _offset)) +#define UCC_TL_UCP_DYN_REMOTE_RKEY(_ctx, _rank, _seg) \ + ((_ctx)->dyn_rkeys[_rank * _ctx->n_dynrinfo_segs + _seg]) extern ucs_memory_type_t ucc_memtype_to_ucs[UCC_MEMORY_TYPE_LAST+1]; diff --git a/src/components/tl/ucp/tl_ucp_coll.c b/src/components/tl/ucp/tl_ucp_coll.c index c25f1319a7..1293ad852f 100644 --- a/src/components/tl/ucp/tl_ucp_coll.c +++ b/src/components/tl/ucp/tl_ucp_coll.c @@ -155,10 +155,9 @@ ucc_status_t ucc_tl_ucp_coll_finalize(ucc_coll_task_t *coll_task) return UCC_OK; } -static void ucc_tl_ucp_pack_data(ucc_tl_ucp_context_t *ctx, int starting_index, - void *pack) +static void ucc_tl_ucp_pack_data(ucc_tl_ucp_context_t *ctx, void *pack) { - uint64_t nsegs = ctx->n_dynrinfo_segs - starting_index; + uint64_t nsegs = ctx->n_dynrinfo_segs; uint64_t offset = 0; size_t section_offset = sizeof(uint64_t) * nsegs; void *keys; @@ -175,19 +174,17 @@ static void ucc_tl_ucp_pack_data(ucc_tl_ucp_context_t *ctx, int starting_index, keys = PTR_OFFSET(pack, (section_offset * 3)); for (i = 0; i < nsegs; i++) { - int index = i + starting_index; - rvas[i] = (uint64_t)ctx->dynamic_remote_info[index].va_base; - lens[i] = ctx->dynamic_remote_info[index].len; - key_sizes[i] = ctx->dynamic_remote_info[index].packed_key_len; - memcpy(PTR_OFFSET(keys, offset), - ctx->dynamic_remote_info[index].packed_key, - ctx->dynamic_remote_info[index].packed_key_len); - offset += ctx->dynamic_remote_info[index].packed_key_len; + rvas[i] = (uint64_t)ctx->dynamic_remote_info[i].va_base; + lens[i] = ctx->dynamic_remote_info[i].len; + key_sizes[i] = ctx->dynamic_remote_info[i].packed_key_len; + memcpy(PTR_OFFSET(keys, offset), ctx->dynamic_remote_info[i].packed_key, + ctx->dynamic_remote_info[i].packed_key_len); + offset += ctx->dynamic_remote_info[i].packed_key_len; } } -ucc_status_t ucc_tl_ucp_memmap_append_segment(ucc_tl_ucp_task_t *task, - ucc_mem_map_t *map, int segid) +ucc_status_t ucc_tl_ucp_memmap_segment(ucc_tl_ucp_task_t *task, + ucc_mem_map_t *map, int segid) { ucc_tl_ucp_team_t *tl_team = UCC_TL_UCP_TASK_TEAM(task); ucc_tl_ucp_context_t *tl_ctx = UCC_TL_UCP_TEAM_CTX(tl_team); @@ -195,122 +192,147 @@ ucc_status_t ucc_tl_ucp_memmap_append_segment(ucc_tl_ucp_task_t *task, ucp_mem_map_params_t mmap_params; ucp_mem_h mh; - // map the memory + /* map the memory */ if (map->resource != NULL) { mmap_params.field_mask = UCP_MEM_MAP_PARAM_FIELD_EXPORTED_MEMH_BUFFER; - mmap_params.exported_memh_buffer = map->resource; - - ucs_status = ucp_mem_map(tl_ctx->worker.ucp_context, &mmap_params, &mh); - if (ucs_status == UCS_ERR_UNREACHABLE) { - tl_error(tl_ctx->super.super.lib, "exported memh is unsupported"); - return ucs_status_to_ucc_status(ucs_status); - } else if (ucs_status < UCS_OK) { - tl_error(tl_ctx->super.super.lib, - "ucp_mem_map failed with error code: %d", ucs_status); - return ucs_status_to_ucc_status(ucs_status); - } - /* generate rkeys / packed keys */ - - tl_ctx->dynamic_remote_info[segid].va_base = map->address; - tl_ctx->dynamic_remote_info[segid].len = map->len; - tl_ctx->dynamic_remote_info[segid].mem_h = mh; + mmap_params.exported_memh_buffer = map->resource; tl_ctx->dynamic_remote_info[segid].packed_memh = map->resource; - ucs_status = - ucp_rkey_pack(tl_ctx->worker.ucp_context, mh, - &tl_ctx->dynamic_remote_info[segid].packed_key, - &tl_ctx->dynamic_remote_info[segid].packed_key_len); - if (UCS_OK != ucs_status) { - tl_error(tl_ctx->super.super.lib, - "failed to pack UCP key with error code: %d", ucs_status); - return ucs_status_to_ucc_status(ucs_status); - } } else { mmap_params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS | UCP_MEM_MAP_PARAM_FIELD_LENGTH; - mmap_params.address = map->address; - mmap_params.length = map->len; - - ucs_status = ucp_mem_map(tl_ctx->worker.ucp_context, &mmap_params, &mh); - if (ucs_status != UCS_OK) { - tl_error(UCC_TASK_LIB(task), "failure in ucp_mem_map %s", - ucs_status_string(ucs_status)); - return ucs_status_to_ucc_status(ucs_status); - } - tl_ctx->dynamic_remote_info[segid].va_base = map->address; - tl_ctx->dynamic_remote_info[segid].len = map->len; - tl_ctx->dynamic_remote_info[segid].mem_h = mh; + mmap_params.address = map->address; + mmap_params.length = map->len; tl_ctx->dynamic_remote_info[segid].packed_memh = NULL; - ucs_status = - ucp_rkey_pack(tl_ctx->worker.ucp_context, mh, - &tl_ctx->dynamic_remote_info[segid].packed_key, - &tl_ctx->dynamic_remote_info[segid].packed_key_len); - if (UCS_OK != ucs_status) { - tl_error(tl_ctx->super.super.lib, - "failed to pack UCP key with error code: %d", ucs_status); - return ucs_status_to_ucc_status(ucs_status); + } + /* map exported memory handle */ + ucs_status = ucp_mem_map(tl_ctx->worker.ucp_context, &mmap_params, &mh); + if (ucs_status == UCS_ERR_UNREACHABLE) { + tl_error(tl_ctx->super.super.lib, "exported memh is unsupported"); + return UCC_ERR_MEM_MAP_FAILURE; + } else if (ucs_status < UCS_OK) { + tl_error(tl_ctx->super.super.lib, + "ucp_mem_map failed with error code: %d", ucs_status); + return UCC_ERR_MEM_MAP_FAILURE; + } + /* generate rkeys / packed keys */ + tl_ctx->dynamic_remote_info[segid].va_base = map->address; + tl_ctx->dynamic_remote_info[segid].len = map->len; + tl_ctx->dynamic_remote_info[segid].mem_h = mh; + ucs_status = + ucp_rkey_pack(tl_ctx->worker.ucp_context, mh, + &tl_ctx->dynamic_remote_info[segid].packed_key, + &tl_ctx->dynamic_remote_info[segid].packed_key_len); + if (UCS_OK != ucs_status) { + tl_error(tl_ctx->super.super.lib, + "failed to pack UCP key with error code: %d", ucs_status); + return ucs_status_to_ucc_status(ucs_status); + } + + return UCC_OK; +} + +ucc_status_t ucc_tl_ucp_coll_dynamic_segment_init(ucc_coll_args_t *coll_args, + ucc_tl_ucp_task_t *task) +{ + ucc_tl_ucp_team_t *tl_team = UCC_TL_UCP_TASK_TEAM(task); + ucc_tl_ucp_context_t *ctx = UCC_TL_UCP_TEAM_CTX(tl_team); + int i = 0; + ucc_status_t status; + ucc_mem_map_t *maps = coll_args->mem_map.segments; + size_t n_segments = coll_args->mem_map.n_segments; + + if (n_segments == 0) { + maps = ucc_calloc(2, sizeof(ucc_mem_map_t)); + if (!maps) { + return UCC_ERR_NO_MEMORY; } + + maps[0].address = coll_args->src.info.buffer; + maps[0].len = (coll_args->src.info.count / UCC_TL_TEAM_SIZE(tl_team)) * + ucc_dt_size(coll_args->src.info.datatype); + maps[0].resource = NULL; + + maps[1].address = coll_args->dst.info.buffer; + maps[1].len = (coll_args->dst.info.count / UCC_TL_TEAM_SIZE(tl_team)) * + ucc_dt_size(coll_args->dst.info.datatype); + maps[1].resource = NULL; + + n_segments = 2; + } + + ctx->dynamic_remote_info = + ucc_calloc(n_segments, sizeof(ucc_mem_map_t), "dynamic remote info"); + /* map memory and fill in local segment information */ + for (i = 0; i < n_segments; i++) { + status = ucc_tl_ucp_memmap_segment(task, &maps[i], i); + if (status != UCC_OK) { + tl_error(UCC_TASK_LIB(task), "failed to memory map a segment"); + goto failed_memory_map; + } + ++ctx->n_dynrinfo_segs; + } + if (coll_args->mem_map.n_segments == 0) { + free(maps); } return UCC_OK; +failed_memory_map: + for (i = 0; i < ctx->n_dynrinfo_segs; i++) { + if (ctx->dynamic_remote_info[i].mem_h) { + ucp_mem_unmap(ctx->worker.ucp_context, + ctx->dynamic_remote_info[i].mem_h); + } + if (ctx->dynamic_remote_info[i].packed_key) { + ucp_rkey_buffer_release(ctx->dynamic_remote_info[i].packed_key); + } + if (ctx->dynamic_remote_info[i].packed_memh) { + ucp_rkey_buffer_release(ctx->dynamic_remote_info[i].packed_memh); + } + } + ctx->n_dynrinfo_segs = 0; + if (coll_args->mem_map.n_segments == 0) { + free(maps); + } + return status; } -ucc_status_t ucc_tl_ucp_coll_dynamic_segments(ucc_coll_args_t *coll_args, - ucc_tl_ucp_task_t *task) +ucc_status_t +ucc_tl_ucp_coll_dynamic_segment_exchange(ucc_coll_args_t *coll_args, + ucc_tl_ucp_task_t *task) { - ucc_tl_ucp_team_t *tl_team = UCC_TL_UCP_TASK_TEAM(task); - ucc_tl_ucp_lib_t *tl_lib = UCC_TL_UCP_TEAM_LIB(tl_team); - ucc_tl_ucp_context_t *ctx = UCC_TL_UCP_TEAM_CTX(tl_team); - int i = 0; + ucc_tl_ucp_team_t *tl_team = UCC_TL_UCP_TASK_TEAM(task); + ucc_tl_ucp_context_t *ctx = UCC_TL_UCP_TEAM_CTX(tl_team); + int i = 0; ucc_status_t status; - if (tl_lib->cfg.use_dynamic_segments && coll_args->mem_map.n_segments > 0) { - int starting_index = ctx->n_dynrinfo_segs; - size_t seg_pack_size = 0; - size_t *global_size = NULL; - size_t team_size = UCC_TL_TEAM_SIZE(tl_team); + if (ctx->n_dynrinfo_segs) { + size_t seg_pack_size = 0; + size_t *global_size = NULL; + size_t team_size = UCC_TL_TEAM_SIZE(tl_team); ucc_team_t *core_team = UCC_TL_CORE_TEAM(UCC_TL_UCP_TASK_TEAM(task)); - ucc_subset_t subset = {.map = tl_team->ctx_map, - .myrank = core_team->rank}; + ucc_subset_t subset = {.map = tl_team->ctx_map, + .myrank = core_team->rank}; ucc_service_coll_req_t *scoll_req; void *ex_buffer; - ptrdiff_t old_offset; - - /* increase dynamic remote info size */ - ctx->dynamic_remote_info = ucc_realloc( - ctx->dynamic_remote_info, - sizeof(ucc_tl_ucp_remote_info_t) * - (ctx->n_dynrinfo_segs + coll_args->mem_map.n_segments), - "dyn remote info"); - if (!ctx->dynamic_remote_info) { - tl_error(UCC_TASK_LIB(task), "Out of Memory"); - return UCC_ERR_NO_MEMORY; - } - for (i = 0; i < coll_args->mem_map.n_segments; i++) { - /* map the buffer and populate the dynamic_remote_info segments */ - status = ucc_tl_ucp_memmap_append_segment( - task, &coll_args->mem_map.segments[i], starting_index + i); - if (status != UCC_OK) { - tl_error(UCC_TASK_LIB(task), "failed to memory map a segment"); - goto failed_memory_map; - } - seg_pack_size += - sizeof(uint64_t) * 3 + - ctx->dynamic_remote_info[starting_index + i].packed_key_len; + for (i = 0; i < ctx->n_dynrinfo_segs; i++) { + seg_pack_size += sizeof(uint64_t) * 3 + + ctx->dynamic_remote_info[i].packed_key_len; } global_size = ucc_calloc(core_team->size, sizeof(size_t)); if (!global_size) { tl_error(UCC_TASK_LIB(task), "Out of Memory"); - goto failed_memory_map; + return UCC_ERR_NO_MEMORY; } /* allgather on the new segments size */ status = ucc_service_allgather(core_team, &seg_pack_size, global_size, sizeof(uint64_t), subset, &scoll_req); if (status < UCC_OK) { - tl_error(UCC_TASK_LIB(task), "failed to perform a service allgather"); + tl_error(UCC_TASK_LIB(task), + "failed to perform a service allgather"); ucc_free(global_size); - goto failed_memory_map; + goto failed_size_exch; } while (UCC_INPROGRESS == (status = ucc_service_coll_test(scoll_req))) { } @@ -318,7 +340,7 @@ ucc_status_t ucc_tl_ucp_coll_dynamic_segments(ucc_coll_args_t *coll_args, tl_error(UCC_TASK_LIB(task), "failed on the allgather"); ucc_service_coll_finalize(scoll_req); ucc_free(global_size); - goto failed_memory_map; + goto failed_size_exch; } ucc_service_coll_finalize(scoll_req); for (i = 0; i < core_team->size; i++) { @@ -329,126 +351,100 @@ ucc_status_t ucc_tl_ucp_coll_dynamic_segments(ucc_coll_args_t *coll_args, ucc_free(global_size); /* pack the dynamic_remote_info segments */ - ctx->n_dynrinfo_segs += coll_args->mem_map.n_segments; ex_buffer = ucc_malloc(seg_pack_size, "ex pack size"); if (!ex_buffer) { tl_error(UCC_TASK_LIB(task), "Out of Memory"); status = UCC_ERR_NO_MEMORY; - goto failed_memory_map; - } - ucc_tl_ucp_pack_data(ctx, starting_index, ex_buffer); - - old_offset = ctx->dyn_seg.buff_size; - ctx->dyn_seg.buff_size += seg_pack_size * core_team->size; - ctx->dyn_seg.dyn_buff = ucc_realloc(ctx->dyn_seg.dyn_buff, - ctx->dyn_seg.buff_size, "dyn buff"); - if (!ctx->dyn_seg.dyn_buff) { - status = UCC_ERR_NO_MEMORY; - tl_error(UCC_TASK_LIB(task), "Out of Memory"); - goto failed_memory_map; - } - ctx->dyn_seg.seg_groups = ucc_realloc( - ctx->dyn_seg.seg_groups, sizeof(uint64_t) * ctx->n_dynrinfo_segs, - "n_dynrinfo_segs"); - if (!ctx->dyn_seg.seg_groups) { - status = UCC_ERR_NO_MEMORY; - tl_error(UCC_TASK_LIB(task), "Out of Memory"); - goto failed_memory_map; - } - ctx->dyn_seg.seg_group_start = ucc_realloc( - ctx->dyn_seg.seg_group_start, - sizeof(uint64_t) * ctx->n_dynrinfo_segs, "n_dynrinfo_segs"); - if (!ctx->dyn_seg.seg_group_start) { - status = UCC_ERR_NO_MEMORY; - tl_error(UCC_TASK_LIB(task), "Out of Memory"); - goto failed_memory_map; - } - ctx->dyn_seg.seg_group_size = ucc_realloc( - ctx->dyn_seg.seg_group_size, - sizeof(uint64_t) * ctx->dyn_seg.num_groups + 1, "n_dynrinfo_segs"); - if (!ctx->dyn_seg.seg_group_size) { - status = UCC_ERR_NO_MEMORY; - tl_error(UCC_TASK_LIB(task), "Out of Memory"); - goto failed_memory_map; + goto failed_size_exch; } + ucc_tl_ucp_pack_data(ctx, ex_buffer); - ctx->dyn_seg.starting_seg = ucc_realloc( - ctx->dyn_seg.starting_seg, sizeof(uint64_t) * ctx->n_dynrinfo_segs, - "n_dynrinfo_segs"); - if (!ctx->dyn_seg.starting_seg) { + ctx->dyn_seg_buf = ucc_calloc(1, team_size * seg_pack_size, "dyn buff"); + if (!ctx->dyn_seg_buf) { status = UCC_ERR_NO_MEMORY; tl_error(UCC_TASK_LIB(task), "Out of Memory"); - goto failed_memory_map; - } - ctx->dyn_seg.num_seg_per_group = ucc_realloc( - ctx->dyn_seg.num_seg_per_group, - sizeof(uint64_t) * ctx->dyn_seg.num_groups + 1, "n_dynrinfo_segs"); - if (!ctx->dyn_seg.num_seg_per_group) { - status = UCC_ERR_NO_MEMORY; - tl_error(UCC_TASK_LIB(task), "Out of Memory"); - goto failed_memory_map; - } - - ctx->dyn_seg.num_groups += 1; - ctx->dyn_seg.num_seg_per_group[ctx->dyn_seg.num_groups - 1] = - coll_args->mem_map.n_segments; - ctx->dyn_seg.seg_group_size[ctx->dyn_seg.num_groups - 1] = - seg_pack_size; - if (starting_index == 0) { - for (i = starting_index; i < ctx->n_dynrinfo_segs; i++) { - ctx->dyn_seg.seg_groups[i] = 0; - ctx->dyn_seg.seg_group_start[i] = 0; - ctx->dyn_seg.starting_seg[i] = starting_index; - } - } else { - for (i = starting_index; i < ctx->n_dynrinfo_segs; i++) { - ctx->dyn_seg.seg_groups[i] = - ctx->dyn_seg.seg_groups[starting_index - 1] + 1; - ctx->dyn_seg.seg_group_start[i] = old_offset; - ctx->dyn_seg.starting_seg[i] = starting_index; - } + goto failed_data_exch; } /* allgather on the new segments (packed) */ - status = ucc_service_allgather( - core_team, ex_buffer, PTR_OFFSET(ctx->dyn_seg.dyn_buff, old_offset), - seg_pack_size, subset, &scoll_req); + status = ucc_service_allgather(core_team, ex_buffer, ctx->dyn_seg_buf, + seg_pack_size, subset, &scoll_req); if (status < UCC_OK) { tl_error(UCC_TASK_LIB(task), "failed on the allgather"); - goto failed_memory_map; + goto failed_data_exch; } while (UCC_INPROGRESS == (status = ucc_service_coll_test(scoll_req))) { } if (status < UCC_OK) { tl_error(UCC_TASK_LIB(task), "failed on the allgather"); ucc_service_coll_finalize(scoll_req); - goto failed_memory_map; + goto failed_data_exch; } /* done with allgather */ ucc_service_coll_finalize(scoll_req); - ctx->rkeys = ucc_realloc(ctx->rkeys, - team_size * sizeof(ucp_rkey_h) * - (ctx->n_rinfo_segs + ctx->n_dynrinfo_segs), - "rkeys"); - memset(PTR_OFFSET(ctx->rkeys, team_size * sizeof(ucp_rkey_h) * - (ctx->n_rinfo_segs + starting_index)), - 0, - team_size * sizeof(ucp_rkey_h) * coll_args->mem_map.n_segments); + ctx->dyn_rkeys = + ucc_calloc(1, team_size * sizeof(ucp_rkey_h) * ctx->n_dynrinfo_segs, + "dyn rkeys"); ucc_free(ex_buffer); } return UCC_OK; -failed_memory_map: +failed_data_exch: +failed_size_exch: for (i = 0; i < coll_args->mem_map.n_segments; i++) { if (ctx->dynamic_remote_info[ctx->n_dynrinfo_segs + i].mem_h) { - ucp_mem_unmap(ctx->worker.ucp_context, ctx->dynamic_remote_info[ctx->n_dynrinfo_segs + i].mem_h); + ucp_mem_unmap( + ctx->worker.ucp_context, + ctx->dynamic_remote_info[ctx->n_dynrinfo_segs + i].mem_h); } if (ctx->dynamic_remote_info[ctx->n_dynrinfo_segs + i].packed_key) { - ucp_rkey_buffer_release(ctx->dynamic_remote_info[ctx->n_dynrinfo_segs + i].packed_key); + ucp_rkey_buffer_release( + ctx->dynamic_remote_info[ctx->n_dynrinfo_segs + i].packed_key); } } return status; } +void ucc_tl_ucp_coll_dynamic_segment_finalize(ucc_tl_ucp_task_t *task) +{ + ucc_tl_ucp_team_t *tl_team = UCC_TL_UCP_TASK_TEAM(task); + ucc_tl_ucp_context_t *ctx = UCC_TL_UCP_TEAM_CTX(tl_team); + int i = 0; + int j = 0; + /* free library resources, unmap user resources */ + if (ctx->dyn_seg_buf) { + /* unmap and release packed buffers */ + for (i = 0; i < ctx->n_dynrinfo_segs; i++) { + if (ctx->dynamic_remote_info[i].mem_h) { + ucp_mem_unmap(ctx->worker.ucp_context, + ctx->dynamic_remote_info[i].mem_h); + } + if (ctx->dynamic_remote_info[i].packed_key) { + ucp_rkey_buffer_release(ctx->dynamic_remote_info[i].packed_key); + } + if (ctx->dynamic_remote_info[i].packed_memh) { + ucp_rkey_buffer_release( + ctx->dynamic_remote_info[i].packed_memh); + } + } + /* destroy rkeys */ + for (i = 0; i < UCC_TL_TEAM_SIZE(tl_team); i++) { + for (j = 0; j < ctx->n_dynrinfo_segs; j++) { + if (UCC_TL_UCP_DYN_REMOTE_RKEY(ctx, i, j)) { + ucp_rkey_destroy(UCC_TL_UCP_DYN_REMOTE_RKEY(ctx, i, j)); + } + } + } + free(ctx->dynamic_remote_info); + free(ctx->dyn_rkeys); + free(ctx->dyn_seg_buf); + + ctx->dynamic_remote_info = NULL; + ctx->dyn_rkeys = NULL; + ctx->dyn_seg_buf = NULL; + ctx->n_dynrinfo_segs = 0; + } +} + ucc_status_t ucc_tl_ucp_coll_init(ucc_base_coll_args_t *coll_args, ucc_base_team_t *team, ucc_coll_task_t **task_h) diff --git a/src/components/tl/ucp/tl_ucp_coll.h b/src/components/tl/ucp/tl_ucp_coll.h index bbcf9a40e9..fb5ec627ab 100644 --- a/src/components/tl/ucp/tl_ucp_coll.h +++ b/src/components/tl/ucp/tl_ucp_coll.h @@ -487,7 +487,13 @@ ucc_tl_ucp_get_radix_from_range(ucc_tl_ucp_team_t *team, return radix; } -ucc_status_t ucc_tl_ucp_coll_dynamic_segments(ucc_coll_args_t *coll_args, - ucc_tl_ucp_task_t *task); +ucc_status_t ucc_tl_ucp_coll_dynamic_segment_init(ucc_coll_args_t *coll_args, + ucc_tl_ucp_task_t *task); + +ucc_status_t +ucc_tl_ucp_coll_dynamic_segment_exchange(ucc_coll_args_t *coll_args, + ucc_tl_ucp_task_t *task); + +void ucc_tl_ucp_coll_dynamic_segment_finalize(ucc_tl_ucp_task_t *task); #endif diff --git a/src/components/tl/ucp/tl_ucp_context.c b/src/components/tl/ucp/tl_ucp_context.c index 4516814bcc..7ad94560ab 100644 --- a/src/components/tl/ucp/tl_ucp_context.c +++ b/src/components/tl/ucp/tl_ucp_context.c @@ -161,18 +161,16 @@ UCC_CLASS_INIT_FUNC(ucc_tl_ucp_context_t, UCP_CHECK(ucp_config_read(prefix, NULL, &ucp_config), "failed to read ucp configuration", err_cfg_read, self); - ucp_params.field_mask = - UCP_PARAM_FIELD_FEATURES | UCP_PARAM_FIELD_TAG_SENDER_MASK | UCP_PARAM_FIELD_NAME; - ucp_params.features = UCP_FEATURE_TAG | UCP_FEATURE_AM; - if (((params->params.mask & UCC_CONTEXT_PARAM_FIELD_MEM_PARAMS) || - lib->cfg.use_dynamic_segments)) { - ucp_params.features |= UCP_FEATURE_RMA | UCP_FEATURE_AMO64; - } + ucp_params.field_mask = UCP_PARAM_FIELD_FEATURES | + UCP_PARAM_FIELD_TAG_SENDER_MASK | + UCP_PARAM_FIELD_NAME; + ucp_params.features = + UCP_FEATURE_TAG | UCP_FEATURE_AM | UCP_FEATURE_RMA | UCP_FEATURE_AMO64; if (lib->cfg.use_xgvmi) { ucp_params.features |= UCP_FEATURE_EXPORTED_MEMH; } ucp_params.tag_sender_mask = UCC_TL_UCP_TAG_SENDER_MASK; - ucp_params.name = "UCC_UCP_CONTEXT"; + ucp_params.name = "UCC_UCP_CONTEXT"; if (params->estimated_num_ppn > 0) { ucp_params.field_mask |= UCP_PARAM_FIELD_ESTIMATED_NUM_PPN; @@ -253,10 +251,11 @@ UCC_CLASS_INIT_FUNC(ucc_tl_ucp_context_t, self->remote_info = NULL; self->dynamic_remote_info = NULL; + self->dyn_seg_buf = NULL; self->n_rinfo_segs = 0; self->n_dynrinfo_segs = 0; self->rkeys = NULL; - memset(&self->dyn_seg, 0, sizeof(self->dyn_seg)); + self->dyn_rkeys = NULL; if (params->params.mask & UCC_CONTEXT_PARAM_FIELD_MEM_PARAMS && params->params.mask & UCC_CONTEXT_PARAM_FIELD_OOB) { ucc_status = ucc_tl_ucp_ctx_remote_populate( @@ -345,12 +344,6 @@ ucc_status_t ucc_tl_ucp_rinfo_destroy(ucc_tl_ucp_context_t *ctx) ucp_rkey_destroy(UCC_TL_UCP_REMOTE_RKEY(ctx, i, j)); } } - for (j = 0; j < ctx->n_dynrinfo_segs; j++) { - if (UCC_TL_UCP_REMOTE_RKEY(ctx, i, ctx->n_rinfo_segs + j)) { - ucp_rkey_destroy( - UCC_TL_UCP_REMOTE_RKEY(ctx, i, ctx->n_rinfo_segs + j)); - } - } } for (i = 0; i < ctx->n_rinfo_segs; i++) { if (ctx->remote_info[i].mem_h) { @@ -366,9 +359,6 @@ ucc_status_t ucc_tl_ucp_rinfo_destroy(ucc_tl_ucp_context_t *ctx) ucp_mem_unmap(ctx->worker.ucp_context, ctx->dynamic_remote_info[i].mem_h); } - if (ctx->dynamic_remote_info[i].packed_key) { - ucp_rkey_buffer_release(ctx->dynamic_remote_info[i].packed_key); - } } ucc_free(ctx->dynamic_remote_info); } @@ -376,7 +366,6 @@ ucc_status_t ucc_tl_ucp_rinfo_destroy(ucc_tl_ucp_context_t *ctx) ucc_free(ctx->rkeys); ctx->remote_info = NULL; ctx->rkeys = NULL; - ctx->dynamic_remote_info = NULL; return UCC_OK; } diff --git a/src/components/tl/ucp/tl_ucp_sendrecv.h b/src/components/tl/ucp/tl_ucp_sendrecv.h index 70cdafa689..009a95ac1e 100644 --- a/src/components/tl/ucp/tl_ucp_sendrecv.h +++ b/src/components/tl/ucp/tl_ucp_sendrecv.h @@ -225,6 +225,25 @@ static inline ucc_status_t ucc_tl_ucp_send_nz(void *buffer, size_t msglen, dest_group_rank, team, task); } +static inline int resolve_segment(const void *va, size_t *key_sizes, + size_t *key_offset, size_t nr_segments, + ucc_tl_ucp_remote_info_t *rinfo) +{ + int i; + uint64_t base; + uint64_t end; + + for (i = 0; i < nr_segments; i++) { + base = (uint64_t)rinfo[i].va_base; + end = base + rinfo[i].len; + if ((uint64_t)va >= base && (uint64_t)va < end) { + return i; + } + *key_offset += key_sizes[i]; + } + return -1; +} + static inline ucc_status_t ucc_tl_ucp_resolve_p2p_by_va(ucc_tl_ucp_team_t *team, void *va, size_t msglen, ucp_ep_h *ep, ucc_rank_t peer, uint64_t *rva, @@ -232,14 +251,13 @@ ucc_tl_ucp_resolve_p2p_by_va(ucc_tl_ucp_team_t *team, void *va, size_t msglen, { ucc_tl_ucp_context_t *ctx = UCC_TL_UCP_TEAM_CTX(team); ptrdiff_t key_offset = 0; - const size_t section_offset = sizeof(uint64_t) * ctx->n_rinfo_segs; + size_t section_offset = sizeof(uint64_t) * ctx->n_rinfo_segs; ucc_rank_t core_rank; uint64_t *rvas; uint64_t *key_sizes; void *keys; void *offset; ptrdiff_t base_offset; - int i; *segment = -1; core_rank = ucc_ep_map_eval(UCC_TL_TEAM_MAP(team), peer); @@ -253,69 +271,51 @@ ucc_tl_ucp_resolve_p2p_by_va(ucc_tl_ucp_team_t *team, void *va, size_t msglen, rvas = (uint64_t *)base_offset; key_sizes = PTR_OFFSET(base_offset, (section_offset * 2)); keys = PTR_OFFSET(base_offset, (section_offset * 3)); - for (i = 0; i < ctx->n_rinfo_segs; i++) { - uint64_t base = (uint64_t)ctx->remote_info[i].va_base; - uint64_t end = base + ctx->remote_info[i].len; - if ((uint64_t)va >= base && (uint64_t)va < end) { - *segment = i; - *rva = rvas[i] + ((uint64_t)va - (uint64_t)base); - if (ucc_unlikely(NULL == - UCC_TL_UCP_REMOTE_RKEY(ctx, peer, *segment))) { - ucs_status_t ucs_status = ucp_ep_rkey_unpack( - *ep, PTR_OFFSET(keys, key_offset), - &UCC_TL_UCP_REMOTE_RKEY(ctx, peer, *segment)); - if (UCS_OK != ucs_status) { - return ucs_status_to_ucc_status(ucs_status); - } + *segment = resolve_segment(va, key_sizes, &key_offset, ctx->n_rinfo_segs, + ctx->remote_info); + if (*segment >= 0) { + *rva = rvas[*segment] + + ((uint64_t)va - (uint64_t)ctx->remote_info[*segment].va_base); + *packed_memh = (ctx->remote_info[*segment].packed_memh) + ? ctx->remote_info[*segment].mem_h + : NULL; + if (ucc_unlikely(NULL == UCC_TL_UCP_REMOTE_RKEY(ctx, peer, *segment))) { + ucs_status_t ucs_status = ucp_ep_rkey_unpack( + *ep, PTR_OFFSET(keys, key_offset), + &UCC_TL_UCP_REMOTE_RKEY(ctx, peer, *segment)); + if (UCS_OK != ucs_status) { + return ucs_status_to_ucc_status(ucs_status); } - *rkey = UCC_TL_UCP_REMOTE_RKEY(ctx, peer, *segment); - *packed_memh = (ctx->remote_info[i].packed_memh) - ? ctx->remote_info[i].mem_h - : NULL; - return UCC_OK; } - key_offset += key_sizes[i]; + *rkey = UCC_TL_UCP_REMOTE_RKEY(ctx, peer, *segment); + return UCC_OK; } - if (0 > *segment) { - key_offset = 0; - for (i = 0; i < ctx->n_dynrinfo_segs; i++) { - uint64_t base = (uint64_t)ctx->dynamic_remote_info[i].va_base; - uint64_t end = base + ctx->dynamic_remote_info[i].len; - uint64_t check_base = (uint64_t)va; - uint64_t check_end = check_base + msglen; - size_t num_keys = 0; - void *packed_key = NULL; - size_t team_size = UCC_TL_TEAM_SIZE(team); - if (check_base >= base && check_base < end && check_end <= end) { - *segment = i; - *rva = UCC_TL_UCP_REMOTE_DYN_RVA(ctx, peer, i); - num_keys = *segment - ctx->dyn_seg.starting_seg[*segment]; - for (int j = 0; j < num_keys; j++) { - key_offset += UCC_TL_UCP_REMOTE_DYN_KEY_SIZE( - ctx, peer, ctx->dyn_seg.starting_seg[*segment] + j); - } - packed_key = - UCC_TL_UCP_REMOTE_DYN_KEY(ctx, peer, key_offset, *segment); - /* dynamic segment keys should be placed AFTER - * the ctx's keys (i.e., num_static_segs + segment_number) */ - if (ucc_unlikely(NULL == UCC_TL_UCP_DYN_REMOTE_RKEY( - ctx, peer, team_size, *segment))) { - ucs_status_t ucs_status = - ucp_ep_rkey_unpack(*ep, packed_key, - &UCC_TL_UCP_DYN_REMOTE_RKEY( - ctx, peer, team_size, *segment)); - if (UCS_OK != ucs_status) { - return ucs_status_to_ucc_status(ucs_status); - } - } - *rkey = - UCC_TL_UCP_DYN_REMOTE_RKEY(ctx, peer, team_size, *segment); - *packed_memh = (ctx->dynamic_remote_info[i].packed_memh) - ? ctx->dynamic_remote_info[i].mem_h - : NULL; - return UCC_OK; + + section_offset = sizeof(uint64_t) * ctx->n_dynrinfo_segs; + base_offset = (ptrdiff_t)(ctx->dyn_seg_buf); + rvas = (uint64_t *)base_offset; + key_sizes = PTR_OFFSET(base_offset, (section_offset * 2)); + keys = PTR_OFFSET(base_offset, (section_offset * 3)); + *segment = resolve_segment(va, key_sizes, &key_offset, ctx->n_dynrinfo_segs, + ctx->dynamic_remote_info); + if (*segment >= 0) { + *rva = rvas[*segment] + + ((uint64_t)va - + (uint64_t)ctx->dynamic_remote_info[*segment].va_base); + *packed_memh = (ctx->dynamic_remote_info[*segment].packed_memh) + ? ctx->dynamic_remote_info[*segment].mem_h + : NULL; + if (ucc_unlikely(NULL == + UCC_TL_UCP_DYN_REMOTE_RKEY(ctx, peer, *segment))) { + ucs_status_t ucs_status = ucp_ep_rkey_unpack( + *ep, PTR_OFFSET(keys, key_offset), + &UCC_TL_UCP_DYN_REMOTE_RKEY(ctx, peer, *segment)); + if (UCS_OK != ucs_status) { + return ucs_status_to_ucc_status(ucs_status); } } + *rkey = UCC_TL_UCP_DYN_REMOTE_RKEY(ctx, peer, *segment); + return UCC_OK; } tl_error( diff --git a/src/ucc/api/ucc.h b/src/ucc/api/ucc.h index 6d28276683..49a21f7401 100644 --- a/src/ucc/api/ucc.h +++ b/src/ucc/api/ucc.h @@ -890,15 +890,25 @@ typedef struct ucc_oob_coll { typedef ucc_oob_coll_t ucc_context_oob_coll_t; typedef ucc_oob_coll_t ucc_team_oob_coll_t; +/** + * @ingroup UCC_CONTEXT_DT + */ +typedef enum { + UCC_MEM_MAP_TYPE_SEND_BUF, + UCC_MEM_MAP_TYPE_RECV_BUF, + UCC_MEM_MAP_TYPE_SEND_RECV_BUF, +} ucc_mem_map_usage_t; + /** * * @ingroup UCC_CONTEXT_DT */ typedef struct ucc_mem_map { - void * address; /*!< the address of a buffer to be attached to + void *address; /*!< the address of a buffer to be attached to a UCC context */ - size_t len; /*!< the length of the buffer */ - void * resource; /*!< resource associated with the address. + size_t len; /*!< the length of the buffer */ + ucc_mem_map_usage_t type; /*!< the usage type of buffer being mapped. */ + void *resource; /*!< resource associated with the address. examples of resources include memory keys. */ } ucc_mem_map_t; @@ -1873,25 +1883,19 @@ typedef struct ucc_coll_args { to 0. */ ucc_coll_callback_t cb; double timeout; /*!< Timeout in seconds */ - ucc_mem_map_params_t mem_map; /*!< Memory regions to be used - for the current and/or - future one-sided collectives. - If set, the designated regions - will be mapped and information - exchanged with the team - associated with the collective - via an allgather operation. - Memory is unmapped during - context destruction. - It is recommended to use this - option sparingly due to the - increased overhead. Not necessary - for two-sided collectives. */ struct { uint64_t start; int64_t stride; uint64_t size; } active_set; + ucc_mem_map_params_t mem_map; /*!< Memory regions to be used + for the current collective. + If set, the designated regions + will be mapped and information + exchanged. Memory is unmapped + at collective completion. Not + necessary for two-sided + collectives. */ } ucc_coll_args_t; /** diff --git a/src/ucc/api/ucc_status.h b/src/ucc/api/ucc_status.h index 90d25b463b..49ab4e1d7e 100644 --- a/src/ucc/api/ucc_status.h +++ b/src/ucc/api/ucc_status.h @@ -40,6 +40,7 @@ typedef enum { UCC_ERR_NO_MESSAGE = -6, /*!< General purpose return code without specific error */ UCC_ERR_NOT_FOUND = -7, UCC_ERR_TIMED_OUT = -8, + UCC_ERR_MEM_MAP_FAILURE = -9, UCC_ERR_LAST = -100, } ucc_status_t; diff --git a/src/utils/ucc_status.c b/src/utils/ucc_status.c index 2e2285eb61..430d4a96d5 100644 --- a/src/utils/ucc_status.c +++ b/src/utils/ucc_status.c @@ -34,6 +34,8 @@ const char *ucc_status_string(ucc_status_t status) return "Not found"; case UCC_ERR_TIMED_OUT: return "Timeout expired"; + case UCC_ERR_MEM_MAP_FAILURE: + return "Failed to memory map address"; default: snprintf(error_str, sizeof(error_str) - 1, "Unknown error %d", status); return error_str;