-
Notifications
You must be signed in to change notification settings - Fork 420
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
UCT/API: Add new MD resource query API implementation #10161
base: master
Are you sure you want to change the base?
Changes from 2 commits
1e716b3
dce5abb
dd27566
7d61d6f
5c65954
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -122,7 +122,7 @@ static const char *size_limit_to_str(size_t min_size, size_t max_size) | |
} | ||
|
||
static void print_iface_info(uct_worker_h worker, uct_md_h md, | ||
uct_tl_resource_desc_t *resource) | ||
uct_tl_resource_desc_v2_t *resource) | ||
{ | ||
char buf[256] = {0}; | ||
uct_iface_params_t iface_params = { | ||
|
@@ -131,8 +131,8 @@ static void print_iface_info(uct_worker_h worker, uct_md_h md, | |
UCT_IFACE_PARAM_FIELD_STATS_ROOT | | ||
UCT_IFACE_PARAM_FIELD_RX_HEADROOM, | ||
.open_mode = UCT_IFACE_OPEN_MODE_DEVICE, | ||
.mode.device.tl_name = resource->tl_name, | ||
.mode.device.dev_name = resource->dev_name, | ||
.mode.device.tl_name = resource->desc.tl_name, | ||
.mode.device.dev_name = resource->desc.dev_name, | ||
.stats_root = ucs_stats_get_root(), | ||
.rx_headroom = 0 | ||
}; | ||
|
@@ -142,18 +142,19 @@ static void print_iface_info(uct_worker_h worker, uct_md_h md, | |
ucs_status_t status; | ||
uct_iface_h iface; | ||
|
||
status = uct_md_iface_config_read(md, resource->tl_name, NULL, NULL, &iface_config); | ||
status = uct_md_iface_config_read(md, resource->desc.tl_name, NULL, NULL, &iface_config); | ||
if (status != UCS_OK) { | ||
return; | ||
} | ||
|
||
printf("# Transport: %s\n", resource->tl_name); | ||
printf("# Device: %s\n", resource->dev_name); | ||
printf("# Type: %s\n", uct_device_type_names[resource->dev_type]); | ||
printf("# Transport: %s\n", resource->desc.tl_name); | ||
printf("# Device: %s\n", resource->desc.dev_name); | ||
printf("# Type: %s\n", uct_device_type_names[resource->desc.dev_type]); | ||
printf("# Flags: 0x%zx\n", resource->flags); | ||
printf("# System device: %s", | ||
ucs_topo_sys_device_get_name(resource->sys_device)); | ||
if (resource->sys_device != UCS_SYS_DEVICE_ID_UNKNOWN) { | ||
printf(" (%d)", resource->sys_device); | ||
ucs_topo_sys_device_get_name(resource->desc.sys_device)); | ||
if (resource->desc.sys_device != UCS_SYS_DEVICE_ID_UNKNOWN) { | ||
printf(" (%d)", resource->desc.sys_device); | ||
} | ||
printf("\n"); | ||
|
||
|
@@ -348,7 +349,7 @@ static void print_iface_info(uct_worker_h worker, uct_md_h md, | |
} | ||
|
||
static ucs_status_t print_tl_info(uct_md_h md, const char *tl_name, | ||
uct_tl_resource_desc_t *resources, | ||
uct_tl_resource_desc_v2_t *resources, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pass by const? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||
unsigned num_resources, | ||
int print_opts, | ||
ucs_config_print_flags_t print_flags) | ||
|
@@ -375,7 +376,7 @@ static ucs_status_t print_tl_info(uct_md_h md, const char *tl_name, | |
printf("# (No supported devices found)\n"); | ||
} | ||
for (i = 0; i < num_resources; ++i) { | ||
ucs_assert(!strcmp(tl_name, resources[i].tl_name)); | ||
ucs_assert(!strcmp(tl_name, resources[i].desc.tl_name)); | ||
print_iface_info(worker, md, &resources[i]); | ||
} | ||
|
||
|
@@ -392,14 +393,15 @@ static void print_md_info(uct_component_h component, | |
const char *req_tl_name) | ||
{ | ||
UCS_STRING_BUFFER_ONSTACK(strb, 256); | ||
uct_tl_resource_desc_t *resources, tmp; | ||
uct_tl_resource_desc_v2_t *resources, tmp; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. seems we can use old API here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. using the new one allows to dump the added |
||
unsigned resource_index, j, num_resources, count; | ||
ucs_memory_type_t mem_type; | ||
ucs_status_t status; | ||
const char *tl_name; | ||
uct_md_config_t *md_config; | ||
uct_md_attr_v2_t md_attr; | ||
uct_md_h md; | ||
uct_md_query_tl_resources_params_t params; | ||
|
||
status = uct_md_config_read(component, NULL, NULL, &md_config); | ||
if (status != UCS_OK) { | ||
|
@@ -413,7 +415,9 @@ static void print_md_info(uct_component_h component, | |
goto out; | ||
} | ||
|
||
status = uct_md_query_tl_resources(md, &resources, &num_resources); | ||
params.field_mask = 0; | ||
status = uct_md_query_tl_resources_v2(md, &resources, &num_resources, | ||
¶ms); | ||
if (status != UCS_OK) { | ||
printf("# < failed to query memory domain resources >\n"); | ||
goto out_close_md; | ||
|
@@ -426,7 +430,7 @@ static void print_md_info(uct_component_h component, | |
if (req_tl_name != NULL) { | ||
resource_index = 0; | ||
while (resource_index < num_resources) { | ||
if (!strcmp(resources[resource_index].tl_name, req_tl_name)) { | ||
if (!strcmp(resources[resource_index].desc.tl_name, req_tl_name)) { | ||
break; | ||
} | ||
++resource_index; | ||
|
@@ -519,10 +523,10 @@ static void print_md_info(uct_component_h component, | |
resource_index = 0; | ||
while (resource_index < num_resources) { | ||
/* Gather all resources for this transport */ | ||
tl_name = resources[resource_index].tl_name; | ||
tl_name = resources[resource_index].desc.tl_name; | ||
count = 1; | ||
for (j = resource_index + 1; j < num_resources; ++j) { | ||
if (!strcmp(tl_name, resources[j].tl_name)) { | ||
if (!strcmp(tl_name, resources[j].desc.tl_name)) { | ||
tmp = resources[count + resource_index]; | ||
resources[count + resource_index] = resources[j]; | ||
resources[j] = tmp; | ||
|
@@ -539,7 +543,7 @@ static void print_md_info(uct_component_h component, | |
} | ||
|
||
out_free_list: | ||
uct_release_tl_resource_list(resources); | ||
uct_release_tl_resource_list_v2(resources); | ||
out_close_md: | ||
uct_md_close(md); | ||
out: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1112,31 +1112,36 @@ static int ucp_tl_resource_is_same_device(const uct_tl_resource_desc_t *resource | |
static void ucp_add_tl_resource_if_enabled( | ||
ucp_context_h context, ucp_md_index_t md_index, | ||
const ucp_config_t *config, const ucs_string_set_t *aux_tls, | ||
const uct_tl_resource_desc_t *resource, unsigned *num_resources_p, | ||
const uct_tl_resource_desc_v2_t *resource, unsigned *num_resources_p, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe you can name it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||
uint64_t dev_cfg_masks[], uint64_t *tl_cfg_mask) | ||
{ | ||
uint8_t rsc_flags; | ||
ucp_rsc_index_t dev_index, i; | ||
|
||
if (ucp_is_resource_enabled(resource, config, aux_tls, &rsc_flags, | ||
if (ucp_is_resource_enabled(&resource->desc, config, aux_tls, &rsc_flags, | ||
dev_cfg_masks, tl_cfg_mask)) { | ||
if ((resource->sys_device != UCS_SYS_DEVICE_ID_UNKNOWN) && | ||
(resource->sys_device >= UCP_MAX_SYS_DEVICES)) { | ||
if ((resource->desc.sys_device != UCS_SYS_DEVICE_ID_UNKNOWN) && | ||
(resource->desc.sys_device >= UCP_MAX_SYS_DEVICES)) { | ||
ucs_diag(UCT_TL_RESOURCE_DESC_FMT | ||
" system device is %d, which exceeds the maximal " | ||
"supported (%d), system locality may be ignored", | ||
UCT_TL_RESOURCE_DESC_ARG(resource), resource->sys_device, | ||
UCT_TL_RESOURCE_DESC_ARG(&resource->desc), resource->desc.sys_device, | ||
UCP_MAX_SYS_DEVICES); | ||
} | ||
context->tl_rscs[context->num_tls].tl_rsc = *resource; | ||
context->tl_rscs[context->num_tls].tl_rsc = resource->desc; | ||
context->tl_rscs[context->num_tls].md_index = md_index; | ||
context->tl_rscs[context->num_tls].tl_name_csum = | ||
ucs_crc16_string(resource->tl_name); | ||
ucs_crc16_string(resource->desc.tl_name); | ||
context->tl_rscs[context->num_tls].flags = rsc_flags; | ||
if (resource->flags & UCT_TL_RESOURCE_DESC_FLAG_INTER_NODE) { | ||
context->tl_rscs[context->num_tls].flags |= | ||
UCP_TL_RSC_FLAG_INTER_NODE; | ||
} | ||
|
||
dev_index = 0; | ||
for (i = 0; i < context->num_tls; ++i) { | ||
if (ucp_tl_resource_is_same_device(&context->tl_rscs[i].tl_rsc, resource)) { | ||
if (ucp_tl_resource_is_same_device(&context->tl_rscs[i].tl_rsc, | ||
&resource->desc)) { | ||
dev_index = context->tl_rscs[i].dev_index; | ||
break; | ||
} else { | ||
|
@@ -1159,16 +1164,19 @@ ucp_add_tl_resources(ucp_context_h context, ucp_md_index_t md_index, | |
uint64_t *tl_cfg_mask) | ||
{ | ||
ucp_tl_md_t *md = &context->tl_mds[md_index]; | ||
uct_tl_resource_desc_t *tl_resources; | ||
uct_tl_resource_desc_v2_t *tl_resources; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. seems we can use old API here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we need v2 because we call |
||
ucp_tl_resource_desc_t *tmp; | ||
unsigned num_tl_resources; | ||
ucs_status_t status; | ||
ucp_rsc_index_t i; | ||
uct_md_query_tl_resources_params_t params; | ||
|
||
*num_resources_p = 0; | ||
*num_resources_p = 0; | ||
params.field_mask = 0; | ||
|
||
/* check what are the available uct resources */ | ||
status = uct_md_query_tl_resources(md->md, &tl_resources, &num_tl_resources); | ||
status = uct_md_query_tl_resources_v2(md->md, &tl_resources, | ||
&num_tl_resources, ¶ms); | ||
if (status != UCS_OK) { | ||
ucs_error("Failed to query resources: %s", ucs_status_string(status)); | ||
goto out; | ||
|
@@ -1197,18 +1205,18 @@ ucp_add_tl_resources(ucp_context_h context, ucp_md_index_t md_index, | |
/* copy only the resources enabled by user configuration */ | ||
context->tl_rscs = tmp; | ||
for (i = 0; i < num_tl_resources; ++i) { | ||
ucs_string_set_addf(&avail_devices[tl_resources[i].dev_type], | ||
"'%s'(%s)", tl_resources[i].dev_name, | ||
ucs_string_set_addf(&avail_devices[tl_resources[i].desc.dev_type], | ||
"'%s'(%s)", tl_resources[i].desc.dev_name, | ||
context->tl_cmpts[md->cmpt_index].attr.name); | ||
ucs_string_set_add(avail_tls, tl_resources[i].tl_name); | ||
ucs_string_set_add(avail_tls, tl_resources[i].desc.tl_name); | ||
ucp_add_tl_resource_if_enabled(context, md_index, config, aux_tls, | ||
&tl_resources[i], num_resources_p, | ||
dev_cfg_masks, tl_cfg_mask); | ||
} | ||
|
||
status = UCS_OK; | ||
free_resources: | ||
uct_release_tl_resource_list(tl_resources); | ||
uct_release_tl_resource_list_v2(tl_resources); | ||
out: | ||
return status; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1050,6 +1050,83 @@ int uct_iface_is_reachable_v2(uct_iface_h iface, | |
const uct_iface_is_reachable_params_t *params); | ||
|
||
|
||
/** | ||
* @ingroup UCT_RESOURCE | ||
* @brief Parameters passed to @ref uct_md_query_tl_resources_v2. | ||
*/ | ||
typedef struct uct_md_query_tl_resources_params { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. struct name seems redundant There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||
/** | ||
* Mask of valid fields which must currently be set to zero. | ||
* Future fields not specified in this mask will be ignored. | ||
* Provides ABI compatibility with respect to adding new fields. | ||
*/ | ||
uint64_t field_mask; | ||
} uct_md_query_tl_resources_params_t; | ||
|
||
|
||
/** | ||
* @ingroup UCT_RESOURCE | ||
* @brief Capability flags of @ref uct_tl_resource_desc_t. | ||
* | ||
* The enumeration defines bit mask of capabilities in @ref | ||
* uct_tl_resource_desc_v2_t::flags, set by @ref uct_md_query_tl_resources_v2. | ||
*/ | ||
enum { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe give it a name and @reference in uct_tl_resource_desc_t There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||
/** | ||
* If set, the resource supports inter-node communications. | ||
*/ | ||
UCT_TL_RESOURCE_DESC_FLAG_INTER_NODE = UCS_BIT(0) | ||
}; | ||
|
||
|
||
/** | ||
* @ingroup UCT_RESOURCE | ||
* @brief Communication resource descriptor. | ||
* | ||
* Resource descriptor of a standalone communication resource with extraneous | ||
* flags. | ||
*/ | ||
typedef struct uct_tl_resource_desc_v2 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. redundant struct name |
||
uct_tl_resource_desc_t desc; /**< Main resource descriptor */ | ||
uint64_t flags; /**< Associated resource flags */ | ||
} uct_tl_resource_desc_v2_t; | ||
|
||
|
||
/** | ||
* @ingroup UCT_RESOURCE | ||
* @brief Query for transport resources. | ||
* | ||
* This routine queries the @ref uct_md_h "memory domain" for communication | ||
* resources that are available for it. | ||
* | ||
* @param [in] md Handle to memory domain. | ||
* @param [out] resources_p Filled with a pointer to an array of resource | ||
* descriptors. | ||
* @param [out] num_resources_p Filled with the number of resources in the array. | ||
* @param [in] params Parameters as defined in @ref | ||
* uct_md_query_tl_resources_params_t. | ||
* | ||
* @return Error code. | ||
*/ | ||
ucs_status_t | ||
uct_md_query_tl_resources_v2(uct_md_h md, | ||
uct_tl_resource_desc_v2_t **resources_p, | ||
unsigned *num_resources_p, | ||
uct_md_query_tl_resources_params_t *params); | ||
rakhmets marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can params be const? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think no |
||
|
||
|
||
/** | ||
* @ingroup UCT_RESOURCE | ||
* @brief Release the list of resources returned from @ref uct_md_query_tl_resources_v2. | ||
* | ||
* This routine releases the memory associated with the list of resources | ||
* allocated by @ref uct_md_query_tl_resources_v2. | ||
* | ||
* @param [in] resources Array of resource descriptors to release. | ||
*/ | ||
void uct_release_tl_resource_list_v2(uct_tl_resource_desc_v2_t *resources); | ||
|
||
|
||
/** | ||
* @ingroup UCT_RESOURCE | ||
* @brief Connect endpoint to a remote endpoint. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
const?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed