Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MCA variable scope and coll tuned rules file update onto v4.1.x #12928

Merged
merged 8 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions ompi/mca/coll/adapt/coll_adapt_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
* Copyright (c) 2014-2020 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -114,39 +115,39 @@ static int adapt_register(void)
we should have a high priority */
cs->adapt_priority = 0;
(void) mca_base_component_var_register(c, "priority", "Priority of the adapt coll component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->adapt_priority);
MCA_BASE_VAR_SCOPE_ALL, &cs->adapt_priority);

cs->adapt_verbose = ompi_coll_base_framework.framework_verbose;
(void) mca_base_component_var_register(c, "verbose",
"Verbose level (default set to the collective framework verbosity)",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->adapt_verbose);
MCA_BASE_VAR_SCOPE_ALL, &cs->adapt_verbose);

cs->adapt_context_free_list_min = 64;
(void) mca_base_component_var_register(c, "context_free_list_min",
"Minimum number of segments in context free list",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&cs->adapt_context_free_list_min);

cs->adapt_context_free_list_max = 1024;
(void) mca_base_component_var_register(c, "context_free_list_max",
"Maximum number of segments in context free list",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&cs->adapt_context_free_list_max);

cs->adapt_context_free_list_inc = 32;
(void) mca_base_component_var_register(c, "context_free_list_inc",
"Increasement number of segments in context free list",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&cs->adapt_context_free_list_inc);
ompi_coll_adapt_ibcast_register();
ompi_coll_adapt_ireduce_register();
Expand Down
22 changes: 12 additions & 10 deletions ompi/mca/coll/adapt/coll_adapt_ibcast.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
* Copyright (c) 2014-2020 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -33,8 +34,9 @@ int ompi_coll_adapt_ibcast_register(void)

mca_coll_adapt_component.adapt_ibcast_algorithm = 1;
mca_base_component_var_register(c, "bcast_algorithm",
"Algorithm of broadcast, 0: tuned, 1: binomial, 2: in_order_binomial, 3: binary, 4: pipeline, 5: chain, 6: linear", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY,
"Algorithm of broadcast, 0: tuned, 1: binomial, 2: in_order_binomial, 3: binary, 4: pipeline, 5: chain, 6: linear",
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_ibcast_algorithm);
if( (mca_coll_adapt_component.adapt_ibcast_algorithm < 0) ||
(mca_coll_adapt_component.adapt_ibcast_algorithm >= OMPI_COLL_ADAPT_ALGORITHM_COUNT) ) {
Expand All @@ -44,33 +46,33 @@ int ompi_coll_adapt_ibcast_register(void)
mca_coll_adapt_component.adapt_ibcast_segment_size = 0;
mca_base_component_var_register(c, "bcast_segment_size",
"Segment size in bytes used by default for bcast algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_ibcast_segment_size);

mca_coll_adapt_component.adapt_ibcast_max_send_requests = 2;
mca_base_component_var_register(c, "bcast_max_send_requests",
"Maximum number of send requests",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_ibcast_max_send_requests);

mca_coll_adapt_component.adapt_ibcast_max_recv_requests = 3;
mca_base_component_var_register(c, "bcast_max_recv_requests",
"Maximum number of receive requests",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_ibcast_max_recv_requests);

mca_coll_adapt_component.adapt_ibcast_synchronous_send = true;
(void) mca_base_component_var_register(c, "bcast_synchronous_send",
"Whether to use synchronous send operations during setup of bcast operations",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_ibcast_synchronous_send);

mca_coll_adapt_component.adapt_ibcast_context_free_list = NULL;
Expand Down
34 changes: 18 additions & 16 deletions ompi/mca/coll/adapt/coll_adapt_ireduce.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2020 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -36,8 +37,9 @@ int ompi_coll_adapt_ireduce_register(void)

mca_coll_adapt_component.adapt_ireduce_algorithm = 1;
mca_base_component_var_register(c, "reduce_algorithm",
"Algorithm of reduce, 1: binomial, 2: in_order_binomial, 3: binary, 4: pipeline, 5: chain, 6: linear", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY,
"Algorithm of reduce, 1: binomial, 2: in_order_binomial, 3: binary, 4: pipeline, 5: chain, 6: linear",
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_ireduce_algorithm);
if( (mca_coll_adapt_component.adapt_ireduce_algorithm < 0) ||
(mca_coll_adapt_component.adapt_ireduce_algorithm > OMPI_COLL_ADAPT_ALGORITHM_COUNT) ) {
Expand All @@ -47,58 +49,58 @@ int ompi_coll_adapt_ireduce_register(void)
mca_coll_adapt_component.adapt_ireduce_segment_size = 163740;
mca_base_component_var_register(c, "reduce_segment_size",
"Segment size in bytes used by default for reduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_ireduce_segment_size);

mca_coll_adapt_component.adapt_ireduce_max_send_requests = 2;
mca_base_component_var_register(c, "reduce_max_send_requests",
"Maximum number of send requests",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_ireduce_max_send_requests);

mca_coll_adapt_component.adapt_ireduce_max_recv_requests = 3;
mca_base_component_var_register(c, "reduce_max_recv_requests",
"Maximum number of receive requests per peer",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_ireduce_max_recv_requests);

mca_coll_adapt_component.adapt_inbuf_free_list_min = 10;
mca_base_component_var_register(c, "inbuf_free_list_min",
"Minimum number of segment in inbuf free list",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_inbuf_free_list_min);

mca_coll_adapt_component.adapt_inbuf_free_list_max = 10000;
mca_base_component_var_register(c, "inbuf_free_list_max",
"Maximum number of segment in inbuf free list",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_inbuf_free_list_max);


mca_coll_adapt_component.adapt_inbuf_free_list_inc = 10;
mca_base_component_var_register(c, "inbuf_free_list_inc",
"Number of segments to allocate when growing the inbuf free list",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_inbuf_free_list_inc);

mca_coll_adapt_component.adapt_ireduce_synchronous_send = true;
(void) mca_base_component_var_register(c, "reduce_synchronous_send",
"Whether to use synchronous send operations during setup of reduce operations",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_ireduce_synchronous_send);

mca_coll_adapt_component.adapt_ireduce_context_free_list = NULL;
Expand Down
21 changes: 21 additions & 0 deletions ompi/mca/coll/base/coll_base_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
* All rights reserved.
* Copyright (c) 2014-2020 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -437,6 +438,26 @@ int ompi_coll_base_file_peek_next_char_is(FILE *fptr, int *fileline, int expecte
} while (1);
}

/**
* return non-zero if the next non-space to read on the current line is a digit.
* otherwise return 0.
*/
int ompi_coll_base_file_peek_next_char_isdigit(FILE *fptr)
{
do {
int next = fgetc(fptr);

if ((' ' == next) || ('\t' == next)) {
continue; /* discard space and tab. keep everything else */
}

ungetc(next, fptr); /* put the char back into the stream */

return isdigit(next); /* report back whether or not next is a digit */

} while (1);
}

/**
* There are certainly simpler implementation for this function when performance
* is not a critical point. But, as this function is used during the collective
Expand Down
2 changes: 2 additions & 0 deletions ompi/mca/coll/base/coll_base_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
* All rights reserved.
* Copyright (c) 2014-2020 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -185,6 +186,7 @@ int ompi_coll_base_file_getnext_string(FILE *fptr, int *fileline, char** val);
* eat the value, otherwise put it back into the file.
*/
int ompi_coll_base_file_peek_next_char_is(FILE *fptr, int *fileline, int expected);
int ompi_coll_base_file_peek_next_char_isdigit(FILE *fptr);

/* Miscelaneous function */
const char* mca_coll_base_colltype_to_str(int collid);
Expand Down
9 changes: 5 additions & 4 deletions ompi/mca/coll/basic/coll_basic_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -91,16 +92,16 @@ basic_register(void)
mca_coll_basic_priority = 10;
(void) mca_base_component_var_register(&mca_coll_basic_component.collm_version, "priority",
"Priority of the basic coll component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_basic_priority);
mca_coll_basic_crossover = 4;
(void) mca_base_component_var_register(&mca_coll_basic_component.collm_version, "crossover",
"Minimum number of processes in a communicator before using the logarithmic algorithms",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_basic_crossover);

return OMPI_SUCCESS;
Expand Down
Loading
Loading