Skip to content

Commit

Permalink
Merge pull request #12928 from burlen/v4.1.x
Browse files Browse the repository at this point in the history
MCA variable scope and coll tuned rules file update onto v4.1.x
  • Loading branch information
jsquyres authored Dec 17, 2024
2 parents c2cc293 + f7011c6 commit 23b6d34
Show file tree
Hide file tree
Showing 10 changed files with 222 additions and 142 deletions.
21 changes: 11 additions & 10 deletions ompi/mca/coll/adapt/coll_adapt_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
* Copyright (c) 2014-2020 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -114,39 +115,39 @@ static int adapt_register(void)
we should have a high priority */
cs->adapt_priority = 0;
(void) mca_base_component_var_register(c, "priority", "Priority of the adapt coll component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->adapt_priority);
MCA_BASE_VAR_SCOPE_ALL, &cs->adapt_priority);

cs->adapt_verbose = ompi_coll_base_framework.framework_verbose;
(void) mca_base_component_var_register(c, "verbose",
"Verbose level (default set to the collective framework verbosity)",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->adapt_verbose);
MCA_BASE_VAR_SCOPE_ALL, &cs->adapt_verbose);

cs->adapt_context_free_list_min = 64;
(void) mca_base_component_var_register(c, "context_free_list_min",
"Minimum number of segments in context free list",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&cs->adapt_context_free_list_min);

cs->adapt_context_free_list_max = 1024;
(void) mca_base_component_var_register(c, "context_free_list_max",
"Maximum number of segments in context free list",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&cs->adapt_context_free_list_max);

cs->adapt_context_free_list_inc = 32;
(void) mca_base_component_var_register(c, "context_free_list_inc",
"Increasement number of segments in context free list",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&cs->adapt_context_free_list_inc);
ompi_coll_adapt_ibcast_register();
ompi_coll_adapt_ireduce_register();
Expand Down
22 changes: 12 additions & 10 deletions ompi/mca/coll/adapt/coll_adapt_ibcast.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
* Copyright (c) 2014-2020 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -33,8 +34,9 @@ int ompi_coll_adapt_ibcast_register(void)

mca_coll_adapt_component.adapt_ibcast_algorithm = 1;
mca_base_component_var_register(c, "bcast_algorithm",
"Algorithm of broadcast, 0: tuned, 1: binomial, 2: in_order_binomial, 3: binary, 4: pipeline, 5: chain, 6: linear", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY,
"Algorithm of broadcast, 0: tuned, 1: binomial, 2: in_order_binomial, 3: binary, 4: pipeline, 5: chain, 6: linear",
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_ibcast_algorithm);
if( (mca_coll_adapt_component.adapt_ibcast_algorithm < 0) ||
(mca_coll_adapt_component.adapt_ibcast_algorithm >= OMPI_COLL_ADAPT_ALGORITHM_COUNT) ) {
Expand All @@ -44,33 +46,33 @@ int ompi_coll_adapt_ibcast_register(void)
mca_coll_adapt_component.adapt_ibcast_segment_size = 0;
mca_base_component_var_register(c, "bcast_segment_size",
"Segment size in bytes used by default for bcast algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_ibcast_segment_size);

mca_coll_adapt_component.adapt_ibcast_max_send_requests = 2;
mca_base_component_var_register(c, "bcast_max_send_requests",
"Maximum number of send requests",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_ibcast_max_send_requests);

mca_coll_adapt_component.adapt_ibcast_max_recv_requests = 3;
mca_base_component_var_register(c, "bcast_max_recv_requests",
"Maximum number of receive requests",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_ibcast_max_recv_requests);

mca_coll_adapt_component.adapt_ibcast_synchronous_send = true;
(void) mca_base_component_var_register(c, "bcast_synchronous_send",
"Whether to use synchronous send operations during setup of bcast operations",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_ibcast_synchronous_send);

mca_coll_adapt_component.adapt_ibcast_context_free_list = NULL;
Expand Down
34 changes: 18 additions & 16 deletions ompi/mca/coll/adapt/coll_adapt_ireduce.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2020 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -36,8 +37,9 @@ int ompi_coll_adapt_ireduce_register(void)

mca_coll_adapt_component.adapt_ireduce_algorithm = 1;
mca_base_component_var_register(c, "reduce_algorithm",
"Algorithm of reduce, 1: binomial, 2: in_order_binomial, 3: binary, 4: pipeline, 5: chain, 6: linear", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY,
"Algorithm of reduce, 1: binomial, 2: in_order_binomial, 3: binary, 4: pipeline, 5: chain, 6: linear",
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_ireduce_algorithm);
if( (mca_coll_adapt_component.adapt_ireduce_algorithm < 0) ||
(mca_coll_adapt_component.adapt_ireduce_algorithm > OMPI_COLL_ADAPT_ALGORITHM_COUNT) ) {
Expand All @@ -47,58 +49,58 @@ int ompi_coll_adapt_ireduce_register(void)
mca_coll_adapt_component.adapt_ireduce_segment_size = 163740;
mca_base_component_var_register(c, "reduce_segment_size",
"Segment size in bytes used by default for reduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_ireduce_segment_size);

mca_coll_adapt_component.adapt_ireduce_max_send_requests = 2;
mca_base_component_var_register(c, "reduce_max_send_requests",
"Maximum number of send requests",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_ireduce_max_send_requests);

mca_coll_adapt_component.adapt_ireduce_max_recv_requests = 3;
mca_base_component_var_register(c, "reduce_max_recv_requests",
"Maximum number of receive requests per peer",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_ireduce_max_recv_requests);

mca_coll_adapt_component.adapt_inbuf_free_list_min = 10;
mca_base_component_var_register(c, "inbuf_free_list_min",
"Minimum number of segment in inbuf free list",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_inbuf_free_list_min);

mca_coll_adapt_component.adapt_inbuf_free_list_max = 10000;
mca_base_component_var_register(c, "inbuf_free_list_max",
"Maximum number of segment in inbuf free list",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_inbuf_free_list_max);


mca_coll_adapt_component.adapt_inbuf_free_list_inc = 10;
mca_base_component_var_register(c, "inbuf_free_list_inc",
"Number of segments to allocate when growing the inbuf free list",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_inbuf_free_list_inc);

mca_coll_adapt_component.adapt_ireduce_synchronous_send = true;
(void) mca_base_component_var_register(c, "reduce_synchronous_send",
"Whether to use synchronous send operations during setup of reduce operations",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_adapt_component.adapt_ireduce_synchronous_send);

mca_coll_adapt_component.adapt_ireduce_context_free_list = NULL;
Expand Down
21 changes: 21 additions & 0 deletions ompi/mca/coll/base/coll_base_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
* All rights reserved.
* Copyright (c) 2014-2020 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -437,6 +438,26 @@ int ompi_coll_base_file_peek_next_char_is(FILE *fptr, int *fileline, int expecte
} while (1);
}

/**
* return non-zero if the next non-space to read on the current line is a digit.
* otherwise return 0.
*/
int ompi_coll_base_file_peek_next_char_isdigit(FILE *fptr)
{
do {
int next = fgetc(fptr);

if ((' ' == next) || ('\t' == next)) {
continue; /* discard space and tab. keep everything else */
}

ungetc(next, fptr); /* put the char back into the stream */

return isdigit(next); /* report back whether or not next is a digit */

} while (1);
}

/**
* There are certainly simpler implementation for this function when performance
* is not a critical point. But, as this function is used during the collective
Expand Down
2 changes: 2 additions & 0 deletions ompi/mca/coll/base/coll_base_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
* All rights reserved.
* Copyright (c) 2014-2020 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -185,6 +186,7 @@ int ompi_coll_base_file_getnext_string(FILE *fptr, int *fileline, char** val);
* eat the value, otherwise put it back into the file.
*/
int ompi_coll_base_file_peek_next_char_is(FILE *fptr, int *fileline, int expected);
int ompi_coll_base_file_peek_next_char_isdigit(FILE *fptr);

/* Miscelaneous function */
const char* mca_coll_base_colltype_to_str(int collid);
Expand Down
9 changes: 5 additions & 4 deletions ompi/mca/coll/basic/coll_basic_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -91,16 +92,16 @@ basic_register(void)
mca_coll_basic_priority = 10;
(void) mca_base_component_var_register(&mca_coll_basic_component.collm_version, "priority",
"Priority of the basic coll component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_basic_priority);
mca_coll_basic_crossover = 4;
(void) mca_base_component_var_register(&mca_coll_basic_component.collm_version, "crossover",
"Minimum number of processes in a communicator before using the logarithmic algorithms",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
MCA_BASE_VAR_SCOPE_ALL,
&mca_coll_basic_crossover);

return OMPI_SUCCESS;
Expand Down
Loading

0 comments on commit 23b6d34

Please sign in to comment.