diff --git a/man/fi_verbs.7.md b/man/fi_verbs.7.md index 0d42c06e63b..cdecaf99bf0 100644 --- a/man/fi_verbs.7.md +++ b/man/fi_verbs.7.md @@ -157,6 +157,11 @@ The verbs provider checks for the following environment variables. ### Common variables: +*FI_VERBS_TOS* +: RDMA CM ToS value. If unset or set to -1, then the ToS will not be + explicitly set and the system default will be used. Valid range is -1 + through 255. + *FI_VERBS_TX_SIZE* : Default maximum tx context size (default: 384) diff --git a/prov/verbs/src/verbs_domain_xrc.c b/prov/verbs/src/verbs_domain_xrc.c index 2c95c948a52..6e706e9111b 100644 --- a/prov/verbs/src/verbs_domain_xrc.c +++ b/prov/verbs/src/verbs_domain_xrc.c @@ -67,6 +67,10 @@ static int vrb_create_ini_qp(struct vrb_xrc_ep *ep) "XRC INI QP rdma_create_qp_ex failed %d\n", -ret); return ret; } + + if (vrb_rdma_set_tos(ep->base_ep.id)) + VRB_WARN_ERRNO(FI_LOG_EP_CTRL, "vrb_rdma_set_tos"); + return FI_SUCCESS; #else /* VERBS_HAVE_XRC */ return -FI_ENOSYS; @@ -400,6 +404,9 @@ int vrb_ep_create_tgt_qp(struct vrb_xrc_ep *ep, uint32_t tgt_qpn) } ep->tgt_ibv_qp = ep->tgt_id->qp; + if (vrb_rdma_set_tos(ep->tgt_id)) + VRB_WARN_ERRNO(FI_LOG_EP_CTRL, "vrb_rdma_set_tos"); + return FI_SUCCESS; #else /* VERBS_HAVE_XRC */ return -FI_ENOSYS; diff --git a/prov/verbs/src/verbs_ep.c b/prov/verbs/src/verbs_ep.c index 63aea82778d..b4c2e4b8c21 100644 --- a/prov/verbs/src/verbs_ep.c +++ b/prov/verbs/src/verbs_ep.c @@ -1063,6 +1063,9 @@ static int vrb_ep_enable(struct fid_ep *ep_fid) /* Allow shared XRC INI QP not controlled by RDMA CM * to share same post functions as RC QP. */ ep->ibv_qp = ep->id->qp; + + if (vrb_rdma_set_tos(ep->id)) + VRB_WARN_ERRNO(FI_LOG_EP_CTRL, "vrb_rdma_set_tos"); } break; case FI_EP_DGRAM: diff --git a/prov/verbs/src/verbs_eq.c b/prov/verbs/src/verbs_eq.c index f9bc78a828f..590dc6d2107 100644 --- a/prov/verbs/src/verbs_eq.c +++ b/prov/verbs/src/verbs_eq.c @@ -893,6 +893,9 @@ vrb_eq_addr_resolved_event(struct vrb_ep *ep) /* Allow shared XRC INI QP not controlled by RDMA CM * to share same post functions as RC QP. */ ep->ibv_qp = ep->id->qp; + + if (vrb_rdma_set_tos(ep->id)) + VRB_WARN_ERRNO(FI_LOG_EP_CTRL, "vrb_rdma_set_tos"); } assert(ep->ibv_qp); diff --git a/prov/verbs/src/verbs_init.c b/prov/verbs/src/verbs_init.c index 67b999d2a61..279ee91e14b 100644 --- a/prov/verbs/src/verbs_init.c +++ b/prov/verbs/src/verbs_init.c @@ -45,6 +45,7 @@ static const char *local_node = "localhost"; #define VERBS_DEFAULT_MIN_RNR_TIMER 12 struct vrb_gl_data vrb_gl_data = { + .tos = VERBS_TOS_UNSET, .def_tx_size = 384, .def_rx_size = 384, .def_tx_iov_limit = 4, @@ -637,6 +638,14 @@ static int vrb_get_param_str(const char *param_name, int vrb_read_params(void) { /* Common parameters */ + if (vrb_get_param_int("tos", "RDMA CM ToS value. If unset or set to -1, then " + "the ToS will not be explicitly set and the system " + "default will be used. Valid range is -1 through 255.", + &vrb_gl_data.tos) || + (vrb_gl_data.tos < -1 || vrb_gl_data.tos > 255)) { + VRB_WARN(FI_LOG_CORE, "Invalid value of ToS\n"); + return -FI_EINVAL; + } if (vrb_get_param_int("tx_size", "Default maximum tx context size", &vrb_gl_data.def_tx_size) || (vrb_gl_data.def_tx_size < 0)) { diff --git a/prov/verbs/src/verbs_ofi.h b/prov/verbs/src/verbs_ofi.h index 0c54f879c45..efaa6e9c130 100644 --- a/prov/verbs/src/verbs_ofi.h +++ b/prov/verbs/src/verbs_ofi.h @@ -161,6 +161,8 @@ #define VERBS_ANY_DOMAIN "verbs_any_domain" #define VERBS_ANY_FABRIC "verbs_any_fabric" +#define VERBS_TOS_UNSET (-1) + #ifdef HAVE_FABRIC_PROFILE struct vrb_profile; typedef struct vrb_profile vrb_profile_t; @@ -176,6 +178,7 @@ extern ofi_mutex_t vrb_init_mutex; extern struct dlist_entry verbs_devs; extern struct vrb_gl_data { + int tos; int def_tx_size; int def_rx_size; int def_tx_iov_limit; @@ -1050,6 +1053,16 @@ vrb_free_recv_wr(struct vrb_progress *progress, struct vrb_recv_wr *wr) ofi_buf_free(wr); } +static inline int vrb_rdma_set_tos(struct rdma_cm_id *id) +{ + if (vrb_gl_data.tos == VERBS_TOS_UNSET) + return 0; + + uint8_t tos = vrb_gl_data.tos; + return rdma_set_option(id, RDMA_OPTION_ID, RDMA_OPTION_ID_TOS, &tos, + sizeof(tos)); +} + int vrb_ep_ops_open(struct fid *fid, const char *name, uint64_t flags, void **ops, void *context); diff --git a/prov/verbs/src/windows/verbs_nd_rdma.c b/prov/verbs/src/windows/verbs_nd_rdma.c index 3235a21d03e..b44e36f01aa 100644 --- a/prov/verbs/src/windows/verbs_nd_rdma.c +++ b/prov/verbs/src/windows/verbs_nd_rdma.c @@ -240,6 +240,13 @@ int rdma_destroy_id(struct rdma_cm_id *id) return 0; } +int rdma_set_option(struct rdma_cm_id *id, int level, int optname, + void *optval, size_t optlen) +{ + errno = ENOSYS; + return -1; +} + int rdma_migrate_id(struct rdma_cm_id *id, struct rdma_event_channel *channel) { struct nd_cm_id *id_nd;