Skip to content

Commit d31560e

Browse files
committed
prov/efa: Disable resource management when user set FI_OPT_EFA_RNR_RETRY
If a user is explicitly asking for a retry count that means they also want to manage the resources themselves and the EFA provider should disable resource management to prevent implicit retries. Also remove the unused function. Signed-off-by: Jessie Yang <[email protected]>
1 parent b30ce18 commit d31560e

File tree

5 files changed

+33
-21
lines changed

5 files changed

+33
-21
lines changed

prov/efa/src/rdm/efa_rdm_ep.h

-21
Original file line numberDiff line numberDiff line change
@@ -263,27 +263,6 @@ struct efa_domain *efa_rdm_ep_domain(struct efa_rdm_ep *ep)
263263

264264
void efa_rdm_ep_post_internal_rx_pkts(struct efa_rdm_ep *ep);
265265

266-
/**
267-
* @brief return whether this endpoint should write error cq entry for RNR.
268-
*
269-
* For an endpoint to write RNR completion, two conditions must be met:
270-
*
271-
* First, the end point must be able to receive RNR completion from rdma-core,
272-
* which means rnr_etry must be less then EFA_RNR_INFINITE_RETRY.
273-
*
274-
* Second, the app need to request this feature when opening endpoint
275-
* (by setting info->domain_attr->resource_mgmt to FI_RM_DISABLED).
276-
* The setting was saved as efa_rdm_ep->handle_resource_management.
277-
*
278-
* @param[in] ep endpoint
279-
*/
280-
static inline
281-
bool efa_rdm_ep_should_write_rnr_completion(struct efa_rdm_ep *ep)
282-
{
283-
return (efa_env.rnr_retry < EFA_RNR_INFINITE_RETRY) &&
284-
(ep->handle_resource_management == FI_RM_DISABLED);
285-
}
286-
287266
/*
288267
* @brief: check whether we should use p2p for this transaction
289268
*

prov/efa/src/rdm/efa_rdm_ep_fiops.c

+6
Original file line numberDiff line numberDiff line change
@@ -1697,6 +1697,12 @@ static int efa_rdm_ep_setopt(fid_t fid, int level, int optname,
16971697
return -FI_ENOSYS;
16981698
}
16991699
efa_rdm_ep->base_ep.rnr_retry = *(size_t *)optval;
1700+
/*
1701+
* If a user is explicitly asking for a retry count that means
1702+
* they also want to manage the resources themselves and the EFA
1703+
* provider should disable resource management to prevent implicit retries.
1704+
*/
1705+
efa_rdm_ep->handle_resource_management = FI_RM_DISABLED;
17001706
break;
17011707
case FI_OPT_FI_HMEM_P2P:
17021708
if (optlen != sizeof(int))

prov/efa/test/efa_unit_test_ep.c

+25
Original file line numberDiff line numberDiff line change
@@ -858,6 +858,31 @@ void test_efa_rdm_ep_setopt_shared_memory_permitted(struct efa_resource **state)
858858
assert_null(ep->shm_ep);
859859
}
860860

861+
/* Test resource management is disabled when user sets FI_OPT_EFA_RNR_RETRY */
862+
void test_efa_rdm_ep_setopt_rnr_retry(struct efa_resource **state)
863+
{
864+
struct efa_resource *resource = *state;
865+
struct efa_rdm_ep *ep;
866+
size_t rnr_retry = 5;
867+
868+
resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM);
869+
870+
efa_unit_test_resource_construct_with_hints(
871+
resource, FI_EP_RDM, FI_VERSION(1, 14), resource->hints, false,
872+
true);
873+
874+
assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT,
875+
FI_OPT_EFA_RNR_RETRY, &rnr_retry,
876+
sizeof(rnr_retry)), 0);
877+
878+
assert_int_equal(fi_enable(resource->ep), 0);
879+
880+
ep = container_of(resource->ep, struct efa_rdm_ep,
881+
base_ep.util_ep.ep_fid);
882+
883+
assert_int_equal(ep->handle_resource_management, FI_RM_DISABLED);
884+
}
885+
861886
/**
862887
* @brief Test fi_enable with different optval of fi_setopt for
863888
* FI_OPT_EFA_WRITE_IN_ORDER_ALIGNED_128_BYTES optname.

prov/efa/test/efa_unit_tests.c

+1
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ int main(void)
9292
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_getopt_undersized_optlen, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
9393
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_getopt_oversized_optlen, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
9494
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_setopt_shared_memory_permitted, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
95+
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_setopt_rnr_retry, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
9596
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_enable_qp_in_order_aligned_128_bytes_good, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
9697
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_enable_qp_in_order_aligned_128_bytes_bad, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
9798
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_pkt_pool_flags, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),

prov/efa/test/efa_unit_tests.h

+1
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ void test_efa_rdm_ep_send_with_shm_no_copy();
117117
void test_efa_rdm_ep_rma_without_caps();
118118
void test_efa_rdm_ep_atomic_without_caps();
119119
void test_efa_rdm_ep_setopt_shared_memory_permitted();
120+
void test_efa_rdm_ep_setopt_rnr_retry();
120121
void test_efa_rdm_ep_enable_qp_in_order_aligned_128_bytes_good();
121122
void test_efa_rdm_ep_enable_qp_in_order_aligned_128_bytes_bad();
122123
void test_efa_rdm_ep_user_zcpy_rx_disabled();

0 commit comments

Comments
 (0)