Skip to content

Commit

Permalink
Test efa direct
Browse files Browse the repository at this point in the history
Signed-off-by: Jessie Yang <[email protected]>
  • Loading branch information
jiaxiyan committed Jan 23, 2025
1 parent 3d04127 commit 6627c7f
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 4 deletions.
10 changes: 6 additions & 4 deletions fabtests/benchmarks/benchmark_shared.c
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,7 @@ int pingpong_rma(enum ft_rma_opcodes rma_op, struct fi_rma_iov *remote)
return EXIT_FAILURE;
}

inject_size = 0;
/* Init rx_buf with invalid iteration number.
* This must be done before the sender sends any data.
*/
Expand All @@ -333,7 +334,7 @@ int pingpong_rma(enum ft_rma_opcodes rma_op, struct fi_rma_iov *remote)
if (rma_op == FT_RMA_WRITE)
*(tx_buf + opts.transfer_size - 1) = (char)i;

if (opts.transfer_size <= inject_size)
if (opts.transfer_size < inject_size)
ret = ft_inject_rma(rma_op, remote, ep,
remote_fi_addr,
opts.transfer_size);
Expand All @@ -359,7 +360,7 @@ int pingpong_rma(enum ft_rma_opcodes rma_op, struct fi_rma_iov *remote)
if (rma_op == FT_RMA_WRITE)
*(tx_buf + opts.transfer_size - 1) = (char)i;

if (opts.transfer_size <= inject_size)
if (opts.transfer_size < inject_size)
ret = ft_inject_rma(rma_op, remote, ep,
remote_fi_addr,
opts.transfer_size);
Expand Down Expand Up @@ -602,6 +603,7 @@ int bandwidth_rma(enum ft_rma_opcodes rma_op, struct fi_rma_iov *remote)
if (ft_check_opts(FT_OPT_VERIFY_DATA))
inject_size = 0;

inject_size = 0;
ret = ft_sync();
if (ret)
return ret;
Expand Down Expand Up @@ -629,7 +631,7 @@ int bandwidth_rma(enum ft_rma_opcodes rma_op, struct fi_rma_iov *remote)
}
switch (rma_op) {
case FT_RMA_WRITE:
if (opts.transfer_size <= inject_size) {
if (opts.transfer_size < inject_size) {
ret = ft_post_rma_inject(FT_RMA_WRITE, tx_buf + offset,
opts.transfer_size, remote);
} else if (opts.use_fi_more) {
Expand All @@ -656,7 +658,7 @@ int bandwidth_rma(enum ft_rma_opcodes rma_op, struct fi_rma_iov *remote)
rx_seq++;

} else {
if (opts.transfer_size <= inject_size) {
if (opts.transfer_size < inject_size) {
ret = ft_post_rma_inject(FT_RMA_WRITEDATA,
tx_buf + offset,
opts.transfer_size,
Expand Down
2 changes: 2 additions & 0 deletions fabtests/common/shared.c
Original file line number Diff line number Diff line change
Expand Up @@ -2565,6 +2565,8 @@ ssize_t ft_post_rx_buf(struct fid_ep *ep, size_t size, void *ctx,
void *op_buf, void *op_mr_desc, uint64_t op_tag)
{
size = MAX(size, FT_MAX_CTRL_MSG) + ft_rx_prefix_size();
if (opts.max_msg_size)
size = MIN(size, opts.max_msg_size);
if (hints->caps & FI_TAGGED) {
op_tag = op_tag ? op_tag : rx_seq;
FT_POST(fi_trecv, ft_progress, rxcq, rx_seq, &rx_cq_cntr,
Expand Down
46 changes: 46 additions & 0 deletions fabtests/pytest/efa/test_rma_bw.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,49 @@ def test_rma_bw_use_fi_more(cmdline_args, operation_type, rma_bw_completion_sema
timeout = max(540, cmdline_args.timeout)
efa_run_client_server_test(cmdline_args, command, "short", rma_bw_completion_semantic,
"host_to_host", inject_message_size, timeout=timeout)


def test_rma_bw_direct(cmdline_args, rma_operation_type,
rma_bw_completion_semantic, rma_bw_memory_type,
message_size, zcpy_recv_max_msg_size):
command = f"fi_rma_bw -e rdm --max-msg-size {zcpy_recv_max_msg_size}"
command = command + " -o " + rma_operation_type + " " + perf_progress_model_cli
# rma_bw test with data verification takes longer to finish
timeout = max(540, cmdline_args.timeout)
efa_run_client_server_test(cmdline_args, command, "short", rma_bw_completion_semantic, rma_bw_memory_type,
message_size, timeout=timeout)

@pytest.mark.functional
def test_rma_bw_range_direct(cmdline_args, rma_operation_type, rma_bw_completion_semantic,
message_size, rma_bw_memory_type, zcpy_recv_max_msg_size):
command = f"fi_rma_bw -e rdm --max-msg-size {zcpy_recv_max_msg_size}"
command = command + " -o " + rma_operation_type
# rma_bw test with data verification takes longer to finish
timeout = max(540, cmdline_args.timeout)
efa_run_client_server_test(cmdline_args, command, "short", rma_bw_completion_semantic, rma_bw_memory_type, message_size, timeout=timeout)

# This test is run in serial mode because it takes a lot of memory
@pytest.mark.serial
@pytest.mark.functional
# TODO Add "writedata", "write" back in when EFA firmware bug is fixed
@pytest.mark.parametrize("operation_type", ["read"])
def test_rma_bw_1G_direct(cmdline_args, operation_type, rma_bw_completion_semantic, zcpy_recv_max_msg_size):
# Default window size is 64 resulting in 128GB being registered, which
# exceeds max number of registered host pages
timeout = max(540, cmdline_args.timeout)
command = f"fi_rma_bw -e rdm -W 1 --max-msg-size {zcpy_recv_max_msg_size}"
command = command + " -o " + operation_type
efa_run_client_server_test(cmdline_args, command, 2,
completion_semantic=rma_bw_completion_semantic, message_size=1073741824,
memory_type="host_to_host", warmup_iteration_type=0, timeout=timeout)

@pytest.mark.functional
@pytest.mark.parametrize("operation_type", ["writedata", "write"])
def test_rma_bw_use_fi_more_direct(cmdline_args, operation_type, rma_bw_completion_semantic,
inject_message_size, zcpy_recv_max_msg_size):
command = f"fi_rma_bw -e rdm --use-fi-more --max-msg-size {zcpy_recv_max_msg_size}"
command = command + " -o " + operation_type
# rma_bw test with data verification takes longer to finish
timeout = max(540, cmdline_args.timeout)
efa_run_client_server_test(cmdline_args, command, "short", rma_bw_completion_semantic,
"host_to_host", inject_message_size, timeout=timeout)
21 changes: 21 additions & 0 deletions fabtests/pytest/efa/test_rma_pingpong.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,24 @@ def test_rma_pingpong_range_no_inject(cmdline_args, operation_type, rma_bw_compl
command = "fi_rma_pingpong -e rdm -j 0"
command = command + " -o " + operation_type
efa_run_client_server_test(cmdline_args, command, "short", rma_bw_completion_semantic, memory_type_bi_dir, rma_pingpong_message_size)


@pytest.mark.parametrize("operation_type", ["writedata"])
@pytest.mark.parametrize("iteration_type",
[pytest.param("short", marks=pytest.mark.short),
pytest.param("standard", marks=pytest.mark.standard)])
def test_rma_pingpong_direct(cmdline_args, iteration_type, operation_type, rma_bw_completion_semantic,
memory_type_bi_dir, rma_pingpong_message_size, zcpy_recv_max_msg_size):
command = f"fi_rma_pingpong -e rdm --max-msg-size {zcpy_recv_max_msg_size}"
command = command + " -o " + operation_type + " " + perf_progress_model_cli
efa_run_client_server_test(cmdline_args, command, iteration_type, rma_bw_completion_semantic,
memory_type_bi_dir, rma_pingpong_message_size)


@pytest.mark.functional
@pytest.mark.parametrize("operation_type", ["writedata"])
def test_rma_pingpong_range_direct(cmdline_args, operation_type, rma_bw_completion_semantic,
rma_pingpong_message_size, memory_type_bi_dir, zcpy_recv_max_msg_size):
command = f"fi_rma_pingpong -e rdm --max-msg-size {zcpy_recv_max_msg_size}"
command = command + " -o " + operation_type
efa_run_client_server_test(cmdline_args, command, "short", rma_bw_completion_semantic, memory_type_bi_dir, rma_pingpong_message_size)
2 changes: 2 additions & 0 deletions prov/efa/src/efa_domain.c
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,8 @@ int efa_domain_open(struct fid_fabric *fabric_fid, struct fi_info *info,
goto err_free;
}
efa_domain->util_domain.domain_fid.ops = &efa_ops_domain_rdm;
efa_domain->util_domain.domain_fid.ops->endpoint = efa_ep_open;
efa_domain->util_domain.domain_fid.ops->cq_open = efa_cq_open;
} else {
assert(EFA_EP_TYPE_IS_DGRAM(info));
efa_domain->util_domain.domain_fid.ops = &efa_ops_domain_dgram;
Expand Down

0 comments on commit 6627c7f

Please sign in to comment.