Skip to content

Commit

Permalink
UCT/IB/MLX5/DV: convert stack allocation at uct_ib_mlx5_devx_md_open(…
Browse files Browse the repository at this point in the history
…) to heap
  • Loading branch information
michal-shalev committed Sep 16, 2024
1 parent 27e63b9 commit 66bbe34
Showing 1 changed file with 27 additions and 13 deletions.
40 changes: 27 additions & 13 deletions src/uct/ib/mlx5/dv/ib_mlx5dv_md.c
Original file line number Diff line number Diff line change
Expand Up @@ -2107,11 +2107,13 @@ ucs_status_t uct_ib_mlx5_devx_md_open(struct ibv_device *ibv_device,
const uct_ib_md_config_t *md_config,
uct_ib_md_t **p_md)
{
char out[UCT_IB_MLX5DV_ST_SZ_BYTES(query_hca_cap_out)] = {};
char in[UCT_IB_MLX5DV_ST_SZ_BYTES(query_hca_cap_in)] = {};
char cap_2_out[UCT_IB_MLX5DV_ST_SZ_BYTES(query_hca_cap_out)] = {};
ucs_status_t status = UCS_OK;
uint8_t lag_state = 0;
size_t out_len = UCT_IB_MLX5DV_ST_SZ_BYTES(query_hca_cap_out);
size_t in_len = UCT_IB_MLX5DV_ST_SZ_BYTES(query_hca_cap_in);
char *out = ucs_calloc(1, out_len, "out");
char *in = ucs_calloc(1, in_len, "in");
char *cap_2_out = ucs_calloc(1, out_len, "cap_2_out");
ucs_status_t status = UCS_OK;
uint8_t lag_state = 0;
void *cap_2;
uint8_t log_max_qp;
uint16_t vhca_id;
Expand All @@ -2125,20 +2127,26 @@ ucs_status_t uct_ib_mlx5_devx_md_open(struct ibv_device *ibv_device,
ucs_mpool_params_t mp_params;
int ksm_atomic;

if ((out == NULL) || (in == NULL) || (cap_2_out == NULL)) {
ucs_error("Failed to allocate memory for HCA capability buffers");
status = UCS_ERR_NO_MEMORY;
goto err;
}

if (!mlx5dv_is_supported(ibv_device)) {
status = UCS_ERR_UNSUPPORTED;
goto err;
goto err_free_buffers;
}

if (md_config->devx == UCS_NO) {
status = UCS_ERR_UNSUPPORTED;
goto err;
goto err_free_buffers;
}

ctx = uct_ib_mlx5_devx_open_device(ibv_device);
if (ctx == NULL) {
status = UCS_ERR_UNSUPPORTED;
goto err;
goto err_free_buffers;
}

md = ucs_derived_of(uct_ib_md_alloc(sizeof(*md), "ib_mlx5_devx_md", ctx),
Expand Down Expand Up @@ -2166,7 +2174,7 @@ ucs_status_t uct_ib_mlx5_devx_md_open(struct ibv_device *ibv_device,
UCT_IB_MLX5DV_SET(query_hca_cap_in, in, opcode, UCT_IB_MLX5_CMD_OP_QUERY_HCA_CAP);
UCT_IB_MLX5DV_SET(query_hca_cap_in, in, op_mod, UCT_IB_MLX5_HCA_CAP_OPMOD_GET_CUR |
(UCT_IB_MLX5_CAP_GENERAL << 1));
ret = mlx5dv_devx_general_cmd(ctx, in, sizeof(in), out, sizeof(out));
ret = mlx5dv_devx_general_cmd(ctx, in, in_len, out, out_len);
if (ret != 0) {
if ((errno == EPERM) || (errno == EPROTONOSUPPORT) ||
(errno == EOPNOTSUPP)) {
Expand Down Expand Up @@ -2285,7 +2293,7 @@ ucs_status_t uct_ib_mlx5_devx_md_open(struct ibv_device *ibv_device,

vhca_id = UCT_IB_MLX5DV_GET(cmd_hca_cap, cap, vhca_id);

status = uct_ib_mlx5_devx_query_cap_2(ctx, cap_2_out, sizeof(cap_2_out));
status = uct_ib_mlx5_devx_query_cap_2(ctx, cap_2_out, out_len);
if (status == UCS_OK) {
cap_2 = UCT_IB_MLX5DV_ADDR_OF(query_hca_cap_out, cap_2_out, capability);

Expand All @@ -2303,8 +2311,7 @@ ucs_status_t uct_ib_mlx5_devx_md_open(struct ibv_device *ibv_device,

UCT_IB_MLX5DV_SET(query_hca_cap_in, in, op_mod, UCT_IB_MLX5_HCA_CAP_OPMOD_GET_CUR |
(UCT_IB_MLX5_CAP_ATOMIC << 1));
status = uct_ib_mlx5_devx_general_cmd(ctx, in, sizeof(in), out,
sizeof(out),
status = uct_ib_mlx5_devx_general_cmd(ctx, in, in_len, out, out_len,
"QUERY_HCA_CAP, ATOMIC", 0);
if (status != UCS_OK) {
goto err_lru_cleanup;
Expand Down Expand Up @@ -2406,6 +2413,9 @@ ucs_status_t uct_ib_mlx5_devx_md_open(struct ibv_device *ibv_device,
uct_ib_mlx5_devx_init_flush_mr(md);

*p_md = &md->super;
ucs_free(out);
ucs_free(in);
ucs_free(cap_2_out);
return UCS_OK;

err_dbrec_mpool_cleanup:
Expand All @@ -2419,6 +2429,10 @@ ucs_status_t uct_ib_mlx5_devx_md_open(struct ibv_device *ibv_device,
uct_ib_md_free(&md->super);
err_free_context:
uct_ib_md_device_context_close(ctx);
err_free_buffers:
ucs_free(out);
ucs_free(in);
ucs_free(cap_2_out);
err:
if ((status == UCS_ERR_UNSUPPORTED) && (md_config->devx == UCS_YES)) {
ucs_error("DEVX requested but not supported by %s",
Expand Down Expand Up @@ -2542,7 +2556,7 @@ uct_ib_mlx5_devx_md_get_counter_set_id(uct_ib_mlx5_md_t *md, uint8_t port_num)
UCT_IB_MLX5DV_SET(query_qp_in, in, opcode, UCT_IB_MLX5_CMD_OP_QUERY_QP);
UCT_IB_MLX5DV_SET(query_qp_in, in, qpn, dummy_qp->qp_num);

ret = mlx5dv_devx_qp_query(dummy_qp, in, sizeof(in), out, sizeof(out));
ret = mlx5dv_devx_qp_query(dummy_qp, in, in_len, out, out_len);
if (ret) {
ucs_diag("mlx5dv_devx_qp_query(%s:%d, DUMMY_QP, QPN=0x%x) failed, "
"syndrome 0x%x: %m",
Expand Down

0 comments on commit 66bbe34

Please sign in to comment.