Skip to content

Commit 620e8cc

Browse files
committed
prov/efa: Refactor dmabuf reg
Introduce a boolean dmabuf_supported in efa_hmem_info, check if dmabuf is supported for different hmem ifaces. When dmabuf is supported, retrieve the dmabuf fd and use ibv_reg_dmabuf_mr to register memory. Otherwise, fall back to ibv_reg_mr. Always use ibv_reg_dmabuf_mr when FI_MR_DMABUF is set. Remove macros in efa_mr_reg_ibv_mr and combine duplicate logic of different hmem ifaces. Signed-off-by: Jessie Yang <[email protected]>
1 parent 43ca6ac commit 620e8cc

File tree

4 files changed

+79
-91
lines changed

4 files changed

+79
-91
lines changed

prov/efa/src/efa_hmem.c

+25-18
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ static int efa_domain_hmem_info_init_system(struct efa_domain *efa_domain)
9797
info->p2p_disabled_by_user = false;
9898
info->p2p_required_by_impl = false;
9999
info->p2p_supported_by_device = true;
100+
info->dmabuf_supported = false;
101+
100102
efa_domain_hmem_info_init_protocol_thresholds(efa_domain, FI_HMEM_SYSTEM);
101103
return 0;
102104
}
@@ -137,6 +139,7 @@ static int efa_domain_hmem_info_init_cuda(struct efa_domain *efa_domain)
137139

138140
info->initialized = true;
139141
info->p2p_disabled_by_user = false;
142+
info->dmabuf_supported = false;
140143

141144
/* If user is using libfabric API 1.18 or later, by default EFA provider is permitted to
142145
* use CUDA library to support CUDA memory, therefore p2p is not required.
@@ -146,26 +149,24 @@ static int efa_domain_hmem_info_init_cuda(struct efa_domain *efa_domain)
146149
else
147150
info->p2p_required_by_impl = true;
148151

149-
#if HAVE_EFA_DMABUF_MR
150-
ret = cuda_get_dmabuf_fd(ptr, len, &dmabuf_fd, &dmabuf_offset);
152+
ret = ofi_hmem_get_dmabuf_fd(FI_HMEM_CUDA, ptr, len, &dmabuf_fd, &dmabuf_offset);
151153
if (ret == FI_SUCCESS) {
152-
ibv_mr = ibv_reg_dmabuf_mr(g_device_list[0].ibv_pd, dmabuf_offset,
154+
ibv_mr = efa_mr_reg_ibv_dmabuf_mr(efa_domain->ibv_pd, dmabuf_offset,
153155
len, (uint64_t)ptr, dmabuf_fd, ibv_access);
154156
if (!ibv_mr) {
155157
EFA_INFO(FI_LOG_DOMAIN,
156158
"Unable to register CUDA device buffer via dmabuf: %s. "
157159
"Fall back to ibv_reg_mr\n", fi_strerror(-errno));
158-
ibv_mr = ibv_reg_mr(g_device_list[0].ibv_pd, ptr, len, ibv_access);
160+
ibv_mr = ibv_reg_mr(efa_domain->ibv_pd, ptr, len, ibv_access);
161+
} else {
162+
info->dmabuf_supported = true;
159163
}
160164
} else {
161165
EFA_INFO(FI_LOG_DOMAIN,
162166
"Unable to retrieve dmabuf fd of CUDA device buffer: %d. "
163167
"Fall back to ibv_reg_mr\n", ret);
164-
ibv_mr = ibv_reg_mr(g_device_list[0].ibv_pd, ptr, len, ibv_access);
168+
ibv_mr = ibv_reg_mr(efa_domain->ibv_pd, ptr, len, ibv_access);
165169
}
166-
#else
167-
ibv_mr = ibv_reg_mr(g_device_list[0].ibv_pd, ptr, len, ibv_access);
168-
#endif
169170

170171
if (!ibv_mr) {
171172
info->p2p_supported_by_device = false;
@@ -247,22 +248,27 @@ static int efa_domain_hmem_info_init_neuron(struct efa_domain *efa_domain)
247248
info->p2p_disabled_by_user = false;
248249
/* Neuron currently requires P2P */
249250
info->p2p_required_by_impl = true;
251+
info->dmabuf_supported = false;
250252

251-
#if HAVE_EFA_DMABUF_MR
252-
ret = neuron_get_dmabuf_fd(ptr, (uint64_t)len, &dmabuf_fd, &offset);
253+
ret = ofi_hmem_get_dmabuf_fd(FI_HMEM_NEURON, ptr, (uint64_t)len, &dmabuf_fd, &offset);
253254
if (ret == FI_SUCCESS) {
254-
ibv_mr = ibv_reg_dmabuf_mr(
255-
g_device_list[0].ibv_pd, offset,
255+
ibv_mr = efa_mr_reg_ibv_dmabuf_mr(
256+
efa_domain->ibv_pd, offset,
256257
len, (uint64_t)ptr, dmabuf_fd, ibv_access);
257-
} else if (ret == -FI_ENOPROTOOPT) {
258-
EFA_INFO(FI_LOG_MR,
258+
if (!ibv_mr) {
259+
EFA_INFO(FI_LOG_DOMAIN,
260+
"Unable to register neuron device buffer via dmabuf: %s. "
261+
"Fall back to ibv_reg_mr\n", fi_strerror(-errno));
262+
ibv_mr = ibv_reg_mr(efa_domain->ibv_pd, ptr, len, ibv_access);
263+
} else {
264+
info->dmabuf_supported = true;
265+
}
266+
} else {
267+
EFA_INFO(FI_LOG_DOMAIN,
259268
"Unable to retrieve dmabuf fd of Neuron device buffer, "
260269
"Fall back to ibv_reg_mr\n");
261-
ibv_mr = ibv_reg_mr(g_device_list[0].ibv_pd, ptr, len, ibv_access);
270+
ibv_mr = ibv_reg_mr(efa_domain->ibv_pd, ptr, len, ibv_access);
262271
}
263-
#else
264-
ibv_mr = ibv_reg_mr(g_device_list[0].ibv_pd, ptr, len, ibv_access);
265-
#endif
266272

267273
if (!ibv_mr) {
268274
info->p2p_supported_by_device = false;
@@ -325,6 +331,7 @@ static int efa_domain_hmem_info_init_synapseai(struct efa_domain *efa_domain)
325331
/* SynapseAI currently requires P2P */
326332
info->p2p_required_by_impl = true;
327333
info->p2p_supported_by_device = true;
334+
info->dmabuf_supported = true;
328335
efa_domain_hmem_info_init_protocol_thresholds(efa_domain, FI_HMEM_SYNAPSEAI);
329336

330337
/* Only the long read protocol is supported */

prov/efa/src/efa_hmem.h

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ struct efa_hmem_info {
2626
bool p2p_disabled_by_user; /* Did the user disable p2p via FI_OPT_FI_HMEM_P2P? */
2727
bool p2p_required_by_impl; /* Is p2p required for this interface? */
2828
bool p2p_supported_by_device; /* do we support p2p with this device */
29+
bool dmabuf_supported;
2930

3031
size_t max_intra_eager_size; /* Maximum message size to use eager protocol for intra-node */
3132
size_t max_medium_msg_size;

prov/efa/src/efa_mr.c

+27-73
Original file line numberDiff line numberDiff line change
@@ -475,30 +475,6 @@ struct fi_ops efa_mr_ops = {
475475
.ops_open = fi_no_ops_open,
476476
};
477477

478-
#if HAVE_EFA_DMABUF_MR
479-
480-
static inline
481-
struct ibv_mr *efa_mr_reg_ibv_dmabuf_mr(struct ibv_pd *pd, uint64_t offset,
482-
size_t len, uint64_t iova, int fd, int access)
483-
{
484-
return ibv_reg_dmabuf_mr(pd, offset, len, iova, fd, access);
485-
}
486-
487-
#else
488-
489-
static inline
490-
struct ibv_mr *efa_mr_reg_ibv_dmabuf_mr(struct ibv_pd *pd, uint64_t offset,
491-
size_t len, uint64_t iova, int fd, int access)
492-
{
493-
EFA_WARN(FI_LOG_MR,
494-
"ibv_reg_dmabuf_mr is required for memory"
495-
" registration with FI_MR_DMABUF flags, but "
496-
" not available in the current rdma-core library."
497-
" please build libfabric with rdma-core >= 34.0\n");
498-
return NULL;
499-
}
500-
501-
#endif
502478
/**
503479
* @brief Register a memory buffer with rdma-core api.
504480
*
@@ -511,7 +487,20 @@ struct ibv_mr *efa_mr_reg_ibv_dmabuf_mr(struct ibv_pd *pd, uint64_t offset,
511487
static struct ibv_mr *efa_mr_reg_ibv_mr(struct efa_mr *efa_mr, struct fi_mr_attr *mr_attr,
512488
int access, const uint64_t flags)
513489
{
514-
if (flags & FI_MR_DMABUF)
490+
int dmabuf_fd;
491+
uint64_t offset;
492+
int ret;
493+
494+
assert(efa_mr->domain->hmem_info[mr_attr->iface].p2p_supported_by_device);
495+
496+
if (flags & FI_MR_DMABUF) {
497+
if (OFI_UNLIKELY(!efa_mr->domain->hmem_info[mr_attr->iface].dmabuf_supported)) {
498+
EFA_WARN(FI_LOG_MR, "Requested FI_MR_DMABUF, but dmabuf is not supported.\n");
499+
return NULL;
500+
}
501+
502+
EFA_INFO(FI_LOG_MR, "FI_MR_DMABUF is set. Registering dmabuf mr with fd: %d, offset: %lu, len: %zu\n",
503+
mr_attr->dmabuf->fd, mr_attr->dmabuf->offset, mr_attr->dmabuf->len);
515504
return efa_mr_reg_ibv_dmabuf_mr(
516505
efa_mr->domain->ibv_pd,
517506
mr_attr->dmabuf->offset,
@@ -520,64 +509,29 @@ static struct ibv_mr *efa_mr_reg_ibv_mr(struct efa_mr *efa_mr, struct fi_mr_attr
520509
mr_attr->dmabuf->fd,
521510
access
522511
);
512+
}
523513

524-
/*
525-
* TODO: remove the synapseai and neuron blocks by onboarding the
526-
* ofi_hmem_get_dmabuf_fd API.
527-
*/
528-
#if HAVE_SYNAPSEAI
529-
if (efa_mr_is_synapseai(efa_mr)) {
530-
int dmabuf_fd;
531-
uint64_t offset;
532-
int ret;
533-
534-
ret = synapseai_get_dmabuf_fd(mr_attr->mr_iov->iov_base,
535-
(uint64_t) mr_attr->mr_iov->iov_len,
536-
&dmabuf_fd, &offset);
514+
if (efa_mr->domain->hmem_info[mr_attr->iface].dmabuf_supported) {
515+
ret = ofi_hmem_get_dmabuf_fd(
516+
mr_attr->iface,
517+
mr_attr->mr_iov->iov_base,
518+
(uint64_t) mr_attr->mr_iov->iov_len,
519+
&dmabuf_fd, &offset);
537520
if (ret != FI_SUCCESS) {
538-
EFA_WARN(FI_LOG_MR, "Unable to get dmabuf fd for Gaudi device buffer \n");
521+
EFA_WARN(FI_LOG_MR, "Unable to get dmabuf fd for device buffer. errno: %d, err_msg: %s\n",
522+
ret, fi_strerror(-ret));
539523
return NULL;
540524
}
525+
EFA_INFO(FI_LOG_MR, "Registering dmabuf mr with fd: %d, offset: %lu, len: %zu\n",
526+
dmabuf_fd, offset, mr_attr->mr_iov->iov_len);
541527
return efa_mr_reg_ibv_dmabuf_mr(efa_mr->domain->ibv_pd, offset,
542528
mr_attr->mr_iov->iov_len,
543529
(uint64_t)mr_attr->mr_iov->iov_base,
544530
dmabuf_fd, access);
545531
}
546-
#endif
547-
548-
#if HAVE_NEURON
549-
if (efa_mr_is_neuron(efa_mr)) {
550-
int dmabuf_fd;
551-
uint64_t offset;
552-
int ret;
553-
554-
ret = neuron_get_dmabuf_fd(
555-
mr_attr->mr_iov->iov_base,
556-
mr_attr->mr_iov->iov_len,
557-
&dmabuf_fd,
558-
&offset);
559-
560-
if (ret == FI_SUCCESS) {
561-
/* Success => invoke ibv_reg_dmabuf_mr */
562-
return efa_mr_reg_ibv_dmabuf_mr(
563-
efa_mr->domain->ibv_pd, 0,
564-
mr_attr->mr_iov->iov_len,
565-
(uint64_t)mr_attr->mr_iov->iov_base,
566-
dmabuf_fd, access);
567-
} else if (ret == -FI_ENOPROTOOPT) {
568-
/* Protocol not availabe => fallback */
569-
EFA_INFO(FI_LOG_MR,
570-
"Unable to get dmabuf fd for Neuron device buffer, "
571-
"Fall back to ibv_reg_mr\n");
572-
return ibv_reg_mr(
573-
efa_mr->domain->ibv_pd,
574-
(void *)mr_attr->mr_iov->iov_base,
575-
mr_attr->mr_iov->iov_len, access);
576-
}
577-
return NULL;
578-
}
579-
#endif
580532

533+
EFA_INFO(FI_LOG_MR, "Dmabuf is not supported. Registering memory via ibv_reg_mr with addr: %lu, len: %zu\n",
534+
(uint64_t)mr_attr->mr_iov->iov_base, mr_attr->mr_iov->iov_len);
581535
return ibv_reg_mr(efa_mr->domain->ibv_pd,
582536
(void *)mr_attr->mr_iov->iov_base,
583537
mr_attr->mr_iov->iov_len, access);

prov/efa/src/efa_mr.h

+26
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
#include <stdbool.h>
88
#include <ofi_mr.h>
9+
#include <infiniband/verbs.h>
910

1011
/*
1112
* Descriptor returned for FI_HMEM peer memory registrations
@@ -35,6 +36,31 @@ struct efa_mr {
3536
bool needs_sync;
3637
};
3738

39+
#if HAVE_EFA_DMABUF_MR
40+
41+
static inline
42+
struct ibv_mr *efa_mr_reg_ibv_dmabuf_mr(struct ibv_pd *pd, uint64_t offset,
43+
size_t len, uint64_t iova, int fd, int access)
44+
{
45+
return ibv_reg_dmabuf_mr(pd, offset, len, iova, fd, access);
46+
}
47+
48+
#else
49+
50+
static inline
51+
struct ibv_mr *efa_mr_reg_ibv_dmabuf_mr(struct ibv_pd *pd, uint64_t offset,
52+
size_t len, uint64_t iova, int fd, int access)
53+
{
54+
EFA_WARN(FI_LOG_MR,
55+
"ibv_reg_dmabuf_mr is required for memory"
56+
" registration with FI_MR_DMABUF flags, but "
57+
" not available in the current rdma-core library."
58+
" please build libfabric with rdma-core >= 34.0\n");
59+
return NULL;
60+
}
61+
62+
#endif
63+
3864
extern int efa_mr_cache_enable;
3965
extern size_t efa_mr_max_cached_count;
4066
extern size_t efa_mr_max_cached_size;

0 commit comments

Comments
 (0)