@@ -97,6 +97,8 @@ static int efa_domain_hmem_info_init_system(struct efa_domain *efa_domain)
97
97
info -> p2p_disabled_by_user = false;
98
98
info -> p2p_required_by_impl = false;
99
99
info -> p2p_supported_by_device = true;
100
+ info -> dmabuf_supported = false;
101
+
100
102
efa_domain_hmem_info_init_protocol_thresholds (efa_domain , FI_HMEM_SYSTEM );
101
103
return 0 ;
102
104
}
@@ -137,6 +139,7 @@ static int efa_domain_hmem_info_init_cuda(struct efa_domain *efa_domain)
137
139
138
140
info -> initialized = true;
139
141
info -> p2p_disabled_by_user = false;
142
+ info -> dmabuf_supported = false;
140
143
141
144
/* If user is using libfabric API 1.18 or later, by default EFA provider is permitted to
142
145
* use CUDA library to support CUDA memory, therefore p2p is not required.
@@ -146,26 +149,24 @@ static int efa_domain_hmem_info_init_cuda(struct efa_domain *efa_domain)
146
149
else
147
150
info -> p2p_required_by_impl = true;
148
151
149
- #if HAVE_EFA_DMABUF_MR
150
- ret = cuda_get_dmabuf_fd (ptr , len , & dmabuf_fd , & dmabuf_offset );
152
+ ret = ofi_hmem_get_dmabuf_fd (FI_HMEM_CUDA , ptr , len , & dmabuf_fd , & dmabuf_offset );
151
153
if (ret == FI_SUCCESS ) {
152
- ibv_mr = ibv_reg_dmabuf_mr ( g_device_list [ 0 ]. ibv_pd , dmabuf_offset ,
154
+ ibv_mr = efa_mr_reg_ibv_dmabuf_mr ( efa_domain -> ibv_pd , dmabuf_offset ,
153
155
len , (uint64_t )ptr , dmabuf_fd , ibv_access );
154
156
if (!ibv_mr ) {
155
157
EFA_INFO (FI_LOG_DOMAIN ,
156
158
"Unable to register CUDA device buffer via dmabuf: %s. "
157
159
"Fall back to ibv_reg_mr\n" , fi_strerror (- errno ));
158
- ibv_mr = ibv_reg_mr (g_device_list [0 ].ibv_pd , ptr , len , ibv_access );
160
+ ibv_mr = ibv_reg_mr (efa_domain -> ibv_pd , ptr , len , ibv_access );
161
+ } else {
162
+ info -> dmabuf_supported = true;
159
163
}
160
164
} else {
161
165
EFA_INFO (FI_LOG_DOMAIN ,
162
166
"Unable to retrieve dmabuf fd of CUDA device buffer: %d. "
163
167
"Fall back to ibv_reg_mr\n" , ret );
164
- ibv_mr = ibv_reg_mr (g_device_list [ 0 ]. ibv_pd , ptr , len , ibv_access );
168
+ ibv_mr = ibv_reg_mr (efa_domain -> ibv_pd , ptr , len , ibv_access );
165
169
}
166
- #else
167
- ibv_mr = ibv_reg_mr (g_device_list [0 ].ibv_pd , ptr , len , ibv_access );
168
- #endif
169
170
170
171
if (!ibv_mr ) {
171
172
info -> p2p_supported_by_device = false;
@@ -247,22 +248,27 @@ static int efa_domain_hmem_info_init_neuron(struct efa_domain *efa_domain)
247
248
info -> p2p_disabled_by_user = false;
248
249
/* Neuron currently requires P2P */
249
250
info -> p2p_required_by_impl = true;
251
+ info -> dmabuf_supported = false;
250
252
251
- #if HAVE_EFA_DMABUF_MR
252
- ret = neuron_get_dmabuf_fd (ptr , (uint64_t )len , & dmabuf_fd , & offset );
253
+ ret = ofi_hmem_get_dmabuf_fd (FI_HMEM_NEURON , ptr , (uint64_t )len , & dmabuf_fd , & offset );
253
254
if (ret == FI_SUCCESS ) {
254
- ibv_mr = ibv_reg_dmabuf_mr (
255
- g_device_list [ 0 ]. ibv_pd , offset ,
255
+ ibv_mr = efa_mr_reg_ibv_dmabuf_mr (
256
+ efa_domain -> ibv_pd , offset ,
256
257
len , (uint64_t )ptr , dmabuf_fd , ibv_access );
257
- } else if (ret == - FI_ENOPROTOOPT ) {
258
- EFA_INFO (FI_LOG_MR ,
258
+ if (!ibv_mr ) {
259
+ EFA_INFO (FI_LOG_DOMAIN ,
260
+ "Unable to register neuron device buffer via dmabuf: %s. "
261
+ "Fall back to ibv_reg_mr\n" , fi_strerror (- errno ));
262
+ ibv_mr = ibv_reg_mr (efa_domain -> ibv_pd , ptr , len , ibv_access );
263
+ } else {
264
+ info -> dmabuf_supported = true;
265
+ }
266
+ } else {
267
+ EFA_INFO (FI_LOG_DOMAIN ,
259
268
"Unable to retrieve dmabuf fd of Neuron device buffer, "
260
269
"Fall back to ibv_reg_mr\n" );
261
- ibv_mr = ibv_reg_mr (g_device_list [ 0 ]. ibv_pd , ptr , len , ibv_access );
270
+ ibv_mr = ibv_reg_mr (efa_domain -> ibv_pd , ptr , len , ibv_access );
262
271
}
263
- #else
264
- ibv_mr = ibv_reg_mr (g_device_list [0 ].ibv_pd , ptr , len , ibv_access );
265
- #endif
266
272
267
273
if (!ibv_mr ) {
268
274
info -> p2p_supported_by_device = false;
@@ -325,6 +331,7 @@ static int efa_domain_hmem_info_init_synapseai(struct efa_domain *efa_domain)
325
331
/* SynapseAI currently requires P2P */
326
332
info -> p2p_required_by_impl = true;
327
333
info -> p2p_supported_by_device = true;
334
+ info -> dmabuf_supported = true;
328
335
efa_domain_hmem_info_init_protocol_thresholds (efa_domain , FI_HMEM_SYNAPSEAI );
329
336
330
337
/* Only the long read protocol is supported */
0 commit comments