Skip to content

Commit

Permalink
accelerator/rocm: add large BAR check
Browse files Browse the repository at this point in the history
check whether the device has large BAR support enabled. If not, set the
rocm_copy_D2H and H2D thresholds to 0, i.e. use hipMemcpy(Async) for all
data transfers. Data center (Instinct) devices usually have large BAR
enabled, for gaming PCs its not always set by the user.

The PR also does a little bit of cleanup in the error handling of the
lazy_init() routine.

Signed-off-by: Edgar Gabriel <[email protected]>
  • Loading branch information
edgargabriel committed Dec 16, 2024
1 parent 9ba5034 commit 4df81a1
Showing 1 changed file with 71 additions and 17 deletions.
88 changes: 71 additions & 17 deletions opal/mca/accelerator/rocm/accelerator_rocm_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ static int accelerator_rocm_component_register(void)

int opal_accelerator_rocm_lazy_init()
{
hipError_t hip_err;
int err = OPAL_SUCCESS;

/* Double checked locking to avoid having to
Expand All @@ -196,41 +197,94 @@ int opal_accelerator_rocm_lazy_init()
goto out;
}

err = hipGetDeviceCount(&opal_accelerator_rocm_num_devices);
if (hipSuccess != err) {
hip_err = hipGetDeviceCount(&opal_accelerator_rocm_num_devices);
if (hipSuccess != hip_err) {
opal_output(0, "Failed to query device count, err=%d %s\n",
err, hipGetErrorString(err));
err = OPAL_ERROR;
hip_err, hipGetErrorString(hip_err));
err = OPAL_ERROR;
goto out;
}

hipStream_t memcpy_stream;
err = hipStreamCreate(&memcpy_stream);
if (hipSuccess != err) {
hip_err = hipStreamCreate(&memcpy_stream);
if (hipSuccess != hip_err) {
opal_output(0, "Could not create hipStream, err=%d %s\n",
err, hipGetErrorString(err));
err = OPAL_ERROR; // we got hipErrorInvalidValue, pretty bad
hip_err, hipGetErrorString(hip_err));
err = OPAL_ERROR; // we got hipErrorInvalidValue, pretty bad
goto out;
}

opal_accelerator_rocm_MemcpyStream = malloc(sizeof(hipStream_t));
if (NULL == opal_accelerator_rocm_MemcpyStream) {
opal_output(0, "Could not allocate hipStream\n");
err = OPAL_ERR_OUT_OF_RESOURCE;
goto out;
}
*opal_accelerator_rocm_MemcpyStream = memcpy_stream;

opal_accelerator_rocm_mem_bw = malloc(sizeof(float)*opal_accelerator_rocm_num_devices);
if (NULL == opal_accelerator_rocm_mem_bw) {
opal_output(0, "Could not allocate memory_bw array\n");
err = OPAL_ERR_OUT_OF_RESOURCE;
goto out;
}

for (int i = 0; i < opal_accelerator_rocm_num_devices; ++i) {
int mem_clock_rate; // kHz
err = hipDeviceGetAttribute(&mem_clock_rate,
hipDeviceAttributeMemoryClockRate,
i);
hip_err = hipDeviceGetAttribute(&mem_clock_rate,
hipDeviceAttributeMemoryClockRate,
i);
if (hipSuccess != hip_err) {
opal_output(0, "Failed to query device MemoryClockRate, err=%d %s\n",
hip_err, hipGetErrorString(hip_err));
err = OPAL_ERROR;
goto out;
}

int bus_width; // bit
err = hipDeviceGetAttribute(&bus_width,
hipDeviceAttributeMemoryBusWidth,
i);
/* bw = clock_rate * bus width * 2bit multiplier
* See https://forums.developer.nvidia.com/t/memory-clock-rate/107940
*/
hip_err = hipDeviceGetAttribute(&bus_width,
hipDeviceAttributeMemoryBusWidth,
i);
if (hipSuccess != hip_err) {
opal_output(0, "Failed to query device MemoryBusWidth, err=%d %s\n",
hip_err, hipGetErrorString(hip_err));
err = OPAL_ERROR;
goto out;
}

/* bw = clock_rate * bus width * 2bit multiplier */
float bw = ((float)mem_clock_rate*(float)bus_width*2.0) / 1024 / 1024 / 8;
opal_accelerator_rocm_mem_bw[i] = bw;
}

#if HIP_VERSION >= 60000000
int dev_id;
hip_err = hipGetDevice(&dev_id);
if (hipSuccess != hip_err) {
opal_output(0, "error retrieving current device");
err = OPAL_ERROR;
goto out;
}

int has_large_bar = 0;
hip_err = hipDeviceGetAttribute (&has_large_bar, hipDeviceAttributeIsLargeBar,
dev_id);
if (hipSuccess != hip_err) {
opal_output(0, "error retrieving current device");
err = OPAL_ERROR;
goto out;
}

if (0 == has_large_bar) {
// Without large BAR we have to use hipMemcpy(Async) for all data transfers
opal_output(0, "Large BAR support is not enabled on current device. "
"Enable large BAR support in BIOS (Above 4G Encoding) for "
"better performance\n.");
opal_accelerator_rocm_memcpyH2D_limit = 0;
opal_accelerator_rocm_memcpyD2H_limit = 0;
}
#endif

err = OPAL_SUCCESS;
opal_atomic_wmb();
accelerator_rocm_init_complete = true;
Expand Down

0 comments on commit 4df81a1

Please sign in to comment.