Skip to content

accelerator/rocm: add large BAR check #12982

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 16, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 71 additions & 17 deletions opal/mca/accelerator/rocm/accelerator_rocm_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ static int accelerator_rocm_component_register(void)

int opal_accelerator_rocm_lazy_init()
{
hipError_t hip_err;
int err = OPAL_SUCCESS;

/* Double checked locking to avoid having to
Expand All @@ -196,41 +197,94 @@ int opal_accelerator_rocm_lazy_init()
goto out;
}

err = hipGetDeviceCount(&opal_accelerator_rocm_num_devices);
if (hipSuccess != err) {
hip_err = hipGetDeviceCount(&opal_accelerator_rocm_num_devices);
if (hipSuccess != hip_err) {
opal_output(0, "Failed to query device count, err=%d %s\n",
err, hipGetErrorString(err));
err = OPAL_ERROR;
hip_err, hipGetErrorString(hip_err));
err = OPAL_ERROR;
goto out;
}

hipStream_t memcpy_stream;
err = hipStreamCreate(&memcpy_stream);
if (hipSuccess != err) {
hip_err = hipStreamCreate(&memcpy_stream);
if (hipSuccess != hip_err) {
opal_output(0, "Could not create hipStream, err=%d %s\n",
err, hipGetErrorString(err));
err = OPAL_ERROR; // we got hipErrorInvalidValue, pretty bad
hip_err, hipGetErrorString(hip_err));
err = OPAL_ERROR; // we got hipErrorInvalidValue, pretty bad
goto out;
}

opal_accelerator_rocm_MemcpyStream = malloc(sizeof(hipStream_t));
if (NULL == opal_accelerator_rocm_MemcpyStream) {
opal_output(0, "Could not allocate hipStream\n");
err = OPAL_ERR_OUT_OF_RESOURCE;
goto out;
}
*opal_accelerator_rocm_MemcpyStream = memcpy_stream;

opal_accelerator_rocm_mem_bw = malloc(sizeof(float)*opal_accelerator_rocm_num_devices);
if (NULL == opal_accelerator_rocm_mem_bw) {
opal_output(0, "Could not allocate memory_bw array\n");
err = OPAL_ERR_OUT_OF_RESOURCE;
goto out;
}

for (int i = 0; i < opal_accelerator_rocm_num_devices; ++i) {
int mem_clock_rate; // kHz
err = hipDeviceGetAttribute(&mem_clock_rate,
hipDeviceAttributeMemoryClockRate,
i);
hip_err = hipDeviceGetAttribute(&mem_clock_rate,
hipDeviceAttributeMemoryClockRate,
i);
if (hipSuccess != hip_err) {
opal_output(0, "Failed to query device MemoryClockRate, err=%d %s\n",
hip_err, hipGetErrorString(hip_err));
err = OPAL_ERROR;
goto out;
}

int bus_width; // bit
err = hipDeviceGetAttribute(&bus_width,
hipDeviceAttributeMemoryBusWidth,
i);
/* bw = clock_rate * bus width * 2bit multiplier
* See https://forums.developer.nvidia.com/t/memory-clock-rate/107940
*/
hip_err = hipDeviceGetAttribute(&bus_width,
hipDeviceAttributeMemoryBusWidth,
i);
if (hipSuccess != hip_err) {
opal_output(0, "Failed to query device MemoryBusWidth, err=%d %s\n",
hip_err, hipGetErrorString(hip_err));
err = OPAL_ERROR;
goto out;
}

/* bw = clock_rate * bus width * 2bit multiplier */
float bw = ((float)mem_clock_rate*(float)bus_width*2.0) / 1024 / 1024 / 8;
opal_accelerator_rocm_mem_bw[i] = bw;
}

#if HIP_VERSION >= 60000000
int dev_id;
hip_err = hipGetDevice(&dev_id);
if (hipSuccess != hip_err) {
opal_output(0, "error retrieving current device");
err = OPAL_ERROR;
goto out;
}

int has_large_bar = 0;
hip_err = hipDeviceGetAttribute (&has_large_bar, hipDeviceAttributeIsLargeBar,
dev_id);
if (hipSuccess != hip_err) {
opal_output(0, "error retrieving current device");
err = OPAL_ERROR;
goto out;
}

if (0 == has_large_bar) {
// Without large BAR we have to use hipMemcpy(Async) for all data transfers
opal_output(0, "Large BAR support is not enabled on current device. "
"Enable large BAR support in BIOS (Above 4G Encoding) for "
"better performance\n.");
opal_accelerator_rocm_memcpyH2D_limit = 0;
opal_accelerator_rocm_memcpyD2H_limit = 0;
}
#endif

err = OPAL_SUCCESS;
opal_atomic_wmb();
accelerator_rocm_init_complete = true;
Expand Down
Loading