Skip to content

Commit cb8dd56

Browse files
authored
Merge pull request #6747 from devreal/rdma-fetchop-local-v4.0.x
OSC rdma: make sure accumulating in shared memory is safe
2 parents 0cd5a5a + 900f0fa commit cb8dd56

File tree

3 files changed

+16
-3
lines changed

3 files changed

+16
-3
lines changed

ompi/mca/osc/rdma/osc_rdma.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,9 @@ struct ompi_osc_rdma_module_t {
145145

146146
bool acc_use_amo;
147147

148+
/** whether the group is located on a single node */
149+
bool single_node;
150+
148151
/** flavor of this window */
149152
int flavor;
150153

ompi/mca/osc/rdma/osc_rdma_accumulate.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -889,10 +889,19 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo
889889
(void) ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
890890
}
891891

892+
/* accumulate in (shared) memory if there is only a single node
893+
* OR if we have an exclusive lock
894+
* OR if other processes won't try to use the network either */
895+
bool use_shared_mem = module->single_node ||
896+
(ompi_osc_rdma_peer_local_base (peer) &&
897+
(ompi_osc_rdma_peer_is_exclusive (peer) ||
898+
!module->acc_single_intrinsic));
899+
892900
/* if the datatype is small enough (and the count is 1) then try to directly use the hardware to execute
893901
* the atomic operation. this should be safe in all cases as either 1) the user has assured us they will
894-
* never use atomics with count > 1, 2) we have the accumulate lock, or 3) we have an exclusive lock */
895-
if (origin_extent <= 8 && 1 == origin_count && !ompi_osc_rdma_peer_local_base (peer)) {
902+
* never use atomics with count > 1, 2) we have the accumulate lock, or 3) we have an exclusive lock.
903+
* avoid using the NIC if the operation can be done directly in shared memory. */
904+
if (origin_extent <= 8 && 1 == origin_count && !use_shared_mem) {
896905
if (module->acc_use_amo && ompi_datatype_is_predefined (origin_datatype)) {
897906
if (NULL == result_addr) {
898907
ret = ompi_osc_rdma_acc_single_atomic (sync, origin_addr, origin_datatype, origin_extent, peer, target_address,

ompi/mca/osc/rdma/osc_rdma_component.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,8 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
543543
local_size = ompi_comm_size (shared_comm);
544544

545545
/* CPU atomics can be used if every process is on the same node or the NIC allows mixing CPU and NIC atomics */
546-
module->use_cpu_atomics = local_size == global_size || (module->selected_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB);
546+
module->single_node = local_size == global_size;
547+
module->use_cpu_atomics = module->single_node || (module->selected_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB);
547548

548549
if (1 == local_size) {
549550
/* no point using a shared segment if there are no other processes on this node */

0 commit comments

Comments
 (0)