From 6d0bdb523ab2d9a35b5dd2c77787012231d2b2c7 Mon Sep 17 00:00:00 2001 From: Wei Zhang Date: Tue, 7 Sep 2021 03:18:06 +0000 Subject: [PATCH 1/2] osc/rdma: do not set use_memory_registraiton in allocate_state_shared Currenntly, in function allocate_state_shared, "module->use_memory_registation" is set to false when all MPI ranks are on same instances (local_size == global_size). This is harmful when there is only one btl in use, in which case the selected btl should determine whether memory registration should be used. For example, btl/ofi uses memory registration even on same instance. This is unnecessary when three are two btls in use, in which case btls that uses memory registration have been excluded in function ompi_osc_rdma_query_alternate_btls. Therefore, this commit removes the setting of module->use_memory_registration in allocate_state_shared. Signed-off-by: Wei Zhang --- ompi/mca/osc/rdma/osc_rdma_component.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ompi/mca/osc/rdma/osc_rdma_component.c b/ompi/mca/osc/rdma/osc_rdma_component.c index 74398060081..18aefd83c3d 100644 --- a/ompi/mca/osc/rdma/osc_rdma_component.c +++ b/ompi/mca/osc/rdma/osc_rdma_component.c @@ -605,10 +605,6 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s return allocate_state_single (module, base, size); } - if (local_size == global_size) { - module->use_memory_registration = false; - } - OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "allocating shared internal state"); local_rank_array_size = sizeof (ompi_osc_rdma_rank_data_t) * RANK_ARRAY_COUNT (module); From 8b1984238d0ffcb930752e27e11c5c1b4ba9c397 Mon Sep 17 00:00:00 2001 From: Wei Zhang Date: Tue, 7 Sep 2021 21:12:17 +0000 Subject: [PATCH 2/2] osc/rdma: setup the state_endpoint and state_btl_index of first peer In function "allocate_state_shared", the peer->state_endpoint was copied from 1st peer (a.k.a local_leader). However, state_endpoint and state_btl_index of the 1st peer was not set, causing all peers' state_endpoint being NULL. This patch addresses the issue by setting 1st peer's state_endpoint and state_btl_index from its data_endpoint and data_btl_index. Signed-off-by: Wei Zhang --- ompi/mca/osc/rdma/osc_rdma_component.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ompi/mca/osc/rdma/osc_rdma_component.c b/ompi/mca/osc/rdma/osc_rdma_component.c index 18aefd83c3d..a2e8dca6fa7 100644 --- a/ompi/mca/osc/rdma/osc_rdma_component.c +++ b/ompi/mca/osc/rdma/osc_rdma_component.c @@ -784,7 +784,10 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s peer->state_handle = (mca_btl_base_registration_handle_t *) state_region->btl_handle_data; } peer->state = (osc_rdma_counter_t) ((uintptr_t) state_region->base + state_base + module->state_size * i); - if (i > 0) { + if (i==0) { + peer->state_endpoint = peer->data_endpoint; + peer->state_btl_index = peer->data_btl_index; + } else { peer->state_endpoint = local_leader->state_endpoint; peer->state_btl_index = local_leader->state_btl_index; }