From 72cd4eaa901e5fc37e0d4bcf143378d28d35717c Mon Sep 17 00:00:00 2001 From: "Mamzi Bayatpour mbayatpour@nvidia.com ()" Date: Mon, 14 Mar 2022 11:48:38 -0700 Subject: [PATCH] Fixes in OSC/UCX: - Failure in window Post and Complete routines - Failure in Put with 1 byte for Dynamic windows - Failure in Accumulate with noncontig dt - Hang in PSCW - Flush every attached win in fence for dynamic win Signed-off-by: Mamzi Bayatpour Co-authored-by: Tomislav Janjusic --- ompi/mca/osc/ucx/osc_ucx_active_target.c | 33 ++++++++++++++++++------ ompi/mca/osc/ucx/osc_ucx_comm.c | 1 + ompi/mca/osc/ucx/osc_ucx_component.c | 2 +- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/ompi/mca/osc/ucx/osc_ucx_active_target.c b/ompi/mca/osc/ucx/osc_ucx_active_target.c index ea6c956cb9c..1bc095a758a 100644 --- a/ompi/mca/osc/ucx/osc_ucx_active_target.c +++ b/ompi/mca/osc/ucx/osc_ucx_active_target.c @@ -74,9 +74,20 @@ int ompi_osc_ucx_fence(int mpi_assert, struct ompi_win_t *win) { } if (!(mpi_assert & MPI_MODE_NOPRECEDE)) { - ret = opal_common_ucx_wpmem_flush(module->mem, OPAL_COMMON_UCX_SCOPE_WORKER, 0/*ignore*/); - if (ret != OMPI_SUCCESS) { - return ret; + if (module->flavor == MPI_WIN_FLAVOR_DYNAMIC) { + int i; + for (i = 0; i < OMPI_OSC_UCX_ATTACH_MAX; i++) { + ret = opal_common_ucx_wpmem_flush(module->local_dynamic_win_info[i].mem, + OPAL_COMMON_UCX_SCOPE_WORKER, 0/*ignore*/); + if (ret != OMPI_SUCCESS) { + return ret; + } + } + } else { + ret = opal_common_ucx_wpmem_flush(module->mem, OPAL_COMMON_UCX_SCOPE_WORKER, 0/*ignore*/); + if (ret != OMPI_SUCCESS) { + return ret; + } } } @@ -181,14 +192,14 @@ int ompi_osc_ucx_complete(struct ompi_win_t *win) { for (i = 0; i < size; i++) { uint64_t remote_addr = module->state_addrs[module->start_grp_ranks[i]] + OSC_UCX_STATE_COMPLETE_COUNT_OFFSET; // write to state.complete_count on remote side - ret = opal_common_ucx_wpmem_post(module->mem, UCP_ATOMIC_POST_OP_ADD, + ret = opal_common_ucx_wpmem_post(module->state_mem, UCP_ATOMIC_POST_OP_ADD, 1, module->start_grp_ranks[i], sizeof(uint64_t), remote_addr); if (ret != OMPI_SUCCESS) { OSC_UCX_VERBOSE(1, "opal_common_ucx_mem_post failed: %d", ret); } - ret = opal_common_ucx_wpmem_flush(module->mem, OPAL_COMMON_UCX_SCOPE_EP, + ret = opal_common_ucx_wpmem_flush(module->state_mem, OPAL_COMMON_UCX_SCOPE_EP, module->start_grp_ranks[i]); if (ret != OMPI_SUCCESS) { return ret; @@ -243,10 +254,13 @@ int ompi_osc_ucx_post(struct ompi_group_t *group, int mpi_assert, struct ompi_wi uint64_t remote_addr = module->state_addrs[ranks_in_win_grp[i]] + OSC_UCX_STATE_POST_INDEX_OFFSET; // write to state.post_index on remote side uint64_t curr_idx = 0, result = 0; + + /* do fop first to get an post index */ - ret = opal_common_ucx_wpmem_fetch(module->mem, UCP_ATOMIC_FETCH_OP_FADD, + ret = opal_common_ucx_wpmem_fetch(module->state_mem, UCP_ATOMIC_FETCH_OP_FADD, 1, ranks_in_win_grp[i], &result, sizeof(result), remote_addr); + if (ret != OMPI_SUCCESS) { ret = OMPI_ERROR; goto cleanup; @@ -258,9 +272,12 @@ int ompi_osc_ucx_post(struct ompi_group_t *group, int mpi_assert, struct ompi_wi /* do cas to send post message */ do { - ret = opal_common_ucx_wpmem_cmpswp(module->mem, 0, result, - myrank + 1, &result, sizeof(result), + + result = myrank + 1; + ret = opal_common_ucx_wpmem_cmpswp(module->state_mem, 0, result, + ranks_in_win_grp[i], &result, sizeof(result), remote_addr); + if (ret != OMPI_SUCCESS) { ret = OMPI_ERROR; goto cleanup; diff --git a/ompi/mca/osc/ucx/osc_ucx_comm.c b/ompi/mca/osc/ucx/osc_ucx_comm.c index efeea95c347..115f5a7b7df 100644 --- a/ompi/mca/osc/ucx/osc_ucx_comm.c +++ b/ompi/mca/osc/ucx/osc_ucx_comm.c @@ -656,6 +656,7 @@ int accumulate_req(const void *origin_addr, int origin_count, if (ret != OMPI_SUCCESS) { return ret; } + temp_count *= target_count; } ompi_datatype_get_true_extent(temp_dt, &temp_lb, &temp_extent); temp_addr = free_ptr = malloc(temp_extent * temp_count); diff --git a/ompi/mca/osc/ucx/osc_ucx_component.c b/ompi/mca/osc/ucx/osc_ucx_component.c index 95c18c117d0..db3d89302b9 100644 --- a/ompi/mca/osc/ucx/osc_ucx_component.c +++ b/ompi/mca/osc/ucx/osc_ucx_component.c @@ -584,7 +584,7 @@ int ompi_osc_find_attached_region_position(ompi_osc_dynamic_win_info_t *dynamic_ if (dynamic_wins[mid_index].base > base) { return ompi_osc_find_attached_region_position(dynamic_wins, min_index, mid_index-1, base, len, insert); - } else if (base + len < dynamic_wins[mid_index].base + dynamic_wins[mid_index].size) { + } else if (base + len <= dynamic_wins[mid_index].base + dynamic_wins[mid_index].size) { return mid_index; } else { return ompi_osc_find_attached_region_position(dynamic_wins, mid_index+1, max_index,