-
Notifications
You must be signed in to change notification settings - Fork 900
osc/rdma, btl: fix two issues with one-sided #9594
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
6e7984d
d9bdca0
f5ab467
e889cdd
11dea3c
ded237a
465af9a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -357,6 +357,7 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int mpi_assert, ompi_win_t * | |
return ret; | ||
} | ||
|
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ? |
||
int ompi_osc_rdma_start_atomic (ompi_group_t *group, int mpi_assert, ompi_win_t *win) | ||
{ | ||
ompi_osc_rdma_module_t *module = GET_MODULE(win); | ||
|
@@ -590,6 +591,82 @@ int ompi_osc_rdma_test_atomic (ompi_win_t *win, int *flag) | |
return OMPI_SUCCESS; | ||
} | ||
|
||
/** | ||
* This function implements a different barrier mechanism for Fence, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. different than what? |
||
* when any of the selected btl does not support remote completion. | ||
* This barrier is based on imposing the MCA_BTL_ORDER_RDMA_ATOMCS | ||
* ordering requirement on seleted btls. | ||
*/ | ||
static | ||
int ompi_osc_rdma_fence_barrier_by_ordered_channel (ompi_win_t *win) | ||
{ | ||
ompi_osc_rdma_module_t *module = GET_MODULE(win); | ||
ompi_osc_rdma_state_t *state = module->state; | ||
ompi_osc_rdma_sync_t *sync = &module->all_sync; | ||
ompi_osc_rdma_peer_t **peers; | ||
ompi_group_t *group; | ||
int num_peers; | ||
int ret; | ||
|
||
assert(module->btl_order == MCA_BTL_IN_ORDER_RDMA_ATOMICS); | ||
OPAL_THREAD_LOCK(&module->lock); | ||
|
||
if (ompi_comm_size(module->comm) == 1) { | ||
OPAL_THREAD_UNLOCK(&(module->lock)); | ||
return OMPI_SUCCESS; | ||
} | ||
|
||
ret = ompi_comm_group(module->comm, &group); | ||
if (OMPI_SUCCESS != ret) { | ||
OPAL_THREAD_UNLOCK(&(module->lock)); | ||
return ret; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. indenting |
||
} | ||
|
||
num_peers = sync->num_peers; | ||
assert(ompi_group_size(group) == num_peers); | ||
peers = ompi_osc_rdma_get_peers(module, group); | ||
if (NULL == peers) { | ||
OPAL_THREAD_UNLOCK(&(module->lock)); | ||
return OMPI_ERR_OUT_OF_RESOURCE; | ||
} | ||
|
||
module->state->num_fenced_peers = 0; | ||
OPAL_THREAD_UNLOCK(&(module->lock)); | ||
ret = module->comm->c_coll->coll_barrier(module->comm, module->comm->c_coll->coll_barrier_module); | ||
if (ret) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no implicit cast from int to bool |
||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "barrier failed!"); | ||
return ret; | ||
} | ||
|
||
/* for each process in the group increment their number of fenced peers */ | ||
for (int i = 0 ; i < num_peers; ++i) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. be consistent in spacing around ; |
||
ompi_osc_rdma_peer_t *peer = peers[i]; | ||
intptr_t target = (intptr_t) peer->state + offsetof (ompi_osc_rdma_state_t, num_fenced_peers); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no space between offseof and argument. |
||
|
||
/* the usage of peer local state requires selected btls to support remote completion, | ||
* if that is the case, this function will not have been called | ||
*/ | ||
assert (!ompi_osc_rdma_peer_local_state (peer)); | ||
ret = ompi_osc_rdma_lock_btl_op (module, peer, target, MCA_BTL_ATOMIC_ADD, 1, true); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure that this is sufficient. BTL operations (including RDMA operations) are not implicitly ordered. An ordering flag must be set on every operation that must be ordered in order for this to be guaranteed to work. TCP doesn't take advantage of reordering (and I don't think OFI does either) so you likely won't see this in testing. but the verbs BTL did have an ordering problem and we haven't audited the other BTLs, so we should really follow the spec). |
||
if (OMPI_SUCCESS != ret) { | ||
return ret; | ||
} | ||
} | ||
|
||
ompi_osc_rdma_release_peers (peers, num_peers); | ||
ompi_group_free (&group); | ||
|
||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "increased fenced_peer counter of all peers"); | ||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "waiting for all peers to increase my counter"); | ||
while (num_peers != state->num_fenced_peers) { | ||
ompi_osc_rdma_progress (module); | ||
opal_atomic_mb (); | ||
} | ||
|
||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "received fence message from all peers"); | ||
return OMPI_SUCCESS; | ||
} | ||
|
||
int ompi_osc_rdma_fence_atomic (int mpi_assert, ompi_win_t *win) | ||
{ | ||
ompi_osc_rdma_module_t *module = GET_MODULE(win); | ||
|
@@ -627,7 +704,18 @@ int ompi_osc_rdma_fence_atomic (int mpi_assert, ompi_win_t *win) | |
ompi_osc_rdma_sync_rdma_complete (&module->all_sync); | ||
|
||
/* ensure all writes to my memory are complete (both local stores, and RMA operations) */ | ||
ret = module->comm->c_coll->coll_barrier(module->comm, module->comm->c_coll->coll_barrier_module); | ||
if (module->btl_support_remote_completion) { | ||
/* if all selected btls support remote completion, then all RMA operations have finished | ||
* on remote side. A barrier is enough to complete the fence. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. indenting |
||
*/ | ||
ret = module->comm->c_coll->coll_barrier(module->comm, module->comm->c_coll->coll_barrier_module); | ||
} else { | ||
/* | ||
* if any selected btl does not support remote completion, we will have to send a completion | ||
* message (through the same endpoint of data transfer) to every peer, then wait for a message from every peer. | ||
*/ | ||
ret = ompi_osc_rdma_fence_barrier_by_ordered_channel(win); | ||
} | ||
|
||
if (mpi_assert & MPI_MODE_NOSUCCEED) { | ||
/* as specified in MPI-3 p 438 3-5 the fence can end an epoch. it isn't explicitly | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The commit message where you introduce the local leader fix appears to have two different commits in it; please properly merge.