Skip to content

Commit 8d130e1

Browse files
committed
UCX osc: properly release exclusive lock to avoid lockup
Signed-off-by: Joseph Schuchart <[email protected]> (cherry picked from commit 08cb638)
1 parent 83f6c57 commit 8d130e1

File tree

1 file changed

+7
-9
lines changed

1 file changed

+7
-9
lines changed

ompi/mca/osc/ucx/osc_ucx_passive_target.c

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -90,21 +90,19 @@ static inline int start_exclusive(ompi_osc_ucx_module_t *module, int target) {
9090
}
9191

9292
static inline int end_exclusive(ompi_osc_ucx_module_t *module, int target) {
93-
uint64_t result_value = 0;
9493
ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, target);
9594
ucp_rkey_h rkey = (module->state_info_array)[target].rkey;
9695
uint64_t remote_addr = (module->state_info_array)[target].addr + OSC_UCX_STATE_LOCK_OFFSET;
97-
int ret;
96+
ucs_status_t status;
9897

99-
ret = opal_common_ucx_atomic_fetch(ep, UCP_ATOMIC_FETCH_OP_SWAP, TARGET_LOCK_UNLOCKED,
100-
&result_value, sizeof(result_value),
101-
remote_addr, rkey, mca_osc_ucx_component.ucp_worker);
102-
if (OMPI_SUCCESS != ret) {
103-
return ret;
98+
status = ucp_atomic_post(ep, UCP_ATOMIC_POST_OP_ADD,
99+
-((int64_t)TARGET_LOCK_EXCLUSIVE), sizeof(uint64_t),
100+
remote_addr, rkey);
101+
if (UCS_OK != status) {
102+
OSC_UCX_VERBOSE(1, "ucp_atomic_post(OP_ADD) failed: %d", status);
103+
return OMPI_ERROR;
104104
}
105105

106-
assert(result_value >= TARGET_LOCK_EXCLUSIVE);
107-
108106
return OMPI_SUCCESS;
109107
}
110108

0 commit comments

Comments
 (0)