Skip to content

Commit 3eadd81

Browse files
authored
Merge pull request #9938 from awlauria/btl_remote_completion_v5.0.x
v5.0.x: btl: introduce flag MCA_BTL_FLAGS_RDMA_REMOTE_COMPLETION
2 parents d402f3b + 3ec41b6 commit 3eadd81

File tree

5 files changed

+55
-3
lines changed

5 files changed

+55
-3
lines changed

opal/mca/btl/base/btl_base_am_rdma.c

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,7 @@ static inline int mca_btl_base_am_rdma_advance(mca_btl_base_module_t *btl,
364364
mca_btl_base_rdma_context_t *context,
365365
bool send_descriptor)
366366
{
367+
int ret;
367368
const size_t remaining = context->total_size - context->sent;
368369

369370
if (0 == remaining) {
@@ -401,7 +402,12 @@ static inline int mca_btl_base_am_rdma_advance(mca_btl_base_module_t *btl,
401402
}
402403

403404
if (send_descriptor) {
404-
return btl->btl_send(btl, endpoint, descriptor, mca_btl_base_rdma_tag(hdr->type));
405+
assert(0 != (descriptor->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK));
406+
ret = btl->btl_send(btl, endpoint, descriptor, mca_btl_base_rdma_tag(hdr->type));
407+
if (ret == 1) {
408+
ret = OPAL_SUCCESS;
409+
}
410+
return ret;
405411
}
406412

407413
/* queue for later to avoid btl_send in callback */
@@ -606,7 +612,14 @@ static int mca_btl_base_am_rdma_respond(mca_btl_base_module_t *btl,
606612

607613
send_descriptor->des_cbfunc = NULL;
608614

615+
/* There is no callback for the response descriptor, therefore it is
616+
* safe to treat 0 and 1 return codes the same
617+
*/
609618
int ret = btl->btl_send(btl, endpoint, send_descriptor, mca_btl_base_rdma_resp_tag());
619+
if (ret == 1) {
620+
ret = OPAL_SUCCESS;
621+
}
622+
610623
if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) {
611624
*descriptor = send_descriptor;
612625
}
@@ -779,11 +792,12 @@ static int mca_btl_base_am_rdma_progress(void)
779792
mca_btl_base_rdma_context_t *context = \
780793
(mca_btl_base_rdma_context_t *) \
781794
descriptor->descriptor->des_context; \
795+
assert(0 != (descriptor->descriptor->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK)); \
782796
int ret = descriptor->btl->btl_send(descriptor->btl, \
783797
descriptor->endpoint, \
784798
descriptor->descriptor, \
785799
mca_btl_base_rdma_tag(context->type)); \
786-
if (OPAL_SUCCESS == ret) { \
800+
if (OPAL_SUCCESS == ret || 1 == ret) { \
787801
opal_list_remove_item(&default_module.queued_initiator_descriptors, \
788802
&descriptor->super); \
789803
} \
@@ -1132,5 +1146,28 @@ int mca_btl_base_am_rdma_init(mca_btl_base_module_t *btl)
11321146
OBJ_CONSTRUCT(&default_module, mca_btl_base_am_rdma_module_t);
11331147
}
11341148

1149+
/* This section check whether we can claim support of remote completion.
1150+
*
1151+
* In terms of remote completion, we are mainly interested in put and atomic ops,
1152+
* because get, atomics fops and atomic cswap support remote completion by their nature.
1153+
*
1154+
* For active message put (AM put), the target side will send a response, and the initiator
1155+
* side will wait for the response to complete the put operation. Thus if AM put is based on send,
1156+
* it support remote completion. (If AM put is based on get, it does not support remote
1157+
* completion because the target side does not wait for get's completion to send response).
1158+
*
1159+
* active message RDMA/atomics does not implement atomic ops. User was suppose to
1160+
* use atomic fops (unless the btl support atomic ops natively).
1161+
*
1162+
* In all, the conditions for AM rdma to claim support of remote completion are:
1163+
* 1. AM put is enabled (which means the btl does not support put)
1164+
* 2. AM put does not use get (so it must use send)
1165+
* 3. btl does not have native atomics ops support.
1166+
*/
1167+
if ((btl->btl_flags & MCA_BTL_FLAGS_PUT_AM) && !mca_btl_base_rdma_use_rdma_get(btl) &&
1168+
!(btl->btl_flags & MCA_BTL_FLAGS_ATOMIC_OPS)) {
1169+
btl->btl_flags |= MCA_BTL_FLAGS_RDMA_REMOTE_COMPLETION;
1170+
}
1171+
11351172
return OPAL_SUCCESS;
11361173
}

opal/mca/btl/btl.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,16 @@ typedef uint8_t mca_btl_base_tag_t;
263263
/* The BTL has active-message based atomics */
264264
#define MCA_BTL_FLAGS_ATOMIC_AM_FOP 0x400000
265265

266+
/** Ths BTL's RDMA/atomics operation supports remote completion.
267+
* When the BTL reported the completion of a RDMA/atomic operation
268+
* on the initator side, the operation also finished on the target side.
269+
*
270+
* Note, this flag is for put and atomic write operations. Operations
271+
* like get, atomic fetch and atomic swap support remote
272+
* completion by nature.
273+
*/
274+
#define MCA_BTL_FLAGS_RDMA_REMOTE_COMPLETION 0x800000
275+
266276
/* Default exclusivity levels */
267277
#define MCA_BTL_EXCLUSIVITY_HIGH (64 * 1024) /* internal loopback */
268278
#define MCA_BTL_EXCLUSIVITY_DEFAULT 1024 /* GM/IB/etc. */

opal/mca/btl/ofi/btl_ofi_module.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,8 +390,10 @@ mca_btl_ofi_module_t *mca_btl_ofi_module_alloc(int mode)
390390
module->super.btl_register_mem = mca_btl_ofi_register_mem;
391391
module->super.btl_deregister_mem = mca_btl_ofi_deregister_mem;
392392

393+
/* btl/ofi support remote completion because it required FI_DELIVERY_COMPLETE capability
394+
*/
393395
module->super.btl_flags |= MCA_BTL_FLAGS_ATOMIC_FOPS | MCA_BTL_FLAGS_ATOMIC_OPS
394-
| MCA_BTL_FLAGS_RDMA;
396+
| MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_RDMA_REMOTE_COMPLETION;
395397

396398
module->super.btl_atomic_flags = MCA_BTL_ATOMIC_SUPPORTS_ADD | MCA_BTL_ATOMIC_SUPPORTS_SWAP
397399
| MCA_BTL_ATOMIC_SUPPORTS_CSWAP

opal/mca/btl/self/btl_self_component.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ static int mca_btl_self_component_register(void)
107107
mca_btl_self.btl_rdma_pipeline_frag_size = INT_MAX;
108108
mca_btl_self.btl_min_rdma_pipeline_size = 0;
109109
mca_btl_self.btl_flags = MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_SEND;
110+
/* for self, remote completion is local completion */
111+
mca_btl_self.btl_flags |= MCA_BTL_FLAGS_RDMA_REMOTE_COMPLETION;
110112
mca_btl_self.btl_bandwidth = 100;
111113
mca_btl_self.btl_latency = 0;
112114
mca_btl_base_param_register(&mca_btl_self_component.super.btl_version, &mca_btl_self);

opal/mca/btl/ugni/btl_ugni_component.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,7 @@ static int btl_ugni_component_register(void)
469469
mca_btl_ugni_module.super.btl_flags = MCA_BTL_FLAGS_SEND | MCA_BTL_FLAGS_RDMA
470470
| MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_ATOMIC_OPS
471471
| MCA_BTL_FLAGS_ATOMIC_FOPS;
472+
mca_btl_ugni_module.super.btl_flags |= MCA_BTL_FLAGS_RDMA_REMOTE_COMPLETION;
472473
mca_btl_ugni_module.super.btl_atomic_flags = MCA_BTL_ATOMIC_SUPPORTS_ADD
473474
| MCA_BTL_ATOMIC_SUPPORTS_AND
474475
| MCA_BTL_ATOMIC_SUPPORTS_OR

0 commit comments

Comments
 (0)