Skip to content

Commit 81a1934

Browse files
committed
osc/rdma: Use BTL am-rdma explicit interface
Switch from using the implicit BTL interface (where the am-rdma interface just extends missing functionality in the BTL) to the new explicit interface (where the OSC RDMA interface is the only maintainer of the BTL list. With this change, alternate BTLs do not have to support REMOTE_COMPLETION to be selected (because the AM RDMA interface always provides remote completion when we request it, as this patch does). Any BTL that supports Active Messages (ie, all of them) should be able to support the OSC RDMA required semantics, eliminating the problem of creating windows with no servicable BTLs. Signed-off-by: Brian Barrett <[email protected]>
1 parent 3bb30e7 commit 81a1934

File tree

6 files changed

+240
-148
lines changed

6 files changed

+240
-148
lines changed

ompi/mca/osc/rdma/osc_rdma.h

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include "ompi/mca/osc/osc.h"
4545
#include "ompi/mca/osc/base/base.h"
4646
#include "opal/mca/btl/btl.h"
47+
#include "opal/mca/btl/base/btl_base_am_rdma.h"
4748
#include "ompi/memchecker.h"
4849
#include "ompi/op/op.h"
4950
#include "opal/align.h"
@@ -255,6 +256,8 @@ struct ompi_osc_rdma_module_t {
255256
/** lock for peer hash table/array */
256257
opal_mutex_t peer_lock;
257258

259+
/* ******************* communication *********************** */
260+
258261
/* we currently support two modes of operation, a single
259262
* accelerated btl (which can use memory registration and can use
260263
* btl_flush() and one or more alternate btls, which cannot use
@@ -265,18 +268,27 @@ struct ompi_osc_rdma_module_t {
265268

266269
union {
267270
struct {
268-
struct mca_btl_base_module_t *accelerated_btl;
271+
mca_btl_base_module_t *accelerated_btl;
269272
};
270273
struct {
271-
struct mca_btl_base_module_t **alternate_btls;
274+
mca_btl_base_am_rdma_module_t **alternate_am_rdmas;
272275
uint8_t alternate_btl_count;
273276
};
274277
};
275278

276-
/** Only true if one BTL is in use. Memory registration is only supported when
277-
* using a single BTL. */
279+
/** Does the selected BTL require memory registration? This field
280+
will be false when alternate BTLs are used, and the value
281+
when an accelerated BTL is used depends on the registration
282+
requirements of the underlying BTL. */
278283
bool use_memory_registration;
279284

285+
size_t put_alignment;
286+
size_t get_alignment;
287+
size_t put_limit;
288+
size_t get_limit;
289+
290+
uint32_t atomic_flags;
291+
280292
/** registered fragment used for locally buffered RDMA transfers */
281293
struct ompi_osc_rdma_frag_t *rdma_frag;
282294

@@ -650,8 +662,15 @@ static inline mca_btl_base_module_t *ompi_osc_rdma_selected_btl (ompi_osc_rdma_m
650662
return module->accelerated_btl;
651663
} else {
652664
assert(btl_index < module->alternate_btl_count);
653-
return module->alternate_btls[btl_index];
665+
return module->alternate_am_rdmas[btl_index]->btl;
654666
}
655667
}
656668

669+
670+
static inline mca_btl_base_am_rdma_module_t *ompi_osc_rdma_selected_am_rdma(ompi_osc_rdma_module_t *module, uint8_t btl_index) {
671+
assert(!module->use_accelerated_btl);
672+
assert(btl_index < module->alternate_btl_count);
673+
return module->alternate_am_rdmas[btl_index];
674+
}
675+
657676
#endif /* OMPI_OSC_RDMA_H */

ompi/mca/osc/rdma/osc_rdma_accumulate.c

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -164,13 +164,11 @@ static int ompi_osc_rdma_fetch_and_op_atomic (ompi_osc_rdma_sync_t *sync, const
164164
mca_btl_base_registration_handle_t *target_handle, ompi_op_t *op, ompi_osc_rdma_request_t *req)
165165
{
166166
ompi_osc_rdma_module_t *module = sync->module;
167-
mca_btl_base_module_t *selected_btl = ompi_osc_rdma_selected_btl (module, peer->data_btl_index);
168-
int32_t atomic_flags = selected_btl->btl_atomic_flags;
169167
int btl_op, flags;
170168
int64_t origin;
171169

172-
if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & atomic_flags) && 4 == extent)) ||
173-
(!(OMPI_DATATYPE_FLAG_DATA_INT & dt->super.flags) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & atomic_flags)) ||
170+
if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & module->atomic_flags) && 4 == extent)) ||
171+
(!(OMPI_DATATYPE_FLAG_DATA_INT & dt->super.flags) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & module->atomic_flags)) ||
174172
!ompi_op_is_intrinsic (op) || (0 == ompi_osc_rdma_op_mapping[op->op_type])) {
175173
return OMPI_ERR_NOT_SUPPORTED;
176174
}
@@ -242,13 +240,11 @@ static int ompi_osc_rdma_acc_single_atomic (ompi_osc_rdma_sync_t *sync, const vo
242240
ompi_op_t *op, ompi_osc_rdma_request_t *req)
243241
{
244242
ompi_osc_rdma_module_t *module = sync->module;
245-
mca_btl_base_module_t *selected_btl = ompi_osc_rdma_selected_btl (module, peer->data_btl_index);
246-
int32_t atomic_flags = selected_btl->btl_atomic_flags;
247243
int btl_op, flags;
248244
int64_t origin;
249245

250-
if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & atomic_flags) && 4 == extent)) ||
251-
(!(OMPI_DATATYPE_FLAG_DATA_INT & dt->super.flags) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & atomic_flags)) ||
246+
if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & module->atomic_flags) && 4 == extent)) ||
247+
(!(OMPI_DATATYPE_FLAG_DATA_INT & dt->super.flags) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & module->atomic_flags)) ||
252248
!ompi_op_is_intrinsic (op) || (0 == ompi_osc_rdma_op_mapping[op->op_type])) {
253249
return OMPI_ERR_NOT_SUPPORTED;
254250
}
@@ -663,13 +659,11 @@ static inline int ompi_osc_rdma_cas_atomic (ompi_osc_rdma_sync_t *sync, const vo
663659
bool lock_acquired)
664660
{
665661
ompi_osc_rdma_module_t *module = sync->module;
666-
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl (module, peer->data_btl_index);
667-
int32_t atomic_flags = btl->btl_atomic_flags;
668662
const size_t size = datatype->super.size;
669663
int64_t compare, source;
670664
int flags, ret;
671665

672-
if (8 != size && !(4 == size && (MCA_BTL_ATOMIC_SUPPORTS_32BIT & atomic_flags))) {
666+
if (8 != size && !(4 == size && (MCA_BTL_ATOMIC_SUPPORTS_32BIT & module->atomic_flags))) {
673667
return OMPI_ERR_NOT_SUPPORTED;
674668
}
675669

@@ -717,7 +711,6 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_addr,
717711
mca_btl_base_registration_handle_t *target_handle, bool lock_acquired)
718712
{
719713
ompi_osc_rdma_module_t *module = sync->module;
720-
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl (module, peer->data_btl_index);
721714
unsigned long len = datatype->super.size;
722715
mca_btl_base_registration_handle_t *local_handle = NULL;
723716
ompi_osc_rdma_frag_t *frag = NULL;
@@ -742,18 +735,21 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_addr,
742735
return OMPI_SUCCESS;
743736
}
744737

745-
if (btl->btl_register_mem && len > btl->btl_put_local_registration_threshold) {
746-
do {
747-
ret = ompi_osc_rdma_frag_alloc (module, len, &frag, &ptr);
748-
if (OPAL_UNLIKELY(OMPI_SUCCESS == ret)) {
749-
break;
750-
}
738+
if (module->use_memory_registration) {
739+
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl (module, peer->data_btl_index);
740+
if (len > btl->btl_put_local_registration_threshold) {
741+
do {
742+
ret = ompi_osc_rdma_frag_alloc(module, len, &frag, &ptr);
743+
if (OPAL_UNLIKELY(OMPI_SUCCESS == ret)) {
744+
break;
745+
}
751746

752-
ompi_osc_rdma_progress (module);
753-
} while (1);
747+
ompi_osc_rdma_progress (module);
748+
} while (1);
754749

755-
memcpy (ptr, source_addr, len);
756-
local_handle = frag->handle;
750+
memcpy(ptr, source_addr, len);
751+
local_handle = frag->handle;
752+
}
757753
}
758754

759755
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "RDMA compare-and-swap initiating blocking btl put...");

ompi/mca/osc/rdma/osc_rdma_btl_comm.h

Lines changed: 55 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,17 @@ ompi_osc_rdma_btl_put(ompi_osc_rdma_module_t *module, uint8_t btl_index,
3636
mca_btl_base_rdma_completion_fn_t cbfunc,
3737
void *cbcontext, void *cbdata)
3838
{
39-
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl(module, btl_index);
40-
41-
return btl->btl_put(btl, endpoint, local_address, remote_address,
42-
local_handle, remote_handle, size, flags, order,
43-
cbfunc, cbcontext, cbdata);
39+
if (module->use_accelerated_btl) {
40+
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl(module, btl_index);
41+
return btl->btl_put(btl, endpoint, local_address, remote_address,
42+
local_handle, remote_handle, size, flags, order,
43+
cbfunc, cbcontext, cbdata);
44+
} else {
45+
mca_btl_base_am_rdma_module_t *am_rdma = ompi_osc_rdma_selected_am_rdma(module, btl_index);
46+
return am_rdma->am_btl_put(am_rdma, endpoint, local_address, remote_address,
47+
local_handle, remote_handle, size, flags, order,
48+
cbfunc, cbcontext, cbdata);
49+
}
4450
}
4551

4652

@@ -54,11 +60,18 @@ ompi_osc_rdma_btl_get(ompi_osc_rdma_module_t *module, uint8_t btl_index,
5460
mca_btl_base_rdma_completion_fn_t cbfunc,
5561
void *cbcontext, void *cbdata)
5662
{
57-
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl(module, btl_index);
5863

59-
return btl->btl_get(btl, endpoint, local_address, remote_address,
60-
local_handle, remote_handle, size, flags, order,
61-
cbfunc, cbcontext, cbdata);
64+
if (module->use_accelerated_btl) {
65+
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl(module, btl_index);
66+
return btl->btl_get(btl, endpoint, local_address, remote_address,
67+
local_handle, remote_handle, size, flags, order,
68+
cbfunc, cbcontext, cbdata);
69+
} else {
70+
mca_btl_base_am_rdma_module_t *am_rdma = ompi_osc_rdma_selected_am_rdma(module, btl_index);
71+
return am_rdma->am_btl_get(am_rdma, endpoint, local_address, remote_address,
72+
local_handle, remote_handle, size, flags, order,
73+
cbfunc, cbcontext, cbdata);
74+
}
6275
}
6376

6477

@@ -71,6 +84,9 @@ ompi_osc_rdma_btl_atomic_op(ompi_osc_rdma_module_t *module, uint8_t btl_index,
7184
{
7285
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl(module, btl_index);
7386

87+
/* the AM BTL interface does not currently support op calls */
88+
assert(module->use_accelerated_btl);
89+
7490
return btl->btl_atomic_op(btl, endpoint, remote_address, remote_handle,
7591
op, operand, flags, order,
7692
cbfunc, cbcontext, cbdata);
@@ -87,12 +103,19 @@ ompi_osc_rdma_btl_atomic_fop(ompi_osc_rdma_module_t *module, uint8_t btl_index,
87103
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
88104

89105
{
90-
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl(module, btl_index);
91-
92-
return btl->btl_atomic_fop(btl, endpoint, local_address, remote_address,
93-
local_handle, remote_handle,
94-
op, operand, flags, order,
95-
cbfunc, cbcontext, cbdata);
106+
if (module->use_accelerated_btl) {
107+
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl(module, btl_index);
108+
return btl->btl_atomic_fop(btl, endpoint, local_address, remote_address,
109+
local_handle, remote_handle,
110+
op, operand, flags, order,
111+
cbfunc, cbcontext, cbdata);
112+
} else {
113+
mca_btl_base_am_rdma_module_t *am_rdma = ompi_osc_rdma_selected_am_rdma(module, btl_index);
114+
return am_rdma->am_btl_atomic_fop(am_rdma, endpoint, local_address, remote_address,
115+
local_handle, remote_handle,
116+
op, operand, flags, order,
117+
cbfunc, cbcontext, cbdata);
118+
}
96119
}
97120

98121

@@ -105,12 +128,19 @@ ompi_osc_rdma_btl_atomic_cswap(ompi_osc_rdma_module_t *module, uint8_t btl_index
105128
uint64_t compare, uint64_t value, int flags, int order,
106129
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
107130
{
108-
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl(module, btl_index);
109-
110-
return btl->btl_atomic_cswap(btl, endpoint, local_address, remote_address,
111-
local_handle, remote_handle,
112-
compare, value, flags, order,
113-
cbfunc, cbcontext, cbdata);
131+
if (module->use_accelerated_btl) {
132+
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl(module, btl_index);
133+
return btl->btl_atomic_cswap(btl, endpoint, local_address, remote_address,
134+
local_handle, remote_handle,
135+
compare, value, flags, order,
136+
cbfunc, cbcontext, cbdata);
137+
} else {
138+
mca_btl_base_am_rdma_module_t *am_rdma = ompi_osc_rdma_selected_am_rdma(module, btl_index);
139+
return am_rdma->am_btl_atomic_cswap(am_rdma, endpoint, local_address, remote_address,
140+
local_handle, remote_handle,
141+
compare, value, flags, order,
142+
cbfunc, cbcontext, cbdata);
143+
}
114144
}
115145

116146

@@ -195,7 +225,10 @@ ompi_osc_rdma_btl_op(ompi_osc_rdma_module_t *module, uint8_t btl_index,
195225
mca_btl_base_module_t *selected_btl = ompi_osc_rdma_selected_btl (module, btl_index);
196226
int ret;
197227

198-
if (!(selected_btl->btl_flags & MCA_BTL_FLAGS_ATOMIC_OPS)) {
228+
/* if using the AM RDMA interface with alternate BTLs or if the
229+
accelerated BTL does not support atomic ops, emulate the atomic
230+
op over a fetch and atomic op */
231+
if (!module->use_accelerated_btl || !(selected_btl->btl_flags & MCA_BTL_FLAGS_ATOMIC_OPS)) {
199232
return ompi_osc_rdma_btl_fop (module, btl_index, endpoint, address, address_handle, op, operand, flags,
200233
NULL, wait_for_completion, cbfunc, cbdata, cbcontext);
201234
}

0 commit comments

Comments
 (0)